예제 #1
0
    def __init__(
        self,
        *,
        start_date: str,
        end_date: str,
        aggregation_unit: AnySpatialUnit,
        label_a: str,
        label_b: str,
        labels: Dict[str, Dict[str, dict]],
        tower_day_of_week_scores: Dict[str, float],
        tower_hour_of_day_scores: List[float],
        tower_cluster_radius: float = 1.0,
        tower_cluster_call_threshold: int = 0,
        event_types: Optional[Union[str, List[str]]],
        subscriber_subset: Union[dict, None] = None,
    ):
        # Note: all input parameters need to be defined as attributes on `self`
        # so that marshmallow can serialise the object correctly.
        self.start_date = start_date
        self.end_date = end_date
        self.aggregation_unit = aggregation_unit
        self.event_types = event_types
        self.label_a = label_a
        self.label_b = label_b
        self.labels = labels
        self.tower_day_of_week_scores = tower_day_of_week_scores
        self.tower_hour_of_day_scores = tower_hour_of_day_scores
        self.tower_cluster_radius = tower_cluster_radius
        self.tower_cluster_call_threshold = tower_cluster_call_threshold
        self.subscriber_subset = subscriber_subset

        common_params = dict(
            labels=labels,
            start_date=start_date,
            end_date=end_date,
            event_types=event_types,
            subscriber_subset=subscriber_subset,
            tower_cluster_call_threshold=tower_cluster_call_threshold,
            tower_cluster_radius=tower_cluster_radius,
            tower_day_of_week_scores=tower_day_of_week_scores,
            tower_hour_of_day_scores=tower_hour_of_day_scores,
        )
        locs_a = _make_meaningful_locations_object(label=label_a,
                                                   **common_params)
        locs_b = _make_meaningful_locations_object(label=label_b,
                                                   **common_params)

        self.q_meaningful_locations_od = RedactedMeaningfulLocationsOD(
            meaningful_locations_od=MeaningfulLocationsOD(
                meaningful_locations_a=locs_a,
                meaningful_locations_b=locs_b,
                spatial_unit=aggregation_unit,
            ))
예제 #2
0
def test_meaningful_locations_od_results(get_dataframe,
                                         meaningful_locations_labels):
    """
    Test that OD on MeaningfulLocations returns expected results and counts clusters per subscriber correctly.
    """
    mfl_a = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="unknown",
    )

    mfl_b = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-02",
                stop="2016-01-03",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-02",
            stop="2016-01-03",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="unknown",
    )
    mfl_od = MeaningfulLocationsOD(
        meaningful_locations_a=mfl_a,
        meaningful_locations_b=mfl_b,
        spatial_unit=make_spatial_unit("admin", level=1),
    )
    mfl_od_df = get_dataframe(mfl_od)
    # Smoke test one admin1 region gets the expected result
    regional_flow = mfl_od_df[(mfl_od_df.pcod_from == "524 1") &
                              (mfl_od_df.pcod_to == "524 4")].value.tolist()[0]
    assert regional_flow == pytest.approx(16.490_807)
    assert mfl_od_df.value.sum() == pytest.approx(454.0)
예제 #3
0
def test_column_names_meaningful_locations_od(exemplar_spatial_unit_param,
                                              get_column_names_from_run,
                                              meaningful_locations_labels):
    """Test that column_names property matches head(0) for an od matrix between meaningful locations"""
    if not exemplar_spatial_unit_param.is_polygon:
        pytest.xfail(
            f"The spatial unit {exemplar_spatial_unit_param} is not supported as an aggregation unit for ODs between MeaningfulLocations."
        )
    mfl_a = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="evening",
    )

    mfl_b = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="unknown",
    )
    mfl_od = MeaningfulLocationsOD(
        meaningful_locations_a=mfl_a,
        meaningful_locations_b=mfl_b,
        spatial_unit=exemplar_spatial_unit_param,
    )

    assert get_column_names_from_run(mfl_od) == mfl_od.column_names
def test_meaningful_locations_od_redaction(get_dataframe,
                                           meaningful_locations_labels):
    """
    Test that OD on MeaningfulLocations is redacted to >15.
    """

    mfl_a = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="unknown",
    )

    mfl_b = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-02",
                stop="2016-01-03",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-02",
            stop="2016-01-03",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="unknown",
    )
    mfl_od = RedactedMeaningfulLocationsOD(
        meaningful_locations_od=MeaningfulLocationsOD(
            meaningful_locations_a=mfl_a,
            meaningful_locations_b=mfl_b,
            spatial_unit=make_spatial_unit("admin", level=1),
        ))
    mfl_od_df = get_dataframe(mfl_od)
    # Aggregate should not include any counts below 15
    assert all(mfl_od_df.value > 15)