def __init__( self, *, start_date: str, end_date: str, aggregation_unit: AnySpatialUnit, label_a: str, label_b: str, labels: Dict[str, Dict[str, dict]], tower_day_of_week_scores: Dict[str, float], tower_hour_of_day_scores: List[float], tower_cluster_radius: float = 1.0, tower_cluster_call_threshold: int = 0, event_types: Optional[Union[str, List[str]]], subscriber_subset: Union[dict, None] = None, ): # Note: all input parameters need to be defined as attributes on `self` # so that marshmallow can serialise the object correctly. self.start_date = start_date self.end_date = end_date self.aggregation_unit = aggregation_unit self.event_types = event_types self.label_a = label_a self.label_b = label_b self.labels = labels self.tower_day_of_week_scores = tower_day_of_week_scores self.tower_hour_of_day_scores = tower_hour_of_day_scores self.tower_cluster_radius = tower_cluster_radius self.tower_cluster_call_threshold = tower_cluster_call_threshold self.subscriber_subset = subscriber_subset common_params = dict( labels=labels, start_date=start_date, end_date=end_date, event_types=event_types, subscriber_subset=subscriber_subset, tower_cluster_call_threshold=tower_cluster_call_threshold, tower_cluster_radius=tower_cluster_radius, tower_day_of_week_scores=tower_day_of_week_scores, tower_hour_of_day_scores=tower_hour_of_day_scores, ) locs_a = _make_meaningful_locations_object(label=label_a, **common_params) locs_b = _make_meaningful_locations_object(label=label_b, **common_params) self.q_meaningful_locations_od = RedactedMeaningfulLocationsOD( meaningful_locations_od=MeaningfulLocationsOD( meaningful_locations_a=locs_a, meaningful_locations_b=locs_b, spatial_unit=aggregation_unit, ))
def test_meaningful_locations_od_results(get_dataframe, meaningful_locations_labels): """ Test that OD on MeaningfulLocations returns expected results and counts clusters per subscriber correctly. """ mfl_a = MeaningfulLocations( clusters=HartiganCluster( calldays=CallDays(subscriber_locations=SubscriberLocations( start="2016-01-01", stop="2016-01-02", spatial_unit=make_spatial_unit("versioned-site"), )), radius=1, ), scores=EventScore( start="2016-01-01", stop="2016-01-02", spatial_unit=make_spatial_unit("versioned-site"), ), labels=meaningful_locations_labels, label="unknown", ) mfl_b = MeaningfulLocations( clusters=HartiganCluster( calldays=CallDays(subscriber_locations=SubscriberLocations( start="2016-01-02", stop="2016-01-03", spatial_unit=make_spatial_unit("versioned-site"), )), radius=1, ), scores=EventScore( start="2016-01-02", stop="2016-01-03", spatial_unit=make_spatial_unit("versioned-site"), ), labels=meaningful_locations_labels, label="unknown", ) mfl_od = MeaningfulLocationsOD( meaningful_locations_a=mfl_a, meaningful_locations_b=mfl_b, spatial_unit=make_spatial_unit("admin", level=1), ) mfl_od_df = get_dataframe(mfl_od) # Smoke test one admin1 region gets the expected result regional_flow = mfl_od_df[(mfl_od_df.pcod_from == "524 1") & (mfl_od_df.pcod_to == "524 4")].value.tolist()[0] assert regional_flow == pytest.approx(16.490_807) assert mfl_od_df.value.sum() == pytest.approx(454.0)
def test_column_names_meaningful_locations_od(exemplar_spatial_unit_param, get_column_names_from_run, meaningful_locations_labels): """Test that column_names property matches head(0) for an od matrix between meaningful locations""" if not exemplar_spatial_unit_param.is_polygon: pytest.xfail( f"The spatial unit {exemplar_spatial_unit_param} is not supported as an aggregation unit for ODs between MeaningfulLocations." ) mfl_a = MeaningfulLocations( clusters=HartiganCluster( calldays=CallDays(subscriber_locations=SubscriberLocations( start="2016-01-01", stop="2016-01-02", spatial_unit=make_spatial_unit("versioned-site"), )), radius=1, ), scores=EventScore( start="2016-01-01", stop="2016-01-02", spatial_unit=make_spatial_unit("versioned-site"), ), labels=meaningful_locations_labels, label="evening", ) mfl_b = MeaningfulLocations( clusters=HartiganCluster( calldays=CallDays(subscriber_locations=SubscriberLocations( start="2016-01-01", stop="2016-01-02", spatial_unit=make_spatial_unit("versioned-site"), )), radius=1, ), scores=EventScore( start="2016-01-01", stop="2016-01-02", spatial_unit=make_spatial_unit("versioned-site"), ), labels=meaningful_locations_labels, label="unknown", ) mfl_od = MeaningfulLocationsOD( meaningful_locations_a=mfl_a, meaningful_locations_b=mfl_b, spatial_unit=exemplar_spatial_unit_param, ) assert get_column_names_from_run(mfl_od) == mfl_od.column_names
def test_meaningful_locations_od_redaction(get_dataframe, meaningful_locations_labels): """ Test that OD on MeaningfulLocations is redacted to >15. """ mfl_a = MeaningfulLocations( clusters=HartiganCluster( calldays=CallDays(subscriber_locations=SubscriberLocations( start="2016-01-01", stop="2016-01-02", spatial_unit=make_spatial_unit("versioned-site"), )), radius=1, ), scores=EventScore( start="2016-01-01", stop="2016-01-02", spatial_unit=make_spatial_unit("versioned-site"), ), labels=meaningful_locations_labels, label="unknown", ) mfl_b = MeaningfulLocations( clusters=HartiganCluster( calldays=CallDays(subscriber_locations=SubscriberLocations( start="2016-01-02", stop="2016-01-03", spatial_unit=make_spatial_unit("versioned-site"), )), radius=1, ), scores=EventScore( start="2016-01-02", stop="2016-01-03", spatial_unit=make_spatial_unit("versioned-site"), ), labels=meaningful_locations_labels, label="unknown", ) mfl_od = RedactedMeaningfulLocationsOD( meaningful_locations_od=MeaningfulLocationsOD( meaningful_locations_a=mfl_a, meaningful_locations_b=mfl_b, spatial_unit=make_spatial_unit("admin", level=1), )) mfl_od_df = get_dataframe(mfl_od) # Aggregate should not include any counts below 15 assert all(mfl_od_df.value > 15)