def test_total_location_events_column_names(exemplar_level_param, interval, direction): """ Test that column_names property of TotalLocationEvents matches head(0)""" tle = TotalLocationEvents("2016-01-01", "2016-01-04", **exemplar_level_param, interval=interval, direction=direction) assert tle.head(0).columns.tolist() == tle.column_names
def test_only_incoming(self): """ TotalLocationEvents() can get activity, ignoring outgoing calls. """ te = TotalLocationEvents("2016-01-01", "2016-01-04", level="versioned-site", direction="in") df = te.get_dataframe() self.assertIs(type(df), DataFrame) # Test one of the values df.date = df.date.astype(str) val = list(df[(df.date == "2016-01-01") & (df.site_id == "6qpN0p") & (df.hour == 0)].total)[0] self.assertEqual(val, 2)
def test_ignore_texts(self): """ TotalLocationEvents() can get the total activity at cell level excluding texts. """ te = TotalLocationEvents("2016-01-01", "2016-01-04", level="versioned-site", table="events.calls") df = te.get_dataframe() self.assertIs(type(df), DataFrame) # Test one of the values df.date = df.date.astype(str) val = list(df[(df.date == "2016-01-01") & (df.site_id == "0xqNDj") & (df.hour == 3)].total)[0] self.assertEqual(val, 3)
def test_bad_interval_raises_error(): """Total location events raises an error for a bad interval.""" with pytest.raises(ValueError): TotalLocationEvents("2016-01-01", "2016-01-04", level="versioned-site", interval="BAD_INTERVAL")
def test_events_at_cell_level(self): """ TotalLocationEvents() returns data at the level of the cell. """ te = TotalLocationEvents("2016-01-01", "2016-01-04", level="versioned-site") df = te.get_dataframe() self.assertIs(type(df), DataFrame) # Test one of the values df.date = df.date.astype(str) val = list(df[(df.date == "2016-01-03") & (df.site_id == "zArRjg") & (df.hour == 17)].total)[0] self.assertEqual(val, 3)
def test_events_daily(self): """ TotalLocationEvents() can get activity on a daily level. """ te = TotalLocationEvents("2016-01-01", "2016-01-04", level="versioned-site", interval="day") df = te.get_dataframe() self.assertIs(type(df), DataFrame) self.assertEqual(list(df.columns), ["site_id", "version", "lon", "lat", "date", "total"]) # Test one of the values df.date = df.date.astype(str) val = list(df[(df.date == "2016-01-03") & (df.site_id == "B8OaG5")].total)[0] self.assertEqual(val, 95)
def test_bad_direction_raises_error(): """Total location events raises an error for a bad direction.""" with pytest.raises(ValueError): TotalLocationEvents( "2016-01-01", "2016-01-04", spatial_unit=make_spatial_unit("versioned-site"), interval="min", direction="BAD_DIRECTION", )
def test_events_daily(get_dataframe): """ TotalLocationEvents() can get activity on a daily level. """ te = TotalLocationEvents( "2016-01-01", "2016-01-04", level="versioned-site", interval="day" ) df = get_dataframe(te) # Test one of the values df.date = df.date.astype(str) val = list(df[(df.date == "2016-01-03") & (df.site_id == "B8OaG5")].total)[0] assert val == 95
def test_events_at_cell_level(get_dataframe): """ TotalLocationEvents() returns data at the level of the cell. """ te = TotalLocationEvents("2016-01-01", "2016-01-04", level="versioned-site") df = get_dataframe(te) # Test one of the values df.date = df.date.astype(str) val = list( df[(df.date == "2016-01-03") & (df.site_id == "zArRjg") & (df.hour == 17)].total )[0] assert val == 3
def test_events_min(self): """ TotalLocationEvents() can get events on a min-by-min basis. """ te = TotalLocationEvents("2016-01-01", "2016-01-04", level="versioned-site", interval="min") df = te.get_dataframe() self.assertIs(type(df), DataFrame) self.assertEqual( list(df.columns), [ "site_id", "version", "lon", "lat", "date", "hour", "min", "total" ], ) # Test one of the values df.date = df.date.astype(str) val = list(df[(df.date == "2016-01-03") & (df.site_id == "zdNQx2") & (df.hour == 15) & (df["min"] == 20)].total)[0] self.assertEqual(val, 1)
def test_events_at_cell_level(get_dataframe): """ TotalLocationEvents() returns data at the level of the cell. """ te = TotalLocationEvents("2016-01-01", "2016-01-04", spatial_unit=make_spatial_unit("cell")) df = get_dataframe(te) # Test one of the values df.date = df.date.astype(str) val = list(df[(df.date == "2016-01-03") & (df.location_id == "1Gc6RSfZ") & (df.hour == 17)].value)[0] assert val == 4
def _flowmachine_query_obj(self): """ Return the underlying flowmachine daily_location object. Returns ------- Query """ return TotalLocationEvents( start=self.start_date, stop=self.end_date, direction=self.direction, table=self.event_types, level=self.aggregation_unit, subscriber_subset=self.subscriber_subset, )
def test_only_incoming(get_dataframe): """ TotalLocationEvents() can get activity, ignoring outgoing calls. """ te = TotalLocationEvents( "2016-01-01", "2016-01-04", spatial_unit=make_spatial_unit("versioned-site"), direction="in", ) df = get_dataframe(te) # Test one of the values df.date = df.date.astype(str) val = list(df[(df.date == "2016-01-01") & (df.site_id == "6qpN0p") & (df.hour == 0)].value)[0] assert val == 2
def _flowmachine_query_obj(self): """ Return the underlying flowmachine daily_location object. Returns ------- Query """ return RedactedTotalEvents(total_events=TotalLocationEvents( start=self.start_date, stop=self.end_date, interval=self.interval, direction=self.direction, table=self.event_types, spatial_unit=self.aggregation_unit, subscriber_subset=self.subscriber_subset, ))
def test_events_min(get_dataframe): """ TotalLocationEvents() can get events on a min-by-min basis. """ te = TotalLocationEvents("2016-01-01", "2016-01-04", level="versioned-site", interval="min") df = get_dataframe(te) # Test one of the values df.date = df.date.astype(str) val = list(df[(df.date == "2016-01-03") & (df.site_id == "zdNQx2") & (df.hour == 15) & (df["min"] == 20)].total)[0] assert val == 1
def test_ignore_texts(get_dataframe): """ TotalLocationEvents() can get the total activity at cell level excluding texts. """ te = TotalLocationEvents( "2016-01-01", "2016-01-04", spatial_unit=make_spatial_unit("versioned-site"), table="events.calls", ) df = get_dataframe(te) # Test one of the values df.date = df.date.astype(str) val = list(df[(df.date == "2016-01-01") & (df.site_id == "0xqNDj") & (df.hour == 3)].value)[0] assert val == 3
def test_all_above_threshold(get_dataframe): """ TotalLocationEvents() can get activity on a daily level but only above threshold. """ te = RedactedTotalEvents(total_events=TotalLocationEvents( "2016-01-01", "2016-01-02", spatial_unit=make_spatial_unit("cell"), interval="day", table=["events.calls"], )) us = get_dataframe( RedactedUniqueSubscriberCounts( unique_subscriber_counts=UniqueSubscriberCounts( "2016-01-01", "2016-01-02", table=["events.calls"]))) te_df = get_dataframe(te) assert all(te_df.value > 15) assert set(us.location_id) == set(te_df.location_id)
def construct_query_object(query_kind, params): # pragma: no cover """ Create an instance of the appropriate subclass of flowmachine.core.query.Query Parameters ---------- query_kind : str The kind of query to be constructed. Example: "daily_location". params : dict Parameters to use in the query construction. Returns ------- flowmachine.core.query.Query """ params = deepcopy( params ) # Operate on a copy to avoid mutating the passed in dict, which might change the redis lookup error_msg_prefix = ( f"Error when constructing query of kind {query_kind} with parameters {params}" ) try: subscriber_subset = params["subscriber_subset"] if subscriber_subset == "all": params["subscriber_subset"] = None else: if isinstance(subscriber_subset, dict): raise NotImplementedError( "Proper subsetting not implemented yet.") else: raise QueryProxyError( f"{error_msg_prefix}: 'Cannot construct {query_kind} subset from given input: {subscriber_subset}'" ) except KeyError: pass # No subset param if "daily_location" == query_kind: date = params["date"] method = params["daily_location_method"] level = params["aggregation_unit"] subscriber_subset = params["subscriber_subset"] allowed_methods = ["last", "most-common"] allowed_levels = ["admin0", "admin1", "admin2", "admin3", "admin4"] if method not in allowed_methods: raise QueryProxyError( f"{error_msg_prefix}: 'Unrecognised method '{method}', must be one of: {allowed_methods}'" ) if level not in allowed_levels: raise QueryProxyError( f"{error_msg_prefix}: 'Unrecognised level '{level}', must be one of: {allowed_levels}'" ) try: q = daily_location( date=date, method=method, level=level, subscriber_subset=subscriber_subset, ) except Exception as e: raise QueryProxyError(f"{error_msg_prefix}: '{e}'") elif "location_event_counts" == query_kind: start_date = params["start_date"] end_date = params["end_date"] interval = params["interval"] level = params["aggregation_unit"] subscriber_subset = params["subscriber_subset"] direction = params["direction"] event_types = params["event_types"] allowed_intervals = TotalLocationEvents.allowed_intervals allowed_directions = ["in", "out", "all"] allowed_levels = [ "admin0", "admin1", "admin2", "admin3", "admin4", "site", "cell", ] if interval not in allowed_intervals: raise QueryProxyError( f"{error_msg_prefix}: 'Unrecognised interval '{interval}', must be one of: {allowed_intervals}'" ) if level not in allowed_levels: raise QueryProxyError( f"{error_msg_prefix}: 'Unrecognised level '{level}', must be one of: {allowed_levels}'" ) if level in ["cell", "site"]: level = f"versioned-{level}" if direction not in allowed_directions: raise QueryProxyError( f"{error_msg_prefix}: 'Unrecognised direction '{direction}', must be one of: {allowed_directions}'" ) if direction == "all": direction = "both" try: q = TotalLocationEvents( start=start_date, stop=end_date, direction=direction, table=event_types, level=level, subscriber_subset=subscriber_subset, ) logger.debug(f"Made TotalLocationEvents query. {q.__dict__}") except Exception as e: raise QueryProxyError(f"{error_msg_prefix}: '{e}'") elif "modal_location" == query_kind: locations = params["locations"] aggregation_unit = params["aggregation_unit"] try: location_objects = [] for loc in locations: query_kind = loc["query_kind"] if query_kind != "daily_location": raise QueryProxyError( f"{error_msg_prefix}: Currently modal location takes only daily locations as input." ) if aggregation_unit != loc["params"]["aggregation_unit"]: raise QueryProxyError( f"{error_msg_prefix}: Modal location aggregation unit must be the same as the ones of all input locations." ) params = loc["params"] dl = construct_query_object(query_kind, params) location_objects.append(dl) q = ModalLocation(*location_objects) except Exception as e: raise QueryProxyError(f"{error_msg_prefix}: '{e}'") elif "flows" == query_kind: aggregation_unit = params["aggregation_unit"] try: from_location = params["from_location"] to_location = params["to_location"] if (aggregation_unit != from_location["params"]["aggregation_unit"] or aggregation_unit != to_location["params"]["aggregation_unit"]): raise QueryProxyError( f"{error_msg_prefix}: Flow aggregation unit must be the same as the ones for from_location and to_location." ) from_location_object = construct_query_object( from_location["query_kind"], from_location["params"]) to_location_object = construct_query_object( to_location["query_kind"], to_location["params"]) q = Flows(from_location_object, to_location_object) except Exception as e: raise QueryProxyError(f"FIXME (flows): {e}") elif "meaningful_locations_aggregate" == query_kind: aggregation_unit = params["aggregation_unit"] mfl = params["meaningful_locations"] try: q = MeaningfulLocationsAggregate( meaningful_locations=construct_query_object(**mfl), level=aggregation_unit, ) except Exception as e: raise QueryProxyError( f"FIXME (meaningful_location_aggregate): {e}") elif "meaningful_locations_od_matrix" == query_kind: aggregation_unit = params["aggregation_unit"] mfl_a = params["meaningful_locations_a"] mfl_b = params["meaningful_locations_b"] try: q = MeaningfulLocationsOD( meaningful_locations_a=construct_query_object(**mfl_a), meaningful_locations_b=construct_query_object(**mfl_b), level=aggregation_unit, ) except Exception as e: raise QueryProxyError( f"FIXME (meaningful_location_od_matrix): {e}") elif "meaningful_locations" == query_kind: label = params["label"] scores = params["scores"] labels = params["labels"] clusters = params["clusters"] try: q = MeaningfulLocations( clusters=construct_query_object(**clusters), labels=labels, scores=construct_query_object(**scores), label=label, ) except Exception as e: raise QueryProxyError(f"FIXME (meaningful_locations): {e}") elif "event_score" == query_kind: try: q = EventScore(**params) except Exception as e: raise QueryProxyError(f"FIXME (event_score): {e}") elif "hartigan_cluster" == query_kind: call_days = params.pop("call_days") try: q = HartiganCluster(calldays=construct_query_object(**call_days), **params) except Exception as e: raise QueryProxyError(f"FIXME (hartigan_cluster): {e}") elif "call_days" == query_kind: sls = params.pop("subscriber_locations") try: q = CallDays(subscriber_locations=construct_query_object(**sls)) except Exception as e: raise QueryProxyError(f"FIXME (call_days): {e}") elif "subscriber_locations" == query_kind: try: q = subscriber_locations(**params) except Exception as e: raise QueryProxyError(f"FIXME (subscriber_locations): {e}") elif "geography" == query_kind: aggregation_unit = params["aggregation_unit"] allowed_aggregation_units = [ "admin0", "admin1", "admin2", "admin3", "admin4" ] if aggregation_unit not in allowed_aggregation_units: raise InvalidGeographyError( f"{error_msg_prefix}: 'Unrecognised aggregation unit '{aggregation_unit}', " f"must be one of: {allowed_aggregation_units}'") try: q = GeoTable( name=aggregation_unit, schema="geography", columns=[ f"{aggregation_unit}name", f"{aggregation_unit}pcod", "geom" ], ) except Exception as e: raise QueryProxyError(f"{error_msg_prefix}: '{e}'") else: error_msg = f"Unsupported query kind: '{query_kind}'" logger.error(error_msg) raise QueryProxyError(error_msg) logger.debug(f"Made {query_kind}: {params}") return q