def test_invalidate_cascade(flowmachine_connect): """ Test that invalidation does not cascade if cascade=False. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = HomeLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = HomeLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() dl1.invalidate_db_cache(cascade=False) assert not dl1.is_stored assert hl1.is_stored assert flow.is_stored in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{dl1.md5}'")) assert not in_cache in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{hl1.md5}'")) assert in_cache has_deps = bool( flowmachine_connect.fetch("SELECT * FROM cache.dependencies")) assert has_deps
def test_invalidate_cache_midchain(flowmachine_connect): """ Test that invalidating a query in the middle of a chain drops the top of the chain and this link, but not the bottom. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = HomeLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = HomeLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() hl1.invalidate_db_cache() assert dl1.is_stored assert not hl1.is_stored assert not flow.is_stored in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{dl1.md5}'")) assert in_cache in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{hl1.md5}'")) assert not in_cache in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{flow.md5}'")) assert not in_cache has_deps = bool( flowmachine_connect.fetch("SELECT * FROM cache.dependencies")) assert has_deps # Daily location deps should remain
def test_invalidate_cache_multi(flowmachine_connect): """ Test that invalidating a simple query that is part of a bigger one drops both tables, cleans up dependencies and removes both from cache. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = HomeLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() dl1.invalidate_db_cache() assert not dl1.is_stored assert not hl1.is_stored in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{dl1.md5}'")) assert not in_cache in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{hl1.md5}'")) assert not in_cache has_deps = bool( flowmachine_connect.fetch("SELECT * FROM cache.dependencies")) assert has_deps
def test_daily_locs_errors(): """ daily_location() errors when we ask for a date that does not exist. """ with pytest.raises(MissingDateError): daily_location("2016-01-31")
def test_column_names_flow(exemplar_spatial_unit_param): """ Test that column_names property matches head(0) for Flows""" flow = Flows( daily_location("2016-01-01", spatial_unit=exemplar_spatial_unit_param), daily_location("2016-01-01", spatial_unit=exemplar_spatial_unit_param), ) assert flow.head(0).columns.tolist() == flow.column_names
def test_flows_geojson(get_dataframe): """ Test geojson works for flows with non-standard column names. """ dl = daily_location( "2016-01-01", spatial_unit=make_spatial_unit("admin", level=2, region_id_column_name="admin2name"), ) dl2 = daily_location( "2016-01-02", spatial_unit=make_spatial_unit("admin", level=2, region_id_column_name="admin2name"), ) fl = Flows(dl, dl2) js = fl.to_geojson() df = get_dataframe(fl) check_features = [js["features"][0], js["features"][5], js["features"][7]] for feature in check_features: outflows = feature["properties"]["outflows"] df_src = df[df.admin2name_from == feature["properties"] ["admin2name"]].set_index("admin2name_to") for dest, tot in outflows.items(): assert tot == df_src.loc[dest]["value"]
def test_ambiguity_is_an_error(): """ Join raises an error if resulting columns are ambiguous. """ with pytest.raises(ValueError): daily_location("2016-01-01").join(daily_location("2016-01-01"), on_left="subscriber")
def test_dl_count_sum_equal_or_less_than_period(get_dataframe): """ Sum of LocationVisits per subscriber should not be more than total number of days between 'start_date' and 'stop_date' """ # test 1 days = 7 start_date = "2016-01-01" stop_date = "2016-01-07" lv = LocationVisits( DayTrajectories(*[ daily_location(d, level="admin3", method="last") for d in list_of_dates(start_date, stop_date) ])) df = get_dataframe(lv) assert df[df["subscriber"] == df.iloc[0, 0]]["dl_count"].sum() <= days # test 2 days = 3 start_date = "2016-01-01" stop_date = "2016-01-03" lv = LocationVisits( DayTrajectories(*[ daily_location(d, level="admin3", method="last") for d in list_of_dates(start_date, stop_date) ])) df = get_dataframe(lv) assert df[df["subscriber"] == df.iloc[0, 0]]["dl_count"].sum() <= days
def test_invalidate_cache_midchain(flowmachine_connect): """ Test that invalidating a query in the middle of a chain drops the top of the chain and this link, but not the bottom. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() assert dl1.is_stored assert hl1.is_stored assert flow.is_stored hl1.invalidate_db_cache() assert dl1.is_stored assert not hl1.is_stored assert not flow.is_stored assert cache_table_exists(get_db(), dl1.query_id) assert not cache_table_exists(get_db(), hl1.query_id) assert not cache_table_exists(get_db(), flow.query_id) has_deps = bool(get_db().fetch("SELECT * FROM cache.dependencies")) assert has_deps # Daily location deps should remain
def test_subsetting_of_query(get_dataframe): """ Check that query ids and length of results of some subsetted queries are as expected. """ selected_subscriber_ids = [ "1jwYL3Nl1Y46lNeQ", "nLvm2gVnEdg7lzqX", "jwKJorl0yBrZX5N8", ] custom_query = CustomQuery( "SELECT duration, msisdn as subscriber FROM events.calls WHERE duration < 10", ["duration", "subscriber"], ) subsetter_1 = SubscriberSubsetterForAllSubscribers() subsetter_2 = SubscriberSubsetterForExplicitSubset(selected_subscriber_ids) subsetter_3 = SubscriberSubsetterForFlowmachineQuery(custom_query) dl_0 = daily_location(date="2016-01-01") dl_1 = daily_location(date="2016-01-01", subscriber_subset=subsetter_1) dl_2 = daily_location(date="2016-01-01", subscriber_subset=subsetter_2) dl_3 = daily_location(date="2016-01-01", subscriber_subset=subsetter_3) assert 499 == len(get_dataframe(dl_0)) assert 499 == len(get_dataframe(dl_1)) assert 3 == len(get_dataframe(dl_2)) assert 26 == len(get_dataframe(dl_3))
def test_invalidate_cascade(flowmachine_connect): """ Test that invalidation does not cascade if cascade=False. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() assert dl1.is_stored assert hl1.is_stored assert flow.is_stored dl1.invalidate_db_cache(cascade=False) assert not dl1.is_stored assert hl1.is_stored assert flow.is_stored assert not cache_table_exists(get_db(), dl1.query_id) assert cache_table_exists(get_db(), hl1.query_id) has_deps = bool(get_db().fetch("SELECT * FROM cache.dependencies")) assert has_deps
def test_gets_mixins(): """ Test that a random sample gets applicable mixins. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flow = Flows(dl1, dl2) assert isinstance(flow.random_sample(size=10), GraphMixin)
def test_flows_raise_error(self): """ Flows() raises error if location levels are different. """ dl1 = daily_location("2016-01-01", level="admin3") dl2 = daily_location("2016-01-01", level="admin2") with self.assertRaises(ValueError): Flows(dl1, dl2)
def setUp(self): self.dl1 = daily_location("2016-01-01") self.dl2 = daily_location("2016-01-02") self.dl3 = daily_location("2016-01-07") self.flowA = Flows(self.dl1, self.dl2) self.flowB = Flows(self.dl1, self.dl3) self.relfl = self.flowA - self.flowB self.df_rel = self.relfl.get_dataframe()
def flows(flowmachine_connect): """Fixture providing two flows.""" dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") dl3 = daily_location("2016-01-07") flow_a = Flows(dl1, dl2) flow_b = Flows(dl1, dl3) yield flow_a, flow_b
def setUp(self): self.dl1 = daily_location("2016-01-01") self.dl2 = daily_location("2016-01-02") self.stub1 = limit("2016-01-01") self.stub2 = limit("2016-01-01", offset=5) self.subset_q = CustomQuery("SELECT msisdn FROM events.calls LIMIT 10")
def test_bad_multiplier(): """ Multiplying by something which isn't a Flows() or scalar raises a value error. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flowA = Flows(dl1, dl2) with pytest.raises(TypeError): relfl = flowA * "A"
def test_column_names_inout(query, exemplar_spatial_unit_param): """ Test that column_names property matches head(0) for InFlow & OutFlow""" flow = Flows( daily_location("2016-01-01", spatial_unit=exemplar_spatial_unit_param), daily_location("2016-01-01", spatial_unit=exemplar_spatial_unit_param), ) query_instance = query(flow) assert query_instance.head( 0).columns.tolist() == query_instance.column_names
def test_bad_exp(): """ Raising to something which isn't a float or an int raises a ValueError. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flowA = Flows(dl1, dl2) with pytest.raises(TypeError): relfl = flowA ** "A"
def test_bad_divisor(): """ Dividing by something which isn't a Flows() or scalar raises a ValueError. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flowA = Flows(dl1, dl2) with pytest.raises(TypeError): relfl = flowA / "A"
def test_flow_math_store(op, exemplar_level_param, flowmachine_connect): """ Storing works for flows added together at all levels """ dl1 = daily_location("2016-01-01", **exemplar_level_param) dl2 = daily_location("2016-01-02", **exemplar_level_param) fl = op(Flows(dl1, dl2), Flows(dl1, dl2)) fl.store().result() assert flowmachine_connect.has_table(*fl.table_name.split(".")[::-1])
def test_column_names_math(query, exemplar_level_param): """ Test that column_names property matches head(0) for FlowMath""" flow = Flows( daily_location("2016-01-01", **exemplar_level_param), daily_location("2016-01-01", **exemplar_level_param), ) query_instance = query(flow, flow) assert query_instance.head( 0).columns.tolist() == query_instance.column_names
def test_run_modal_location_query(send_zmq_message_and_receive_reply): """ Can run modal location query and receive successful response including the query_id. """ msg = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "modal_location", "locations": [ { "query_kind": "daily_location", "date": "2016-01-01", "method": "most-common", "aggregation_unit": "admin3", "subscriber_subset": None, }, { "query_kind": "daily_location", "date": "2016-01-02", "method": "most-common", "aggregation_unit": "admin3", "subscriber_subset": None, }, ], "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg) q = SpatialAggregate(locations=ModalLocation( daily_location( date="2016-01-01", method="most-common", spatial_unit=make_spatial_unit("admin", level=3), subscriber_subset=None, ), daily_location( date="2016-01-02", method="most-common", spatial_unit=make_spatial_unit("admin", level=3), subscriber_subset=None, ), )) expected_query_id = q.md5 assert "success" == reply["status"] assert expected_query_id == reply["payload"]["query_id"] assert ["query_id"] == list(reply["payload"].keys())
def test_flows_raise_error(): """ Flows() raises error if location levels are different. """ dl1 = daily_location("2016-01-01", spatial_unit=make_spatial_unit("admin", level=3)) dl2 = daily_location("2016-01-01", spatial_unit=make_spatial_unit("admin", level=2)) with pytest.raises(ValueError): Flows(dl1, dl2)
def test_edgelist(get_dataframe): """Test that an EdgeList object can be created.""" query = daily_location("2016-01-01").aggregate() wrapped = EdgeList(query) df = get_dataframe(wrapped) assert all(df.set_index(["name_from"]).loc["Rasuwa"]["count"] == 11) query = daily_location("2016-01-01").aggregate() wrapped = EdgeList(query, left_handed=False) df = get_dataframe(wrapped) assert all(df.set_index(["name_to"]).loc["Rasuwa"]["count"] == 11)
def test_flows_geo_augmented_query_raises_error(): """ Test that a ValueError is raised when attempting to get geojson for a flows query with no geography data. """ dl = daily_location("2016-01-01", spatial_unit=make_spatial_unit("cell")) dl2 = daily_location("2016-01-02", spatial_unit=make_spatial_unit("cell")) fl = Flows(dl, dl2) with pytest.raises(ValueError): fl.to_geojson_string()
def test_inflow_value(get_dataframe): """ One of the values for the outflows. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flow = Flows(dl1, dl2) inflow = flow.inflow() df = get_dataframe(inflow) assert df.set_index("name_to").ix["Okhaldhunga"][0] == 20
def test_outflow_value(get_dataframe): """ One of the values for the outflows. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flow = Flows(dl1, dl2) outflow = flow.outflow() df = get_dataframe(outflow) assert df.set_index("name_from").ix["Sankhuwasabha"][0] == 24
def test_inflow_value(get_dataframe): """ One of the values for the outflows. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flow = Flows(dl1, dl2) inflow = flow.inflow() df = get_dataframe(inflow) assert df.set_index("pcod_to").loc["524 1 03 13"][0] == 20
def test_shrink_to_size_respects_dry_run(flowmachine_connect): """ Test that shrink_below_size doesn't remove anything during a dry run. """ dl = daily_location("2016-01-01").store().result() dl2 = daily_location("2016-01-02").store().result() removed_queries = shrink_below_size(flowmachine_connect, 0, dry_run=True) assert 2 == len(removed_queries) assert dl.is_stored assert dl2.is_stored