def test_invalidate_cache_midchain(flowmachine_connect): """ Test that invalidating a query in the middle of a chain drops the top of the chain and this link, but not the bottom. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = HomeLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = HomeLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() hl1.invalidate_db_cache() assert dl1.is_stored assert not hl1.is_stored assert not flow.is_stored in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{dl1.md5}'")) assert in_cache in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{hl1.md5}'")) assert not in_cache in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{flow.md5}'")) assert not in_cache has_deps = bool( flowmachine_connect.fetch("SELECT * FROM cache.dependencies")) assert has_deps # Daily location deps should remain
def test_invalidate_cascade(flowmachine_connect): """ Test that invalidation does not cascade if cascade=False. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = HomeLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = HomeLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() dl1.invalidate_db_cache(cascade=False) assert not dl1.is_stored assert hl1.is_stored assert flow.is_stored in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{dl1.md5}'")) assert not in_cache in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{hl1.md5}'")) assert in_cache has_deps = bool( flowmachine_connect.fetch("SELECT * FROM cache.dependencies")) assert has_deps
def test_invalidate_cascade(flowmachine_connect): """ Test that invalidation does not cascade if cascade=False. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() assert dl1.is_stored assert hl1.is_stored assert flow.is_stored dl1.invalidate_db_cache(cascade=False) assert not dl1.is_stored assert hl1.is_stored assert flow.is_stored assert not cache_table_exists(get_db(), dl1.query_id) assert cache_table_exists(get_db(), hl1.query_id) has_deps = bool(get_db().fetch("SELECT * FROM cache.dependencies")) assert has_deps
def test_invalidate_cache_midchain(flowmachine_connect): """ Test that invalidating a query in the middle of a chain drops the top of the chain and this link, but not the bottom. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() assert dl1.is_stored assert hl1.is_stored assert flow.is_stored hl1.invalidate_db_cache() assert dl1.is_stored assert not hl1.is_stored assert not flow.is_stored assert cache_table_exists(get_db(), dl1.query_id) assert not cache_table_exists(get_db(), hl1.query_id) assert not cache_table_exists(get_db(), flow.query_id) has_deps = bool(get_db().fetch("SELECT * FROM cache.dependencies")) assert has_deps # Daily location deps should remain
def setUp(self): self.dl1 = daily_location("2016-01-01") self.dl2 = daily_location("2016-01-02") self.dl3 = daily_location("2016-01-07") self.flowA = Flows(self.dl1, self.dl2) self.flowB = Flows(self.dl1, self.dl3) self.relfl = self.flowA - self.flowB self.df_rel = self.relfl.get_dataframe()
def test_gets_mixins(): """ Test that a random sample gets applicable mixins. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flow = Flows(dl1, dl2) assert isinstance(flow.random_sample(size=10), GraphMixin)
def flows(flowmachine_connect): """Fixture providing two flows.""" dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") dl3 = daily_location("2016-01-07") flow_a = Flows(dl1, dl2) flow_b = Flows(dl1, dl3) yield flow_a, flow_b
def test_flow_math_store(op, exemplar_level_param, flowmachine_connect): """ Storing works for flows added together at all levels """ dl1 = daily_location("2016-01-01", **exemplar_level_param) dl2 = daily_location("2016-01-02", **exemplar_level_param) fl = op(Flows(dl1, dl2), Flows(dl1, dl2)) fl.store().result() assert flowmachine_connect.has_table(*fl.table_name.split(".")[::-1])
def test_outflow_value(get_dataframe): """ One of the values for the outflows. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flow = Flows(dl1, dl2) outflow = flow.outflow() df = get_dataframe(outflow) assert df.set_index("name_from").ix["Sankhuwasabha"][0] == 24
def test_undirected(): """ to_networkx() raises a warning if using duplicate edges are detected. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flow = Flows(dl1, dl2) with pytest.warns(UserWarning): graph = flow.to_networkx(directed_graph=False) assert "Sankhuwasabha" in graph.neighbors("Arghakhanchi")
def test_inflow_value(get_dataframe): """ One of the values for the outflows. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flow = Flows(dl1, dl2) inflow = flow.inflow() df = get_dataframe(inflow) assert df.set_index("name_to").ix["Okhaldhunga"][0] == 20
def test_inflow_value(get_dataframe): """ One of the values for the outflows. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flow = Flows(dl1, dl2) inflow = flow.inflow() df = get_dataframe(inflow) assert df.set_index("pcod_to").loc["524 1 03 13"][0] == 20
def test_nx_object(): """ to_networkx() creates a networkx.Graph() object. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flow = Flows(dl1, dl2) graph = flow.to_networkx() assert graph.has_node("524 3 09 50") assert "524 5 14 73" in graph.neighbors("524 3 09 50") assert "524 1 02 09" not in graph.neighbors("524 3 09 50")
def test_nx_object(): """ to_networkx() creates a networkx.Graph() object. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flow = Flows(dl1, dl2) graph = flow.to_networkx() assert graph.has_node("Arghakhanchi") assert "Dadeldhura" in graph.neighbors("Arghakhanchi") assert "Sankhuwasabha" not in graph.neighbors("Arghakhanchi")
def test_do_cache_nested(flowmachine_connect): """ Test that a query containing nested subqueries can be cached. """ hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow._db_store_cache_metadata() assert cache_table_exists(flowmachine_connect, flow.md5)
def test_a_mul_b_no_b_flow(get_dataframe): """ No row is returned if the Flows() is not in both A and B. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") dl3 = daily_location("2016-01-07") flowA = Flows(dl1, dl2) flowB = Flows(dl1, dl3) relfl = flowA * flowB df_rel = get_dataframe(relfl) with pytest.raises(IndexError): diff = df_rel[(df_rel.name_from == "Humla") & (df_rel.name_to == "Kapilbastu")]["count"].values[0]
def test_a_mul_b(get_dataframe): """ Multiplying one Flows() by another gives an expected value. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") dl3 = daily_location("2016-01-07") flowA = Flows(dl1, dl2) flowB = Flows(dl1, dl3) relfl = flowA * flowB df_rel = get_dataframe(relfl) diff = df_rel[(df_rel.name_from == "Bajhang") & (df_rel.name_to == "Myagdi")]["count"].values[0] assert 12 == diff
def test_a_plus_b_no_b_flow(get_dataframe): """ Adding a Flows() where it does not exist on the other side returns the original Flows() count. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") dl3 = daily_location("2016-01-07") flowA = Flows(dl1, dl2) flowB = Flows(dl1, dl3) df_rel = get_dataframe(flowA + flowB) diff = df_rel[(df_rel.name_from == "Humla") & (df_rel.name_to == "Kapilbastu")][ "count" ].values[0] assert 2 == diff
def test_a_plus_b(get_dataframe): """ Adding two Flows() together produces a known value. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") dl3 = daily_location("2016-01-07") flowA = Flows(dl1, dl2) flowB = Flows(dl1, dl3) df_rel = get_dataframe(flowA + flowB) diff = df_rel[(df_rel.name_from == "Bajhang") & (df_rel.name_to == "Myagdi")][ "count" ].values[0] assert 7 == diff
def test_store_cache_nested(flowmachine_connect): """ Test that storing a query with nested subqueries also caches it. """ hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() # Should be stored assert flow.is_stored assert cache_table_exists(get_db(), flow.query_id)
def test_average_self(get_dataframe): """ Adding a Flows() to itself and dividing is the same as the original Flows(). """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") dl3 = daily_location("2016-01-07") flowA = Flows(dl1, dl2) flowB = Flows(dl1, dl3) avged = get_dataframe((flowA + flowA) / 2) orig = get_dataframe(flowA) compare = (avged.set_index(["name_from", "name_to"]).sort_index() == orig.set_index( ["name_from", "name_to"]).sort_index()) assert compare.all().values[0]
def test_inoutflow_with_double_column_location(): """ Test that flowmachine.Inflow can handle a location with more than one column. """ dl1 = daily_location("2016-01-01", spatial_unit=make_spatial_unit("versioned-site")) dl2 = daily_location("2016-01-02", spatial_unit=make_spatial_unit("versioned-site")) flow = Flows(dl1, dl2) expected_columns = ["site_id_to", "version_to", "lon_to", "lat_to", "total"] assert flow.inflow().column_names == expected_columns expected_columns = ["site_id_from", "version_from", "lon_from", "lat_from", "total"] assert flow.outflow().column_names == expected_columns
def test_a_div_b_no_b_flow(get_dataframe): """ Rows where there is not an exact match are not returned. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") dl3 = daily_location("2016-01-07") flowA = Flows(dl1, dl2) flowB = Flows(dl1, dl3) relfl = flowA / flowB df_rel = get_dataframe(relfl) with pytest.raises(IndexError): diff = df_rel[(df_rel.name_from == "Humla") & (df_rel.name_to == "Kapilbastu")][ "count" ].values[0]
def test_do_cache_nested(flowmachine_connect): """ Test that a query containing nested subqueries can be cached. """ hl1 = HomeLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl2 = HomeLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow._db_store_cache_metadata() in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{flow.md5}'")) assert in_cache
def test_deps_cache_broken_chain(): """ Test that a Query -> not_cached -> cached chain will return a dependency on cached. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) dep = dl1.query_id assert 8 == len(flow._get_stored_dependencies()) assert dep in [x.query_id for x in flow._get_stored_dependencies()]
def test_summation(get_dataframe): """ Summing a list of Flows() yields a known value. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") dl3 = daily_location("2016-01-07") flowA = Flows(dl1, dl2) flowB = Flows(dl1, dl3) flows = [flowA, flowB, flowA] summed = sum(flows) res = get_dataframe(summed) result = res[(res.name_from == "Bajhang") & (res.name_to == "Myagdi")][ "count" ].values[0] assert 10 == result
def test_flows_raise_error(self): """ Flows() raises error if location levels are different. """ dl1 = daily_location("2016-01-01", level="admin3") dl2 = daily_location("2016-01-01", level="admin2") with self.assertRaises(ValueError): Flows(dl1, dl2)
def test_store_cache_nested(flowmachine_connect): """ Test that storing a query with nested subqueries also caches it. """ hl1 = HomeLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl2 = HomeLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() # Should be stored assert flow.is_stored in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{flow.md5}'")) assert in_cache
def test_deps_cache_chain(): """ Test that a Query -> cached1 -> cached2 chain will return only a dependency on cached1. """ dl1 = daily_location("2016-01-01") hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) bad_dep = dl1.query_id good_dep = hl1.query_id assert 6 == len(flow._get_stored_dependencies()) assert good_dep in [x.query_id for x in flow._get_stored_dependencies()] assert bad_dep not in [x.query_id for x in flow._get_stored_dependencies()]
def test_bad_divisor(): """ Dividing by something which isn't a Flows() or scalar raises a ValueError. """ dl1 = daily_location("2016-01-01") dl2 = daily_location("2016-01-02") flowA = Flows(dl1, dl2) with pytest.raises(TypeError): relfl = flowA / "A"