Example #1
0
def test_invalidate_cascade(flowmachine_connect):
    """
    Test that invalidation does not cascade if cascade=False.

    """
    dl1 = daily_location("2016-01-01")
    dl1.store().result()
    hl1 = HomeLocation(daily_location("2016-01-01"),
                       daily_location("2016-01-02"))
    hl1.store().result()
    hl2 = HomeLocation(daily_location("2016-01-03"),
                       daily_location("2016-01-04"))
    flow = Flows(hl1, hl2)
    flow.store().result()
    dl1.invalidate_db_cache(cascade=False)
    assert not dl1.is_stored
    assert hl1.is_stored
    assert flow.is_stored
    in_cache = bool(
        flowmachine_connect.fetch(
            f"SELECT * FROM cache.cached WHERE query_id='{dl1.md5}'"))
    assert not in_cache
    in_cache = bool(
        flowmachine_connect.fetch(
            f"SELECT * FROM cache.cached WHERE query_id='{hl1.md5}'"))
    assert in_cache
    has_deps = bool(
        flowmachine_connect.fetch("SELECT * FROM cache.dependencies"))
    assert has_deps
Example #2
0
def test_invalidate_cache_midchain(flowmachine_connect):
    """
    Test that invalidating a query in the middle of a chain drops the
    top of the chain and this link, but not the bottom.

    """
    dl1 = daily_location("2016-01-01")
    dl1.store().result()
    hl1 = HomeLocation(daily_location("2016-01-01"),
                       daily_location("2016-01-02"))
    hl1.store().result()
    hl2 = HomeLocation(daily_location("2016-01-03"),
                       daily_location("2016-01-04"))
    flow = Flows(hl1, hl2)
    flow.store().result()
    hl1.invalidate_db_cache()
    assert dl1.is_stored
    assert not hl1.is_stored
    assert not flow.is_stored
    in_cache = bool(
        flowmachine_connect.fetch(
            f"SELECT * FROM cache.cached WHERE query_id='{dl1.md5}'"))
    assert in_cache
    in_cache = bool(
        flowmachine_connect.fetch(
            f"SELECT * FROM cache.cached WHERE query_id='{hl1.md5}'"))
    assert not in_cache
    in_cache = bool(
        flowmachine_connect.fetch(
            f"SELECT * FROM cache.cached WHERE query_id='{flow.md5}'"))
    assert not in_cache
    has_deps = bool(
        flowmachine_connect.fetch("SELECT * FROM cache.dependencies"))
    assert has_deps  # Daily location deps should remain
Example #3
0
def test_invalidate_cache_multi(flowmachine_connect):
    """
    Test that invalidating a simple query that is part of
    a bigger one drops both tables, cleans up dependencies
    and removes both from cache.

    """
    dl1 = daily_location("2016-01-01")
    dl1.store().result()
    hl1 = HomeLocation(daily_location("2016-01-01"),
                       daily_location("2016-01-02"))
    hl1.store().result()
    dl1.invalidate_db_cache()
    assert not dl1.is_stored
    assert not hl1.is_stored
    in_cache = bool(
        flowmachine_connect.fetch(
            f"SELECT * FROM cache.cached WHERE query_id='{dl1.md5}'"))
    assert not in_cache
    in_cache = bool(
        flowmachine_connect.fetch(
            f"SELECT * FROM cache.cached WHERE query_id='{hl1.md5}'"))
    assert not in_cache
    has_deps = bool(
        flowmachine_connect.fetch("SELECT * FROM cache.dependencies"))
    assert has_deps
Example #4
0
def test_daily_locs_errors():
    """
    daily_location() errors when we ask for a date that does not exist.
    """

    with pytest.raises(MissingDateError):
        daily_location("2016-01-31")
Example #5
0
def test_column_names_flow(exemplar_spatial_unit_param):
    """ Test that column_names property matches head(0) for Flows"""
    flow = Flows(
        daily_location("2016-01-01", spatial_unit=exemplar_spatial_unit_param),
        daily_location("2016-01-01", spatial_unit=exemplar_spatial_unit_param),
    )
    assert flow.head(0).columns.tolist() == flow.column_names
Example #6
0
def test_flows_geojson(get_dataframe):
    """
    Test geojson works for flows with non-standard column names.
    """

    dl = daily_location(
        "2016-01-01",
        spatial_unit=make_spatial_unit("admin",
                                       level=2,
                                       region_id_column_name="admin2name"),
    )
    dl2 = daily_location(
        "2016-01-02",
        spatial_unit=make_spatial_unit("admin",
                                       level=2,
                                       region_id_column_name="admin2name"),
    )
    fl = Flows(dl, dl2)
    js = fl.to_geojson()
    df = get_dataframe(fl)
    check_features = [js["features"][0], js["features"][5], js["features"][7]]
    for feature in check_features:
        outflows = feature["properties"]["outflows"]
        df_src = df[df.admin2name_from == feature["properties"]
                    ["admin2name"]].set_index("admin2name_to")
        for dest, tot in outflows.items():
            assert tot == df_src.loc[dest]["value"]
def test_ambiguity_is_an_error():
    """
    Join raises an error if resulting columns are ambiguous.
    """
    with pytest.raises(ValueError):
        daily_location("2016-01-01").join(daily_location("2016-01-01"),
                                          on_left="subscriber")
Example #8
0
def test_dl_count_sum_equal_or_less_than_period(get_dataframe):
    """
    Sum of LocationVisits per subscriber should not be more than total
    number of days between 'start_date' and 'stop_date'
    """
    # test 1
    days = 7
    start_date = "2016-01-01"
    stop_date = "2016-01-07"
    lv = LocationVisits(
        DayTrajectories(*[
            daily_location(d, level="admin3", method="last")
            for d in list_of_dates(start_date, stop_date)
        ]))
    df = get_dataframe(lv)
    assert df[df["subscriber"] == df.iloc[0, 0]]["dl_count"].sum() <= days
    # test 2
    days = 3
    start_date = "2016-01-01"
    stop_date = "2016-01-03"
    lv = LocationVisits(
        DayTrajectories(*[
            daily_location(d, level="admin3", method="last")
            for d in list_of_dates(start_date, stop_date)
        ]))
    df = get_dataframe(lv)
    assert df[df["subscriber"] == df.iloc[0, 0]]["dl_count"].sum() <= days
Example #9
0
def test_invalidate_cache_midchain(flowmachine_connect):
    """
    Test that invalidating a query in the middle of a chain drops the
    top of the chain and this link, but not the bottom.

    """
    dl1 = daily_location("2016-01-01")
    dl1.store().result()
    hl1 = ModalLocation(daily_location("2016-01-01"),
                        daily_location("2016-01-02"))
    hl1.store().result()
    hl2 = ModalLocation(daily_location("2016-01-03"),
                        daily_location("2016-01-04"))
    flow = Flows(hl1, hl2)
    flow.store().result()
    assert dl1.is_stored
    assert hl1.is_stored
    assert flow.is_stored
    hl1.invalidate_db_cache()
    assert dl1.is_stored
    assert not hl1.is_stored
    assert not flow.is_stored
    assert cache_table_exists(get_db(), dl1.query_id)
    assert not cache_table_exists(get_db(), hl1.query_id)
    assert not cache_table_exists(get_db(), flow.query_id)
    has_deps = bool(get_db().fetch("SELECT * FROM cache.dependencies"))
    assert has_deps  # Daily location deps should remain
def test_subsetting_of_query(get_dataframe):
    """
    Check that query ids and length of results of some subsetted queries are as expected.
    """
    selected_subscriber_ids = [
        "1jwYL3Nl1Y46lNeQ",
        "nLvm2gVnEdg7lzqX",
        "jwKJorl0yBrZX5N8",
    ]
    custom_query = CustomQuery(
        "SELECT duration, msisdn as subscriber FROM events.calls WHERE duration < 10",
        ["duration", "subscriber"],
    )
    subsetter_1 = SubscriberSubsetterForAllSubscribers()
    subsetter_2 = SubscriberSubsetterForExplicitSubset(selected_subscriber_ids)
    subsetter_3 = SubscriberSubsetterForFlowmachineQuery(custom_query)

    dl_0 = daily_location(date="2016-01-01")
    dl_1 = daily_location(date="2016-01-01", subscriber_subset=subsetter_1)
    dl_2 = daily_location(date="2016-01-01", subscriber_subset=subsetter_2)
    dl_3 = daily_location(date="2016-01-01", subscriber_subset=subsetter_3)

    assert 499 == len(get_dataframe(dl_0))
    assert 499 == len(get_dataframe(dl_1))
    assert 3 == len(get_dataframe(dl_2))
    assert 26 == len(get_dataframe(dl_3))
Example #11
0
def test_invalidate_cascade(flowmachine_connect):
    """
    Test that invalidation does not cascade if cascade=False.

    """
    dl1 = daily_location("2016-01-01")
    dl1.store().result()
    hl1 = ModalLocation(daily_location("2016-01-01"),
                        daily_location("2016-01-02"))
    hl1.store().result()
    hl2 = ModalLocation(daily_location("2016-01-03"),
                        daily_location("2016-01-04"))
    flow = Flows(hl1, hl2)
    flow.store().result()
    assert dl1.is_stored
    assert hl1.is_stored
    assert flow.is_stored
    dl1.invalidate_db_cache(cascade=False)
    assert not dl1.is_stored
    assert hl1.is_stored
    assert flow.is_stored
    assert not cache_table_exists(get_db(), dl1.query_id)
    assert cache_table_exists(get_db(), hl1.query_id)
    has_deps = bool(get_db().fetch("SELECT * FROM cache.dependencies"))
    assert has_deps
Example #12
0
def test_gets_mixins():
    """
    Test that a random sample gets applicable mixins. 
    """
    dl1 = daily_location("2016-01-01")
    dl2 = daily_location("2016-01-02")
    flow = Flows(dl1, dl2)
    assert isinstance(flow.random_sample(size=10), GraphMixin)
Example #13
0
 def test_flows_raise_error(self):
     """
     Flows() raises error if location levels are different.
     """
     dl1 = daily_location("2016-01-01", level="admin3")
     dl2 = daily_location("2016-01-01", level="admin2")
     with self.assertRaises(ValueError):
         Flows(dl1, dl2)
Example #14
0
 def setUp(self):
     self.dl1 = daily_location("2016-01-01")
     self.dl2 = daily_location("2016-01-02")
     self.dl3 = daily_location("2016-01-07")
     self.flowA = Flows(self.dl1, self.dl2)
     self.flowB = Flows(self.dl1, self.dl3)
     self.relfl = self.flowA - self.flowB
     self.df_rel = self.relfl.get_dataframe()
def flows(flowmachine_connect):
    """Fixture providing two flows."""
    dl1 = daily_location("2016-01-01")
    dl2 = daily_location("2016-01-02")
    dl3 = daily_location("2016-01-07")
    flow_a = Flows(dl1, dl2)
    flow_b = Flows(dl1, dl3)
    yield flow_a, flow_b
Example #16
0
    def setUp(self):

        self.dl1 = daily_location("2016-01-01")
        self.dl2 = daily_location("2016-01-02")

        self.stub1 = limit("2016-01-01")
        self.stub2 = limit("2016-01-01", offset=5)

        self.subset_q = CustomQuery("SELECT msisdn FROM events.calls LIMIT 10")
def test_bad_multiplier():
    """
    Multiplying by something which isn't a Flows() or scalar raises a value error.
    """
    dl1 = daily_location("2016-01-01")
    dl2 = daily_location("2016-01-02")
    flowA = Flows(dl1, dl2)
    with pytest.raises(TypeError):
        relfl = flowA * "A"
Example #18
0
def test_column_names_inout(query, exemplar_spatial_unit_param):
    """ Test that column_names property matches head(0) for InFlow & OutFlow"""
    flow = Flows(
        daily_location("2016-01-01", spatial_unit=exemplar_spatial_unit_param),
        daily_location("2016-01-01", spatial_unit=exemplar_spatial_unit_param),
    )
    query_instance = query(flow)
    assert query_instance.head(
        0).columns.tolist() == query_instance.column_names
Example #19
0
def test_bad_exp():
    """
    Raising to something which isn't a float or an int raises a ValueError.
    """
    dl1 = daily_location("2016-01-01")
    dl2 = daily_location("2016-01-02")
    flowA = Flows(dl1, dl2)
    with pytest.raises(TypeError):
        relfl = flowA ** "A"
Example #20
0
def test_bad_divisor():
    """
    Dividing by something which isn't a Flows() or scalar raises a ValueError.
    """
    dl1 = daily_location("2016-01-01")
    dl2 = daily_location("2016-01-02")
    flowA = Flows(dl1, dl2)
    with pytest.raises(TypeError):
        relfl = flowA / "A"
Example #21
0
def test_flow_math_store(op, exemplar_level_param, flowmachine_connect):
    """
    Storing works for flows added together at all levels
    """
    dl1 = daily_location("2016-01-01", **exemplar_level_param)
    dl2 = daily_location("2016-01-02", **exemplar_level_param)
    fl = op(Flows(dl1, dl2), Flows(dl1, dl2))
    fl.store().result()
    assert flowmachine_connect.has_table(*fl.table_name.split(".")[::-1])
Example #22
0
def test_column_names_math(query, exemplar_level_param):
    """ Test that column_names property matches head(0) for FlowMath"""
    flow = Flows(
        daily_location("2016-01-01", **exemplar_level_param),
        daily_location("2016-01-01", **exemplar_level_param),
    )
    query_instance = query(flow, flow)
    assert query_instance.head(
        0).columns.tolist() == query_instance.column_names
Example #23
0
def test_run_modal_location_query(send_zmq_message_and_receive_reply):
    """
    Can run modal location query and receive successful response including the query_id.
    """
    msg = {
        "action": "run_query",
        "params": {
            "query_kind": "spatial_aggregate",
            "locations": {
                "query_kind":
                "modal_location",
                "locations": [
                    {
                        "query_kind": "daily_location",
                        "date": "2016-01-01",
                        "method": "most-common",
                        "aggregation_unit": "admin3",
                        "subscriber_subset": None,
                    },
                    {
                        "query_kind": "daily_location",
                        "date": "2016-01-02",
                        "method": "most-common",
                        "aggregation_unit": "admin3",
                        "subscriber_subset": None,
                    },
                ],
                "aggregation_unit":
                "admin3",
                "subscriber_subset":
                None,
            },
        },
        "request_id": "DUMMY_ID",
    }
    reply = send_zmq_message_and_receive_reply(msg)

    q = SpatialAggregate(locations=ModalLocation(
        daily_location(
            date="2016-01-01",
            method="most-common",
            spatial_unit=make_spatial_unit("admin", level=3),
            subscriber_subset=None,
        ),
        daily_location(
            date="2016-01-02",
            method="most-common",
            spatial_unit=make_spatial_unit("admin", level=3),
            subscriber_subset=None,
        ),
    ))
    expected_query_id = q.md5

    assert "success" == reply["status"]
    assert expected_query_id == reply["payload"]["query_id"]
    assert ["query_id"] == list(reply["payload"].keys())
Example #24
0
def test_flows_raise_error():
    """
    Flows() raises error if location levels are different.
    """
    dl1 = daily_location("2016-01-01",
                         spatial_unit=make_spatial_unit("admin", level=3))
    dl2 = daily_location("2016-01-01",
                         spatial_unit=make_spatial_unit("admin", level=2))
    with pytest.raises(ValueError):
        Flows(dl1, dl2)
Example #25
0
def test_edgelist(get_dataframe):
    """Test that an EdgeList object can be created."""
    query = daily_location("2016-01-01").aggregate()
    wrapped = EdgeList(query)
    df = get_dataframe(wrapped)
    assert all(df.set_index(["name_from"]).loc["Rasuwa"]["count"] == 11)
    query = daily_location("2016-01-01").aggregate()
    wrapped = EdgeList(query, left_handed=False)
    df = get_dataframe(wrapped)
    assert all(df.set_index(["name_to"]).loc["Rasuwa"]["count"] == 11)
Example #26
0
def test_flows_geo_augmented_query_raises_error():
    """
    Test that a ValueError is raised when attempting to get geojson for a flows
    query with no geography data.
    """
    dl = daily_location("2016-01-01", spatial_unit=make_spatial_unit("cell"))
    dl2 = daily_location("2016-01-02", spatial_unit=make_spatial_unit("cell"))
    fl = Flows(dl, dl2)
    with pytest.raises(ValueError):
        fl.to_geojson_string()
Example #27
0
def test_inflow_value(get_dataframe):
    """
    One of the values for the outflows.
    """
    dl1 = daily_location("2016-01-01")
    dl2 = daily_location("2016-01-02")
    flow = Flows(dl1, dl2)
    inflow = flow.inflow()
    df = get_dataframe(inflow)
    assert df.set_index("name_to").ix["Okhaldhunga"][0] == 20
Example #28
0
def test_outflow_value(get_dataframe):
    """
    One of the values for the outflows.
    """
    dl1 = daily_location("2016-01-01")
    dl2 = daily_location("2016-01-02")
    flow = Flows(dl1, dl2)
    outflow = flow.outflow()
    df = get_dataframe(outflow)
    assert df.set_index("name_from").ix["Sankhuwasabha"][0] == 24
def test_inflow_value(get_dataframe):
    """
    One of the values for the outflows.
    """
    dl1 = daily_location("2016-01-01")
    dl2 = daily_location("2016-01-02")
    flow = Flows(dl1, dl2)
    inflow = flow.inflow()
    df = get_dataframe(inflow)
    assert df.set_index("pcod_to").loc["524 1 03 13"][0] == 20
Example #30
0
def test_shrink_to_size_respects_dry_run(flowmachine_connect):
    """
    Test that shrink_below_size doesn't remove anything during a dry run.
    """
    dl = daily_location("2016-01-01").store().result()
    dl2 = daily_location("2016-01-02").store().result()
    removed_queries = shrink_below_size(flowmachine_connect, 0, dry_run=True)
    assert 2 == len(removed_queries)
    assert dl.is_stored
    assert dl2.is_stored