Beispiel #1
0
def test_unmoving_at_reference_location_values(get_dataframe):
    df = get_dataframe(
        UnmovingAtReferenceLocation(
            locations=UniqueLocations(
                SubscriberLocations(
                    "2016-01-01",
                    "2016-01-01 10:00",
                    spatial_unit=make_spatial_unit("admin", level=3),
                )),
            reference_locations=LastLocation("2016-01-01", "2016-01-02"),
        )).set_index("subscriber")
    assert not df.loc["038OVABN11Ak4W5P"].value
    assert df.loc["3XKdxqvyNxO2vLD1"].value
Beispiel #2
0
def test_subscriber_location_entropy(get_dataframe):
    """
    Test some hand picked periods and tables.
    """
    query = LocationEntropy("2016-01-01", "2016-01-08")
    df = get_dataframe(query).set_index("subscriber")
    assert df.loc["0DB8zw67E9mZAPK2"].entropy == pytest.approx(2.996_587)

    query = LocationEntropy("2016-01-02",
                            "2016-01-05",
                            spatial_unit=make_spatial_unit("admin", level=1))
    df = get_dataframe(query).set_index("subscriber")
    assert df.loc["0DB8zw67E9mZAPK2"].entropy == pytest.approx(1.214_889_6)
Beispiel #3
0
def test_invalid_statistic_raises_error():
    """
    Test that passing an invalid statistic raises an error.
    """
    with pytest.raises(ValueError,
                       match="'NOT_A_STATISTIC' is not a valid statistic"):
        DistanceSeries(
            subscriber_locations=SubscriberLocations(
                "2016-01-01",
                "2016-01-07",
                spatial_unit=make_spatial_unit("lon-lat")),
            statistic="NOT_A_STATISTIC",
        )
def get_spatial_unit_obj(aggregation_unit_string) -> GeomSpatialUnit:
    """
    Given an aggregation unit string (as validated by AggregationUnit()),
    return a FlowMachine spatial unit object.
    """
    if "admin" in aggregation_unit_string:
        level = int(aggregation_unit_string[-1])
        spatial_unit_args = {"spatial_unit_type": "admin", "level": level}
    else:
        raise NotImplementedError(
            f"The helper function `get_spatial_unit_obj` does not support aggregation units of type '{aggregation_unit_string}'."
        )
    return make_spatial_unit(**spatial_unit_args)
Beispiel #5
0
def test_no_cast_for_below_day(get_dataframe):
    """
    Test that results aren't cast to date for smaller time buckets.
    """
    df = get_dataframe(
        DistanceSeries(
            subscriber_locations=SubscriberLocations(
                "2016-01-01",
                "2016-01-02",
                spatial_unit=make_spatial_unit("lon-lat")),
            time_bucket="hour",
        ))
    assert isinstance(df.datetime[0], datetime)
Beispiel #6
0
def test_unmoving_at_reference_location_counts_column_names(
        get_column_names_from_run):
    assert (get_column_names_from_run(
        UnmovingAtReferenceLocationCounts(
            UnmovingAtReferenceLocation(
                locations=UniqueLocations(
                    SubscriberLocations(
                        "2016-01-01",
                        "2016-01-01 10:00",
                        spatial_unit=make_spatial_unit("admin", level=3),
                    )),
                reference_locations=LastLocation("2016-01-01", "2016-01-02"),
            ))) == ["pcod", "value"])
def test_join_with_polygon(get_dataframe, get_length):
    """
    Test that flowmachine.JoinToLocation can get the (arbitrary) polygon
    of each cell.
    """
    ul = SubscriberLocations(
        "2016-01-05", "2016-01-07", spatial_unit=make_spatial_unit("cell")
    )
    j = JoinToLocation(
        ul,
        spatial_unit=make_spatial_unit(
            "polygon",
            region_id_column_name="admin3pcod",
            geom_table="geography.admin3",
            geom_column="geom",
        ),
    )
    df = get_dataframe(j)

    expected_cols = sorted(["admin3pcod", "location_id", "subscriber", "time"])
    assert sorted(df.columns) == expected_cols
    assert len(df) == get_length(ul)
def test_column_names_meaningful_locations(get_column_names_from_run,
                                           meaningful_locations_labels):
    """Test that column_names property matches head(0) for meaningfullocations"""
    mfl = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="evening",
    )

    assert get_column_names_from_run(mfl) == mfl.column_names
def test_join_with_versioned_cells(get_dataframe, get_length):
    """
    Test that flowmachine.JoinToLocation can fetch the cell version.
    """
    ul = SubscriberLocations("2016-01-05",
                             "2016-01-07",
                             spatial_unit=make_spatial_unit("cell"))
    df = get_dataframe(
        JoinToLocation(ul, spatial_unit=make_spatial_unit("versioned-cell")))
    # As our database is complete we should not drop any rows
    assert len(df) == get_length(ul)
    # These should all be version zero, these are the towers before the changeover date, or those that
    # have not moved.
    should_be_version_zero = df[(df.time <= move_date) |
                                (~df.location_id.isin(moving_sites))]

    # These should all be one, they are the ones after the change over time that have moved.
    should_be_version_one = df[(df.time > move_date)
                               & (df.location_id.isin(moving_sites))]

    assert (should_be_version_zero.version == 0).all()
    assert (should_be_version_one.version == 1).all()
Beispiel #10
0
def test_active_at_reference_location_counts_column_names(
        get_column_names_from_run):
    assert get_column_names_from_run(
        ActiveAtReferenceLocationCounts(
            ActiveAtReferenceLocation(
                subscriber_locations=UniqueLocations(
                    SubscriberLocations(
                        "2016-01-01",
                        "2016-01-02",
                        spatial_unit=make_spatial_unit("admin", level=3),
                    )),
                reference_locations=daily_location("2016-01-03"),
            ))) == ["pcod", "value"]
Beispiel #11
0
    def __init__(
        self,
        start,
        stop,
        *,
        location,
        spatial_unit: AnySpatialUnit = make_spatial_unit("cell"),
        hours="all",
        table="all",
        subscriber_identifier="msisdn",
        ignore_nulls=True,
        subscriber_subset=None,
    ):
        """"""

        if location == "any" and spatial_unit != make_spatial_unit("cell"):
            raise ValueError(
                "Invalid parameter combination: location='any' can only be used with cell spatial unit."
            )

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.location = location

        self.ul = SubscriberLocations(
            self.start,
            self.stop,
            spatial_unit=spatial_unit,
            hours=hours,
            table=table,
            subscriber_identifier=subscriber_identifier,
            ignore_nulls=ignore_nulls,
            subscriber_subset=subscriber_subset,
        )

        self.table = self.ul.table
        self.subscriber_identifier = self.ul.subscriber_identifier

        super().__init__()
def test_can_get_pcods(get_dataframe):
    """
    SubscriberLocations() can make queries at the p-code level.
    """

    subscriber_pcod = SubscriberLocations(
        "2016-01-01 13:30:30",
        "2016-01-02 16:25:00",
        spatial_unit=make_spatial_unit("polygon",
                                       region_id_column_name="admin3pcod",
                                       geom_table="geography.admin3"),
    )
    df = get_dataframe(subscriber_pcod)
    assert df.admin3pcod[0].startswith("524")
def test_join_with_lon_lat(get_dataframe):
    """
    Test that flowmachine.JoinToLocation can get the lon-lat values of the cell
    """
    ul = SubscriberLocations(
        "2016-01-05", "2016-01-07", spatial_unit=make_spatial_unit("cell")
    )
    df = get_dataframe(JoinToLocation(ul, spatial_unit=make_spatial_unit("lon-lat")))

    expected_cols = sorted(["subscriber", "time", "location_id", "lon", "lat"])
    assert sorted(df.columns) == expected_cols
    # Pick out one cell that moves location and assert that the
    # lon-lats are right
    focal_cell = "dJb0Wd"
    lon1, lat1 = (83.09284486, 27.648837800000003)
    lon2, lat2 = (83.25769074752517, 27.661443318109132)
    post_move = df[(df.time > move_date) & (df["location_id"] == focal_cell)]
    pre_move = df[(df.time < move_date) & (df["location_id"] == focal_cell)]
    # And check them all one-by-one
    np.isclose(pre_move.lon, lon1).all()
    np.isclose(pre_move.lat, lat1).all()
    np.isclose(post_move.lon, lon2).all()
    np.isclose(post_move.lat, lat2).all()
def test_reprojection():
    """
    Test that in db reprojection works.

    """
    dl = daily_location(
        "2016-01-01", "2016-01-02", spatial_unit=make_spatial_unit("lon-lat")
    ).aggregate()
    js = dl.to_geojson(crs=2770)  # OSGB36
    assert js["features"][0]["geometry"]["coordinates"] == [
        -8094697.51781301,
        9465052.88370377,
    ]
    assert js["properties"]["crs"] == proj4string(dl.connection, 2770)
Beispiel #15
0
def test_handles_list_of_locations(get_dataframe):
    """
    FirstLocation() subsets data based on a list of locations, rather than a single one.
    """
    dfl = FirstLocation(
        "2016-01-01",
        "2016-01-04",
        location=["QeBRM8", "m9jL23" "LVnDQL"],
        spatial_unit=make_spatial_unit("versioned-site"),
    )
    df = get_dataframe(dfl)

    df.set_index("subscriber", inplace=True)
    assert str(df.loc["038OVABN11Ak4W5P", "time"]) == "2016-01-01 05:02:10+00:00"
Beispiel #16
0
def test_whether_scores_are_within_score_bounds(get_dataframe):
    """
    Test whether the scores are within the bounds of maximum and minimum scores.
    """
    es = EventScore(
        start="2016-01-01",
        stop="2016-01-05",
        spatial_unit=make_spatial_unit("versioned-site"),
    )
    df = get_dataframe(es)
    max_score = df[["score_hour", "score_dow"]].max()
    min_score = df[["score_hour", "score_dow"]].min()
    assert all(max_score <= [1, 1])
    assert all(min_score >= [-1, -1])
Beispiel #17
0
def test_last_loc_lon_lat(get_dataframe):
    """
    LastLocation() can make queries at the lon-lat level.
    """

    last_loc = LastLocation("2016-01-01",
                            "2016-01-02",
                            spatial_unit=make_spatial_unit("lon-lat"))
    df = get_dataframe(last_loc)
    df.set_index("subscriber", inplace=True)
    assert pytest.approx(83.09669810947962) == float(
        df.loc["yqw50eNyEwOxNDGL"].lon)
    assert pytest.approx(29.135638957790576) == float(
        df.loc["yqw50eNyEwOxNDGL"].lat)
def test_unique_visitor_counts(get_dataframe):
    """
    Values test for unique visitor counts.
    """
    activity = UniqueVisitorCounts(
        ActiveAtReferenceLocationCounts(
            ActiveAtReferenceLocation(
                subscriber_locations=UniqueLocations(
                    SubscriberLocations(
                        "2016-01-01",
                        "2016-01-02",
                        spatial_unit=make_spatial_unit("admin", level=3),
                    )),
                reference_locations=daily_location("2016-01-03"),
            )),
        UniqueSubscriberCounts("2016-01-01",
                               "2016-01-02",
                               spatial_unit=make_spatial_unit("admin",
                                                              level=3)),
    )
    df = get_dataframe(activity).set_index("pcod")
    assert df.loc["524 1 01 04"].value == 66
    assert df.loc["524 3 08 44"].value == 170
def test_min_displacement_zero(get_dataframe):
    """
    When time period for diplacement and home location are the same min displacement
    should be zero for all subscribers
    """
    rl = daily_location("2016-01-01",
                        spatial_unit=make_spatial_unit("lon-lat"))
    df = get_dataframe(
        Displacement("2016-01-01",
                     "2016-01-07",
                     reference_location=rl,
                     statistic="min"))

    assert df.value.sum() == 0
def test_returns_expected_values(stat, sub_a_expected, sub_b_expected,
                                 get_dataframe):
    """
    Test that we get expected return values for the various statistics
    """
    sub_a_id, sub_b_id = "j6QYNbMJgAwlVORP", "NG1km5NzBg5JD8nj"
    rl = daily_location("2016-01-01",
                        spatial_unit=make_spatial_unit("lon-lat"))
    df = get_dataframe(
        DistanceSeries(
            subscriber_locations=SubscriberLocations(
                "2016-01-01",
                "2016-01-07",
                spatial_unit=make_spatial_unit("lon-lat")),
            reference_location=rl,
            statistic=stat,
        )).set_index(["subscriber", "datetime"])
    assert df.loc[(sub_a_id,
                   datetime(2016, 1,
                            1))].value == pytest.approx(sub_a_expected)
    assert df.loc[(sub_b_id,
                   datetime(2016, 1,
                            6))].value == pytest.approx(sub_b_expected)
Beispiel #21
0
def test_invalid_time_bucket_raises_error():
    """
    Test that passing an invalid time bucket raises an error.
    """
    with pytest.raises(
            ValueError,
            match="'NOT_A_BUCKET' is not a valid value for time_bucket"):
        DistanceSeries(
            subscriber_locations=SubscriberLocations(
                "2016-01-01",
                "2016-01-07",
                spatial_unit=make_spatial_unit("lon-lat")),
            time_bucket="NOT_A_BUCKET",
        )
def test_some_results(get_dataframe):
    """
    DistanceMatrix() returns a dataframe that contains hand-picked results.
    """
    c = DistanceMatrix(spatial_unit=make_spatial_unit("versioned-site"))
    df = get_dataframe(c)
    set_df = df.set_index(
        ["site_id_from", "version_from", "site_id_to", "version_to"])
    assert set_df.loc[("8wPojr", 1, "GN2k0G",
                       0)]["value"] == pytest.approx(789.23239740488)
    assert set_df.loc[("8wPojr", 0, "GN2k0G",
                       0)]["value"] == pytest.approx(769.20155628077)
    assert set_df.loc[("8wPojr", 1, "DbWg4K",
                       0)]["value"] == pytest.approx(757.97771793683)
Beispiel #23
0
def test_unmoving_values(get_dataframe):
    df = get_dataframe(
        Unmoving(
            locations=UniqueLocations(
                SubscriberLocations(
                    "2016-01-01",
                    "2016-01-01 10:00",
                    spatial_unit=make_spatial_unit("admin", level=3),
                )
            )
        )
    ).set_index("subscriber")
    assert not df.loc["038OVABN11Ak4W5P"].value
    assert df.loc["0Gl95NRLjW2aw8pW"].value
Beispiel #24
0
def test_contact_reference_location_no_spatial_unit_raises():
    """ Test ValueError is raised for contact_location without spatial_unit attribute. """
    cb = ContactBalance("2016-01-01", "2016-01-03")
    # by encapsulating ModalLocations in a CustomQuery we remove the spatial_unit
    # attribute from it which should raise an error
    ml = ModalLocation(*[
        daily_location(
            d,
            spatial_unit=make_spatial_unit("versioned-cell"),
            subscriber_subset=cb.counterparts_subset(include_subscribers=True),
        ) for d in list_of_dates("2016-01-01", "2016-01-03")
    ])
    ml = CustomQuery(ml.get_query(), ml.column_names)
    with pytest.raises(ValueError):
        query = ContactReferenceLocationStats(cb, ml)
Beispiel #25
0
def test_contact_reference_location_bad_spatial_unit_raises():
    """
    Test InvalidSpatialUnitError is raised for contact_location with
    non-compliant spatial unit.
    """
    cb = ContactBalance("2016-01-01", "2016-01-03")
    ml = ModalLocation(*[
        daily_location(
            d,
            spatial_unit=make_spatial_unit("admin", level=3),
            subscriber_subset=cb.counterparts_subset(include_subscribers=True),
        ) for d in list_of_dates("2016-01-01", "2016-01-03")
    ])
    with pytest.raises(InvalidSpatialUnitError):
        query = ContactReferenceLocationStats(cb, ml)
Beispiel #26
0
def test_cluster_is_within_envelope(get_dataframe):
    """
    Test that all the clusters are within the enveloped formed by all the towers in the cluster.
    """
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))

    hartigan = HartiganCluster(calldays=cd, radius=50)
    har_df = hartigan.to_geopandas()
    sites = Sites().to_geopandas().set_index(["site_id", "version"])
    towers = GeoSeries(har_df.apply(lambda x: get_geom_point(x, sites), 1))
    s = har_df.intersects(towers)
    assert all(s)
Beispiel #27
0
def test_all_options_hartigan():
    """
    Test whether Hartigan works when changing all options.
    """
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))

    har = HartiganCluster(calldays=cd, radius=50, buffer=2,
                          call_threshold=2).to_geopandas()
    assert set(har.columns) == set([
        "subscriber", "geometry", "rank", "calldays", "site_id", "version",
        "centroid"
    ])
Beispiel #28
0
def test_call_threshold_works(get_dataframe):
    """
    Test whether a call threshold above 1 limits the number of clusters.
    """
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))

    hartigan = HartiganCluster(calldays=cd, radius=50)
    har_df = hartigan.to_geopandas()
    assert any(har_df.calldays == 1)
    har_df_higher_call_threshold = get_dataframe(
        HartiganCluster(calldays=cd, radius=50, call_threshold=2))
    assert len(har_df) > len(har_df_higher_call_threshold)
def test_meaningful_locations_aggregate_disallowed_spatial_unit_raises():
    """ Test that a bad spatial unit raises an InvalidSpatialUnitError"""

    with pytest.raises(InvalidSpatialUnitError):
        mfl_agg = MeaningfulLocationsAggregate(
            meaningful_locations=MeaningfulLocations(
                clusters=HartiganCluster(
                    calldays=CallDays(subscriber_locations=SubscriberLocations(
                        start="2016-01-01",
                        stop="2016-01-02",
                        spatial_unit=make_spatial_unit("versioned-site"),
                    )),
                    radius=1,
                ),
                scores=EventScore(
                    start="2016-01-01",
                    stop="2016-01-02",
                    spatial_unit=make_spatial_unit("versioned-site"),
                ),
                labels=labels,
                label="evening",
            ),
            spatial_unit=make_spatial_unit("lon-lat"),
        )
Beispiel #30
0
def test_default_indexes():
    """
    Check that default indexing columns are correct
    """
    assert daily_location("2016-01-01", "2016-01-02").index_cols == [
        ["pcod"],
        '"subscriber"',
    ]
    assert daily_location(
        "2016-01-01", "2016-01-02",
        spatial_unit=make_spatial_unit("lon-lat")).index_cols == [[
            "lon", "lat"
        ], '"subscriber"']
    assert SubscriberDegree("2016-01-01",
                            "2016-01-02").index_cols == ['"subscriber"']