def test_smooth(get_dataframe):
    """
    Test that iterated median filter matches an independently calculated result.
    """
    sl = SubscriberLocations(
        "2016-01-01",
        "2016-01-07",
        spatial_unit=make_spatial_unit("lon-lat"),
        hours=(20, 0),
    )
    ds = DistanceSeries(subscriber_locations=sl, statistic="min")
    smoothed_df = get_dataframe(
        IterativeMedianFilter(
            query_to_filter=ImputedDistanceSeries(distance_series=ds),
            filter_window_size=3,
        )
    )
    assert smoothed_df.set_index("subscriber").loc[
        "038OVABN11Ak4W5P"
    ].value.tolist() == pytest.approx(
        [
            9343367.56611,
            9343367.56611,
            9343367.56611,
            9343367.56611,
            9343367.56611,
            9221492.17419,
        ]
    )
Exemple #2
0
def test_returns_expected_values(stat, sub_a_expected, sub_b_expected,
                                 get_dataframe):
    """
    Test that we get expected return values for the various statistics
    """
    sub_a_id, sub_b_id = "j6QYNbMJgAwlVORP", "NG1km5NzBg5JD8nj"
    rl = daily_location("2016-01-01",
                        spatial_unit=make_spatial_unit("lon-lat"))
    df = get_dataframe(
        DistanceSeries(
            subscriber_locations=SubscriberLocations(
                "2016-01-01",
                "2016-01-07",
                spatial_unit=make_spatial_unit("lon-lat"),
            ),
            reference_location=rl,
            statistic=stat,
        ))
    df = (df.assign(datetime=pd.to_datetime(df.datetime)).set_index(
        ["subscriber", "datetime"]).sort_index())
    sub = df.loc[sub_a_id]
    assert df.loc[sub_a_id].loc["2016-01-01"].value == pytest.approx(
        sub_a_expected)
    assert df.loc[(sub_b_id,
                   datetime(2016, 1,
                            6))].value == pytest.approx(sub_b_expected)
Exemple #3
0
def test_error_when_subs_locations_not_point_geom():
    """
    Test that error is raised if the spatial unit of the subscriber locations isn't point.
    """

    with pytest.raises(ValueError,
                       match="does not have longitude/latitude columns"):
        DistanceSeries(subscriber_locations=SubscriberLocations(
            "2016-01-01",
            "2016-01-07",
            spatial_unit=make_spatial_unit("admin", level=3),
        ))
Exemple #4
0
def test_no_cast_for_below_day(get_dataframe):
    """
    Test that results aren't cast to date for smaller time buckets.
    """
    df = get_dataframe(
        DistanceSeries(
            subscriber_locations=SubscriberLocations(
                "2016-01-01",
                "2016-01-02",
                spatial_unit=make_spatial_unit("lon-lat")),
            time_bucket="hour",
        ))
    assert isinstance(df.datetime[0], datetime)
Exemple #5
0
def test_invalid_statistic_raises_error():
    """
    Test that passing an invalid statistic raises an error.
    """
    with pytest.raises(ValueError,
                       match="'NOT_A_STATISTIC' is not a valid statistic"):
        DistanceSeries(
            subscriber_locations=SubscriberLocations(
                "2016-01-01",
                "2016-01-07",
                spatial_unit=make_spatial_unit("lon-lat")),
            statistic="NOT_A_STATISTIC",
        )
Exemple #6
0
def test_invalid_time_bucket_raises_error():
    """
    Test that passing an invalid time bucket raises an error.
    """
    with pytest.raises(
            ValueError,
            match="'NOT_A_BUCKET' is not a valid value for time_bucket"):
        DistanceSeries(
            subscriber_locations=SubscriberLocations(
                "2016-01-01",
                "2016-01-07",
                spatial_unit=make_spatial_unit("lon-lat")),
            time_bucket="NOT_A_BUCKET",
        )
Exemple #7
0
def test_invalid_reference_raises_error():
    """
    Test that passing an invalid reference location raises an error.
    """
    with pytest.raises(
            ValueError,
            match=
            "Argument 'reference_location' should be an instance of BaseLocation class or a tuple of two floats. Got: str",
    ):
        DistanceSeries(
            subscriber_locations=SubscriberLocations(
                "2016-01-01",
                "2016-01-07",
                spatial_unit=make_spatial_unit("lon-lat")),
            reference_location="NOT_A_LOCATION",
        )
def test_bad_window(size, match):
    """
    Test some median unfriendly window sizes raise errors.
    """
    with pytest.raises(ValueError, match=match):
        sl = SubscriberLocations(
            "2016-01-01",
            "2016-01-07",
            spatial_unit=make_spatial_unit("lon-lat"),
            hours=(20, 0),
        )
        ds = DistanceSeries(subscriber_locations=sl, statistic="min")
        IterativeMedianFilter(
            query_to_filter=ImputedDistanceSeries(distance_series=ds),
            filter_window_size=size,
        )
def test_column_must_exist(column_arg):
    """
    Check errors for required columns.
    """
    with pytest.raises(ValueError, match=column_arg):
        sl = SubscriberLocations(
            "2016-01-01",
            "2016-01-07",
            spatial_unit=make_spatial_unit("lon-lat"),
            hours=(20, 0),
        )
        ds = DistanceSeries(subscriber_locations=sl, statistic="min")
        IterativeMedianFilter(
            query_to_filter=ImputedDistanceSeries(distance_series=ds),
            filter_window_size=3,
            **{column_arg: "NOT_A_VALID_COLUMN"},
        )
Exemple #10
0
def test_returns_expected_values_fixed_point(stat, sub_a_expected,
                                             sub_b_expected, get_dataframe):
    """
    Test that we get expected return values for the various statistics with 0, 0 reference
    """
    sub_a_id, sub_b_id = "j6QYNbMJgAwlVORP", "NG1km5NzBg5JD8nj"
    df = get_dataframe(
        DistanceSeries(
            subscriber_locations=SubscriberLocations(
                "2016-01-01",
                "2016-01-07",
                spatial_unit=make_spatial_unit("lon-lat")),
            statistic=stat,
        )).set_index(["subscriber", "datetime"])
    assert df.loc[(sub_a_id, date(2016, 1,
                                  1))].value == pytest.approx(sub_a_expected)
    assert df.loc[(sub_b_id, date(2016, 1,
                                  6))].value == pytest.approx(sub_b_expected)
Exemple #11
0
def test_impute(get_dataframe):
    sl = SubscriberLocations(
        "2016-01-01",
        "2016-01-07",
        spatial_unit=make_spatial_unit("lon-lat"),
        hours=(20, 0),
    )
    ds = DistanceSeries(subscriber_locations=sl, statistic="min")
    ds_df = get_dataframe(ds)
    sql = get_dataframe(ImputedDistanceSeries(distance_series=ds))
    all_subs = ds_df.subscriber.drop_duplicates()
    for sub in all_subs:
        print(sub)
        if ds_df[ds_df.subscriber == sub].datetime.nunique() > 3:
            to_be_imputed = ds_df[ds_df.subscriber == sub].sort_values(
                "datetime")
            imputed = fill_in_dates(to_be_imputed, 3, sl.start, sl.stop)
            assert imputed.value.values.tolist() == pytest.approx(
                sql[sql.subscriber == sub].value.tolist())
Exemple #12
0
def test_error_on_spatial_unit_mismatch():
    """
    Test that error is raised if the spatial unit of the subscriber locations isn't point.
    """

    rl = daily_location("2016-01-01",
                        spatial_unit=make_spatial_unit("admin", level=3))

    with pytest.raises(
            ValueError,
            match=
            "reference_location must have the same spatial unit as subscriber_locations.",
    ):
        DistanceSeries(
            subscriber_locations=SubscriberLocations(
                "2016-01-01",
                "2016-01-07",
                spatial_unit=make_spatial_unit("lon-lat")),
            reference_location=rl,
        )