def test_smooth(get_dataframe): """ Test that iterated median filter matches an independently calculated result. """ sl = SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("lon-lat"), hours=(20, 0), ) ds = DistanceSeries(subscriber_locations=sl, statistic="min") smoothed_df = get_dataframe( IterativeMedianFilter( query_to_filter=ImputedDistanceSeries(distance_series=ds), filter_window_size=3, ) ) assert smoothed_df.set_index("subscriber").loc[ "038OVABN11Ak4W5P" ].value.tolist() == pytest.approx( [ 9343367.56611, 9343367.56611, 9343367.56611, 9343367.56611, 9343367.56611, 9221492.17419, ] )
def test_returns_expected_values(stat, sub_a_expected, sub_b_expected, get_dataframe): """ Test that we get expected return values for the various statistics """ sub_a_id, sub_b_id = "j6QYNbMJgAwlVORP", "NG1km5NzBg5JD8nj" rl = daily_location("2016-01-01", spatial_unit=make_spatial_unit("lon-lat")) df = get_dataframe( DistanceSeries( subscriber_locations=SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("lon-lat"), ), reference_location=rl, statistic=stat, )) df = (df.assign(datetime=pd.to_datetime(df.datetime)).set_index( ["subscriber", "datetime"]).sort_index()) sub = df.loc[sub_a_id] assert df.loc[sub_a_id].loc["2016-01-01"].value == pytest.approx( sub_a_expected) assert df.loc[(sub_b_id, datetime(2016, 1, 6))].value == pytest.approx(sub_b_expected)
def test_error_when_subs_locations_not_point_geom(): """ Test that error is raised if the spatial unit of the subscriber locations isn't point. """ with pytest.raises(ValueError, match="does not have longitude/latitude columns"): DistanceSeries(subscriber_locations=SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("admin", level=3), ))
def test_no_cast_for_below_day(get_dataframe): """ Test that results aren't cast to date for smaller time buckets. """ df = get_dataframe( DistanceSeries( subscriber_locations=SubscriberLocations( "2016-01-01", "2016-01-02", spatial_unit=make_spatial_unit("lon-lat")), time_bucket="hour", )) assert isinstance(df.datetime[0], datetime)
def test_invalid_statistic_raises_error(): """ Test that passing an invalid statistic raises an error. """ with pytest.raises(ValueError, match="'NOT_A_STATISTIC' is not a valid statistic"): DistanceSeries( subscriber_locations=SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("lon-lat")), statistic="NOT_A_STATISTIC", )
def test_invalid_time_bucket_raises_error(): """ Test that passing an invalid time bucket raises an error. """ with pytest.raises( ValueError, match="'NOT_A_BUCKET' is not a valid value for time_bucket"): DistanceSeries( subscriber_locations=SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("lon-lat")), time_bucket="NOT_A_BUCKET", )
def test_invalid_reference_raises_error(): """ Test that passing an invalid reference location raises an error. """ with pytest.raises( ValueError, match= "Argument 'reference_location' should be an instance of BaseLocation class or a tuple of two floats. Got: str", ): DistanceSeries( subscriber_locations=SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("lon-lat")), reference_location="NOT_A_LOCATION", )
def test_bad_window(size, match): """ Test some median unfriendly window sizes raise errors. """ with pytest.raises(ValueError, match=match): sl = SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("lon-lat"), hours=(20, 0), ) ds = DistanceSeries(subscriber_locations=sl, statistic="min") IterativeMedianFilter( query_to_filter=ImputedDistanceSeries(distance_series=ds), filter_window_size=size, )
def test_column_must_exist(column_arg): """ Check errors for required columns. """ with pytest.raises(ValueError, match=column_arg): sl = SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("lon-lat"), hours=(20, 0), ) ds = DistanceSeries(subscriber_locations=sl, statistic="min") IterativeMedianFilter( query_to_filter=ImputedDistanceSeries(distance_series=ds), filter_window_size=3, **{column_arg: "NOT_A_VALID_COLUMN"}, )
def test_returns_expected_values_fixed_point(stat, sub_a_expected, sub_b_expected, get_dataframe): """ Test that we get expected return values for the various statistics with 0, 0 reference """ sub_a_id, sub_b_id = "j6QYNbMJgAwlVORP", "NG1km5NzBg5JD8nj" df = get_dataframe( DistanceSeries( subscriber_locations=SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("lon-lat")), statistic=stat, )).set_index(["subscriber", "datetime"]) assert df.loc[(sub_a_id, date(2016, 1, 1))].value == pytest.approx(sub_a_expected) assert df.loc[(sub_b_id, date(2016, 1, 6))].value == pytest.approx(sub_b_expected)
def test_impute(get_dataframe): sl = SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("lon-lat"), hours=(20, 0), ) ds = DistanceSeries(subscriber_locations=sl, statistic="min") ds_df = get_dataframe(ds) sql = get_dataframe(ImputedDistanceSeries(distance_series=ds)) all_subs = ds_df.subscriber.drop_duplicates() for sub in all_subs: print(sub) if ds_df[ds_df.subscriber == sub].datetime.nunique() > 3: to_be_imputed = ds_df[ds_df.subscriber == sub].sort_values( "datetime") imputed = fill_in_dates(to_be_imputed, 3, sl.start, sl.stop) assert imputed.value.values.tolist() == pytest.approx( sql[sql.subscriber == sub].value.tolist())
def test_error_on_spatial_unit_mismatch(): """ Test that error is raised if the spatial unit of the subscriber locations isn't point. """ rl = daily_location("2016-01-01", spatial_unit=make_spatial_unit("admin", level=3)) with pytest.raises( ValueError, match= "reference_location must have the same spatial unit as subscriber_locations.", ): DistanceSeries( subscriber_locations=SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("lon-lat")), reference_location=rl, )