def test_load_series(): dt = datetime(2019, 1, 29, 15, 15) s = ObjectSeriesSpecs( data=pd.Series( index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"), data=[1, 2, 3], ), name="mydata", ) assert (s.load_series(expected_frequency=timedelta( minutes=15)).loc[dt + timedelta(minutes=30)] == 3)
def test_load_series_with_missing_data(): dt = datetime(2019, 1, 29, 15, 15) s = ObjectSeriesSpecs( data=pd.Series( index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"), data=[1, np.nan, 3], ), name="mydata", ) with pytest.raises(NaNData) as e_info: s.load_series(expected_frequency=timedelta(minutes=15)) assert "Nan values" in str(e_info.value)
def test_load_series_with_frequency_resampling(): dt = datetime(2019, 1, 29, 15, 15) s = ObjectSeriesSpecs( data=pd.Series( index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"), data=[1, 2, 3], ), name="mydata", ) series = s.load_series(expected_frequency=timedelta(hours=1)) assert len(series) == 1 assert series[0] == 2 # the mean
def test_load_series_with_expected_time_window(): dt = datetime(2019, 1, 29, 15, 15, tzinfo=pytz.utc) s = ObjectSeriesSpecs( data=pd.Series( index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"), data=[1, 2, 3], ), name="mydata", ) assert (s.load_series( expected_frequency=timedelta(minutes=15), check_time_window=(dt, dt + timedelta(minutes=30)), ).loc[dt + timedelta(minutes=30)] == 3)
def test_load_series_with_interpolation(): dt = datetime(2019, 1, 29, 15, 15) s = ObjectSeriesSpecs( data=pd.Series( index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"), data=[1, np.nan, 3], ), name="mydata", interpolation_config={"method": "time"}, ) series = s.load_series(expected_frequency=timedelta(minutes=15)) assert len(series) == 3 assert series[1] == 2 # the interpolated value
def test_load_series_with_non_existing_custom_frequency_resampling(): dt = datetime(2019, 1, 29, 15, 15) s = ObjectSeriesSpecs( data=pd.Series( index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"), data=[1, 2, 3], ), name="mydata", resampling_config={"aggregation": "GGG"}, ) with pytest.raises(IncompatibleModelSpecs) as e_info: s.load_series(expected_frequency=timedelta(hours=1)) assert "Cannot find resampling aggregation GGG" in str(e_info.value)
def test_load_series_with_non_existing_interpolation(): dt = datetime(2019, 1, 29, 15, 15) s = ObjectSeriesSpecs( data=pd.Series( index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"), data=[1, np.nan, 3], ), name="mydata", interpolation_config={"method": "GGG"}, ) with pytest.raises(IncompatibleModelSpecs) as e_info: s.load_series(expected_frequency=timedelta(minutes=15)) assert "Cannot call interpolate function with arguments {'method': 'GGG'}" in str( e_info.value)
def test_load_series_with_transformation(): dt = datetime(2019, 1, 29, 15, 15) s = ObjectSeriesSpecs( data=pd.Series( index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"), data=[1, 2, 3], ), name="mydata", feature_transformation=MyMultiplicationTransformation(factor=11), ) assert (s.load_series(expected_frequency=timedelta( minutes=15)).loc[dt + timedelta(minutes=15)] == 2) assert (s.load_series( expected_frequency=timedelta(minutes=15), transform_features=True).loc[dt + timedelta(minutes=15)] == 2 * 11)
def test_load_series_with_frequency_resampling(down_or_up: str): dt = datetime(2019, 1, 29, 15, 15) s = ObjectSeriesSpecs( data=pd.Series( index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"), data=[1, 2, 3], ), name="mydata", ) series = s.load_series( expected_frequency=timedelta(hours=1) if down_or_up == "down" else timedelta(minutes=5) ) assert len(series) == 1 if down_or_up == "down" else len(series) == 9 assert series.mean() == 2 # the mean remains the same
def test_load_series_with_larger_expected_time_window(): dt = datetime(2019, 1, 29, 15, 15, tzinfo=pytz.utc) s = ObjectSeriesSpecs( data=pd.Series( index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"), data=[1, 2, 3], ), name="mydata", ) with pytest.raises(MissingData) as e_info: s.load_series( expected_frequency=timedelta(minutes=15), check_time_window=(dt - timedelta(minutes=15), dt + timedelta(minutes=45)), ) assert "starts too late" in str(e_info.value) assert "ends too early" in str(e_info.value)
def test_load_series_with_non_existing_custom_frequency_resampling(down_or_up: str): dt = datetime(2019, 1, 29, 15, 15) s = ObjectSeriesSpecs( data=pd.Series( index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"), data=[1, 2, 3], ), name="mydata", resampling_config={f"{down_or_up}sampling_method": "GGG"}, ) with pytest.raises(IncompatibleModelSpecs) as e_info: s.load_series( expected_frequency=timedelta(hours=1) if down_or_up == "down" else timedelta(minutes=5) ) assert f"Cannot find {down_or_up}sampling method GGG" in str(e_info.value)
def test_load_series_with_custom_frequency_resampling(down_or_up: str): dt = datetime(2019, 1, 29, 15, 15) s = ObjectSeriesSpecs( data=pd.Series( index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"), data=[1, 2, 3], ), name="mydata", resampling_config={ "downsampling_method": "sum", "upsampling_method": "reverse_sum", }, ) series = s.load_series( expected_frequency=timedelta(hours=1) if down_or_up == "down" else timedelta(minutes=5) ) assert len(series) == 1 if down_or_up == "down" else len(series) == 9 assert sum(series) == 6 # the sum remains the same
def test_load_series_with_instantaneous_measurements(down_or_up: str, exp_series): """ Test resampling of instantaneous measurements. """ dt = datetime(2019, 1, 29, 15, 30) s = ObjectSeriesSpecs( data=pd.Series( index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"), data=[1, 2, 3], ), name="mydata", resampling_config=dict( downsampling_method="first", event_resolution=timedelta(hours=0), ), interpolation_config=dict(method="pad", limit=3), ) series = s.load_series( expected_frequency=timedelta(hours=1) if down_or_up == "down" else timedelta(minutes=5) ) assert len(series) == 2 if down_or_up == "down" else len(series) == 7 pd.testing.assert_series_equal(series, exp_series)
def test_load_series_without_datetime_index(): with pytest.raises(Exception) as e_info: s = ObjectSeriesSpecs(data=pd.Series([1, 2, 3]), name="mydata") s.load_series(expected_frequency=timedelta(hours=1)) assert "DatetimeIndex" in str(e_info.value)