예제 #1
0
def test_load_series():
    dt = datetime(2019, 1, 29, 15, 15)
    s = ObjectSeriesSpecs(
        data=pd.Series(
            index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"),
            data=[1, 2, 3],
        ),
        name="mydata",
    )
    assert (s.load_series(expected_frequency=timedelta(
        minutes=15)).loc[dt + timedelta(minutes=30)] == 3)
예제 #2
0
def test_load_series_with_missing_data():
    dt = datetime(2019, 1, 29, 15, 15)
    s = ObjectSeriesSpecs(
        data=pd.Series(
            index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"),
            data=[1, np.nan, 3],
        ),
        name="mydata",
    )
    with pytest.raises(NaNData) as e_info:
        s.load_series(expected_frequency=timedelta(minutes=15))
    assert "Nan values" in str(e_info.value)
예제 #3
0
def test_load_series_with_frequency_resampling():
    dt = datetime(2019, 1, 29, 15, 15)
    s = ObjectSeriesSpecs(
        data=pd.Series(
            index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"),
            data=[1, 2, 3],
        ),
        name="mydata",
    )
    series = s.load_series(expected_frequency=timedelta(hours=1))
    assert len(series) == 1
    assert series[0] == 2  # the mean
예제 #4
0
def test_load_series_with_expected_time_window():
    dt = datetime(2019, 1, 29, 15, 15, tzinfo=pytz.utc)
    s = ObjectSeriesSpecs(
        data=pd.Series(
            index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"),
            data=[1, 2, 3],
        ),
        name="mydata",
    )
    assert (s.load_series(
        expected_frequency=timedelta(minutes=15),
        check_time_window=(dt, dt + timedelta(minutes=30)),
    ).loc[dt + timedelta(minutes=30)] == 3)
예제 #5
0
def test_load_series_with_interpolation():
    dt = datetime(2019, 1, 29, 15, 15)
    s = ObjectSeriesSpecs(
        data=pd.Series(
            index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"),
            data=[1, np.nan, 3],
        ),
        name="mydata",
        interpolation_config={"method": "time"},
    )

    series = s.load_series(expected_frequency=timedelta(minutes=15))
    assert len(series) == 3
    assert series[1] == 2  # the interpolated value
예제 #6
0
def test_load_series_with_non_existing_custom_frequency_resampling():
    dt = datetime(2019, 1, 29, 15, 15)
    s = ObjectSeriesSpecs(
        data=pd.Series(
            index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"),
            data=[1, 2, 3],
        ),
        name="mydata",
        resampling_config={"aggregation": "GGG"},
    )

    with pytest.raises(IncompatibleModelSpecs) as e_info:
        s.load_series(expected_frequency=timedelta(hours=1))
    assert "Cannot find resampling aggregation GGG" in str(e_info.value)
예제 #7
0
def test_load_series_with_non_existing_interpolation():
    dt = datetime(2019, 1, 29, 15, 15)
    s = ObjectSeriesSpecs(
        data=pd.Series(
            index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"),
            data=[1, np.nan, 3],
        ),
        name="mydata",
        interpolation_config={"method": "GGG"},
    )

    with pytest.raises(IncompatibleModelSpecs) as e_info:
        s.load_series(expected_frequency=timedelta(minutes=15))
    assert "Cannot call interpolate function with arguments {'method': 'GGG'}" in str(
        e_info.value)
예제 #8
0
def test_load_series_with_transformation():
    dt = datetime(2019, 1, 29, 15, 15)
    s = ObjectSeriesSpecs(
        data=pd.Series(
            index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"),
            data=[1, 2, 3],
        ),
        name="mydata",
        feature_transformation=MyMultiplicationTransformation(factor=11),
    )
    assert (s.load_series(expected_frequency=timedelta(
        minutes=15)).loc[dt + timedelta(minutes=15)] == 2)
    assert (s.load_series(
        expected_frequency=timedelta(minutes=15),
        transform_features=True).loc[dt + timedelta(minutes=15)] == 2 * 11)
예제 #9
0
def test_load_series_with_frequency_resampling(down_or_up: str):
    dt = datetime(2019, 1, 29, 15, 15)
    s = ObjectSeriesSpecs(
        data=pd.Series(
            index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"),
            data=[1, 2, 3],
        ),
        name="mydata",
    )
    series = s.load_series(
        expected_frequency=timedelta(hours=1)
        if down_or_up == "down"
        else timedelta(minutes=5)
    )
    assert len(series) == 1 if down_or_up == "down" else len(series) == 9
    assert series.mean() == 2  # the mean remains the same
예제 #10
0
def test_load_series_with_larger_expected_time_window():
    dt = datetime(2019, 1, 29, 15, 15, tzinfo=pytz.utc)
    s = ObjectSeriesSpecs(
        data=pd.Series(
            index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"),
            data=[1, 2, 3],
        ),
        name="mydata",
    )
    with pytest.raises(MissingData) as e_info:
        s.load_series(
            expected_frequency=timedelta(minutes=15),
            check_time_window=(dt - timedelta(minutes=15),
                               dt + timedelta(minutes=45)),
        )
    assert "starts too late" in str(e_info.value)
    assert "ends too early" in str(e_info.value)
예제 #11
0
def test_load_series_with_non_existing_custom_frequency_resampling(down_or_up: str):
    dt = datetime(2019, 1, 29, 15, 15)
    s = ObjectSeriesSpecs(
        data=pd.Series(
            index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"),
            data=[1, 2, 3],
        ),
        name="mydata",
        resampling_config={f"{down_or_up}sampling_method": "GGG"},
    )

    with pytest.raises(IncompatibleModelSpecs) as e_info:
        s.load_series(
            expected_frequency=timedelta(hours=1)
            if down_or_up == "down"
            else timedelta(minutes=5)
        )
    assert f"Cannot find {down_or_up}sampling method GGG" in str(e_info.value)
예제 #12
0
def test_load_series_with_custom_frequency_resampling(down_or_up: str):
    dt = datetime(2019, 1, 29, 15, 15)
    s = ObjectSeriesSpecs(
        data=pd.Series(
            index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"),
            data=[1, 2, 3],
        ),
        name="mydata",
        resampling_config={
            "downsampling_method": "sum",
            "upsampling_method": "reverse_sum",
        },
    )

    series = s.load_series(
        expected_frequency=timedelta(hours=1)
        if down_or_up == "down"
        else timedelta(minutes=5)
    )
    assert len(series) == 1 if down_or_up == "down" else len(series) == 9
    assert sum(series) == 6  # the sum remains the same
예제 #13
0
def test_load_series_with_instantaneous_measurements(down_or_up: str, exp_series):
    """ Test resampling of instantaneous measurements. """
    dt = datetime(2019, 1, 29, 15, 30)
    s = ObjectSeriesSpecs(
        data=pd.Series(
            index=pd.date_range(dt, dt + timedelta(minutes=30), freq="15T"),
            data=[1, 2, 3],
        ),
        name="mydata",
        resampling_config=dict(
            downsampling_method="first",
            event_resolution=timedelta(hours=0),
        ),
        interpolation_config=dict(method="pad", limit=3),
    )

    series = s.load_series(
        expected_frequency=timedelta(hours=1)
        if down_or_up == "down"
        else timedelta(minutes=5)
    )
    assert len(series) == 2 if down_or_up == "down" else len(series) == 7
    pd.testing.assert_series_equal(series, exp_series)
예제 #14
0
def test_load_series_without_datetime_index():
    with pytest.raises(Exception) as e_info:
        s = ObjectSeriesSpecs(data=pd.Series([1, 2, 3]), name="mydata")
        s.load_series(expected_frequency=timedelta(hours=1))
    assert "DatetimeIndex" in str(e_info.value)