Example #1
0
    def test_random_ts_and_window_size(self, df: pd.DataFrame,
                                       window_size: int):
        ma_feature = MovingAverage(window_size=window_size)
        df_ma = ma_feature.fit_transform(df)
        expected_df_ma = self._correct_ma(df, window_size)

        testing.assert_frame_equal(expected_df_ma, df_ma)
Example #2
0
    def test_invalid_window_size(self):
        window_size = -1
        df = pd.DataFrame.from_dict({"x0": [0, 1, 2, 3, 4, 5]})

        ma_feature = MovingAverage(window_size=window_size)

        with pytest.raises(ValueError):
            ma_feature.fit_transform(df)
Example #3
0
    def test_positive_window_size(self):
        window_size = 2
        df = pd.DataFrame.from_dict({"x": [0, 1, 2, 3, 4, 5]})

        ma_feature = MovingAverage(window_size=window_size)
        df_ma = ma_feature.fit_transform(df)
        output_name = "x__" + ma_feature.__class__.__name__
        expected_df_ma = pd.DataFrame.from_dict(
            {output_name: [np.nan, 0.5, 1.5, 2.5, 3.5, 4.5]})

        testing.assert_frame_equal(expected_df_ma, df_ma, check_names=False)
Example #4
0
    def test_multi_columns_window_size(self):
        window_size = 2
        df = pd.DataFrame.from_dict({
            "x0": [0, 1, 2, 3, 4, 5],
            "x1": [7, 8, 9, 10, 11, 12]
        })

        ma_feature = MovingAverage(window_size=window_size)
        feature_name = ma_feature.__class__.__name__

        df_ma = ma_feature.fit_transform(df)
        expected_df_ma = pd.DataFrame({
            f"x0__{feature_name}": [np.nan, 0.5, 1.5, 2.5, 3.5, 4.5],
            f"x1__{feature_name}": [np.nan, 7.5, 8.5, 9.5, 10.5, 11.5],
        })

        testing.assert_frame_equal(expected_df_ma, df_ma, check_names=False)
Example #5
0
def features1():
    return [
        ("shift_0", Shift(0), make_column_selector(dtype_include=np.number)),
        ("shift_1", Shift(1), make_column_selector(dtype_include=np.number)),
        (
            "moving_average_3",
            MovingAverage(window_size=3),
            make_column_selector(dtype_include=np.number),
        ),
    ]
def test_feature_creation_transform():
    data = testing.makeTimeDataFrame(freq="s")

    shift = Shift(1)
    ma = MovingAverage(window_size=3)

    col_name = 'A'

    fc = FeatureCreation([
        ('s1', shift, [col_name]),
        ('ma3', ma, [col_name]),
    ])
    res = fc.fit(data).transform(data)

    assert_array_equal(res.columns.values, [
        f's1__{col_name}__{shift.__class__.__name__}',
        f'ma3__{col_name}__{ma.__class__.__name__}'
    ])
    numpy_X_y_matrices,
    numpy_X_matrices,
)
from gtime.utils.hypothesis.general_strategies import (
    shape_X_y_matrices,
    ordered_pair,
    shape_matrix,
)

df_transformer = FeatureCreation(
    [
        ("shift_0", Shift(0), make_column_selector(dtype_include=np.number)),
        ("shift_1", Shift(1), make_column_selector(dtype_include=np.number)),
        (
            "moving_average_3",
            MovingAverage(window_size=3),
            make_column_selector(dtype_include=np.number),
        ),
    ]
)


class TestXyMatrices:
    @given(X_y_matrices(horizon=3, df_transformer=df_transformer))
    def test_X_shape_correct(self, X_y: Tuple[pd.DataFrame, pd.DataFrame]):
        X, y = X_y
        assert X.shape[1] == len(df_transformer.transformers_)

    @given(X_y_matrices(horizon=3, df_transformer=df_transformer))
    def test_shape_consistent(self, X_y: Tuple[pd.DataFrame, pd.DataFrame]):
        X, y = X_y
Example #8
0
 def _correct_ma(self, df: pd.DataFrame, window_size: int) -> pd.DataFrame:
     return (
         df.rolling(window_size)
         .mean()
         .add_suffix("__" + MovingAverage().__class__.__name__)
     )
Example #9
0
class TestCVPipeline:
    @given(
        models=models_grid(),
        n_splits=st.integers(min_value=2, max_value=10),
        blocking=st.booleans(),
        metrics=metrics(),
    )
    def test_constructor(self, models, n_splits, blocking, metrics):
        cv_pipeline = CVPipeline(models_sets=models,
                                 n_splits=n_splits,
                                 blocking=blocking,
                                 metrics=metrics)
        list_len = np.sum(
            [np.prod([len(y) for y in x.values()]) for x in models.values()])
        assert list_len == len(cv_pipeline.model_list)
        assert len(metrics) == len(cv_pipeline.metrics)

    @pytest.mark.parametrize("models", [{
        Naive: {
            "horizon": [3]
        },
        AR: {
            "horizon": [3],
            "p": [2, 3]
        }
    }])
    @pytest.mark.parametrize("metrics", [{"RMSE": rmse, "MAE": mae}])
    @pytest.mark.parametrize("n_splits", [3, 5])
    @pytest.mark.parametrize("blocking", [True, False])
    @pytest.mark.parametrize("seed", [5, 1000])
    def test_fit_predict(self, models, n_splits, blocking, metrics, seed):
        cv_pipeline = CVPipeline(models_sets=models,
                                 n_splits=n_splits,
                                 blocking=blocking,
                                 metrics=metrics)
        np.random.seed(seed)
        idx = pd.period_range(start="2011-01-01", end="2012-01-01")
        df = pd.DataFrame(np.random.standard_normal((len(idx), 1)),
                          index=idx,
                          columns=["1"])
        cv_pipeline.fit(df)
        assert cv_pipeline.cv_results_.shape == (
            len(cv_pipeline.model_list) * len(metrics),
            4,
        )
        y_pred = cv_pipeline.predict()
        horizon = cv_pipeline.best_model_.horizon
        assert y_pred.shape == (horizon, horizon)

    @pytest.mark.parametrize(
        "models",
        [{
            TimeSeriesForecastingModel: {
                "features": [
                    [("s3", Shift(1), ["1"])],
                    [("ma10", MovingAverage(10), ["1"])],
                ],
                "horizon": [4],
                "model": [NaiveForecaster(),
                          DriftForecaster()],
            }
        }],
    )
    @pytest.mark.parametrize("metrics", [{"RMSE": rmse, "MAE": mae}])
    @pytest.mark.parametrize("n_splits", [5])
    def test_model_assembly(self, models, n_splits, metrics):
        cv_pipeline = CVPipeline(models_sets=models,
                                 n_splits=n_splits,
                                 metrics=metrics)
        idx = pd.period_range(start="2011-01-01", end="2012-01-01")
        df = pd.DataFrame(np.random.standard_normal((len(idx), 1)),
                          index=idx,
                          columns=["1"])
        cv_pipeline.fit(df)
        assert cv_pipeline.cv_results_.shape == (
            len(cv_pipeline.model_list) * len(metrics),
            4,
        )
        y_pred = cv_pipeline.predict()
        horizon = cv_pipeline.best_model_.horizon
        assert y_pred.shape == (horizon, horizon)

    @pytest.mark.parametrize("models", [{
        Naive: {
            "horizon": [3]
        },
        AR: {
            "horizon": [3],
            "p": [2, 3]
        }
    }])
    @pytest.mark.parametrize("refit",
                             ["all", "best", ["Naive: {'horizon': 3}"]])
    def test_models_refit(self, models, refit):
        cv_pipeline = CVPipeline(models_sets=models)
        idx = pd.period_range(start="2011-01-01", end="2012-01-01")
        df = pd.DataFrame(np.random.standard_normal((len(idx), 1)),
                          index=idx,
                          columns=["1"])
        cv_pipeline.fit(df, refit=refit)
        assert cv_pipeline.cv_results_.shape == (
            len(cv_pipeline.model_list),
            4,
        )
        y_pred = cv_pipeline.predict()
        horizon = cv_pipeline.best_model_.horizon
        assert y_pred.shape == (horizon, horizon)