def test_random_ts_and_window_size(self, df: pd.DataFrame, window_size: int): ma_feature = MovingAverage(window_size=window_size) df_ma = ma_feature.fit_transform(df) expected_df_ma = self._correct_ma(df, window_size) testing.assert_frame_equal(expected_df_ma, df_ma)
def test_invalid_window_size(self): window_size = -1 df = pd.DataFrame.from_dict({"x0": [0, 1, 2, 3, 4, 5]}) ma_feature = MovingAverage(window_size=window_size) with pytest.raises(ValueError): ma_feature.fit_transform(df)
def test_positive_window_size(self): window_size = 2 df = pd.DataFrame.from_dict({"x": [0, 1, 2, 3, 4, 5]}) ma_feature = MovingAverage(window_size=window_size) df_ma = ma_feature.fit_transform(df) output_name = "x__" + ma_feature.__class__.__name__ expected_df_ma = pd.DataFrame.from_dict( {output_name: [np.nan, 0.5, 1.5, 2.5, 3.5, 4.5]}) testing.assert_frame_equal(expected_df_ma, df_ma, check_names=False)
def test_multi_columns_window_size(self): window_size = 2 df = pd.DataFrame.from_dict({ "x0": [0, 1, 2, 3, 4, 5], "x1": [7, 8, 9, 10, 11, 12] }) ma_feature = MovingAverage(window_size=window_size) feature_name = ma_feature.__class__.__name__ df_ma = ma_feature.fit_transform(df) expected_df_ma = pd.DataFrame({ f"x0__{feature_name}": [np.nan, 0.5, 1.5, 2.5, 3.5, 4.5], f"x1__{feature_name}": [np.nan, 7.5, 8.5, 9.5, 10.5, 11.5], }) testing.assert_frame_equal(expected_df_ma, df_ma, check_names=False)
def features1(): return [ ("shift_0", Shift(0), make_column_selector(dtype_include=np.number)), ("shift_1", Shift(1), make_column_selector(dtype_include=np.number)), ( "moving_average_3", MovingAverage(window_size=3), make_column_selector(dtype_include=np.number), ), ]
def test_feature_creation_transform(): data = testing.makeTimeDataFrame(freq="s") shift = Shift(1) ma = MovingAverage(window_size=3) col_name = 'A' fc = FeatureCreation([ ('s1', shift, [col_name]), ('ma3', ma, [col_name]), ]) res = fc.fit(data).transform(data) assert_array_equal(res.columns.values, [ f's1__{col_name}__{shift.__class__.__name__}', f'ma3__{col_name}__{ma.__class__.__name__}' ])
numpy_X_y_matrices, numpy_X_matrices, ) from gtime.utils.hypothesis.general_strategies import ( shape_X_y_matrices, ordered_pair, shape_matrix, ) df_transformer = FeatureCreation( [ ("shift_0", Shift(0), make_column_selector(dtype_include=np.number)), ("shift_1", Shift(1), make_column_selector(dtype_include=np.number)), ( "moving_average_3", MovingAverage(window_size=3), make_column_selector(dtype_include=np.number), ), ] ) class TestXyMatrices: @given(X_y_matrices(horizon=3, df_transformer=df_transformer)) def test_X_shape_correct(self, X_y: Tuple[pd.DataFrame, pd.DataFrame]): X, y = X_y assert X.shape[1] == len(df_transformer.transformers_) @given(X_y_matrices(horizon=3, df_transformer=df_transformer)) def test_shape_consistent(self, X_y: Tuple[pd.DataFrame, pd.DataFrame]): X, y = X_y
def _correct_ma(self, df: pd.DataFrame, window_size: int) -> pd.DataFrame: return ( df.rolling(window_size) .mean() .add_suffix("__" + MovingAverage().__class__.__name__) )
class TestCVPipeline: @given( models=models_grid(), n_splits=st.integers(min_value=2, max_value=10), blocking=st.booleans(), metrics=metrics(), ) def test_constructor(self, models, n_splits, blocking, metrics): cv_pipeline = CVPipeline(models_sets=models, n_splits=n_splits, blocking=blocking, metrics=metrics) list_len = np.sum( [np.prod([len(y) for y in x.values()]) for x in models.values()]) assert list_len == len(cv_pipeline.model_list) assert len(metrics) == len(cv_pipeline.metrics) @pytest.mark.parametrize("models", [{ Naive: { "horizon": [3] }, AR: { "horizon": [3], "p": [2, 3] } }]) @pytest.mark.parametrize("metrics", [{"RMSE": rmse, "MAE": mae}]) @pytest.mark.parametrize("n_splits", [3, 5]) @pytest.mark.parametrize("blocking", [True, False]) @pytest.mark.parametrize("seed", [5, 1000]) def test_fit_predict(self, models, n_splits, blocking, metrics, seed): cv_pipeline = CVPipeline(models_sets=models, n_splits=n_splits, blocking=blocking, metrics=metrics) np.random.seed(seed) idx = pd.period_range(start="2011-01-01", end="2012-01-01") df = pd.DataFrame(np.random.standard_normal((len(idx), 1)), index=idx, columns=["1"]) cv_pipeline.fit(df) assert cv_pipeline.cv_results_.shape == ( len(cv_pipeline.model_list) * len(metrics), 4, ) y_pred = cv_pipeline.predict() horizon = cv_pipeline.best_model_.horizon assert y_pred.shape == (horizon, horizon) @pytest.mark.parametrize( "models", [{ TimeSeriesForecastingModel: { "features": [ [("s3", Shift(1), ["1"])], [("ma10", MovingAverage(10), ["1"])], ], "horizon": [4], "model": [NaiveForecaster(), DriftForecaster()], } }], ) @pytest.mark.parametrize("metrics", [{"RMSE": rmse, "MAE": mae}]) @pytest.mark.parametrize("n_splits", [5]) def test_model_assembly(self, models, n_splits, metrics): cv_pipeline = CVPipeline(models_sets=models, n_splits=n_splits, metrics=metrics) idx = pd.period_range(start="2011-01-01", end="2012-01-01") df = pd.DataFrame(np.random.standard_normal((len(idx), 1)), index=idx, columns=["1"]) cv_pipeline.fit(df) assert cv_pipeline.cv_results_.shape == ( len(cv_pipeline.model_list) * len(metrics), 4, ) y_pred = cv_pipeline.predict() horizon = cv_pipeline.best_model_.horizon assert y_pred.shape == (horizon, horizon) @pytest.mark.parametrize("models", [{ Naive: { "horizon": [3] }, AR: { "horizon": [3], "p": [2, 3] } }]) @pytest.mark.parametrize("refit", ["all", "best", ["Naive: {'horizon': 3}"]]) def test_models_refit(self, models, refit): cv_pipeline = CVPipeline(models_sets=models) idx = pd.period_range(start="2011-01-01", end="2012-01-01") df = pd.DataFrame(np.random.standard_normal((len(idx), 1)), index=idx, columns=["1"]) cv_pipeline.fit(df, refit=refit) assert cv_pipeline.cv_results_.shape == ( len(cv_pipeline.model_list), 4, ) y_pred = cv_pipeline.predict() horizon = cv_pipeline.best_model_.horizon assert y_pred.shape == (horizon, horizon)