Example #1
0
def get_dummy_series(
    ts_length: int, lt_end_value: int = 10, st_value_offset: int = 10
) -> TimeSeries:
    return (
        lt(length=ts_length, end_value=lt_end_value)
        + st(length=ts_length, value_y_offset=st_value_offset)
        + rt(length=ts_length)
    )
Example #2
0
    def test_gridsearch_n_jobs(self):
        '''
        Testing that running gridsearch with multiple workers returns the same best_parameters as the single worker run.
        '''

        np.random.seed(1)
        ts_length = 100

        dummy_series = (lt(length=ts_length, end_value=1) +
                        st(length=ts_length, value_y_offset=0) +
                        rt(length=ts_length))

        ts_train = dummy_series[:round(ts_length * 0.8)]
        ts_val = dummy_series[round(ts_length * 0.8):]

        test_cases = [
            {
                "model": ARIMA,  # ExtendedForecastingModel
                "parameters": {
                    'p': [18, 4, 8],
                    'q': [1, 2, 3]
                }
            },
            {
                "model": BlockRNNModel,  # TorchForecastingModel
                "parameters": {
                    'input_chunk_length': [1, 3, 5, 10],
                    'output_chunk_length': [1, 3, 5, 10],
                    'n_epochs': [1, 5],
                    'random_state': [
                        42
                    ]  # necessary to avoid randomness among runs with same parameters
                }
            }
        ]

        for test in test_cases:

            model = test["model"]
            parameters = test["parameters"]

            np.random.seed(1)
            _, best_params1 = model.gridsearch(parameters=parameters,
                                               series=ts_train,
                                               val_series=ts_val,
                                               n_jobs=1)

            np.random.seed(1)
            _, best_params2 = model.gridsearch(parameters=parameters,
                                               series=ts_train,
                                               val_series=ts_val,
                                               n_jobs=-1)

            self.assertEqual(best_params1, best_params2)
Example #3
0
 def test_gridsearch_multi(self):
     dummy_series = st(length=40, value_y_offset=10).stack(
         lt(length=40, end_value=20)
     )
     tcn_params = {
         "input_chunk_length": [12],
         "output_chunk_length": [3],
         "n_epochs": [1],
         "batch_size": [1],
         "kernel_size": [2, 3, 4],
     }
     TCNModel.gridsearch(tcn_params, dummy_series, forecast_horizon=3, metric=mape)
Example #4
0
 def test_gridsearch_multi(self):
     dummy_series = st(length=40,
                       value_y_offset=10).stack(lt(length=40, end_value=20))
     tcn_params = {
         'n_epochs': [1],
         'batch_size': [1],
         'input_size': [2],
         'output_length': [3],
         'output_size': [2],
         'kernel_size': [2, 3, 4]
     }
     TCNModel.gridsearch(tcn_params,
                         dummy_series,
                         forecast_horizon=3,
                         metric=mape,
                         use_full_output_length=True)
Example #5
0
    def test_gridsearch(self):

        np.random.seed(1)
        ts_length = 50
        dummy_series = (lt(length=ts_length, end_value=10) +
                        st(length=ts_length, value_y_offset=10) +
                        rt(length=ts_length))

        theta_params = {'theta': list(range(3, 10))}
        self.assertTrue(
            compare_best_against_random(Theta, theta_params, dummy_series))

        fft_params = {
            'nr_freqs_to_keep': [10, 50, 100],
            'trend': [None, 'poly', 'exp']
        }
        self.assertTrue(
            compare_best_against_random(FFT, fft_params, dummy_series))

        es_params = {'seasonal_periods': list(range(5, 10))}
        self.assertTrue(
            compare_best_against_random(ExponentialSmoothing, es_params,
                                        dummy_series))
Example #6
0
    def test_backtest_regression(self):
        gaussian_series = gt(mean=2, length=50)
        sine_series = st(length=50)
        features = gaussian_series.stack(sine_series)
        features_multivariate = (
            gaussian_series +
            sine_series).stack(gaussian_series).stack(sine_series)
        target = sine_series

        features = TimeSeries(features.pd_dataframe().rename(
            {
                "0": "Value0",
                "1": "Value1"
            }, axis=1))
        features_multivariate = TimeSeries(
            features_multivariate.pd_dataframe().rename(
                {
                    "0": "Value0",
                    "1": "Value1",
                    "2": "Value2"
                }, axis=1))

        # univariate feature test
        score = LinearRegressionModel(lags=None, lags_exog=[0, 1]).backtest(
            series=target,
            covariates=features,
            start=pd.Timestamp('20000201'),
            forecast_horizon=3,
            metric=r2_score,
            last_points_only=True)
        self.assertGreater(score, 0.95)

        # Using an int or float value for start
        score = RandomForest(lags=12,
                             lags_exog=[0]).backtest(series=target,
                                                     covariates=features,
                                                     start=30,
                                                     forecast_horizon=3,
                                                     metric=r2_score)
        self.assertGreater(score, 0.95)

        score = RandomForest(lags=12,
                             lags_exog=[0]).backtest(series=target,
                                                     covariates=features,
                                                     start=0.5,
                                                     forecast_horizon=3,
                                                     metric=r2_score)
        self.assertGreater(score, 0.95)

        # Using a too small start value
        with self.assertRaises(ValueError):
            RandomForest(lags=12).backtest(series=target,
                                           start=0,
                                           forecast_horizon=3)

        with self.assertRaises(ValueError):
            RandomForest(lags=12).backtest(series=target,
                                           start=0.01,
                                           forecast_horizon=3)

        # Using RandomForest's start default value
        score = RandomForest(lags=12).backtest(series=target,
                                               forecast_horizon=3,
                                               metric=r2_score)
        self.assertGreater(score, 0.95)

        # multivariate feature test
        score = RandomForest(lags=12, lags_exog=[0, 1]).backtest(
            series=target,
            covariates=features_multivariate,
            start=pd.Timestamp('20000201'),
            forecast_horizon=3,
            metric=r2_score)
        self.assertGreater(score, 0.95)

        # multivariate with stride
        score = RandomForest(lags=12, lags_exog=[0]).backtest(
            series=target,
            covariates=features_multivariate,
            start=pd.Timestamp('20000201'),
            forecast_horizon=3,
            metric=r2_score,
            last_points_only=True,
            stride=3)
        self.assertGreater(score, 0.95)
Example #7
0
    def test_backtest_regression(self):
        np.random.seed(4)

        gaussian_series = gt(mean=2, length=50)
        sine_series = st(length=50)
        features = gaussian_series.stack(sine_series)
        features_multivariate = (
            (gaussian_series + sine_series).stack(gaussian_series).stack(sine_series)
        )
        target = sine_series

        features = features.with_columns_renamed(
            features.components, ["Value0", "Value1"]
        )

        features_multivariate = features_multivariate.with_columns_renamed(
            features_multivariate.components, ["Value0", "Value1", "Value2"]
        )

        # univariate feature test
        score = LinearRegressionModel(
            lags=None, lags_future_covariates=[0, -1]
        ).backtest(
            series=target,
            future_covariates=features,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
            last_points_only=True,
        )
        self.assertGreater(score, 0.9)

        # univariate feature test + train length
        score = LinearRegressionModel(
            lags=None, lags_future_covariates=[0, -1]
        ).backtest(
            series=target,
            future_covariates=features,
            start=pd.Timestamp("20000201"),
            train_length=20,
            forecast_horizon=3,
            metric=r2_score,
            last_points_only=True,
        )
        self.assertGreater(score, 0.9)

        # Using an int or float value for start
        score = RandomForest(
            lags=12, lags_future_covariates=[0], random_state=0
        ).backtest(
            series=target,
            future_covariates=features,
            start=30,
            forecast_horizon=3,
            metric=r2_score,
        )
        self.assertGreater(score, 0.9)

        score = RandomForest(
            lags=12, lags_future_covariates=[0], random_state=0
        ).backtest(
            series=target,
            future_covariates=features,
            start=0.5,
            forecast_horizon=3,
            metric=r2_score,
        )
        self.assertGreater(score, 0.9)

        # Using a too small start value
        with self.assertRaises(ValueError):
            RandomForest(lags=12).backtest(series=target, start=0, forecast_horizon=3)

        with self.assertRaises(ValueError):
            RandomForest(lags=12).backtest(
                series=target, start=0.01, forecast_horizon=3
            )

        # Using RandomForest's start default value
        score = RandomForest(lags=12, random_state=0).backtest(
            series=target, forecast_horizon=3, metric=r2_score
        )
        self.assertGreater(score, 0.95)

        # multivariate feature test
        score = RandomForest(
            lags=12, lags_future_covariates=[0, -1], random_state=0
        ).backtest(
            series=target,
            future_covariates=features_multivariate,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
        )
        self.assertGreater(score, 0.94)

        # multivariate feature test with train window 35
        score_35 = RandomForest(
            lags=12, lags_future_covariates=[0, -1], random_state=0
        ).backtest(
            series=target,
            train_length=35,
            future_covariates=features_multivariate,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
        )
        logger.info(
            "Score for multivariate feature test with train window 35 is: ", score_35
        )
        self.assertGreater(score_35, 0.92)

        # multivariate feature test with train window 45
        score_45 = RandomForest(
            lags=12, lags_future_covariates=[0, -1], random_state=0
        ).backtest(
            series=target,
            train_length=45,
            future_covariates=features_multivariate,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
        )
        logger.info(
            "Score for multivariate feature test with train window 45 is: ", score_45
        )
        self.assertGreater(score_45, 0.94)
        self.assertGreater(score_45, score_35)

        # multivariate with stride
        score = RandomForest(
            lags=12, lags_future_covariates=[0], random_state=0
        ).backtest(
            series=target,
            future_covariates=features_multivariate,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
            last_points_only=True,
            stride=3,
        )
        self.assertGreater(score, 0.9)
Example #8
0
    def test_backtest_regression(self):
        gaussian_series = gt(mean=2, length=50)
        sine_series = st(length=50)
        features = [gaussian_series + sine_series, gaussian_series]
        features_multivariate = [
            (gaussian_series + sine_series).stack(gaussian_series),
            gaussian_series
        ]
        target = st(length=50)

        # univariate feature test
        score = StandardRegressionModel(15).backtest(features,
                                                     target,
                                                     pd.Timestamp('20000201'),
                                                     3,
                                                     metric=r2_score)
        self.assertEqual(score, 1.0)

        # Using an int or float value for start
        score = StandardRegressionModel(15).backtest(features,
                                                     target,
                                                     start=30,
                                                     forecast_horizon=3,
                                                     metric=r2_score)
        self.assertEqual(score, 1.0)

        score = StandardRegressionModel(15).backtest(features,
                                                     target,
                                                     start=0.5,
                                                     forecast_horizon=3,
                                                     metric=r2_score)
        self.assertEqual(score, 1.0)

        # Using a too small start value
        with self.assertRaises(ValueError):
            StandardRegressionModel(15).backtest(features,
                                                 target,
                                                 start=0,
                                                 forecast_horizon=3)

        with self.assertRaises(ValueError):
            StandardRegressionModel(15).backtest(features,
                                                 target,
                                                 start=0.01,
                                                 forecast_horizon=3)

        # Using StandardRegressionModel's start default value
        score = StandardRegressionModel(15).backtest(features,
                                                     target,
                                                     forecast_horizon=3,
                                                     metric=r2_score)
        self.assertEqual(score, 1.0)

        # multivariate feature test
        score = StandardRegressionModel(15).backtest(features_multivariate,
                                                     target,
                                                     pd.Timestamp('20000201'),
                                                     forecast_horizon=3,
                                                     metric=r2_score)
        self.assertEqual(score, 1.0)

        # multivariate target
        score = StandardRegressionModel(15).backtest(features_multivariate,
                                                     target.stack(target),
                                                     pd.Timestamp('20000201'),
                                                     forecast_horizon=3,
                                                     metric=r2_score)
        self.assertEqual(score, 1.0)

        # multivariate target with stride
        hist = StandardRegressionModel(15).historical_forecasts(
            features_multivariate,
            target.stack(target),
            pd.Timestamp('20000201'),
            forecast_horizon=3,
            stride=3,
            last_points_only=True)
        self.assertEqual(r2_score(target.stack(target), hist), 1.0)
        self.assertEqual((hist.time_index()[1] - hist.time_index()[0]).days, 3)
Example #9
0
    def test_backtest_regression(self):
        gaussian_series = gt(mean=2, length=50)
        sine_series = st(length=50)
        features = [gaussian_series + sine_series, gaussian_series]
        features_multivariate = [
            (gaussian_series + sine_series).stack(gaussian_series),
            gaussian_series
        ]
        target = st(length=50)

        # univariate feature test
        pred = StandardRegressionModel(15).backtest(features, target,
                                                    pd.Timestamp('20000201'),
                                                    3)
        self.assertEqual(r2_score(pred, target), 1.0)

        # Using an int or float value for start
        pred = StandardRegressionModel(15).backtest(features,
                                                    target,
                                                    start=30,
                                                    forecast_horizon=3)
        self.assertEqual(r2_score(pred, target), 1.0)

        pred = StandardRegressionModel(15).backtest(features,
                                                    target,
                                                    start=0.5,
                                                    forecast_horizon=3)
        self.assertEqual(r2_score(pred, target), 1.0)

        # Using a too small start value
        with self.assertRaises(ValueError):
            StandardRegressionModel(15).backtest(features,
                                                 target,
                                                 start=0,
                                                 forecast_horizon=3)

        with self.assertRaises(ValueError):
            StandardRegressionModel(15).backtest(features,
                                                 target,
                                                 start=0.01,
                                                 forecast_horizon=3)

        # Using StandardRegressionModel's start default value
        pred = StandardRegressionModel(15).backtest(features,
                                                    target,
                                                    forecast_horizon=3)
        self.assertEqual(r2_score(pred, target), 1.0)

        # multivariate feature test
        pred = StandardRegressionModel(15).backtest(features_multivariate,
                                                    target,
                                                    pd.Timestamp('20000201'),
                                                    3)
        self.assertEqual(r2_score(pred, target), 1.0)

        # multivariate target
        pred = StandardRegressionModel(15).backtest(features_multivariate,
                                                    target.stack(target),
                                                    pd.Timestamp('20000201'),
                                                    3)
        self.assertEqual(r2_score(pred, target.stack(target)), 1.0)

        # multivariate target with stride
        pred = StandardRegressionModel(15).backtest(features_multivariate,
                                                    target.stack(target),
                                                    pd.Timestamp('20000201'),
                                                    3,
                                                    stride=3)
        self.assertEqual(r2_score(pred, target.stack(target)), 1.0)
        self.assertEqual((pred.time_index()[1] - pred.time_index()[0]).days, 3)