예제 #1
0
def get_dummy_series(
    ts_length: int, lt_end_value: int = 10, st_value_offset: int = 10
) -> TimeSeries:
    return (
        lt(length=ts_length, end_value=lt_end_value)
        + st(length=ts_length, value_y_offset=st_value_offset)
        + rt(length=ts_length)
    )
예제 #2
0
    def test_gridsearch_n_jobs(self):
        '''
        Testing that running gridsearch with multiple workers returns the same best_parameters as the single worker run.
        '''

        np.random.seed(1)
        ts_length = 100

        dummy_series = (lt(length=ts_length, end_value=1) +
                        st(length=ts_length, value_y_offset=0) +
                        rt(length=ts_length))

        ts_train = dummy_series[:round(ts_length * 0.8)]
        ts_val = dummy_series[round(ts_length * 0.8):]

        test_cases = [
            {
                "model": ARIMA,  # ExtendedForecastingModel
                "parameters": {
                    'p': [18, 4, 8],
                    'q': [1, 2, 3]
                }
            },
            {
                "model": BlockRNNModel,  # TorchForecastingModel
                "parameters": {
                    'input_chunk_length': [1, 3, 5, 10],
                    'output_chunk_length': [1, 3, 5, 10],
                    'n_epochs': [1, 5],
                    'random_state': [
                        42
                    ]  # necessary to avoid randomness among runs with same parameters
                }
            }
        ]

        for test in test_cases:

            model = test["model"]
            parameters = test["parameters"]

            np.random.seed(1)
            _, best_params1 = model.gridsearch(parameters=parameters,
                                               series=ts_train,
                                               val_series=ts_val,
                                               n_jobs=1)

            np.random.seed(1)
            _, best_params2 = model.gridsearch(parameters=parameters,
                                               series=ts_train,
                                               val_series=ts_val,
                                               n_jobs=-1)

            self.assertEqual(best_params1, best_params2)
예제 #3
0
 def test_gridsearch_multi(self):
     dummy_series = st(length=40, value_y_offset=10).stack(
         lt(length=40, end_value=20)
     )
     tcn_params = {
         "input_chunk_length": [12],
         "output_chunk_length": [3],
         "n_epochs": [1],
         "batch_size": [1],
         "kernel_size": [2, 3, 4],
     }
     TCNModel.gridsearch(tcn_params, dummy_series, forecast_horizon=3, metric=mape)
예제 #4
0
    def test_forecasting_residuals(self):
        model = NaiveSeasonal(K=1)

        # test zero residuals
        constant_ts = ct(length=20)
        residuals = model.residuals(constant_ts)
        np.testing.assert_almost_equal(residuals.univariate_values(),
                                       np.zeros(len(residuals)))

        # test constant, positive residuals
        linear_ts = lt(length=20)
        residuals = model.residuals(linear_ts)
        np.testing.assert_almost_equal(np.diff(residuals.univariate_values()),
                                       np.zeros(len(residuals) - 1))
        np.testing.assert_array_less(np.zeros(len(residuals)),
                                     residuals.univariate_values())
예제 #5
0
 def test_gridsearch_multi(self):
     dummy_series = st(length=40,
                       value_y_offset=10).stack(lt(length=40, end_value=20))
     tcn_params = {
         'n_epochs': [1],
         'batch_size': [1],
         'input_size': [2],
         'output_length': [3],
         'output_size': [2],
         'kernel_size': [2, 3, 4]
     }
     TCNModel.gridsearch(tcn_params,
                         dummy_series,
                         forecast_horizon=3,
                         metric=mape,
                         use_full_output_length=True)
예제 #6
0
    def test_gridsearch(self):

        np.random.seed(1)
        ts_length = 50
        dummy_series = (lt(length=ts_length, end_value=10) +
                        st(length=ts_length, value_y_offset=10) +
                        rt(length=ts_length))

        theta_params = {'theta': list(range(3, 10))}
        self.assertTrue(
            compare_best_against_random(Theta, theta_params, dummy_series))

        fft_params = {
            'nr_freqs_to_keep': [10, 50, 100],
            'trend': [None, 'poly', 'exp']
        }
        self.assertTrue(
            compare_best_against_random(FFT, fft_params, dummy_series))

        es_params = {'seasonal_periods': list(range(5, 10))}
        self.assertTrue(
            compare_best_against_random(ExponentialSmoothing, es_params,
                                        dummy_series))
예제 #7
0
    def test_backtest_forecasting(self):
        linear_series = lt(length=50)
        linear_series_multi = linear_series.stack(linear_series)

        # univariate model + univariate series
        score = NaiveDrift().backtest(linear_series,
                                      start=pd.Timestamp('20000201'),
                                      forecast_horizon=3,
                                      metric=r2_score)
        self.assertEqual(score, 1.0)

        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series,
                                  start=pd.Timestamp('20000217'),
                                  forecast_horizon=3)
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series,
                                  start=pd.Timestamp('20000217'),
                                  forecast_horizon=3,
                                  overlap_end=False)
        NaiveDrift().backtest(linear_series,
                              start=pd.Timestamp('20000216'),
                              forecast_horizon=3)
        NaiveDrift().backtest(linear_series,
                              start=pd.Timestamp('20000217'),
                              forecast_horizon=3,
                              overlap_end=True)

        # Using forecast_horizon default value
        NaiveDrift().backtest(linear_series, start=pd.Timestamp('20000216'))
        NaiveDrift().backtest(linear_series,
                              start=pd.Timestamp('20000217'),
                              overlap_end=True)

        # Using an int or float value for start
        NaiveDrift().backtest(linear_series, start=30)
        NaiveDrift().backtest(linear_series, start=0.7, overlap_end=True)

        # Using invalid start and/or forecast_horizon values
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series,
                                  start=0.7,
                                  forecast_horizon=-1)
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series,
                                  start=-0.7,
                                  forecast_horizon=1)

        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, start=100)
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, start=1.2)
        with self.assertRaises(TypeError):
            NaiveDrift().backtest(linear_series, start='wrong type')

        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series,
                                  start=49,
                                  forecast_horizon=2,
                                  overlap_end=False)

        # univariate model + multivariate series
        with self.assertRaises(AssertionError):
            NaiveDrift().backtest(linear_series_multi,
                                  start=pd.Timestamp('20000201'),
                                  forecast_horizon=3)

        # multivariate model + univariate series
        if TORCH_AVAILABLE:
            tcn_model = TCNModel(input_chunk_length=12,
                                 output_chunk_length=1,
                                 batch_size=1,
                                 n_epochs=1)
            pred = tcn_model.historical_forecasts(
                linear_series,
                start=pd.Timestamp('20000125'),
                forecast_horizon=3,
                verbose=False,
                last_points_only=True)
            self.assertEqual(pred.width, 1)
            self.assertEqual(pred.end_time(), linear_series.end_time())

            # multivariate model + multivariate series
            with self.assertRaises(ValueError):
                tcn_model.backtest(linear_series_multi,
                                   start=pd.Timestamp('20000125'),
                                   forecast_horizon=3,
                                   verbose=False)

            tcn_model = TCNModel(input_chunk_length=12,
                                 output_chunk_length=3,
                                 batch_size=1,
                                 n_epochs=1)
            pred = tcn_model.historical_forecasts(
                linear_series_multi,
                start=pd.Timestamp('20000125'),
                forecast_horizon=3,
                verbose=False,
                last_points_only=True)
            self.assertEqual(pred.width, 2)
            self.assertEqual(pred.end_time(), linear_series.end_time())
예제 #8
0
    def test_backtest_forecasting(self):
        linear_series = lt(length=50)
        linear_series_int = TimeSeries.from_values(linear_series.values())
        linear_series_multi = linear_series.stack(linear_series)

        # univariate model + univariate series
        score = NaiveDrift().backtest(
            linear_series,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
        )
        self.assertEqual(score, 1.0)

        # very large train length should not affect the backtest
        score = NaiveDrift().backtest(
            linear_series,
            train_length=10000,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
        )
        self.assertEqual(score, 1.0)

        # window of size 2 is too small for naive drift
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(
                linear_series,
                train_length=2,
                start=pd.Timestamp("20000201"),
                forecast_horizon=3,
                metric=r2_score,
            )

        # test that it also works for time series that are not Datetime-indexed
        score = NaiveDrift().backtest(
            linear_series_int, start=0.7, forecast_horizon=3, metric=r2_score
        )
        self.assertEqual(score, 1.0)

        with self.assertRaises(ValueError):
            NaiveDrift().backtest(
                linear_series,
                start=pd.Timestamp("20000217"),
                forecast_horizon=3,
                overlap_end=False,
            )
        NaiveDrift().backtest(
            linear_series, start=pd.Timestamp("20000216"), forecast_horizon=3
        )
        NaiveDrift().backtest(
            linear_series,
            start=pd.Timestamp("20000217"),
            forecast_horizon=3,
            overlap_end=True,
        )

        # Using forecast_horizon default value
        NaiveDrift().backtest(linear_series, start=pd.Timestamp("20000216"))
        NaiveDrift().backtest(
            linear_series, start=pd.Timestamp("20000217"), overlap_end=True
        )

        # Using an int or float value for start
        NaiveDrift().backtest(linear_series, start=30)
        NaiveDrift().backtest(linear_series, start=0.7, overlap_end=True)

        # Set custom train window length
        NaiveDrift().backtest(linear_series, train_length=10, start=30)

        # Using invalid start and/or forecast_horizon values
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, start=0.7, forecast_horizon=-1)
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, start=-0.7, forecast_horizon=1)

        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, start=100)
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, start=1.2)
        with self.assertRaises(TypeError):
            NaiveDrift().backtest(linear_series, start="wrong type")
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, train_length=0, start=0.5)
        with self.assertRaises(TypeError):
            NaiveDrift().backtest(linear_series, train_length=1.2, start=0.5)
        with self.assertRaises(TypeError):
            NaiveDrift().backtest(linear_series, train_length="wrong type", start=0.5)

        with self.assertRaises(ValueError):
            NaiveDrift().backtest(
                linear_series, start=49, forecast_horizon=2, overlap_end=False
            )

        # univariate model + multivariate series
        with self.assertRaises(AssertionError):
            NaiveDrift().backtest(
                linear_series_multi, start=pd.Timestamp("20000201"), forecast_horizon=3
            )

        # multivariate model + univariate series
        if TORCH_AVAILABLE:
            tcn_model = TCNModel(
                input_chunk_length=12, output_chunk_length=1, batch_size=1, n_epochs=1
            )
            pred = tcn_model.historical_forecasts(
                linear_series,
                start=pd.Timestamp("20000125"),
                forecast_horizon=3,
                verbose=False,
                last_points_only=True,
            )
            self.assertEqual(pred.width, 1)
            self.assertEqual(pred.end_time(), linear_series.end_time())

            # multivariate model + multivariate series
            with self.assertRaises(ValueError):
                tcn_model.backtest(
                    linear_series_multi,
                    start=pd.Timestamp("20000125"),
                    forecast_horizon=3,
                    verbose=False,
                )

            tcn_model = TCNModel(
                input_chunk_length=12, output_chunk_length=3, batch_size=1, n_epochs=1
            )
            pred = tcn_model.historical_forecasts(
                linear_series_multi,
                start=pd.Timestamp("20000125"),
                forecast_horizon=3,
                verbose=False,
                last_points_only=True,
            )
            self.assertEqual(pred.width, 2)
            self.assertEqual(pred.end_time(), linear_series.end_time())
예제 #9
0
    def test_backtest_forecasting(self):
        linear_series = lt(length=50)
        linear_series_multi = linear_series.stack(linear_series)

        # univariate model + univariate series
        pred = NaiveDrift().backtest(linear_series, None,
                                     pd.Timestamp('20000201'), 3)
        self.assertEqual(r2_score(pred, linear_series), 1.0)

        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series,
                                  None,
                                  start=pd.Timestamp('20000217'),
                                  forecast_horizon=3)
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series,
                                  None,
                                  start=pd.Timestamp('20000217'),
                                  forecast_horizon=3,
                                  trim_to_series=True)
        NaiveDrift().backtest(linear_series,
                              None,
                              start=pd.Timestamp('20000216'),
                              forecast_horizon=3)
        NaiveDrift().backtest(linear_series,
                              None,
                              pd.Timestamp('20000217'),
                              forecast_horizon=3,
                              trim_to_series=False)

        # Using forecast_horizon default value
        NaiveDrift().backtest(linear_series,
                              None,
                              start=pd.Timestamp('20000216'))
        NaiveDrift().backtest(linear_series,
                              None,
                              pd.Timestamp('20000217'),
                              trim_to_series=False)

        # Using an int or float value for start
        NaiveDrift().backtest(linear_series, None, start=30)
        NaiveDrift().backtest(linear_series,
                              None,
                              start=0.7,
                              trim_to_series=False)

        # Using invalid start and/or forecast_horizon values
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series,
                                  None,
                                  start=0.7,
                                  forecast_horizon=-1)
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, None, 0.7, -1)

        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, None, start=100)
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, None, start=1.2)
        with self.assertRaises(TypeError):
            NaiveDrift().backtest(linear_series, None, start='wrong type')

        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series,
                                  None,
                                  start=49,
                                  forecast_horizon=2,
                                  trim_to_series=True)

        # univariate model + multivariate series
        with self.assertRaises(AssertionError):
            NaiveDrift().backtest(linear_series_multi, None,
                                  pd.Timestamp('20000201'), 3)

        # multivariate model + univariate series
        if TORCH_AVAILABLE:
            tcn_model = TCNModel(batch_size=1, n_epochs=1)
            pred = tcn_model.backtest(linear_series,
                                      None,
                                      pd.Timestamp('20000125'),
                                      3,
                                      verbose=False)
            self.assertEqual(pred.width, 1)

            # multivariate model + multivariate series
            with self.assertRaises(ValueError):
                tcn_model.backtest(linear_series_multi,
                                   None,
                                   pd.Timestamp('20000125'),
                                   3,
                                   verbose=False)
            tcn_model = TCNModel(batch_size=1,
                                 n_epochs=1,
                                 input_size=2,
                                 output_length=3)
            with self.assertRaises(ValueError):
                tcn_model.backtest(linear_series_multi,
                                   None,
                                   pd.Timestamp('20000125'),
                                   3,
                                   verbose=False,
                                   use_full_output_length=False)
            pred = tcn_model.backtest(linear_series_multi,
                                      linear_series_multi[['0']],
                                      pd.Timestamp('20000125'),
                                      1,
                                      verbose=False,
                                      use_full_output_length=True)
            self.assertEqual(pred.width, 1)
            pred = tcn_model.backtest(linear_series_multi,
                                      linear_series_multi[['1']],
                                      pd.Timestamp('20000125'),
                                      3,
                                      verbose=False,
                                      use_full_output_length=True)
            self.assertEqual(pred.width, 1)
            tcn_model = TCNModel(batch_size=1,
                                 n_epochs=1,
                                 input_size=2,
                                 output_length=3,
                                 output_size=2)
            pred = tcn_model.backtest(linear_series_multi,
                                      linear_series_multi,
                                      pd.Timestamp('20000125'),
                                      3,
                                      verbose=False,
                                      use_full_output_length=True)
            self.assertEqual(pred.width, 2)