예제 #1
0
 def test_gridsearch_multi(self):
     dummy_series = st(length=40, value_y_offset=10).stack(
         lt(length=40, end_value=20)
     )
     tcn_params = {
         "input_chunk_length": [12],
         "output_chunk_length": [3],
         "n_epochs": [1],
         "batch_size": [1],
         "kernel_size": [2, 3, 4],
     }
     TCNModel.gridsearch(tcn_params, dummy_series, forecast_horizon=3, metric=mape)
예제 #2
0
 def test_call_predict_global_models_multivariate_input_no_covariates(self):
     naive_ensemble = NaiveEnsembleModel([
         RNNModel(12, n_epochs=1),
         TCNModel(10, 2, n_epochs=1),
         NBEATSModel(10, 2, n_epochs=1),
     ])
     naive_ensemble.fit(self.seq1)
     naive_ensemble.predict(n=5, series=self.seq1)
예제 #3
0
    def test_call_predict_global_models_univariate_input_no_covariates(self):
        naive_ensemble = NaiveEnsembleModel([
            RNNModel(12, n_epochs=1),
            TCNModel(10, 2, n_epochs=1),
            NBEATSModel(10, 2, n_epochs=1),
        ])
        with self.assertRaises(Exception):
            naive_ensemble.predict(5)

        naive_ensemble.fit(self.series1)
        naive_ensemble.predict(5)
예제 #4
0
        def _batch_prediction_test_helper_function(self, targets):
            epsilon = 1e-4
            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=10,
                n_epochs=10,
                random_state=0,
            )
            model.fit(series=targets[0], past_covariates=self.covariates_past)
            preds_default = model.predict(
                n=160,
                series=targets,
                past_covariates=[self.covariates] * len(targets),
                batch_size=None,
            )

            # make batch size large enough to test stacking samples
            for batch_size in range(1, 4 * len(targets)):
                preds = model.predict(
                    n=160,
                    series=targets,
                    past_covariates=[self.covariates] * len(targets),
                    batch_size=batch_size,
                )
                for i in range(len(targets)):
                    self.assertLess(
                        sum(sum((preds[i] - preds_default[i]).values())),
                        epsilon)
예제 #5
0
 def test_call_predict_global_models_multivariate_input_with_covariates(
         self):
     naive_ensemble = NaiveEnsembleModel([
         RNNModel(12, n_epochs=1),
         TCNModel(10, 2, n_epochs=1),
         NBEATSModel(10, 2, n_epochs=1),
     ])
     naive_ensemble.fit(self.seq1, self.cov1)
     predict_series = [s[:12] for s in self.seq1]
     predict_covariates = [c[:14] for c in self.cov1]
     naive_ensemble.predict(n=2,
                            series=predict_series,
                            past_covariates=predict_covariates)
예제 #6
0
    def test_backtest_forecasting(self):
        linear_series = lt(length=50)
        linear_series_int = TimeSeries.from_values(linear_series.values())
        linear_series_multi = linear_series.stack(linear_series)

        # univariate model + univariate series
        score = NaiveDrift().backtest(
            linear_series,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
        )
        self.assertEqual(score, 1.0)

        # very large train length should not affect the backtest
        score = NaiveDrift().backtest(
            linear_series,
            train_length=10000,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
        )
        self.assertEqual(score, 1.0)

        # window of size 2 is too small for naive drift
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(
                linear_series,
                train_length=2,
                start=pd.Timestamp("20000201"),
                forecast_horizon=3,
                metric=r2_score,
            )

        # test that it also works for time series that are not Datetime-indexed
        score = NaiveDrift().backtest(
            linear_series_int, start=0.7, forecast_horizon=3, metric=r2_score
        )
        self.assertEqual(score, 1.0)

        with self.assertRaises(ValueError):
            NaiveDrift().backtest(
                linear_series,
                start=pd.Timestamp("20000217"),
                forecast_horizon=3,
                overlap_end=False,
            )
        NaiveDrift().backtest(
            linear_series, start=pd.Timestamp("20000216"), forecast_horizon=3
        )
        NaiveDrift().backtest(
            linear_series,
            start=pd.Timestamp("20000217"),
            forecast_horizon=3,
            overlap_end=True,
        )

        # Using forecast_horizon default value
        NaiveDrift().backtest(linear_series, start=pd.Timestamp("20000216"))
        NaiveDrift().backtest(
            linear_series, start=pd.Timestamp("20000217"), overlap_end=True
        )

        # Using an int or float value for start
        NaiveDrift().backtest(linear_series, start=30)
        NaiveDrift().backtest(linear_series, start=0.7, overlap_end=True)

        # Set custom train window length
        NaiveDrift().backtest(linear_series, train_length=10, start=30)

        # Using invalid start and/or forecast_horizon values
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, start=0.7, forecast_horizon=-1)
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, start=-0.7, forecast_horizon=1)

        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, start=100)
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, start=1.2)
        with self.assertRaises(TypeError):
            NaiveDrift().backtest(linear_series, start="wrong type")
        with self.assertRaises(ValueError):
            NaiveDrift().backtest(linear_series, train_length=0, start=0.5)
        with self.assertRaises(TypeError):
            NaiveDrift().backtest(linear_series, train_length=1.2, start=0.5)
        with self.assertRaises(TypeError):
            NaiveDrift().backtest(linear_series, train_length="wrong type", start=0.5)

        with self.assertRaises(ValueError):
            NaiveDrift().backtest(
                linear_series, start=49, forecast_horizon=2, overlap_end=False
            )

        # univariate model + multivariate series
        with self.assertRaises(AssertionError):
            NaiveDrift().backtest(
                linear_series_multi, start=pd.Timestamp("20000201"), forecast_horizon=3
            )

        # multivariate model + univariate series
        if TORCH_AVAILABLE:
            tcn_model = TCNModel(
                input_chunk_length=12, output_chunk_length=1, batch_size=1, n_epochs=1
            )
            pred = tcn_model.historical_forecasts(
                linear_series,
                start=pd.Timestamp("20000125"),
                forecast_horizon=3,
                verbose=False,
                last_points_only=True,
            )
            self.assertEqual(pred.width, 1)
            self.assertEqual(pred.end_time(), linear_series.end_time())

            # multivariate model + multivariate series
            with self.assertRaises(ValueError):
                tcn_model.backtest(
                    linear_series_multi,
                    start=pd.Timestamp("20000125"),
                    forecast_horizon=3,
                    verbose=False,
                )

            tcn_model = TCNModel(
                input_chunk_length=12, output_chunk_length=3, batch_size=1, n_epochs=1
            )
            pred = tcn_model.historical_forecasts(
                linear_series_multi,
                start=pd.Timestamp("20000125"),
                forecast_horizon=3,
                verbose=False,
                last_points_only=True,
            )
            self.assertEqual(pred.width, 2)
            self.assertEqual(pred.end_time(), linear_series.end_time())
예제 #7
0
        def test_future_covariates(self):
            # models with future covariates should produce better predictions over a long forecasting horizon
            # than a model trained with no covariates
            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=5,
                n_epochs=20,
                random_state=0,
            )

            model.fit(series=self.target_past)
            long_pred_no_cov = model.predict(n=160)

            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=5,
                n_epochs=20,
                random_state=0,
            )
            model.fit(series=self.target_past,
                      past_covariates=self.covariates_past)
            long_pred_with_cov = model.predict(n=160,
                                               past_covariates=self.covariates)
            self.assertTrue(
                mape(self.target_future, long_pred_no_cov) > mape(
                    self.target_future, long_pred_with_cov),
                "Models with future covariates should produce better predictions.",
            )

            # block models can predict up to self.output_chunk_length points beyond the last future covariate...
            model.predict(n=165, past_covariates=self.covariates)

            # ... not more
            with self.assertRaises(ValueError):
                model.predict(n=166, series=self.ts_pass_train)

            # recurrent models can only predict data points for time steps where future covariates are available
            model = RNNModel(12, n_epochs=1)
            model.fit(series=self.target_past,
                      future_covariates=self.covariates_past)
            model.predict(n=160, future_covariates=self.covariates)
            with self.assertRaises(ValueError):
                model.predict(n=161, future_covariates=self.covariates)
예제 #8
0
exogenous_input = 'Median'

if style == 'all':
    n_windows = 5
elif style == '20_percent':
    n_windows = 1
else:
    raise ValueError('The style has to be "all" or "20_percent".')

# Note: Only use filler for now, remove after resampling script is fixed
filler = MissingValuesFiller()

# Create model
model = TCNModel(
    input_chunk_length=input_length,
    output_chunk_length=output_length,
    batch_size=input_length
)  # batch_size must be <= input_length (bug fixed in Darts version 0.9.0)

for parameter in parameters:
    print(
        f'\n##############################\nCurrent Parameter: {parameter.upper()}\n'
        f'##############################\n',
        file=sys.stderr)

    start_time = time.time()

    # Create sub folder for each parameter
    if not os.path.isdir(
            f'./data/{approach}/{n_chunks}_chunks/{style}/{parameter}'):
        os.mkdir(f'./data/{approach}/{n_chunks}_chunks/{style}/{parameter}')
예제 #9
0
def get_tcn_model(dataset=None, plot=False, verbose=False):
    if (dataset is None):
        df = pd.read_csv("jeans_day.csv")
    else:
        df = pd.DataFrame.from_dict(dataset)
    ts = TimeSeries.from_dataframe(df,
                                   time_col='time_interval',
                                   value_cols=['count'])

    train, val = ts.split_after(0.8)  #80% train, 20% val

    scaler = Scaler()
    train_transformed = scaler.fit_transform(train)
    val_transformed = scaler.transform(val)
    ts_transformed = scaler.transform(ts)

    params = dict()
    params['kernel_size'] = [4, 6]
    params['num_filters'] = [10]
    params['random_state'] = [0, 1]
    params['input_chunk_length'] = [14]
    params['output_chunk_length'] = [1]
    params['dilation_base'] = [2, 3]
    params['n_epochs'] = [100]
    params['dropout'] = [0]
    params['loss_fn'] = [MSELoss()]
    params['weight_norm'] = [True]
    tcn = TCNModel.gridsearch(parameters=params,
                              series=train_transformed,
                              val_series=val_transformed,
                              verbose=verbose,
                              metric=mse)

    params = tcn[1]
    tcn_model = tcn[0]
    tcn_model.fit(series=train_transformed)
    if (plot):
        backtest = tcn_model.historical_forecasts(series=ts_transformed,
                                                  start=0.8,
                                                  forecast_horizon=1,
                                                  stride=1,
                                                  retrain=False,
                                                  verbose=verbose)
        val = scaler.inverse_transform(val_transformed)
        backtest = scaler.inverse_transform(backtest)
        train = scaler.inverse_transform(train_transformed)
        print(scaler.inverse_transform(tcn_model.predict(7)))
        print("R2: {}".format(r2_score(val, backtest[1:], intersect=False)))
        print("MAPE: {}".format(mape(val, backtest[1:])))
        print("MASE: {}".format(mase(val, backtest[1:], train)))
        print("MAE: {}".format(mae(val, backtest[1:])))
        print("RMSE: {}".format(np.sqrt(mse(val, backtest[1:]))))
        backtest.plot(label='backtest')
        ts.plot(label='actual')
        plt.title("H&M Daily, TCN Model")
        plt.xlabel("Date")
        plt.ylabel("Count")
        plt.legend()
        plt.show()
    else:
        return [tcn_model, params]
예제 #10
0
 def test_input_models_global_models(self):
     NaiveEnsembleModel([RNNModel(12), TCNModel(10, 2), NBEATSModel(10, 2)])