def test_gridsearch_multi(self): dummy_series = st(length=40, value_y_offset=10).stack( lt(length=40, end_value=20) ) tcn_params = { "input_chunk_length": [12], "output_chunk_length": [3], "n_epochs": [1], "batch_size": [1], "kernel_size": [2, 3, 4], } TCNModel.gridsearch(tcn_params, dummy_series, forecast_horizon=3, metric=mape)
def test_call_predict_global_models_multivariate_input_no_covariates(self): naive_ensemble = NaiveEnsembleModel([ RNNModel(12, n_epochs=1), TCNModel(10, 2, n_epochs=1), NBEATSModel(10, 2, n_epochs=1), ]) naive_ensemble.fit(self.seq1) naive_ensemble.predict(n=5, series=self.seq1)
def test_call_predict_global_models_univariate_input_no_covariates(self): naive_ensemble = NaiveEnsembleModel([ RNNModel(12, n_epochs=1), TCNModel(10, 2, n_epochs=1), NBEATSModel(10, 2, n_epochs=1), ]) with self.assertRaises(Exception): naive_ensemble.predict(5) naive_ensemble.fit(self.series1) naive_ensemble.predict(5)
def _batch_prediction_test_helper_function(self, targets): epsilon = 1e-4 model = TCNModel( input_chunk_length=50, output_chunk_length=10, n_epochs=10, random_state=0, ) model.fit(series=targets[0], past_covariates=self.covariates_past) preds_default = model.predict( n=160, series=targets, past_covariates=[self.covariates] * len(targets), batch_size=None, ) # make batch size large enough to test stacking samples for batch_size in range(1, 4 * len(targets)): preds = model.predict( n=160, series=targets, past_covariates=[self.covariates] * len(targets), batch_size=batch_size, ) for i in range(len(targets)): self.assertLess( sum(sum((preds[i] - preds_default[i]).values())), epsilon)
def test_call_predict_global_models_multivariate_input_with_covariates( self): naive_ensemble = NaiveEnsembleModel([ RNNModel(12, n_epochs=1), TCNModel(10, 2, n_epochs=1), NBEATSModel(10, 2, n_epochs=1), ]) naive_ensemble.fit(self.seq1, self.cov1) predict_series = [s[:12] for s in self.seq1] predict_covariates = [c[:14] for c in self.cov1] naive_ensemble.predict(n=2, series=predict_series, past_covariates=predict_covariates)
def test_backtest_forecasting(self): linear_series = lt(length=50) linear_series_int = TimeSeries.from_values(linear_series.values()) linear_series_multi = linear_series.stack(linear_series) # univariate model + univariate series score = NaiveDrift().backtest( linear_series, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, ) self.assertEqual(score, 1.0) # very large train length should not affect the backtest score = NaiveDrift().backtest( linear_series, train_length=10000, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, ) self.assertEqual(score, 1.0) # window of size 2 is too small for naive drift with self.assertRaises(ValueError): NaiveDrift().backtest( linear_series, train_length=2, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, ) # test that it also works for time series that are not Datetime-indexed score = NaiveDrift().backtest( linear_series_int, start=0.7, forecast_horizon=3, metric=r2_score ) self.assertEqual(score, 1.0) with self.assertRaises(ValueError): NaiveDrift().backtest( linear_series, start=pd.Timestamp("20000217"), forecast_horizon=3, overlap_end=False, ) NaiveDrift().backtest( linear_series, start=pd.Timestamp("20000216"), forecast_horizon=3 ) NaiveDrift().backtest( linear_series, start=pd.Timestamp("20000217"), forecast_horizon=3, overlap_end=True, ) # Using forecast_horizon default value NaiveDrift().backtest(linear_series, start=pd.Timestamp("20000216")) NaiveDrift().backtest( linear_series, start=pd.Timestamp("20000217"), overlap_end=True ) # Using an int or float value for start NaiveDrift().backtest(linear_series, start=30) NaiveDrift().backtest(linear_series, start=0.7, overlap_end=True) # Set custom train window length NaiveDrift().backtest(linear_series, train_length=10, start=30) # Using invalid start and/or forecast_horizon values with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=0.7, forecast_horizon=-1) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=-0.7, forecast_horizon=1) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=100) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=1.2) with self.assertRaises(TypeError): NaiveDrift().backtest(linear_series, start="wrong type") with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, train_length=0, start=0.5) with self.assertRaises(TypeError): NaiveDrift().backtest(linear_series, train_length=1.2, start=0.5) with self.assertRaises(TypeError): NaiveDrift().backtest(linear_series, train_length="wrong type", start=0.5) with self.assertRaises(ValueError): NaiveDrift().backtest( linear_series, start=49, forecast_horizon=2, overlap_end=False ) # univariate model + multivariate series with self.assertRaises(AssertionError): NaiveDrift().backtest( linear_series_multi, start=pd.Timestamp("20000201"), forecast_horizon=3 ) # multivariate model + univariate series if TORCH_AVAILABLE: tcn_model = TCNModel( input_chunk_length=12, output_chunk_length=1, batch_size=1, n_epochs=1 ) pred = tcn_model.historical_forecasts( linear_series, start=pd.Timestamp("20000125"), forecast_horizon=3, verbose=False, last_points_only=True, ) self.assertEqual(pred.width, 1) self.assertEqual(pred.end_time(), linear_series.end_time()) # multivariate model + multivariate series with self.assertRaises(ValueError): tcn_model.backtest( linear_series_multi, start=pd.Timestamp("20000125"), forecast_horizon=3, verbose=False, ) tcn_model = TCNModel( input_chunk_length=12, output_chunk_length=3, batch_size=1, n_epochs=1 ) pred = tcn_model.historical_forecasts( linear_series_multi, start=pd.Timestamp("20000125"), forecast_horizon=3, verbose=False, last_points_only=True, ) self.assertEqual(pred.width, 2) self.assertEqual(pred.end_time(), linear_series.end_time())
def test_future_covariates(self): # models with future covariates should produce better predictions over a long forecasting horizon # than a model trained with no covariates model = TCNModel( input_chunk_length=50, output_chunk_length=5, n_epochs=20, random_state=0, ) model.fit(series=self.target_past) long_pred_no_cov = model.predict(n=160) model = TCNModel( input_chunk_length=50, output_chunk_length=5, n_epochs=20, random_state=0, ) model.fit(series=self.target_past, past_covariates=self.covariates_past) long_pred_with_cov = model.predict(n=160, past_covariates=self.covariates) self.assertTrue( mape(self.target_future, long_pred_no_cov) > mape( self.target_future, long_pred_with_cov), "Models with future covariates should produce better predictions.", ) # block models can predict up to self.output_chunk_length points beyond the last future covariate... model.predict(n=165, past_covariates=self.covariates) # ... not more with self.assertRaises(ValueError): model.predict(n=166, series=self.ts_pass_train) # recurrent models can only predict data points for time steps where future covariates are available model = RNNModel(12, n_epochs=1) model.fit(series=self.target_past, future_covariates=self.covariates_past) model.predict(n=160, future_covariates=self.covariates) with self.assertRaises(ValueError): model.predict(n=161, future_covariates=self.covariates)
exogenous_input = 'Median' if style == 'all': n_windows = 5 elif style == '20_percent': n_windows = 1 else: raise ValueError('The style has to be "all" or "20_percent".') # Note: Only use filler for now, remove after resampling script is fixed filler = MissingValuesFiller() # Create model model = TCNModel( input_chunk_length=input_length, output_chunk_length=output_length, batch_size=input_length ) # batch_size must be <= input_length (bug fixed in Darts version 0.9.0) for parameter in parameters: print( f'\n##############################\nCurrent Parameter: {parameter.upper()}\n' f'##############################\n', file=sys.stderr) start_time = time.time() # Create sub folder for each parameter if not os.path.isdir( f'./data/{approach}/{n_chunks}_chunks/{style}/{parameter}'): os.mkdir(f'./data/{approach}/{n_chunks}_chunks/{style}/{parameter}')
def get_tcn_model(dataset=None, plot=False, verbose=False): if (dataset is None): df = pd.read_csv("jeans_day.csv") else: df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) train, val = ts.split_after(0.8) #80% train, 20% val scaler = Scaler() train_transformed = scaler.fit_transform(train) val_transformed = scaler.transform(val) ts_transformed = scaler.transform(ts) params = dict() params['kernel_size'] = [4, 6] params['num_filters'] = [10] params['random_state'] = [0, 1] params['input_chunk_length'] = [14] params['output_chunk_length'] = [1] params['dilation_base'] = [2, 3] params['n_epochs'] = [100] params['dropout'] = [0] params['loss_fn'] = [MSELoss()] params['weight_norm'] = [True] tcn = TCNModel.gridsearch(parameters=params, series=train_transformed, val_series=val_transformed, verbose=verbose, metric=mse) params = tcn[1] tcn_model = tcn[0] tcn_model.fit(series=train_transformed) if (plot): backtest = tcn_model.historical_forecasts(series=ts_transformed, start=0.8, forecast_horizon=1, stride=1, retrain=False, verbose=verbose) val = scaler.inverse_transform(val_transformed) backtest = scaler.inverse_transform(backtest) train = scaler.inverse_transform(train_transformed) print(scaler.inverse_transform(tcn_model.predict(7))) print("R2: {}".format(r2_score(val, backtest[1:], intersect=False))) print("MAPE: {}".format(mape(val, backtest[1:]))) print("MASE: {}".format(mase(val, backtest[1:], train))) print("MAE: {}".format(mae(val, backtest[1:]))) print("RMSE: {}".format(np.sqrt(mse(val, backtest[1:])))) backtest.plot(label='backtest') ts.plot(label='actual') plt.title("H&M Daily, TCN Model") plt.xlabel("Date") plt.ylabel("Count") plt.legend() plt.show() else: return [tcn_model, params]
def test_input_models_global_models(self): NaiveEnsembleModel([RNNModel(12), TCNModel(10, 2), NBEATSModel(10, 2)])