def get_dummy_series( ts_length: int, lt_end_value: int = 10, st_value_offset: int = 10 ) -> TimeSeries: return ( lt(length=ts_length, end_value=lt_end_value) + st(length=ts_length, value_y_offset=st_value_offset) + rt(length=ts_length) )
def test_gridsearch_n_jobs(self): ''' Testing that running gridsearch with multiple workers returns the same best_parameters as the single worker run. ''' np.random.seed(1) ts_length = 100 dummy_series = (lt(length=ts_length, end_value=1) + st(length=ts_length, value_y_offset=0) + rt(length=ts_length)) ts_train = dummy_series[:round(ts_length * 0.8)] ts_val = dummy_series[round(ts_length * 0.8):] test_cases = [ { "model": ARIMA, # ExtendedForecastingModel "parameters": { 'p': [18, 4, 8], 'q': [1, 2, 3] } }, { "model": BlockRNNModel, # TorchForecastingModel "parameters": { 'input_chunk_length': [1, 3, 5, 10], 'output_chunk_length': [1, 3, 5, 10], 'n_epochs': [1, 5], 'random_state': [ 42 ] # necessary to avoid randomness among runs with same parameters } } ] for test in test_cases: model = test["model"] parameters = test["parameters"] np.random.seed(1) _, best_params1 = model.gridsearch(parameters=parameters, series=ts_train, val_series=ts_val, n_jobs=1) np.random.seed(1) _, best_params2 = model.gridsearch(parameters=parameters, series=ts_train, val_series=ts_val, n_jobs=-1) self.assertEqual(best_params1, best_params2)
def test_gridsearch_multi(self): dummy_series = st(length=40, value_y_offset=10).stack( lt(length=40, end_value=20) ) tcn_params = { "input_chunk_length": [12], "output_chunk_length": [3], "n_epochs": [1], "batch_size": [1], "kernel_size": [2, 3, 4], } TCNModel.gridsearch(tcn_params, dummy_series, forecast_horizon=3, metric=mape)
def test_forecasting_residuals(self): model = NaiveSeasonal(K=1) # test zero residuals constant_ts = ct(length=20) residuals = model.residuals(constant_ts) np.testing.assert_almost_equal(residuals.univariate_values(), np.zeros(len(residuals))) # test constant, positive residuals linear_ts = lt(length=20) residuals = model.residuals(linear_ts) np.testing.assert_almost_equal(np.diff(residuals.univariate_values()), np.zeros(len(residuals) - 1)) np.testing.assert_array_less(np.zeros(len(residuals)), residuals.univariate_values())
def test_gridsearch_multi(self): dummy_series = st(length=40, value_y_offset=10).stack(lt(length=40, end_value=20)) tcn_params = { 'n_epochs': [1], 'batch_size': [1], 'input_size': [2], 'output_length': [3], 'output_size': [2], 'kernel_size': [2, 3, 4] } TCNModel.gridsearch(tcn_params, dummy_series, forecast_horizon=3, metric=mape, use_full_output_length=True)
def test_gridsearch(self): np.random.seed(1) ts_length = 50 dummy_series = (lt(length=ts_length, end_value=10) + st(length=ts_length, value_y_offset=10) + rt(length=ts_length)) theta_params = {'theta': list(range(3, 10))} self.assertTrue( compare_best_against_random(Theta, theta_params, dummy_series)) fft_params = { 'nr_freqs_to_keep': [10, 50, 100], 'trend': [None, 'poly', 'exp'] } self.assertTrue( compare_best_against_random(FFT, fft_params, dummy_series)) es_params = {'seasonal_periods': list(range(5, 10))} self.assertTrue( compare_best_against_random(ExponentialSmoothing, es_params, dummy_series))
def test_backtest_forecasting(self): linear_series = lt(length=50) linear_series_multi = linear_series.stack(linear_series) # univariate model + univariate series score = NaiveDrift().backtest(linear_series, start=pd.Timestamp('20000201'), forecast_horizon=3, metric=r2_score) self.assertEqual(score, 1.0) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=pd.Timestamp('20000217'), forecast_horizon=3) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=pd.Timestamp('20000217'), forecast_horizon=3, overlap_end=False) NaiveDrift().backtest(linear_series, start=pd.Timestamp('20000216'), forecast_horizon=3) NaiveDrift().backtest(linear_series, start=pd.Timestamp('20000217'), forecast_horizon=3, overlap_end=True) # Using forecast_horizon default value NaiveDrift().backtest(linear_series, start=pd.Timestamp('20000216')) NaiveDrift().backtest(linear_series, start=pd.Timestamp('20000217'), overlap_end=True) # Using an int or float value for start NaiveDrift().backtest(linear_series, start=30) NaiveDrift().backtest(linear_series, start=0.7, overlap_end=True) # Using invalid start and/or forecast_horizon values with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=0.7, forecast_horizon=-1) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=-0.7, forecast_horizon=1) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=100) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=1.2) with self.assertRaises(TypeError): NaiveDrift().backtest(linear_series, start='wrong type') with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=49, forecast_horizon=2, overlap_end=False) # univariate model + multivariate series with self.assertRaises(AssertionError): NaiveDrift().backtest(linear_series_multi, start=pd.Timestamp('20000201'), forecast_horizon=3) # multivariate model + univariate series if TORCH_AVAILABLE: tcn_model = TCNModel(input_chunk_length=12, output_chunk_length=1, batch_size=1, n_epochs=1) pred = tcn_model.historical_forecasts( linear_series, start=pd.Timestamp('20000125'), forecast_horizon=3, verbose=False, last_points_only=True) self.assertEqual(pred.width, 1) self.assertEqual(pred.end_time(), linear_series.end_time()) # multivariate model + multivariate series with self.assertRaises(ValueError): tcn_model.backtest(linear_series_multi, start=pd.Timestamp('20000125'), forecast_horizon=3, verbose=False) tcn_model = TCNModel(input_chunk_length=12, output_chunk_length=3, batch_size=1, n_epochs=1) pred = tcn_model.historical_forecasts( linear_series_multi, start=pd.Timestamp('20000125'), forecast_horizon=3, verbose=False, last_points_only=True) self.assertEqual(pred.width, 2) self.assertEqual(pred.end_time(), linear_series.end_time())
def test_backtest_forecasting(self): linear_series = lt(length=50) linear_series_int = TimeSeries.from_values(linear_series.values()) linear_series_multi = linear_series.stack(linear_series) # univariate model + univariate series score = NaiveDrift().backtest( linear_series, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, ) self.assertEqual(score, 1.0) # very large train length should not affect the backtest score = NaiveDrift().backtest( linear_series, train_length=10000, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, ) self.assertEqual(score, 1.0) # window of size 2 is too small for naive drift with self.assertRaises(ValueError): NaiveDrift().backtest( linear_series, train_length=2, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, ) # test that it also works for time series that are not Datetime-indexed score = NaiveDrift().backtest( linear_series_int, start=0.7, forecast_horizon=3, metric=r2_score ) self.assertEqual(score, 1.0) with self.assertRaises(ValueError): NaiveDrift().backtest( linear_series, start=pd.Timestamp("20000217"), forecast_horizon=3, overlap_end=False, ) NaiveDrift().backtest( linear_series, start=pd.Timestamp("20000216"), forecast_horizon=3 ) NaiveDrift().backtest( linear_series, start=pd.Timestamp("20000217"), forecast_horizon=3, overlap_end=True, ) # Using forecast_horizon default value NaiveDrift().backtest(linear_series, start=pd.Timestamp("20000216")) NaiveDrift().backtest( linear_series, start=pd.Timestamp("20000217"), overlap_end=True ) # Using an int or float value for start NaiveDrift().backtest(linear_series, start=30) NaiveDrift().backtest(linear_series, start=0.7, overlap_end=True) # Set custom train window length NaiveDrift().backtest(linear_series, train_length=10, start=30) # Using invalid start and/or forecast_horizon values with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=0.7, forecast_horizon=-1) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=-0.7, forecast_horizon=1) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=100) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, start=1.2) with self.assertRaises(TypeError): NaiveDrift().backtest(linear_series, start="wrong type") with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, train_length=0, start=0.5) with self.assertRaises(TypeError): NaiveDrift().backtest(linear_series, train_length=1.2, start=0.5) with self.assertRaises(TypeError): NaiveDrift().backtest(linear_series, train_length="wrong type", start=0.5) with self.assertRaises(ValueError): NaiveDrift().backtest( linear_series, start=49, forecast_horizon=2, overlap_end=False ) # univariate model + multivariate series with self.assertRaises(AssertionError): NaiveDrift().backtest( linear_series_multi, start=pd.Timestamp("20000201"), forecast_horizon=3 ) # multivariate model + univariate series if TORCH_AVAILABLE: tcn_model = TCNModel( input_chunk_length=12, output_chunk_length=1, batch_size=1, n_epochs=1 ) pred = tcn_model.historical_forecasts( linear_series, start=pd.Timestamp("20000125"), forecast_horizon=3, verbose=False, last_points_only=True, ) self.assertEqual(pred.width, 1) self.assertEqual(pred.end_time(), linear_series.end_time()) # multivariate model + multivariate series with self.assertRaises(ValueError): tcn_model.backtest( linear_series_multi, start=pd.Timestamp("20000125"), forecast_horizon=3, verbose=False, ) tcn_model = TCNModel( input_chunk_length=12, output_chunk_length=3, batch_size=1, n_epochs=1 ) pred = tcn_model.historical_forecasts( linear_series_multi, start=pd.Timestamp("20000125"), forecast_horizon=3, verbose=False, last_points_only=True, ) self.assertEqual(pred.width, 2) self.assertEqual(pred.end_time(), linear_series.end_time())
def test_backtest_forecasting(self): linear_series = lt(length=50) linear_series_multi = linear_series.stack(linear_series) # univariate model + univariate series pred = NaiveDrift().backtest(linear_series, None, pd.Timestamp('20000201'), 3) self.assertEqual(r2_score(pred, linear_series), 1.0) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, None, start=pd.Timestamp('20000217'), forecast_horizon=3) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, None, start=pd.Timestamp('20000217'), forecast_horizon=3, trim_to_series=True) NaiveDrift().backtest(linear_series, None, start=pd.Timestamp('20000216'), forecast_horizon=3) NaiveDrift().backtest(linear_series, None, pd.Timestamp('20000217'), forecast_horizon=3, trim_to_series=False) # Using forecast_horizon default value NaiveDrift().backtest(linear_series, None, start=pd.Timestamp('20000216')) NaiveDrift().backtest(linear_series, None, pd.Timestamp('20000217'), trim_to_series=False) # Using an int or float value for start NaiveDrift().backtest(linear_series, None, start=30) NaiveDrift().backtest(linear_series, None, start=0.7, trim_to_series=False) # Using invalid start and/or forecast_horizon values with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, None, start=0.7, forecast_horizon=-1) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, None, 0.7, -1) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, None, start=100) with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, None, start=1.2) with self.assertRaises(TypeError): NaiveDrift().backtest(linear_series, None, start='wrong type') with self.assertRaises(ValueError): NaiveDrift().backtest(linear_series, None, start=49, forecast_horizon=2, trim_to_series=True) # univariate model + multivariate series with self.assertRaises(AssertionError): NaiveDrift().backtest(linear_series_multi, None, pd.Timestamp('20000201'), 3) # multivariate model + univariate series if TORCH_AVAILABLE: tcn_model = TCNModel(batch_size=1, n_epochs=1) pred = tcn_model.backtest(linear_series, None, pd.Timestamp('20000125'), 3, verbose=False) self.assertEqual(pred.width, 1) # multivariate model + multivariate series with self.assertRaises(ValueError): tcn_model.backtest(linear_series_multi, None, pd.Timestamp('20000125'), 3, verbose=False) tcn_model = TCNModel(batch_size=1, n_epochs=1, input_size=2, output_length=3) with self.assertRaises(ValueError): tcn_model.backtest(linear_series_multi, None, pd.Timestamp('20000125'), 3, verbose=False, use_full_output_length=False) pred = tcn_model.backtest(linear_series_multi, linear_series_multi[['0']], pd.Timestamp('20000125'), 1, verbose=False, use_full_output_length=True) self.assertEqual(pred.width, 1) pred = tcn_model.backtest(linear_series_multi, linear_series_multi[['1']], pd.Timestamp('20000125'), 3, verbose=False, use_full_output_length=True) self.assertEqual(pred.width, 1) tcn_model = TCNModel(batch_size=1, n_epochs=1, input_size=2, output_length=3, output_size=2) pred = tcn_model.backtest(linear_series_multi, linear_series_multi, pd.Timestamp('20000125'), 3, verbose=False, use_full_output_length=True) self.assertEqual(pred.width, 2)