def helper_test_models_accuracy(self, series, past_covariates, min_rmse_model): # for every model, test whether it predicts the target with a minimum r2 score of `min_rmse` train_series, test_series = train_test_split(series, 70) train_past_covariates, _ = train_test_split(past_covariates, 70) for output_chunk_length in [1, 5]: for idx, model in enumerate(self.models): model_instance = model( lags=12, lags_past_covariates=2, output_chunk_length=output_chunk_length, ) model_instance.fit(series=train_series, past_covariates=train_past_covariates) prediction = model_instance.predict( n=len(test_series), series=train_series, past_covariates=past_covariates, ) current_rmse = rmse(prediction, test_series) # in case of multi-series take mean rmse mean_rmse = np.mean(current_rmse) self.assertTrue( mean_rmse <= min_rmse_model[idx], f"{str(model_instance)} model was not able to predict data as well as expected. " f"A mean rmse score of {mean_rmse} was recorded.", )
def test_kalman_missing_values(self): sine = tg.sine_timeseries( length=100, value_frequency=0.05) + 0.1 * tg.gaussian_timeseries(length=100) values = sine.values() values[20:22] = np.nan values[28:40] = np.nan sine_holes = TimeSeries.from_values(values) sine = TimeSeries.from_values(sine.values()) kf = KalmanFilter(dim_x=2) kf.fit(sine_holes[-50:]) # fit on the part with no holes # reconstructruction should succeed filtered_series = kf.filter(sine_holes, num_samples=100) # reconstruction error should be sufficiently small self.assertLess(rmse(filtered_series, sine), 0.1)
def helper_test_models_accuracy(self, model_instance, n, series, past_covariates, min_rmse): # for every model, test whether it predicts the target with a minimum r2 score of `min_rmse` train_series, test_series = train_test_split(series, pd.Timestamp("20010101")) train_past_covariates, _ = train_test_split(past_covariates, pd.Timestamp("20010101")) model_instance.fit(series=train_series, past_covariates=train_past_covariates) prediction = model_instance.predict(n=n, past_covariates=past_covariates) current_rmse = rmse(test_series, prediction) self.assertTrue( current_rmse <= min_rmse, f"Model was not able to denoise data. A rmse score of {current_rmse} was recorded.", )
def test_multiple_ts(self): lags = 4 lags_past_covariates = 3 model = RegressionModel(lags=lags, lags_past_covariates=lags_past_covariates) target_series = tg.linear_timeseries(start_value=0, end_value=49, length=50) past_covariates = tg.linear_timeseries(start_value=100, end_value=149, length=50) past_covariates = past_covariates.stack( tg.linear_timeseries(start_value=400, end_value=449, length=50)) target_train, target_test = target_series.split_after(0.7) past_covariates_train, past_covariates_test = past_covariates.split_after( 0.7) model.fit( series=[target_train, target_train + 0.5], past_covariates=[ past_covariates_train, past_covariates_train + 0.5 ], ) predictions = model.predict( 10, series=[target_train, target_train + 0.5], past_covariates=[past_covariates, past_covariates + 0.5], ) self.assertEqual(len(predictions[0]), 10, f"Found {len(predictions)} instead") # multiple TS, both future and past covariates, checking that both covariates lead to better results than # using a single one (target series = past_cov + future_cov + noise) np.random.seed(42) linear_ts_1 = tg.linear_timeseries(start_value=10, end_value=59, length=50) linear_ts_2 = tg.linear_timeseries(start_value=40, end_value=89, length=50) past_covariates = tg.sine_timeseries(length=50) * 10 future_covariates = ( tg.sine_timeseries(length=50, value_frequency=0.015) * 50) target_series_1 = linear_ts_1 + 4 * past_covariates + 2 * future_covariates target_series_2 = linear_ts_2 + 4 * past_covariates + 2 * future_covariates target_series_1_noise = (linear_ts_1 + 4 * past_covariates + 2 * future_covariates + tg.gaussian_timeseries(std=7, length=50)) target_series_2_noise = (linear_ts_2 + 4 * past_covariates + 2 * future_covariates + tg.gaussian_timeseries(std=7, length=50)) target_train_1, target_test_1 = target_series_1.split_after(0.7) target_train_2, target_test_2 = target_series_2.split_after(0.7) ( target_train_1_noise, target_test_1_noise, ) = target_series_1_noise.split_after(0.7) ( target_train_2_noise, target_test_2_noise, ) = target_series_2_noise.split_after(0.7) # testing improved denoise with multiple TS # test 1: with single TS, 2 covariates should be better than one model = RegressionModel(lags=3, lags_past_covariates=5) model.fit([target_train_1_noise], [past_covariates]) prediction_past_only = model.predict( n=len(target_test_1), series=[target_train_1_noise, target_train_2_noise], past_covariates=[past_covariates] * 2, ) model = RegressionModel(lags=3, lags_past_covariates=5, lags_future_covariates=(5, 0)) model.fit([target_train_1_noise], [past_covariates], [future_covariates]) prediction_past_and_future = model.predict( n=len(target_test_1), series=[target_train_1_noise, target_train_2_noise], past_covariates=[past_covariates] * 2, future_covariates=[future_covariates] * 2, ) error_past_only = rmse( [target_test_1, target_test_2], prediction_past_only, inter_reduction=np.mean, ) error_both = rmse( [target_test_1, target_test_2], prediction_past_and_future, inter_reduction=np.mean, ) self.assertTrue(error_past_only > error_both) # test 2: with both covariates, 2 TS should learn more than one (with little noise) model = RegressionModel(lags=3, lags_past_covariates=5, lags_future_covariates=(5, 0)) model.fit( [target_train_1_noise, target_train_2_noise], [past_covariates] * 2, [future_covariates] * 2, ) prediction_past_and_future_multi_ts = model.predict( n=len(target_test_1), series=[target_train_1_noise, target_train_2_noise], past_covariates=[past_covariates] * 2, future_covariates=[future_covariates] * 2, ) error_both_multi_ts = rmse( [target_test_1, target_test_2], prediction_past_and_future_multi_ts, inter_reduction=np.mean, ) self.assertTrue(error_both > error_both_multi_ts)
train, val = series.split_after(pd.Timestamp('20200810')) from darts.models import ExponentialSmoothing model = ExponentialSmoothing() model.fit(train) prediction = model.predict(len(val)) import matplotlib.pyplot as plt series.plot(label='actual', lw=3) prediction.plot(label='forecast', lw=3) plt.legend() plt.xlabel('Year') from darts.models.prophet import Prophet models = [ExponentialSmoothing(), Prophet()] backtests = [ model.backtest(series, start=pd.Timestamp('20200810'), forecast_horizon=3) for model in models ] from darts.metrics import mape, rmse for i, m in enumerate(models): err = rmse(backtests[i], series) backtests[i].plot(lw=3, label='{}, RMSE={:.2f}'.format(m, err)) plt.title('Predictive verification of 3 methods') plt.legend() plt.savefig("result2_pcr_positive_daily.png", bbox_inches='tight', dpi=120)