Example #1
0
        def helper_test_models_accuracy(self, series, past_covariates,
                                        min_rmse_model):
            # for every model, test whether it predicts the target with a minimum r2 score of `min_rmse`
            train_series, test_series = train_test_split(series, 70)
            train_past_covariates, _ = train_test_split(past_covariates, 70)

            for output_chunk_length in [1, 5]:
                for idx, model in enumerate(self.models):
                    model_instance = model(
                        lags=12,
                        lags_past_covariates=2,
                        output_chunk_length=output_chunk_length,
                    )
                    model_instance.fit(series=train_series,
                                       past_covariates=train_past_covariates)
                    prediction = model_instance.predict(
                        n=len(test_series),
                        series=train_series,
                        past_covariates=past_covariates,
                    )
                    current_rmse = rmse(prediction, test_series)
                    # in case of multi-series take mean rmse
                    mean_rmse = np.mean(current_rmse)
                    self.assertTrue(
                        mean_rmse <= min_rmse_model[idx],
                        f"{str(model_instance)} model was not able to predict data as well as expected. "
                        f"A mean rmse score of {mean_rmse} was recorded.",
                    )
Example #2
0
    def test_kalman_missing_values(self):
        sine = tg.sine_timeseries(
            length=100,
            value_frequency=0.05) + 0.1 * tg.gaussian_timeseries(length=100)
        values = sine.values()
        values[20:22] = np.nan
        values[28:40] = np.nan
        sine_holes = TimeSeries.from_values(values)
        sine = TimeSeries.from_values(sine.values())

        kf = KalmanFilter(dim_x=2)
        kf.fit(sine_holes[-50:])  # fit on the part with no holes

        # reconstructruction should succeed
        filtered_series = kf.filter(sine_holes, num_samples=100)

        # reconstruction error should be sufficiently small
        self.assertLess(rmse(filtered_series, sine), 0.1)
Example #3
0
    def helper_test_models_accuracy(self, model_instance, n, series,
                                    past_covariates, min_rmse):
        # for every model, test whether it predicts the target with a minimum r2 score of `min_rmse`
        train_series, test_series = train_test_split(series,
                                                     pd.Timestamp("20010101"))
        train_past_covariates, _ = train_test_split(past_covariates,
                                                    pd.Timestamp("20010101"))

        model_instance.fit(series=train_series,
                           past_covariates=train_past_covariates)
        prediction = model_instance.predict(n=n,
                                            past_covariates=past_covariates)
        current_rmse = rmse(test_series, prediction)

        self.assertTrue(
            current_rmse <= min_rmse,
            f"Model was not able to denoise data. A rmse score of {current_rmse} was recorded.",
        )
Example #4
0
        def test_multiple_ts(self):
            lags = 4
            lags_past_covariates = 3
            model = RegressionModel(lags=lags,
                                    lags_past_covariates=lags_past_covariates)

            target_series = tg.linear_timeseries(start_value=0,
                                                 end_value=49,
                                                 length=50)
            past_covariates = tg.linear_timeseries(start_value=100,
                                                   end_value=149,
                                                   length=50)
            past_covariates = past_covariates.stack(
                tg.linear_timeseries(start_value=400, end_value=449,
                                     length=50))

            target_train, target_test = target_series.split_after(0.7)
            past_covariates_train, past_covariates_test = past_covariates.split_after(
                0.7)
            model.fit(
                series=[target_train, target_train + 0.5],
                past_covariates=[
                    past_covariates_train, past_covariates_train + 0.5
                ],
            )

            predictions = model.predict(
                10,
                series=[target_train, target_train + 0.5],
                past_covariates=[past_covariates, past_covariates + 0.5],
            )

            self.assertEqual(len(predictions[0]), 10,
                             f"Found {len(predictions)} instead")

            # multiple TS, both future and past covariates, checking that both covariates lead to better results than
            # using a single one (target series = past_cov + future_cov + noise)
            np.random.seed(42)

            linear_ts_1 = tg.linear_timeseries(start_value=10,
                                               end_value=59,
                                               length=50)
            linear_ts_2 = tg.linear_timeseries(start_value=40,
                                               end_value=89,
                                               length=50)

            past_covariates = tg.sine_timeseries(length=50) * 10
            future_covariates = (
                tg.sine_timeseries(length=50, value_frequency=0.015) * 50)

            target_series_1 = linear_ts_1 + 4 * past_covariates + 2 * future_covariates
            target_series_2 = linear_ts_2 + 4 * past_covariates + 2 * future_covariates

            target_series_1_noise = (linear_ts_1 + 4 * past_covariates +
                                     2 * future_covariates +
                                     tg.gaussian_timeseries(std=7, length=50))

            target_series_2_noise = (linear_ts_2 + 4 * past_covariates +
                                     2 * future_covariates +
                                     tg.gaussian_timeseries(std=7, length=50))

            target_train_1, target_test_1 = target_series_1.split_after(0.7)
            target_train_2, target_test_2 = target_series_2.split_after(0.7)

            (
                target_train_1_noise,
                target_test_1_noise,
            ) = target_series_1_noise.split_after(0.7)
            (
                target_train_2_noise,
                target_test_2_noise,
            ) = target_series_2_noise.split_after(0.7)

            # testing improved denoise with multiple TS

            # test 1: with single TS, 2 covariates should be better than one
            model = RegressionModel(lags=3, lags_past_covariates=5)
            model.fit([target_train_1_noise], [past_covariates])

            prediction_past_only = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
            )

            model = RegressionModel(lags=3,
                                    lags_past_covariates=5,
                                    lags_future_covariates=(5, 0))
            model.fit([target_train_1_noise], [past_covariates],
                      [future_covariates])
            prediction_past_and_future = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
                future_covariates=[future_covariates] * 2,
            )

            error_past_only = rmse(
                [target_test_1, target_test_2],
                prediction_past_only,
                inter_reduction=np.mean,
            )
            error_both = rmse(
                [target_test_1, target_test_2],
                prediction_past_and_future,
                inter_reduction=np.mean,
            )

            self.assertTrue(error_past_only > error_both)
            # test 2: with both covariates, 2 TS should learn more than one (with little noise)
            model = RegressionModel(lags=3,
                                    lags_past_covariates=5,
                                    lags_future_covariates=(5, 0))
            model.fit(
                [target_train_1_noise, target_train_2_noise],
                [past_covariates] * 2,
                [future_covariates] * 2,
            )
            prediction_past_and_future_multi_ts = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
                future_covariates=[future_covariates] * 2,
            )
            error_both_multi_ts = rmse(
                [target_test_1, target_test_2],
                prediction_past_and_future_multi_ts,
                inter_reduction=np.mean,
            )

            self.assertTrue(error_both > error_both_multi_ts)
train, val = series.split_after(pd.Timestamp('20200810'))

from darts.models import ExponentialSmoothing

model = ExponentialSmoothing()
model.fit(train)
prediction = model.predict(len(val))

import matplotlib.pyplot as plt

series.plot(label='actual', lw=3)
prediction.plot(label='forecast', lw=3)
plt.legend()
plt.xlabel('Year')

from darts.models.prophet import Prophet

models = [ExponentialSmoothing(), Prophet()]
backtests = [
    model.backtest(series, start=pd.Timestamp('20200810'), forecast_horizon=3)
    for model in models
]

from darts.metrics import mape, rmse
for i, m in enumerate(models):
    err = rmse(backtests[i], series)
    backtests[i].plot(lw=3, label='{}, RMSE={:.2f}'.format(m, err))
plt.title('Predictive verification of 3 methods')
plt.legend()

plt.savefig("result2_pcr_positive_daily.png", bbox_inches='tight', dpi=120)