예제 #1
0
    def test_seasonality_inference(self):

        # test `seasonal_periods` inference for datetime indices
        freq_str_seasonality_periods_tuples = [
            ("D", 7),
            ("H", 24),
            ("M", 12),
            ("W", 52),
            ("Q", 4),
            ("B", 5),
        ]
        for tuple in freq_str_seasonality_periods_tuples:
            self.helper_test_seasonality_inference(*tuple)

        # test default selection for integer index
        series = TimeSeries.from_values(np.arange(1, 30, 1))
        model = ExponentialSmoothing()
        model.fit(series)
        self.assertEqual(model.seasonal_periods, 12)

        # test whether a model that inferred a seasonality period before will do it again for a new series
        series1 = tg.sine_timeseries(length=100, freq="M")
        series2 = tg.sine_timeseries(length=100, freq="D")
        model = ExponentialSmoothing()
        model.fit(series1)
        model.fit(series2)
        self.assertEqual(model.seasonal_periods, 7)
예제 #2
0
        def helper_generate_multivariate_case_data(self, season_length,
                                                   n_repeat):
            """generates multivariate test case data. Target series is a sine wave stacked with a repeating
            linear curve of equal seasonal length. Covariates are datetime attributes for 'hours'.
            """

            # generate sine wave
            ts_sine = tg.sine_timeseries(
                value_frequency=1 / season_length,
                length=n_repeat * season_length,
                freq="h",
            )

            # generate repeating linear curve
            ts_linear = tg.linear_timeseries(0,
                                             1,
                                             length=season_length,
                                             start=ts_sine.end_time() +
                                             ts_sine.freq)
            for i in range(n_repeat - 1):
                start = ts_linear.end_time() + ts_linear.freq
                new_ts = tg.linear_timeseries(0,
                                              1,
                                              length=season_length,
                                              start=start)
                ts_linear = ts_linear.append(new_ts)
            ts_linear = TimeSeries.from_times_and_values(
                times=ts_sine.time_index, values=ts_linear.values())

            # create multivariate TimeSeries by stacking sine and linear curves
            ts = ts_sine.stack(ts_linear)

            # create train/test sets
            val_length = 10 * season_length
            ts_train, ts_val = ts[:-val_length], ts[-val_length:]

            # scale data
            scaler_ts = Scaler()
            ts_train_scaled = scaler_ts.fit_transform(ts_train)
            ts_val_scaled = scaler_ts.transform(ts_val)
            ts_scaled = scaler_ts.transform(ts)

            # generate long enough covariates (past and future covariates will be the same for simplicity)
            long_enough_ts = tg.sine_timeseries(value_frequency=1 /
                                                season_length,
                                                length=1000,
                                                freq=ts.freq)
            covariates = tg.datetime_attribute_timeseries(long_enough_ts,
                                                          attribute="hour")
            scaler_covs = Scaler()
            covariates_scaled = scaler_covs.fit_transform(covariates)
            return ts_scaled, ts_train_scaled, ts_val_scaled, covariates_scaled
예제 #3
0
    def test_itakura_window(self):
        n = 6
        m = 5
        slope = 1.5

        window = dtw.Itakura(max_slope=slope)
        window.init_size(n, m)

        cells = list(window)
        self.assertEqual(
            cells,
            [
                (1, 1),
                (1, 2),
                (2, 1),
                (2, 2),
                (2, 3),
                (3, 1),
                (3, 2),
                (3, 3),
                (3, 4),
                (4, 2),
                (4, 3),
                (4, 4),
                (5, 2),
                (5, 3),
                (5, 4),
                (5, 5),
                (6, 4),
                (6, 5),
            ],
        )

        sizes = [(10, 43), (543, 45), (34, 11)]

        for n, m in sizes:
            slope = m / n + 1

            series1 = tg.sine_timeseries(length=n,
                                         value_frequency=1 / n,
                                         value_phase=0)
            series2 = tg.sine_timeseries(length=m,
                                         value_frequency=1 / m,
                                         value_phase=np.pi / 4)

            dist = dtw.dtw(series1, series2,
                           window=dtw.Itakura(slope)).mean_distance()
            self.assertGreater(1, dist)
예제 #4
0
        def test_static_covariates_support(self):
            target_multi = concatenate(
                [tg.sine_timeseries(length=10, freq="h")] * 2, axis=1)

            target_multi = target_multi.with_static_covariates(
                pd.DataFrame([[0.0, 1.0], [2.0, 3.0]], index=["st1", "st2"]))

            # should work with cyclic encoding for time index
            model = TFTModel(
                input_chunk_length=3,
                output_chunk_length=4,
                add_encoders={"cyclic": {
                    "future": "hour"
                }},
                pl_trainer_kwargs={"fast_dev_run": True},
            )
            model.fit(target_multi, verbose=False)
            assert len(model.model.static_variables) == len(
                target_multi.static_covariates.columns)

            model.predict(n=1, series=target_multi, verbose=False)

            # raise an error when trained with static covariates of wrong dimensionality
            target_multi = target_multi.with_static_covariates(
                pd.concat([target_multi.static_covariates] * 2, axis=1))
            with pytest.raises(ValueError):
                model.predict(n=1, series=target_multi, verbose=False)

            # raise an error when trained with static covariates and trying to predict without
            target_multi = target_multi.with_static_covariates(None)
            with pytest.raises(ValueError):
                model.predict(n=1, series=target_multi, verbose=False)
예제 #5
0
    def helper_relevant_attributes(self, freq, length,
                                   period_attributes_tuples):

        # test random walk
        random_walk_ts = tg.random_walk_timeseries(freq=freq, length=length)
        self.assertEqual(_find_relevant_timestamp_attributes(random_walk_ts),
                         set())

        for period, relevant_attributes in period_attributes_tuples:

            # test seasonal period with no noise
            seasonal_ts = tg.sine_timeseries(freq=freq,
                                             value_frequency=1 / period,
                                             length=length)
            self.assertEqual(
                _find_relevant_timestamp_attributes(seasonal_ts),
                relevant_attributes,
                "failed to recognize season in non-noisy timeseries",
            )

            # test seasonal period with no noise
            seasonal_noisy_ts = seasonal_ts + tg.gaussian_timeseries(
                freq=freq, length=length)
            self.assertEqual(
                _find_relevant_timestamp_attributes(seasonal_noisy_ts),
                relevant_attributes,
                "failed to recognize season in noisy timeseries",
            )
예제 #6
0
        def test_future_covariate_handling(self):
            ts_time_index = tg.sine_timeseries(length=2, freq="h")
            ts_integer_index = TimeSeries.from_values(
                values=ts_time_index.values())

            # model requires future covariates without cyclic encoding
            model = TFTModel(input_chunk_length=1, output_chunk_length=1)
            with self.assertRaises(ValueError):
                model.fit(ts_time_index, verbose=False)

            # should work with cyclic encoding for time index
            model = TFTModel(
                input_chunk_length=1,
                output_chunk_length=1,
                add_encoders={"cyclic": {
                    "future": "hour"
                }},
            )
            model.fit(ts_time_index, verbose=False)

            # should work with relative index both with time index and integer index
            model = TFTModel(input_chunk_length=1,
                             output_chunk_length=1,
                             add_relative_index=True)
            model.fit(ts_time_index, verbose=False)
            model.fit(ts_integer_index, verbose=False)
예제 #7
0
    def test_kalman_samples(self):
        kf = KalmanFilter(dim_x=1)

        series = tg.sine_timeseries(length=30, value_frequency=0.1)

        kf.fit(series)
        prediction = kf.filter(series, num_samples=10)

        self.assertEqual(prediction.width, 1)
        self.assertEqual(prediction.n_samples, 10)
예제 #8
0
    def test_gaussian_process_multivariate(self):
        gpf = GaussianProcessFilter()

        sine_ts = tg.sine_timeseries(length=30, value_frequency=0.1)
        noise_ts = tg.gaussian_timeseries(length=30) * 0.1
        ts = sine_ts.stack(noise_ts)

        prediction = gpf.filter(ts)

        self.assertEqual(prediction.width, 2)
예제 #9
0
    def test_kalman_covariates(self):
        kf = KalmanFilter(dim_x=2)

        series = tg.sine_timeseries(length=30, value_frequency=0.1)
        covariates = -series.copy()

        kf.fit(series, covariates=covariates)
        prediction = kf.filter(series, covariates=covariates)

        self.assertEqual(prediction.width, 1)
        self.assertEqual(prediction.n_samples, 1)
예제 #10
0
    def test_kalman_multivariate(self):
        kf = KalmanFilter(dim_x=3)

        sine_ts = tg.sine_timeseries(length=30, value_frequency=0.1)
        noise_ts = tg.gaussian_timeseries(length=30) * 0.1
        series = sine_ts.stack(noise_ts)

        kf.fit(series)
        prediction = kf.filter(series)

        self.assertEqual(prediction.width, 2)
        self.assertEqual(prediction.n_samples, 1)
예제 #11
0
 def test_routine(start, end=None, length=None):
     # testing for correct value range
     sine_ts = sine_timeseries(
         start=start,
         end=end,
         length=length,
         value_amplitude=value_amplitude,
         value_y_offset=value_y_offset,
     )
     self.assertTrue(
         (sine_ts <= value_y_offset + value_amplitude).all().all())
     self.assertTrue(
         (sine_ts >= value_y_offset - value_amplitude).all().all())
     self.assertEqual(len(sine_ts), length_assert)
예제 #12
0
    def test_moving_average_multivariate(self):
        ma = MovingAverage(window=3)
        sine_ts = tg.sine_timeseries(length=30, value_frequency=0.1)
        noise_ts = tg.gaussian_timeseries(length=30) * 0.1
        ts = sine_ts.stack(noise_ts)
        ts_filtered = ma.filter(ts)

        self.assertGreater(
            np.mean(np.abs(ts.values()[:, 0])),
            np.mean(np.abs(ts_filtered.values()[:, 0])),
        )
        self.assertGreater(
            np.mean(np.abs(ts.values()[:, 1])),
            np.mean(np.abs(ts_filtered.values()[:, 1])),
        )
예제 #13
0
        def test_performance(self):
            # test TCN performance on dummy time series
            ts = tg.sine_timeseries(length=100) + tg.linear_timeseries(
                length=100, end_value=2
            )
            train, test = ts[:90], ts[90:]
            model = TCNModel(
                input_chunk_length=12,
                output_chunk_length=10,
                n_epochs=300,
                random_state=0,
            )
            model.fit(train)
            pred = model.predict(n=10)

            self.assertTrue(mae(pred, test) < 0.3)
예제 #14
0
    def test_kalman_given_kf(self):
        nfoursid_ss = state_space.StateSpace(a=np.eye(2),
                                             b=np.ones((2, 1)),
                                             c=np.ones((1, 2)),
                                             d=np.ones((1, 1)))
        nfoursid_kf = kalman.Kalman(nfoursid_ss, np.ones((3, 3)) * 0.1)
        kf = KalmanFilter(dim_x=1, kf=nfoursid_kf)

        series = tg.sine_timeseries(length=30, value_frequency=0.1)

        prediction = kf.filter(series, covariates=-series.copy())

        self.assertEqual(kf.dim_u, 1)
        self.assertEqual(kf.dim_x, 2)
        self.assertEqual(prediction.width, 1)
        self.assertEqual(prediction.n_samples, 1)
예제 #15
0
        def helper_test_freq_coversion(self, test_cases):
            for freq, period in test_cases.items():
                ts_sine = tg.sine_timeseries(
                    value_frequency=1 / period, length=3, freq=freq
                )
                # this should not raise an error if frequency is known
                _ = Prophet._freq_to_days(freq=ts_sine.freq_str)

            self.assertAlmostEqual(
                Prophet._freq_to_days(freq="30S"),
                30 * Prophet._freq_to_days(freq="S"),
                delta=10e-9,
            )

            # check bad frequency string
            with self.assertRaises(ValueError):
                _ = Prophet._freq_to_days(freq="30SS")
예제 #16
0
    def denoising_input(self):
        np.random.seed(self.RANDOM_SEED)

        ts_periodic = tg.sine_timeseries(length=500)
        ts_gaussian = tg.gaussian_timeseries(length=500)
        ts_random_walk = tg.random_walk_timeseries(length=500)

        ts_cov1 = ts_periodic.stack(ts_gaussian)
        ts_cov1 = ts_cov1.pd_dataframe()
        ts_cov1.columns = ["Periodic", "Gaussian"]
        ts_cov1 = TimeSeries.from_dataframe(ts_cov1)
        ts_sum1 = ts_periodic + ts_gaussian

        ts_cov2 = ts_sum1.stack(ts_random_walk)
        ts_sum2 = ts_sum1 + ts_random_walk

        return ts_sum1, ts_cov1, ts_sum2, ts_cov2
예제 #17
0
    def test_kalman_missing_values(self):
        sine = tg.sine_timeseries(
            length=100,
            value_frequency=0.05) + 0.1 * tg.gaussian_timeseries(length=100)
        values = sine.values()
        values[20:22] = np.nan
        values[28:40] = np.nan
        sine_holes = TimeSeries.from_values(values)
        sine = TimeSeries.from_values(sine.values())

        kf = KalmanFilter(dim_x=2)
        kf.fit(sine_holes[-50:])  # fit on the part with no holes

        # reconstructruction should succeed
        filtered_series = kf.filter(sine_holes, num_samples=100)

        # reconstruction error should be sufficiently small
        self.assertLess(rmse(filtered_series, sine), 0.1)
예제 #18
0
class BoxCoxTestCase(unittest.TestCase):

    sine_series = sine_timeseries(length=50,
                                  value_y_offset=5,
                                  value_frequency=0.05)
    lin_series = linear_timeseries(start_value=1, end_value=10, length=50)
    multi_series = sine_series.stack(lin_series)

    def test_boxbox_lambda(self):
        boxcox = BoxCox()

        boxcox.fit(self.multi_series, 0.3)
        self.assertEqual(boxcox._lmbda, [0.3, 0.3])

        boxcox.fit(self.multi_series, [0.3, 0.4])
        self.assertEqual(boxcox._lmbda, [0.3, 0.4])

        with self.assertRaises(ValueError):
            boxcox.fit(self.multi_series, [0.2, 0.4, 0.5])

        boxcox.fit(self.multi_series, optim_method='mle')
        lmbda1 = boxcox._lmbda
        boxcox.fit(self.multi_series, optim_method='pearsonr')
        lmbda2 = boxcox._lmbda

        self.assertNotEqual(lmbda1.array, lmbda2.array)

    def test_boxcox_transform(self):
        log_mapper = Mapper(lambda x: log(x))
        boxcox = BoxCox()

        transformed1 = log_mapper.transform(self.sine_series)
        transformed2 = boxcox.fit(self.sine_series,
                                  lmbda=0).transform(self.sine_series)

        self.assertEqual(transformed1, transformed2)

    def test_boxcox_inverse(self):
        boxcox = BoxCox()
        transformed = boxcox.fit_transform(self.multi_series)
        back = boxcox.inverse_transform(transformed)
        pd.testing.assert_frame_equal(self.multi_series._df,
                                      back._df,
                                      check_exact=False)
예제 #19
0
class RegressionEnsembleModelsTestCase(DartsBaseTestClass):

    RANDOM_SEED = 111

    sine_series = tg.sine_timeseries(value_frequency=(1 / 5),
                                     value_y_offset=10,
                                     length=50)
    lin_series = tg.linear_timeseries(length=50)

    combined = sine_series + lin_series

    seq1 = [_make_ts(0), _make_ts(10), _make_ts(20)]
    cov1 = [_make_ts(5), _make_ts(15), _make_ts(25)]

    seq2 = [_make_ts(0, 20), _make_ts(10, 20), _make_ts(20, 20)]
    cov2 = [_make_ts(5, 30), _make_ts(15, 30), _make_ts(25, 30)]

    # dummy feature and target TimeSeries instances
    ts_periodic = tg.sine_timeseries(length=500)
    ts_gaussian = tg.gaussian_timeseries(length=500)
    ts_random_walk = tg.random_walk_timeseries(length=500)

    ts_cov1 = ts_periodic.stack(ts_gaussian)
    ts_cov1 = ts_cov1.pd_dataframe()
    ts_cov1.columns = ["Periodic", "Gaussian"]
    ts_cov1 = TimeSeries.from_dataframe(ts_cov1)
    ts_sum1 = ts_periodic + ts_gaussian

    ts_cov2 = ts_sum1.stack(ts_random_walk)
    ts_sum2 = ts_sum1 + ts_random_walk

    def get_local_models(self):
        return [NaiveDrift(), NaiveSeasonal(5), NaiveSeasonal(10)]

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def get_global_models(self, output_chunk_length=5):
        return [
            RNNModel(
                input_chunk_length=20,
                output_chunk_length=output_chunk_length,
                n_epochs=1,
                random_state=42,
            ),
            BlockRNNModel(
                input_chunk_length=20,
                output_chunk_length=output_chunk_length,
                n_epochs=1,
                random_state=42,
            ),
        ]

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_accepts_different_regression_models(self):
        regr1 = LinearRegression()
        regr2 = RandomForestRegressor()
        regr3 = RandomForest(lags_future_covariates=[0])

        model0 = RegressionEnsembleModel(self.get_local_models(), 10)
        model1 = RegressionEnsembleModel(self.get_local_models(), 10, regr1)
        model2 = RegressionEnsembleModel(self.get_local_models(), 10, regr2)
        model3 = RegressionEnsembleModel(self.get_local_models(), 10, regr3)

        models = [model0, model1, model2, model3]
        for model in models:
            model.fit(series=self.combined)
            model.predict(10)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_accepts_one_model(self):
        regr1 = LinearRegression()
        regr2 = RandomForest(lags_future_covariates=[0])

        model0 = RegressionEnsembleModel([self.get_local_models()[0]], 10)
        model1 = RegressionEnsembleModel([self.get_local_models()[0]], 10,
                                         regr1)
        model2 = RegressionEnsembleModel([self.get_local_models()[0]], 10,
                                         regr2)

        models = [model0, model1, model2]
        for model in models:
            model.fit(series=self.combined)
            model.predict(10)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_train_n_points(self):
        regr = LinearRegressionModel(lags_future_covariates=[0])

        # same values
        ensemble = RegressionEnsembleModel(self.get_local_models(), 5, regr)

        # too big value to perform the split
        ensemble = RegressionEnsembleModel(self.get_local_models(), 100)
        with self.assertRaises(ValueError):
            ensemble.fit(self.combined)

        ensemble = RegressionEnsembleModel(self.get_local_models(), 50)
        with self.assertRaises(ValueError):
            ensemble.fit(self.combined)

        # too big value considering min_train_series_length
        ensemble = RegressionEnsembleModel(self.get_local_models(), 45)
        with self.assertRaises(ValueError):
            ensemble.fit(self.combined)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_torch_models_retrain(self):
        model1 = BlockRNNModel(input_chunk_length=12,
                               output_chunk_length=1,
                               random_state=0,
                               n_epochs=2)
        model2 = BlockRNNModel(input_chunk_length=12,
                               output_chunk_length=1,
                               random_state=0,
                               n_epochs=2)

        ensemble = RegressionEnsembleModel([model1], 5)
        ensemble.fit(self.combined)

        model1_fitted = ensemble.models[0]
        forecast1 = model1_fitted.predict(10)

        model2.fit(self.combined)
        forecast2 = model2.predict(10)

        self.assertAlmostEqual(sum(forecast1.values() - forecast2.values())[0],
                               0.0,
                               places=2)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_train_predict_global_models_univar(self):
        ensemble_models = self.get_global_models(output_chunk_length=10)
        ensemble_models.append(RegressionModel(lags=1))
        ensemble = RegressionEnsembleModel(ensemble_models, 10)
        ensemble.fit(series=self.combined)
        ensemble.predict(10)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_train_predict_global_models_multivar_no_covariates(self):
        ensemble_models = self.get_global_models(output_chunk_length=10)
        ensemble_models.append(RegressionModel(lags=1))
        ensemble = RegressionEnsembleModel(ensemble_models, 10)
        ensemble.fit(self.seq1)
        ensemble.predict(10, self.seq1)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_train_predict_global_models_multivar_with_covariates(self):
        ensemble_models = self.get_global_models(output_chunk_length=10)
        ensemble_models.append(
            RegressionModel(lags=1, lags_past_covariates=[-1]))
        ensemble = RegressionEnsembleModel(ensemble_models, 10)
        ensemble.fit(self.seq1, self.cov1)
        ensemble.predict(10, self.seq2, self.cov2)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def helper_test_models_accuracy(self, model_instance, n, series,
                                    past_covariates, min_rmse):
        # for every model, test whether it predicts the target with a minimum r2 score of `min_rmse`
        train_series, test_series = train_test_split(series,
                                                     pd.Timestamp("20010101"))
        train_past_covariates, _ = train_test_split(past_covariates,
                                                    pd.Timestamp("20010101"))

        model_instance.fit(series=train_series,
                           past_covariates=train_past_covariates)
        prediction = model_instance.predict(n=n,
                                            past_covariates=past_covariates)
        current_rmse = rmse(test_series, prediction)

        self.assertTrue(
            current_rmse <= min_rmse,
            f"Model was not able to denoise data. A rmse score of {current_rmse} was recorded.",
        )

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def denoising_input(self):
        np.random.seed(self.RANDOM_SEED)

        ts_periodic = tg.sine_timeseries(length=500)
        ts_gaussian = tg.gaussian_timeseries(length=500)
        ts_random_walk = tg.random_walk_timeseries(length=500)

        ts_cov1 = ts_periodic.stack(ts_gaussian)
        ts_cov1 = ts_cov1.pd_dataframe()
        ts_cov1.columns = ["Periodic", "Gaussian"]
        ts_cov1 = TimeSeries.from_dataframe(ts_cov1)
        ts_sum1 = ts_periodic + ts_gaussian

        ts_cov2 = ts_sum1.stack(ts_random_walk)
        ts_sum2 = ts_sum1 + ts_random_walk

        return ts_sum1, ts_cov1, ts_sum2, ts_cov2

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_ensemble_models_denoising(self):
        # for every model, test whether it correctly denoises ts_sum using ts_gaussian and ts_sum as inputs
        # WARNING: this test isn't numerically stable, changing self.RANDOM_SEED can lead to exploding coefficients
        horizon = 10
        ts_sum1, ts_cov1, _, _ = self.denoising_input()
        torch.manual_seed(self.RANDOM_SEED)

        ensemble_models = [
            RNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            BlockRNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            RegressionModel(lags_past_covariates=[-1]),
        ]

        ensemble = RegressionEnsembleModel(ensemble_models, horizon)
        self.helper_test_models_accuracy(ensemble, horizon, ts_sum1, ts_cov1,
                                         3)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_ensemble_models_denoising_multi_input(self):
        # for every model, test whether it correctly denoises ts_sum_2 using ts_random_multi and ts_sum_2 as inputs
        # WARNING: this test isn't numerically stable, changing self.RANDOM_SEED can lead to exploding coefficients
        horizon = 10
        _, _, ts_sum2, ts_cov2 = self.denoising_input()
        torch.manual_seed(self.RANDOM_SEED)

        ensemble_models = [
            RNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            BlockRNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            RegressionModel(lags_past_covariates=[-1]),
            RegressionModel(lags_past_covariates=[-1]),
        ]

        ensemble = RegressionEnsembleModel(ensemble_models, horizon)
        self.helper_test_models_accuracy(ensemble, horizon, ts_sum2, ts_cov2,
                                         3)
예제 #20
0
    class RegressionModelsTestCase(DartsBaseTestClass):

        np.random.seed(42)

        # default regression models
        models = [
            RandomForest, LinearRegressionModel, RegressionModel, LightGBMModel
        ]

        # register likelihood regression models
        QuantileLightGBMModel = partialclass(
            LightGBMModel,
            likelihood="quantile",
            quantiles=[0.05, 0.5, 0.95],
            random_state=42,
        )
        PoissonLightGBMModel = partialclass(LightGBMModel,
                                            likelihood="poisson",
                                            random_state=42)
        QuantileLinearRegressionModel = partialclass(
            LinearRegressionModel,
            likelihood="quantile",
            quantiles=[0.05, 0.5, 0.95],
            random_state=42,
        )
        PoissonLinearRegressionModel = partialclass(LinearRegressionModel,
                                                    likelihood="poisson",
                                                    random_state=42)
        # targets for poisson regression must be positive, so we exclude them for some tests
        models.extend([
            QuantileLightGBMModel,
            QuantileLinearRegressionModel,
            PoissonLightGBMModel,
            PoissonLinearRegressionModel,
        ])

        # dummy feature and target TimeSeries instances
        target_series, past_covariates, future_covariates = dummy_timeseries(
            length=100,
            n_series=3,
            comps_target=3,
            comps_pcov=2,
            comps_fcov=1,
            multiseries_offset=10,
            pcov_offset=0,
            fcov_offset=0,
        )
        # shift sines to positive values for poisson regressors
        sine_univariate1 = tg.sine_timeseries(length=100) + 1.5
        sine_univariate2 = tg.sine_timeseries(length=100,
                                              value_phase=1.5705) + 1.5
        sine_univariate3 = tg.sine_timeseries(length=100,
                                              value_phase=0.78525) + 1.5
        sine_univariate4 = tg.sine_timeseries(length=100,
                                              value_phase=0.392625) + 1.5
        sine_univariate5 = tg.sine_timeseries(length=100,
                                              value_phase=0.1963125) + 1.5
        sine_univariate6 = tg.sine_timeseries(length=100,
                                              value_phase=0.09815625) + 1.5
        sine_multivariate1 = sine_univariate1.stack(sine_univariate2)
        sine_multivariate2 = sine_univariate2.stack(sine_univariate3)
        sine_multiseries1 = [
            sine_univariate1, sine_univariate2, sine_univariate3
        ]
        sine_multiseries2 = [
            sine_univariate4, sine_univariate5, sine_univariate6
        ]

        lags_1 = {"target": [-3, -2, -1], "past": [-4, -2], "future": [-5, 2]}

        def test_model_construction(self):
            for model in self.models:
                # TESTING SINGLE INT
                # testing lags
                model_instance = model(lags=5)
                self.assertEqual(model_instance.lags.get("target"),
                                 [-5, -4, -3, -2, -1])
                # testing lags_past_covariates
                model_instance = model(lags=None, lags_past_covariates=3)
                self.assertEqual(model_instance.lags.get("past"), [-3, -2, -1])
                # testing lags_future covariates
                model_instance = model(lags=None,
                                       lags_future_covariates=(3, 5))
                self.assertEqual(model_instance.lags.get("future"),
                                 [-3, -2, -1, 0, 1, 2, 3, 4])

                # TESTING LIST of int
                # lags
                values = [-5, -3, -1]
                model_instance = model(lags=values)
                self.assertEqual(model_instance.lags.get("target"), values)
                # testing lags_past_covariates
                model_instance = model(lags_past_covariates=values)
                self.assertEqual(model_instance.lags.get("past"), values)
                # testing lags_future_covariates

                with self.assertRaises(ValueError):
                    model()
                with self.assertRaises(ValueError):
                    model(lags=0)
                with self.assertRaises(ValueError):
                    model(lags=[-1, 0])
                with self.assertRaises(ValueError):
                    model(lags=[3, 5])
                with self.assertRaises(ValueError):
                    model(lags=[-3, -5.0])
                with self.assertRaises(ValueError):
                    model(lags=-5)
                with self.assertRaises(ValueError):
                    model(lags=3.6)
                with self.assertRaises(ValueError):
                    model(lags=None, lags_past_covariates=False)
                with self.assertRaises(ValueError):
                    model(lags=None)
                with self.assertRaises(ValueError):
                    model(lags=5, lags_future_covariates=True)
                with self.assertRaises(ValueError):
                    model(lags=5, lags_future_covariates=(1, -3))
                with self.assertRaises(ValueError):
                    model(lags=5, lags_future_covariates=(1, 2, 3))
                with self.assertRaises(ValueError):
                    model(lags=5, lags_future_covariates=(1, True))
                with self.assertRaises(ValueError):
                    model(lags=5, lags_future_covariates=(1, 1.0))

        def test_training_data_creation(self):
            # testing _get_training_data function
            model_instance = RegressionModel(
                lags=self.lags_1["target"],
                lags_past_covariates=self.lags_1["past"],
                lags_future_covariates=self.lags_1["future"],
            )

            max_samples_per_ts = 17

            training_samples, training_labels = model_instance._create_lagged_data(
                target_series=self.target_series,
                past_covariates=self.past_covariates,
                future_covariates=self.future_covariates,
                max_samples_per_ts=max_samples_per_ts,
            )

            # checking number of dimensions
            self.assertEqual(len(training_samples.shape),
                             2)  # samples, features
            self.assertEqual(len(training_labels.shape),
                             2)  # samples, components (multivariate)
            self.assertEqual(training_samples.shape[0],
                             training_labels.shape[0])
            self.assertEqual(training_samples.shape[0],
                             len(self.target_series) * max_samples_per_ts)
            self.assertEqual(
                training_samples.shape[1],
                len(self.lags_1["target"]) * self.target_series[0].width +
                len(self.lags_1["past"]) * self.past_covariates[0].width +
                len(self.lags_1["future"]) * self.future_covariates[0].width,
            )

            # check last sample
            self.assertListEqual(
                list(training_samples[0, :]),
                [
                    79.0,
                    179.0,
                    279.0,
                    80.0,
                    180.0,
                    280.0,
                    81.0,
                    181.0,
                    281.0,
                    10078.0,
                    10178.0,
                    10080.0,
                    10180.0,
                    20077.0,
                    20084.0,
                ],
            )
            self.assertListEqual(list(training_labels[0]), [82, 182, 282])

        def test_prediction_data_creation(self):

            # assigning correct names to variables
            series = [ts[:-50] for ts in self.target_series]
            output_chunk_length = 5
            n = 12

            # prediction preprocessing start
            covariates = {
                "past": (self.past_covariates, self.lags_1.get("past")),
                "future": (self.future_covariates, self.lags_1.get("future")),
            }

            # dictionary containing covariate data over time span required for prediction
            covariate_matrices = {}
            # dictionary containing covariate lags relative to minimum covariate lag
            relative_cov_lags = {}
            # number of prediction steps given forecast horizon and output_chunk_length
            n_pred_steps = math.ceil(n / output_chunk_length)
            for cov_type, (covs, lags) in covariates.items():
                if covs is not None:
                    relative_cov_lags[cov_type] = np.array(lags) - lags[0]
                    covariate_matrices[cov_type] = []
                    for idx, (ts, cov) in enumerate(zip(series, covs)):
                        first_pred_ts = ts.end_time() + 1 * ts.freq
                        last_pred_ts = (first_pred_ts + (
                            (n_pred_steps - 1) * output_chunk_length) *
                                        ts.freq)
                        first_req_ts = first_pred_ts + lags[0] * ts.freq
                        last_req_ts = last_pred_ts + lags[-1] * ts.freq

                        # not enough covariate data checks excluded, they are tested elsewhere

                        if cov.has_datetime_index:
                            covariate_matrices[cov_type].append(
                                cov[first_req_ts:last_req_ts].values())
                        else:
                            # include last_req_ts when slicing series with integer indices
                            covariate_matrices[cov_type].append(
                                cov[first_req_ts:last_req_ts + 1].values())

                    covariate_matrices[cov_type] = np.stack(
                        covariate_matrices[cov_type])

            series_matrix = None
            if "target" in self.lags_1:
                series_matrix = np.stack(
                    [ts[self.lags_1["target"][0]:].values() for ts in series])
            # prediction preprocessing end

            # tests
            self.assertTrue(
                all([
                    lag >= 0 for lags in relative_cov_lags.values()
                    for lag in lags
                ]))
            self.assertEqual(
                covariate_matrices["past"].shape,
                (
                    len(series),
                    relative_cov_lags["past"][-1] +
                    (n_pred_steps - 1) * output_chunk_length + 1,
                    covariates["past"][0][0].width,
                ),
            )
            self.assertEqual(
                covariate_matrices["future"].shape,
                (
                    len(series),
                    relative_cov_lags["future"][-1] +
                    (n_pred_steps - 1) * output_chunk_length + 1,
                    covariates["future"][0][0].width,
                ),
            )
            self.assertEqual(
                series_matrix.shape,
                (len(series), -self.lags_1["target"][0], series[0].width),
            )
            self.assertListEqual(
                list(covariate_matrices["past"][0, :, 0]),
                [
                    10047.0,
                    10048.0,
                    10049.0,
                    10050.0,
                    10051.0,
                    10052.0,
                    10053.0,
                    10054.0,
                    10055.0,
                    10056.0,
                    10057.0,
                    10058.0,
                    10059.0,
                ],
            )
            self.assertListEqual(
                list(covariate_matrices["future"][0, :, 0]),
                [
                    20046.0,
                    20047.0,
                    20048.0,
                    20049.0,
                    20050.0,
                    20051.0,
                    20052.0,
                    20053.0,
                    20054.0,
                    20055.0,
                    20056.0,
                    20057.0,
                    20058.0,
                    20059.0,
                    20060.0,
                    20061.0,
                    20062.0,
                    20063.0,
                ],
            )
            self.assertListEqual(list(series_matrix[0, :, 0]),
                                 [48.0, 49.0, 50.0])

        def test_models_runnability(self):
            train_y, test_y = self.sine_univariate1.split_before(0.7)
            for model in self.models:
                # testing past covariates
                with self.assertRaises(ValueError):
                    # testing lags_past_covariates None but past_covariates during training
                    model_instance = model(lags=4, lags_past_covariates=None)
                    model_instance.fit(
                        series=self.sine_univariate1,
                        past_covariates=self.sine_multivariate1,
                    )

                with self.assertRaises(ValueError):
                    # testing lags_past_covariates but no past_covariates during fit
                    model_instance = model(lags=4, lags_past_covariates=3)
                    model_instance.fit(series=self.sine_univariate1)

                # testing future_covariates
                with self.assertRaises(ValueError):
                    # testing lags_future_covariates None but future_covariates during training
                    model_instance = model(lags=4, lags_future_covariates=None)
                    model_instance.fit(
                        series=self.sine_univariate1,
                        future_covariates=self.sine_multivariate1,
                    )

                with self.assertRaises(ValueError):
                    # testing lags_covariate but no covariate during fit
                    model_instance = model(lags=4, lags_future_covariates=3)
                    model_instance.fit(series=self.sine_univariate1)

                # testing input_dim
                model_instance = model(lags=4, lags_past_covariates=2)
                model_instance.fit(
                    series=train_y,
                    past_covariates=self.sine_univariate1.stack(
                        self.sine_univariate1),
                )

                self.assertEqual(model_instance.input_dim, {
                    "target": 1,
                    "past": 2,
                    "future": None
                })

                with self.assertRaises(ValueError):
                    prediction = model_instance.predict(n=len(test_y) + 2)

                # while it should work with n = 1
                prediction = model_instance.predict(n=1)
                self.assertTrue(
                    len(prediction) == 1,
                    f"Expected length 1, found {len(prediction)} instead",
                )

        def test_fit(self):
            for model in self.models:

                # test fitting both on univariate and multivariate timeseries
                for series in [self.sine_univariate1, self.sine_multivariate2]:
                    with self.assertRaises(ValueError):
                        model_instance = model(lags=4, lags_past_covariates=4)
                        model_instance.fit(
                            series=series,
                            past_covariates=self.sine_multivariate1)
                        model_instance.predict(n=10)

                    model_instance = model(lags=12)
                    model_instance.fit(series=series)
                    self.assertEqual(model_instance.lags.get("past"), None)

                    model_instance = model(lags=12, lags_past_covariates=12)
                    model_instance.fit(series=series,
                                       past_covariates=self.sine_multivariate1)
                    self.assertEqual(len(model_instance.lags.get("past")), 12)

                    model_instance = model(lags=12,
                                           lags_future_covariates=(0, 1))
                    model_instance.fit(
                        series=series,
                        future_covariates=self.sine_multivariate1)
                    self.assertEqual(len(model_instance.lags.get("future")), 1)

                    model_instance = model(lags=12,
                                           lags_past_covariates=[-1, -4, -6])
                    model_instance.fit(series=series,
                                       past_covariates=self.sine_multivariate1)
                    self.assertEqual(len(model_instance.lags.get("past")), 3)

                    model_instance = model(
                        lags=12,
                        lags_past_covariates=[-1, -4, -6],
                        lags_future_covariates=[-2, 0],
                    )
                    model_instance.fit(
                        series=series,
                        past_covariates=self.sine_multivariate1,
                        future_covariates=self.sine_multivariate1,
                    )
                    self.assertEqual(len(model_instance.lags.get("past")), 3)

        def helper_test_models_accuracy(self, series, past_covariates,
                                        min_rmse_model):
            # for every model, test whether it predicts the target with a minimum r2 score of `min_rmse`
            train_series, test_series = train_test_split(series, 70)
            train_past_covariates, _ = train_test_split(past_covariates, 70)

            for output_chunk_length in [1, 5]:
                for idx, model in enumerate(self.models):
                    model_instance = model(
                        lags=12,
                        lags_past_covariates=2,
                        output_chunk_length=output_chunk_length,
                    )
                    model_instance.fit(series=train_series,
                                       past_covariates=train_past_covariates)
                    prediction = model_instance.predict(
                        n=len(test_series),
                        series=train_series,
                        past_covariates=past_covariates,
                    )
                    current_rmse = rmse(prediction, test_series)
                    # in case of multi-series take mean rmse
                    mean_rmse = np.mean(current_rmse)
                    self.assertTrue(
                        mean_rmse <= min_rmse_model[idx],
                        f"{str(model_instance)} model was not able to predict data as well as expected. "
                        f"A mean rmse score of {mean_rmse} was recorded.",
                    )

        def test_models_accuracy_univariate(self):
            # for every model, and different output_chunk_lengths test whether it predicts the univariate time series
            # as well as expected
            self.helper_test_models_accuracy(
                self.sine_univariate1,
                self.sine_univariate2,
                [0.03, 1e-13, 1e-13, 0.3, 0.5, 0.8, 0.4, 0.4],
            )

        def test_models_accuracy_multivariate(self):
            # for every model, and different output_chunk_lengths test whether it predicts the multivariate time series
            # as well as expected
            self.helper_test_models_accuracy(
                self.sine_multivariate1,
                self.sine_multivariate2,
                [0.3, 1e-13, 1e-13, 0.4, 0.4, 0.8, 0.4, 0.4],
            )

        def test_models_accuracy_multiseries_multivariate(self):
            # for every model, and different output_chunk_lengths test whether it predicts the multiseries, multivariate
            # time series as well as expected
            self.helper_test_models_accuracy(
                self.sine_multiseries1,
                self.sine_multiseries2,
                [0.05, 1e-13, 1e-13, 0.05, 0.4, 0.8, 0.4, 0.4],
            )

        def test_historical_forecast(self):
            model = self.models[1](lags=5)
            result = model.historical_forecasts(
                series=self.sine_univariate1,
                future_covariates=None,
                start=0.8,
                forecast_horizon=1,
                stride=1,
                retrain=True,
                overlap_end=False,
                last_points_only=True,
                verbose=False,
            )
            self.assertEqual(len(result), 21)

            model = self.models[1](lags=5, lags_past_covariates=5)
            result = model.historical_forecasts(
                series=self.sine_univariate1,
                past_covariates=self.sine_multivariate1,
                start=0.8,
                forecast_horizon=1,
                stride=1,
                retrain=True,
                overlap_end=False,
                last_points_only=True,
                verbose=False,
            )
            self.assertEqual(len(result), 21)

            model = self.models[1](lags=5,
                                   lags_past_covariates=5,
                                   output_chunk_length=5)
            result = model.historical_forecasts(
                series=self.sine_univariate1,
                past_covariates=self.sine_multivariate1,
                start=0.8,
                forecast_horizon=1,
                stride=1,
                retrain=True,
                overlap_end=False,
                last_points_only=True,
                verbose=False,
            )
            self.assertEqual(len(result), 21)

        def test_multioutput_wrapper(self):
            lags = 12
            models = [
                (RegressionModel(lags=lags), True),
                (RegressionModel(lags=lags, model=LinearRegression()), True),
                (RegressionModel(lags=lags,
                                 model=RandomForestRegressor()), True),
                (
                    RegressionModel(lags=lags,
                                    model=HistGradientBoostingRegressor()),
                    False,
                ),
            ]

            for model, supports_multioutput_natively in models:
                model.fit(series=self.sine_multivariate1)
                if supports_multioutput_natively:
                    self.assertFalse(
                        isinstance(model.model, MultiOutputRegressor))
                else:
                    self.assertTrue(
                        isinstance(model.model, MultiOutputRegressor))

        def test_regression_model(self):
            lags = 12
            models = [
                RegressionModel(lags=lags),
                RegressionModel(lags=lags, model=LinearRegression()),
                RegressionModel(lags=lags, model=RandomForestRegressor()),
                RegressionModel(lags=lags,
                                model=HistGradientBoostingRegressor()),
            ]

            for model in models:
                model.fit(series=self.sine_univariate1)
                self.assertEqual(len(model.lags.get("target")), lags)
                model.predict(n=10)

        def test_multiple_ts(self):
            lags = 4
            lags_past_covariates = 3
            model = RegressionModel(lags=lags,
                                    lags_past_covariates=lags_past_covariates)

            target_series = tg.linear_timeseries(start_value=0,
                                                 end_value=49,
                                                 length=50)
            past_covariates = tg.linear_timeseries(start_value=100,
                                                   end_value=149,
                                                   length=50)
            past_covariates = past_covariates.stack(
                tg.linear_timeseries(start_value=400, end_value=449,
                                     length=50))

            target_train, target_test = target_series.split_after(0.7)
            past_covariates_train, past_covariates_test = past_covariates.split_after(
                0.7)
            model.fit(
                series=[target_train, target_train + 0.5],
                past_covariates=[
                    past_covariates_train, past_covariates_train + 0.5
                ],
            )

            predictions = model.predict(
                10,
                series=[target_train, target_train + 0.5],
                past_covariates=[past_covariates, past_covariates + 0.5],
            )

            self.assertEqual(len(predictions[0]), 10,
                             f"Found {len(predictions)} instead")

            # multiple TS, both future and past covariates, checking that both covariates lead to better results than
            # using a single one (target series = past_cov + future_cov + noise)
            np.random.seed(42)

            linear_ts_1 = tg.linear_timeseries(start_value=10,
                                               end_value=59,
                                               length=50)
            linear_ts_2 = tg.linear_timeseries(start_value=40,
                                               end_value=89,
                                               length=50)

            past_covariates = tg.sine_timeseries(length=50) * 10
            future_covariates = (
                tg.sine_timeseries(length=50, value_frequency=0.015) * 50)

            target_series_1 = linear_ts_1 + 4 * past_covariates + 2 * future_covariates
            target_series_2 = linear_ts_2 + 4 * past_covariates + 2 * future_covariates

            target_series_1_noise = (linear_ts_1 + 4 * past_covariates +
                                     2 * future_covariates +
                                     tg.gaussian_timeseries(std=7, length=50))

            target_series_2_noise = (linear_ts_2 + 4 * past_covariates +
                                     2 * future_covariates +
                                     tg.gaussian_timeseries(std=7, length=50))

            target_train_1, target_test_1 = target_series_1.split_after(0.7)
            target_train_2, target_test_2 = target_series_2.split_after(0.7)

            (
                target_train_1_noise,
                target_test_1_noise,
            ) = target_series_1_noise.split_after(0.7)
            (
                target_train_2_noise,
                target_test_2_noise,
            ) = target_series_2_noise.split_after(0.7)

            # testing improved denoise with multiple TS

            # test 1: with single TS, 2 covariates should be better than one
            model = RegressionModel(lags=3, lags_past_covariates=5)
            model.fit([target_train_1_noise], [past_covariates])

            prediction_past_only = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
            )

            model = RegressionModel(lags=3,
                                    lags_past_covariates=5,
                                    lags_future_covariates=(5, 0))
            model.fit([target_train_1_noise], [past_covariates],
                      [future_covariates])
            prediction_past_and_future = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
                future_covariates=[future_covariates] * 2,
            )

            error_past_only = rmse(
                [target_test_1, target_test_2],
                prediction_past_only,
                inter_reduction=np.mean,
            )
            error_both = rmse(
                [target_test_1, target_test_2],
                prediction_past_and_future,
                inter_reduction=np.mean,
            )

            self.assertTrue(error_past_only > error_both)
            # test 2: with both covariates, 2 TS should learn more than one (with little noise)
            model = RegressionModel(lags=3,
                                    lags_past_covariates=5,
                                    lags_future_covariates=(5, 0))
            model.fit(
                [target_train_1_noise, target_train_2_noise],
                [past_covariates] * 2,
                [future_covariates] * 2,
            )
            prediction_past_and_future_multi_ts = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
                future_covariates=[future_covariates] * 2,
            )
            error_both_multi_ts = rmse(
                [target_test_1, target_test_2],
                prediction_past_and_future_multi_ts,
                inter_reduction=np.mean,
            )

            self.assertTrue(error_both > error_both_multi_ts)

        def test_only_future_covariates(self):

            model = RegressionModel(lags_future_covariates=[-2])

            target_series = tg.linear_timeseries(start_value=0,
                                                 end_value=49,
                                                 length=50)
            covariates = tg.linear_timeseries(start_value=100,
                                              end_value=149,
                                              length=50)
            covariates = covariates.stack(
                tg.linear_timeseries(start_value=400, end_value=449,
                                     length=50))

            target_train, target_test = target_series.split_after(0.7)
            covariates_train, covariates_test = covariates.split_after(0.7)
            model.fit(
                series=[target_train, target_train + 0.5],
                future_covariates=[covariates_train, covariates_train + 0.5],
            )

            predictions = model.predict(
                10,
                series=[target_train, target_train + 0.5],
                future_covariates=[covariates, covariates + 0.5],
            )

            self.assertEqual(len(predictions[0]), 10,
                             f"Found {len(predictions[0])} instead")

        def test_not_enough_covariates(self):

            target_series = tg.linear_timeseries(start_value=0,
                                                 end_value=100,
                                                 length=50)
            past_covariates = tg.linear_timeseries(start_value=100,
                                                   end_value=200,
                                                   length=50)
            future_covariates = tg.linear_timeseries(start_value=200,
                                                     end_value=300,
                                                     length=50)

            model = RegressionModel(
                lags_past_covariates=[-10],
                lags_future_covariates=[-5, 5],
                output_chunk_length=7,
            )
            model.fit(
                series=target_series,
                past_covariates=past_covariates,
                future_covariates=future_covariates,
                max_samples_per_ts=1,
            )

            # output_chunk_length, required past_offset, required future_offset
            test_cases = [
                (1, 0, 13),
                (5, -4, 9),
                (7, -2, 11),
            ]
            for (output_chunk_length, req_past_offset,
                 req_future_offset) in test_cases:
                model = RegressionModel(
                    lags_past_covariates=[-10],
                    lags_future_covariates=[-4, 3],
                    output_chunk_length=output_chunk_length,
                )
                model.fit(
                    series=target_series,
                    past_covariates=past_covariates,
                    future_covariates=future_covariates,
                )

                # check that given the required offsets no ValueError is raised
                model.predict(
                    10,
                    series=target_series[:-25],
                    past_covariates=past_covariates[:-25 + req_past_offset],
                    future_covariates=future_covariates[:-25 +
                                                        req_future_offset],
                )
                # check that one less past covariate time step causes ValueError
                with self.assertRaises(ValueError):
                    model.predict(
                        10,
                        series=target_series[:-25],
                        past_covariates=past_covariates[:-26 +
                                                        req_past_offset],
                        future_covariates=future_covariates[:-25 +
                                                            req_future_offset],
                    )
                # check that one less future covariate time step causes ValueError
                with self.assertRaises(ValueError):
                    model.predict(
                        10,
                        series=target_series[:-25],
                        past_covariates=past_covariates[:-25 +
                                                        req_past_offset],
                        future_covariates=future_covariates[:-26 +
                                                            req_future_offset],
                    )

        @patch.object(
            darts.models.forecasting.gradient_boosted_model.lgb.LGBMRegressor,
            "fit")
        # @patch.object(darts.models.forecasting.gradient_boosted_model.lgb.LGBMRegressor, 'fit')
        def test_gradient_boosted_model_with_eval_set(self, lgb_fit_patch):
            """Test whether these evaluation set parameters are passed to LGBRegressor"""
            model = LightGBMModel(lags=4, lags_past_covariates=2)
            model.fit(
                series=self.sine_univariate1,
                past_covariates=self.sine_multivariate1,
                val_series=self.sine_univariate1,
                val_past_covariates=self.sine_multivariate1,
                early_stopping_rounds=2,
            )

            lgb_fit_patch.assert_called_once()

            assert lgb_fit_patch.call_args[1]["eval_set"] is not None
            assert lgb_fit_patch.call_args[1]["early_stopping_rounds"] == 2
예제 #21
0
        def test_multiple_ts(self):
            lags = 4
            lags_past_covariates = 3
            model = RegressionModel(lags=lags,
                                    lags_past_covariates=lags_past_covariates)

            target_series = tg.linear_timeseries(start_value=0,
                                                 end_value=49,
                                                 length=50)
            past_covariates = tg.linear_timeseries(start_value=100,
                                                   end_value=149,
                                                   length=50)
            past_covariates = past_covariates.stack(
                tg.linear_timeseries(start_value=400, end_value=449,
                                     length=50))

            target_train, target_test = target_series.split_after(0.7)
            past_covariates_train, past_covariates_test = past_covariates.split_after(
                0.7)
            model.fit(
                series=[target_train, target_train + 0.5],
                past_covariates=[
                    past_covariates_train, past_covariates_train + 0.5
                ],
            )

            predictions = model.predict(
                10,
                series=[target_train, target_train + 0.5],
                past_covariates=[past_covariates, past_covariates + 0.5],
            )

            self.assertEqual(len(predictions[0]), 10,
                             f"Found {len(predictions)} instead")

            # multiple TS, both future and past covariates, checking that both covariates lead to better results than
            # using a single one (target series = past_cov + future_cov + noise)
            np.random.seed(42)

            linear_ts_1 = tg.linear_timeseries(start_value=10,
                                               end_value=59,
                                               length=50)
            linear_ts_2 = tg.linear_timeseries(start_value=40,
                                               end_value=89,
                                               length=50)

            past_covariates = tg.sine_timeseries(length=50) * 10
            future_covariates = (
                tg.sine_timeseries(length=50, value_frequency=0.015) * 50)

            target_series_1 = linear_ts_1 + 4 * past_covariates + 2 * future_covariates
            target_series_2 = linear_ts_2 + 4 * past_covariates + 2 * future_covariates

            target_series_1_noise = (linear_ts_1 + 4 * past_covariates +
                                     2 * future_covariates +
                                     tg.gaussian_timeseries(std=7, length=50))

            target_series_2_noise = (linear_ts_2 + 4 * past_covariates +
                                     2 * future_covariates +
                                     tg.gaussian_timeseries(std=7, length=50))

            target_train_1, target_test_1 = target_series_1.split_after(0.7)
            target_train_2, target_test_2 = target_series_2.split_after(0.7)

            (
                target_train_1_noise,
                target_test_1_noise,
            ) = target_series_1_noise.split_after(0.7)
            (
                target_train_2_noise,
                target_test_2_noise,
            ) = target_series_2_noise.split_after(0.7)

            # testing improved denoise with multiple TS

            # test 1: with single TS, 2 covariates should be better than one
            model = RegressionModel(lags=3, lags_past_covariates=5)
            model.fit([target_train_1_noise], [past_covariates])

            prediction_past_only = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
            )

            model = RegressionModel(lags=3,
                                    lags_past_covariates=5,
                                    lags_future_covariates=(5, 0))
            model.fit([target_train_1_noise], [past_covariates],
                      [future_covariates])
            prediction_past_and_future = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
                future_covariates=[future_covariates] * 2,
            )

            error_past_only = rmse(
                [target_test_1, target_test_2],
                prediction_past_only,
                inter_reduction=np.mean,
            )
            error_both = rmse(
                [target_test_1, target_test_2],
                prediction_past_and_future,
                inter_reduction=np.mean,
            )

            self.assertTrue(error_past_only > error_both)
            # test 2: with both covariates, 2 TS should learn more than one (with little noise)
            model = RegressionModel(lags=3,
                                    lags_past_covariates=5,
                                    lags_future_covariates=(5, 0))
            model.fit(
                [target_train_1_noise, target_train_2_noise],
                [past_covariates] * 2,
                [future_covariates] * 2,
            )
            prediction_past_and_future_multi_ts = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
                future_covariates=[future_covariates] * 2,
            )
            error_both_multi_ts = rmse(
                [target_test_1, target_test_2],
                prediction_past_and_future_multi_ts,
                inter_reduction=np.mean,
            )

            self.assertTrue(error_both > error_both_multi_ts)
예제 #22
0
class EnsembleModelsTestCase(DartsBaseTestClass):
    series1 = tg.sine_timeseries(value_frequency=(1 / 5),
                                 value_y_offset=10,
                                 length=50)
    series2 = tg.linear_timeseries(length=50)

    seq1 = [_make_ts(0), _make_ts(10), _make_ts(20)]
    cov1 = [_make_ts(5), _make_ts(15), _make_ts(25)]

    def test_untrained_models(self):
        model = NaiveDrift()
        _ = NaiveEnsembleModel([model])

        # trained models should raise error
        model.fit(self.series1)
        with self.assertRaises(ValueError):
            NaiveEnsembleModel([model])

    def test_input_models_local_models(self):
        with self.assertRaises(ValueError):
            NaiveEnsembleModel([])
        with self.assertRaises(ValueError):
            NaiveEnsembleModel(
                [NaiveDrift, NaiveSeasonal, Theta, ExponentialSmoothing])
        with self.assertRaises(ValueError):
            NaiveEnsembleModel(
                [NaiveDrift(), NaiveSeasonal,
                 Theta(),
                 ExponentialSmoothing()])
        NaiveEnsembleModel(
            [NaiveDrift(),
             NaiveSeasonal(),
             Theta(),
             ExponentialSmoothing()])

    def test_call_predict_local_models(self):
        naive_ensemble = NaiveEnsembleModel([NaiveSeasonal(), Theta()])
        with self.assertRaises(Exception):
            naive_ensemble.predict(5)
        naive_ensemble.fit(self.series1)
        naive_ensemble.predict(5)

    def test_predict_ensemble_local_models(self):
        naive = NaiveSeasonal(K=5)
        theta = Theta()
        naive_ensemble = NaiveEnsembleModel([naive, theta])
        naive_ensemble.fit(self.series1 + self.series2)
        forecast_naive_ensemble = naive_ensemble.predict(5)
        naive.fit(self.series1 + self.series2)
        theta.fit(self.series1 + self.series2)
        forecast_mean = 0.5 * naive.predict(5) + 0.5 * theta.predict(5)

        self.assertTrue(
            np.array_equal(forecast_naive_ensemble.values(),
                           forecast_mean.values()))

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_input_models_global_models(self):
        NaiveEnsembleModel([RNNModel(12), TCNModel(10, 2), NBEATSModel(10, 2)])

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_call_predict_global_models_univariate_input_no_covariates(self):
        naive_ensemble = NaiveEnsembleModel([
            RNNModel(12, n_epochs=1),
            TCNModel(10, 2, n_epochs=1),
            NBEATSModel(10, 2, n_epochs=1),
        ])
        with self.assertRaises(Exception):
            naive_ensemble.predict(5)

        naive_ensemble.fit(self.series1)
        naive_ensemble.predict(5)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_call_predict_global_models_multivariate_input_no_covariates(self):
        naive_ensemble = NaiveEnsembleModel([
            RNNModel(12, n_epochs=1),
            TCNModel(10, 2, n_epochs=1),
            NBEATSModel(10, 2, n_epochs=1),
        ])
        naive_ensemble.fit(self.seq1)
        naive_ensemble.predict(n=5, series=self.seq1)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_call_predict_global_models_multivariate_input_with_covariates(
            self):
        naive_ensemble = NaiveEnsembleModel([
            RNNModel(12, n_epochs=1),
            TCNModel(10, 2, n_epochs=1),
            NBEATSModel(10, 2, n_epochs=1),
        ])
        naive_ensemble.fit(self.seq1, self.cov1)
        predict_series = [s[:12] for s in self.seq1]
        predict_covariates = [c[:14] for c in self.cov1]
        naive_ensemble.predict(n=2,
                               series=predict_series,
                               past_covariates=predict_covariates)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_input_models_mixed(self):
        with self.assertRaises(ValueError):
            NaiveEnsembleModel([NaiveDrift(), Theta(), RNNModel(12)])

    def test_fit_multivar_ts_with_local_models(self):
        naive = NaiveEnsembleModel(
            [NaiveDrift(),
             NaiveSeasonal(),
             Theta(),
             ExponentialSmoothing()])
        with self.assertRaises(ValueError):
            naive.fit(self.seq1)

    def test_fit_univar_ts_with_covariates_for_local_models(self):
        naive = NaiveEnsembleModel(
            [NaiveDrift(),
             NaiveSeasonal(),
             Theta(),
             ExponentialSmoothing()])
        with self.assertRaises(ValueError):
            naive.fit(self.series1, self.series2)
예제 #23
0
class ReconciliationTestCase(unittest.TestCase):
    __test__ = True

    @classmethod
    def setUpClass(cls):
        logging.disable(logging.CRITICAL)

    np.random.seed(42)
    """ test case with a more intricate hierarchy """
    LENGTH = 200
    total_series = (tg.sine_timeseries(value_frequency=0.03, length=LENGTH) +
                    1 + tg.gaussian_timeseries(length=LENGTH) * 0.2)
    bottom_1 = total_series / 3 + tg.gaussian_timeseries(length=LENGTH) * 0.01
    bottom_2 = 2 * total_series / 3 + tg.gaussian_timeseries(
        length=LENGTH) * 0.01
    series = concatenate([total_series, bottom_1, bottom_2], axis=1)
    hierarchy = {"sine_1": ["sine"], "sine_2": ["sine"]}
    series = series.with_hierarchy(hierarchy)

    # get a single forecast
    model = LinearRegressionModel(lags=30, output_chunk_length=10)
    model.fit(series)
    pred = model.predict(n=20)

    # get a backtest forecast to get residuals
    pred_back = model.historical_forecasts(series,
                                           start=0.75,
                                           forecast_horizon=10)
    intersection = series.slice_intersect(pred_back)
    residuals = intersection - pred_back
    """ test case with a more intricate hierarchy """
    components_complex = ["total", "a", "b", "x", "y", "ax", "ay", "bx", "by"]

    hierarchy_complex = {
        "ax": ["a", "x"],
        "ay": ["a", "y"],
        "bx": ["b", "x"],
        "by": ["b", "y"],
        "a": ["total"],
        "b": ["total"],
        "x": ["total"],
        "y": ["total"],
    }

    series_complex = TimeSeries.from_values(
        values=np.random.rand(50, len(components_complex), 5),
        columns=components_complex,
        hierarchy=hierarchy_complex,
    )

    def _assert_reconciliation(self, fitted_recon):
        pred_r = fitted_recon.transform(self.pred)
        np.testing.assert_almost_equal(
            pred_r["sine"].values(copy=False),
            (pred_r["sine_1"] + pred_r["sine_2"]).values(copy=False),
        )

    def _assert_reconciliation_complex(self, fitted_recon):
        reconciled = fitted_recon.transform(self.series_complex)

        def _assert_comps(comp, comps):
            np.testing.assert_almost_equal(
                reconciled[comp].values(copy=False),
                sum(reconciled[c] for c in comps).values(copy=False),
            )

        _assert_comps("a", ["ax", "ay"])
        _assert_comps("b", ["bx", "by"])
        _assert_comps("x", ["ax", "bx"])
        _assert_comps("y", ["ay", "by"])
        _assert_comps("total", ["ax", "ay", "bx", "by"])
        _assert_comps("total", ["a", "b"])
        _assert_comps("total", ["x", "y"])

    def test_bottom_up(self):
        recon = BottomUpReconciliator()
        self._assert_reconciliation(recon)

    def test_top_down(self):
        # should work when fitting on training series
        recon = TopDownReconciliator()
        recon.fit(self.series)
        self._assert_reconciliation(recon)

        # or when fitting on forecasts
        recon = TopDownReconciliator()
        recon.fit(self.pred)
        self._assert_reconciliation(recon)

    def test_mint(self):
        # ols
        recon = MinTReconciliator("ols")
        recon.fit(self.series)
        self._assert_reconciliation(recon)

        # wls_struct
        recon = MinTReconciliator("wls_struct")
        recon.fit(self.series)
        self._assert_reconciliation(recon)

        # wls_var
        recon = MinTReconciliator("wls_var")
        recon.fit(self.residuals)
        self._assert_reconciliation(recon)

        # mint_cov
        recon = MinTReconciliator("mint_cov")
        recon.fit(self.residuals)
        self._assert_reconciliation(recon)

        # wls_val
        recon = MinTReconciliator("wls_val")
        recon.fit(self.series)
        self._assert_reconciliation(recon)

    def test_summation_matrix(self):
        np.testing.assert_equal(
            _get_summation_matrix(self.series_complex),
            np.array([
                [1, 1, 1, 1],
                [1, 1, 0, 0],
                [0, 0, 1, 1],
                [1, 0, 1, 0],
                [0, 1, 0, 1],
                [1, 0, 0, 0],
                [0, 1, 0, 0],
                [0, 0, 1, 0],
                [0, 0, 0, 1],
            ]),
        )

    def test_hierarchy_preserved_after_predict(self):
        self.assertEqual(self.pred.hierarchy, self.series.hierarchy)

    def test_more_intricate_hierarchy(self):
        recon = BottomUpReconciliator()
        self._assert_reconciliation_complex(recon)

        recon = TopDownReconciliator()
        recon.fit(self.series_complex)
        self._assert_reconciliation_complex(recon)

        recon = MinTReconciliator("ols")
        recon.fit(self.series_complex)
        self._assert_reconciliation_complex(recon)

        recon = MinTReconciliator("wls_struct")
        recon.fit(self.series_complex)
        self._assert_reconciliation_complex(recon)

        recon = MinTReconciliator("wls_val")
        recon.fit(self.series_complex)
        self._assert_reconciliation_complex(recon)
예제 #24
0
        def helper_test_prophet_model(self, period, freq, compare_all_models=False):
            """Test which includes adding custom seasonalities and future covariates. The tests compare the output of
            univariate and stochastic forecasting with the validation timeseries and Prophet's base model output.

            The underlying curve to forecast is a sine timeseries multiplied with another sine timeseries.
            The curve shape repeats every 2*period timesteps (i.e. for period=24 hours -> seasonal_periods=48).
            We take the second sine wave as a covariate for the model.
            With the added custom seasonality and covariate, the model should have a very accurate forecast.
            """
            repetitions = 8
            ts_sine1 = tg.sine_timeseries(
                value_frequency=1 / period, length=period * repetitions, freq=freq
            )
            ts_sine2 = tg.sine_timeseries(
                value_frequency=1 / (period * 2), length=period * repetitions, freq=freq
            )
            ts_sine = ts_sine1 * ts_sine2
            covariate = ts_sine2

            split = int(-period * repetitions / 2)
            train, val = ts_sine[:split], ts_sine[split:]
            train_cov, val_cov = covariate[:split], covariate[split:]

            supress_auto_seasonality = {
                "daily_seasonality": False,
                "weekly_seasonality": False,
                "yearly_seasonality": False,
            }
            custom_seasonality = {
                "name": "custom",
                "seasonal_periods": int(2 * period),
                "fourier_order": 4,
            }
            model = Prophet(
                add_seasonalities=custom_seasonality,
                seasonality_mode="additive",
                **supress_auto_seasonality
            )

            model.fit(train, future_covariates=train_cov)

            # univariate, stochastic and Prophet's base model forecast
            pred_darts = model.predict(
                n=len(val), num_samples=1, future_covariates=val_cov
            )
            compare_preds = [pred_darts]

            if compare_all_models:
                pred_darts_stochastic = model.predict(
                    n=len(val), num_samples=200, future_covariates=val_cov
                )
                pred_raw_df = model.predict_raw(n=len(val), future_covariates=val_cov)
                pred_raw = TimeSeries.from_dataframe(
                    pred_raw_df[["ds", "yhat"]], time_col="ds"
                )
                compare_preds += [
                    pred_darts_stochastic.quantile_timeseries(0.5),
                    pred_raw,
                ]

            # all predictions should fit the underlying curve very well
            for pred in compare_preds:
                for val_i, pred_i in zip(
                    val.univariate_values(), pred.univariate_values()
                ):
                    self.assertAlmostEqual(val_i, pred_i, delta=0.1)
예제 #25
0
    class GlobalForecastingModelsTestCase(DartsBaseTestClass):
        # forecasting horizon used in runnability tests
        forecasting_horizon = 12

        np.random.seed(42)
        torch.manual_seed(42)

        # some arbitrary static covariates
        static_covariates = pd.DataFrame([[0.0, 1.0]], columns=["st1", "st2"])

        # real timeseries for functionality tests
        ts_passengers = (AirPassengersDataset().load().with_static_covariates(
            static_covariates))
        scaler = Scaler()
        ts_passengers = scaler.fit_transform(ts_passengers)
        ts_pass_train, ts_pass_val = ts_passengers[:-36], ts_passengers[-36:]

        # an additional noisy series
        ts_pass_train_1 = ts_pass_train + 0.01 * tg.gaussian_timeseries(
            length=len(ts_pass_train),
            freq=ts_pass_train.freq_str,
            start=ts_pass_train.start_time(),
        )

        # an additional time series serving as covariates
        year_series = tg.datetime_attribute_timeseries(ts_passengers,
                                                       attribute="year")
        month_series = tg.datetime_attribute_timeseries(ts_passengers,
                                                        attribute="month")
        scaler_dt = Scaler()
        time_covariates = scaler_dt.fit_transform(
            year_series.stack(month_series))
        time_covariates_train, time_covariates_val = (
            time_covariates[:-36],
            time_covariates[-36:],
        )

        # an artificial time series that is highly dependent on covariates
        ts_length = 400
        split_ratio = 0.6
        sine_1_ts = tg.sine_timeseries(length=ts_length)
        sine_2_ts = tg.sine_timeseries(length=ts_length, value_frequency=0.05)
        sine_3_ts = tg.sine_timeseries(length=ts_length,
                                       value_frequency=0.003,
                                       value_amplitude=5)
        linear_ts = tg.linear_timeseries(length=ts_length,
                                         start_value=3,
                                         end_value=8)

        covariates = sine_3_ts.stack(sine_2_ts).stack(linear_ts)
        covariates_past, _ = covariates.split_after(split_ratio)

        target = sine_1_ts + sine_2_ts + linear_ts + sine_3_ts
        target_past, target_future = target.split_after(split_ratio)

        def test_save_model_parameters(self):
            # model creation parameters were saved before. check if re-created model has same params as original
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                self.assertTrue(model._model_params,
                                model.untrained_model()._model_params)

        def test_single_ts(self):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(
                    input_chunk_length=IN_LEN,
                    output_chunk_length=OUT_LEN,
                    random_state=0,
                    **kwargs,
                )
                model.fit(self.ts_pass_train)
                pred = model.predict(n=36)
                mape_err = mape(self.ts_pass_val, pred)
                self.assertTrue(
                    mape_err < err,
                    "Model {} produces errors too high (one time "
                    "series). Error = {}".format(model_cls, mape_err),
                )
                self.assertTrue(
                    pred.static_covariates.equals(
                        self.ts_passengers.static_covariates))

        def test_multi_ts(self):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(
                    input_chunk_length=IN_LEN,
                    output_chunk_length=OUT_LEN,
                    random_state=0,
                    **kwargs,
                )
                model.fit([self.ts_pass_train, self.ts_pass_train_1])
                with self.assertRaises(ValueError):
                    # when model is fit from >1 series, one must provide a series in argument
                    model.predict(n=1)
                pred = model.predict(n=36, series=self.ts_pass_train)
                mape_err = mape(self.ts_pass_val, pred)
                self.assertTrue(
                    mape_err < err,
                    "Model {} produces errors too high (several time "
                    "series). Error = {}".format(model_cls, mape_err),
                )

                # check prediction for several time series
                pred_list = model.predict(
                    n=36, series=[self.ts_pass_train, self.ts_pass_train_1])
                self.assertTrue(
                    len(pred_list) == 2,
                    f"Model {model_cls} did not return a list of prediction",
                )
                for pred in pred_list:
                    mape_err = mape(self.ts_pass_val, pred)
                    self.assertTrue(
                        mape_err < err,
                        "Model {} produces errors too high (several time series 2). "
                        "Error = {}".format(model_cls, mape_err),
                    )

        def test_covariates(self):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(
                    input_chunk_length=IN_LEN,
                    output_chunk_length=OUT_LEN,
                    random_state=0,
                    **kwargs,
                )

                # Here we rely on the fact that all non-Dual models currently are Past models
                cov_name = ("future_covariates" if isinstance(
                    model, DualCovariatesTorchModel) else "past_covariates")
                cov_kwargs = {
                    cov_name:
                    [self.time_covariates_train, self.time_covariates_train]
                }
                model.fit(series=[self.ts_pass_train, self.ts_pass_train_1],
                          **cov_kwargs)
                with self.assertRaises(ValueError):
                    # when model is fit from >1 series, one must provide a series in argument
                    model.predict(n=1)

                with self.assertRaises(ValueError):
                    # when model is fit using multiple covariates, covariates are required at prediction time
                    model.predict(n=1, series=self.ts_pass_train)

                cov_kwargs_train = {cov_name: self.time_covariates_train}
                cov_kwargs_notrain = {cov_name: self.time_covariates}
                with self.assertRaises(ValueError):
                    # when model is fit using covariates, n cannot be greater than output_chunk_length...
                    model.predict(n=13,
                                  series=self.ts_pass_train,
                                  **cov_kwargs_train)

                # ... unless future covariates are provided
                pred = model.predict(n=13,
                                     series=self.ts_pass_train,
                                     **cov_kwargs_notrain)

                pred = model.predict(n=12,
                                     series=self.ts_pass_train,
                                     **cov_kwargs_notrain)
                mape_err = mape(self.ts_pass_val, pred)
                self.assertTrue(
                    mape_err < err,
                    "Model {} produces errors too high (several time "
                    "series with covariates). Error = {}".format(
                        model_cls, mape_err),
                )

                # when model is fit using 1 training and 1 covariate series, time series args are optional
                if model._is_probabilistic:
                    continue
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                model.fit(series=self.ts_pass_train, **cov_kwargs_train)
                pred1 = model.predict(1)
                pred2 = model.predict(1, series=self.ts_pass_train)
                pred3 = model.predict(1, **cov_kwargs_train)
                pred4 = model.predict(1,
                                      **cov_kwargs_train,
                                      series=self.ts_pass_train)
                self.assertEqual(pred1, pred2)
                self.assertEqual(pred1, pred3)
                self.assertEqual(pred1, pred4)

        def test_future_covariates(self):
            # models with future covariates should produce better predictions over a long forecasting horizon
            # than a model trained with no covariates
            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=5,
                n_epochs=20,
                random_state=0,
            )

            model.fit(series=self.target_past)
            long_pred_no_cov = model.predict(n=160)

            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=5,
                n_epochs=20,
                random_state=0,
            )
            model.fit(series=self.target_past,
                      past_covariates=self.covariates_past)
            long_pred_with_cov = model.predict(n=160,
                                               past_covariates=self.covariates)
            self.assertTrue(
                mape(self.target_future, long_pred_no_cov) > mape(
                    self.target_future, long_pred_with_cov),
                "Models with future covariates should produce better predictions.",
            )

            # block models can predict up to self.output_chunk_length points beyond the last future covariate...
            model.predict(n=165, past_covariates=self.covariates)

            # ... not more
            with self.assertRaises(ValueError):
                model.predict(n=166, series=self.ts_pass_train)

            # recurrent models can only predict data points for time steps where future covariates are available
            model = RNNModel(12, n_epochs=1)
            model.fit(series=self.target_past,
                      future_covariates=self.covariates_past)
            model.predict(n=160, future_covariates=self.covariates)
            with self.assertRaises(ValueError):
                model.predict(n=161, future_covariates=self.covariates)

        def test_batch_predictions(self):
            # predicting multiple time series at once needs to work for arbitrary batch sizes
            # univariate case
            targets_univar = [
                self.target_past,
                self.target_past[:60],
                self.target_past[:80],
            ]
            self._batch_prediction_test_helper_function(targets_univar)

            # multivariate case
            targets_multivar = [tgt.stack(tgt) for tgt in targets_univar]
            self._batch_prediction_test_helper_function(targets_multivar)

        def _batch_prediction_test_helper_function(self, targets):
            epsilon = 1e-4
            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=10,
                n_epochs=10,
                random_state=0,
            )
            model.fit(series=targets[0], past_covariates=self.covariates_past)
            preds_default = model.predict(
                n=160,
                series=targets,
                past_covariates=[self.covariates] * len(targets),
                batch_size=None,
            )

            # make batch size large enough to test stacking samples
            for batch_size in range(1, 4 * len(targets)):
                preds = model.predict(
                    n=160,
                    series=targets,
                    past_covariates=[self.covariates] * len(targets),
                    batch_size=batch_size,
                )
                for i in range(len(targets)):
                    self.assertLess(
                        sum(sum((preds[i] - preds_default[i]).values())),
                        epsilon)

        def test_predict_from_dataset_unsupported_input(self):
            # an exception should be thrown if an unsupported type is passed
            unsupported_type = "unsupported_type"
            # just need to test this with one model
            model_cls, kwargs, err = models_cls_kwargs_errs[0]
            model = model_cls(input_chunk_length=IN_LEN,
                              output_chunk_length=OUT_LEN,
                              **kwargs)
            model.fit([self.ts_pass_train, self.ts_pass_train_1])

            with self.assertRaises(ValueError):
                model.predict_from_dataset(
                    n=1, input_series_dataset=unsupported_type)

        def test_prediction_with_different_n(self):
            # test model predictions for n < out_len, n == out_len and n > out_len
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                self.assertTrue(
                    isinstance(
                        model,
                        (
                            PastCovariatesTorchModel,
                            DualCovariatesTorchModel,
                            MixedCovariatesTorchModel,
                        ),
                    ),
                    "unit test not yet defined for the given {X}CovariatesTorchModel.",
                )

                if isinstance(model, PastCovariatesTorchModel):
                    past_covs, future_covs = self.covariates, None
                elif isinstance(model, DualCovariatesTorchModel):
                    past_covs, future_covs = None, self.covariates
                else:
                    past_covs, future_covs = self.covariates, self.covariates

                model.fit(
                    self.target_past,
                    past_covariates=past_covs,
                    future_covariates=future_covs,
                    epochs=1,
                )

                # test prediction for n < out_len, n == out_len and n > out_len
                for n in [OUT_LEN - 1, OUT_LEN, 2 * OUT_LEN - 1]:
                    pred = model.predict(n=n,
                                         past_covariates=past_covs,
                                         future_covariates=future_covs)
                    self.assertEqual(len(pred), n)

        def test_same_result_with_different_n_jobs(self):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)

                multiple_ts = [self.ts_pass_train] * 10

                model.fit(multiple_ts)

                # safe random state for two successive identical predictions
                if model._is_probabilistic():
                    random_state = deepcopy(model._random_instance)
                else:
                    random_state = None

                pred1 = model.predict(n=36, series=multiple_ts, n_jobs=1)

                if random_state is not None:
                    model._random_instance = random_state

                pred2 = model.predict(
                    n=36, series=multiple_ts,
                    n_jobs=-1)  # assuming > 1 core available in the machine
                self.assertEqual(
                    pred1,
                    pred2,
                    "Model {} produces different predictions with different number of jobs",
                )

        @patch(
            "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer"
        )
        def test_fit_with_constr_epochs(self, init_trainer):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                multiple_ts = [self.ts_pass_train] * 10
                model.fit(multiple_ts)

                init_trainer.assert_called_with(max_epochs=kwargs["n_epochs"],
                                                trainer_params=ANY)

        @patch(
            "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer"
        )
        def test_fit_with_fit_epochs(self, init_trainer):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                multiple_ts = [self.ts_pass_train] * 10
                epochs = 3

                model.fit(multiple_ts, epochs=epochs)
                init_trainer.assert_called_with(max_epochs=epochs,
                                                trainer_params=ANY)

                model.total_epochs = epochs
                # continue training
                model.fit(multiple_ts, epochs=epochs)
                init_trainer.assert_called_with(max_epochs=epochs,
                                                trainer_params=ANY)

        @patch(
            "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer"
        )
        def test_fit_from_dataset_with_epochs(self, init_trainer):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                multiple_ts = [self.ts_pass_train] * 10
                train_dataset = model._build_train_dataset(
                    multiple_ts,
                    past_covariates=None,
                    future_covariates=None,
                    max_samples_per_ts=None,
                )
                epochs = 3

                model.fit_from_dataset(train_dataset, epochs=epochs)
                init_trainer.assert_called_with(max_epochs=epochs,
                                                trainer_params=ANY)

                # continue training
                model.fit_from_dataset(train_dataset, epochs=epochs)
                init_trainer.assert_called_with(max_epochs=epochs,
                                                trainer_params=ANY)

        def test_predit_after_fit_from_dataset(self):
            model_cls, kwargs, _ = models_cls_kwargs_errs[0]
            model = model_cls(input_chunk_length=IN_LEN,
                              output_chunk_length=OUT_LEN,
                              **kwargs)

            multiple_ts = [self.ts_pass_train] * 10
            train_dataset = model._build_train_dataset(
                multiple_ts,
                past_covariates=None,
                future_covariates=None,
                max_samples_per_ts=None,
            )
            model.fit_from_dataset(train_dataset, epochs=3)

            # test predict() works after fit_from_dataset()
            model.predict(n=1, series=multiple_ts[0])

        def test_sample_smaller_than_batch_size(self):
            """
            Checking that the TorchForecastingModels do not crash even if the number of available samples for training
            is strictly lower than the selected batch_size
            """
            # TS with 50 timestamps. TorchForecastingModels will use the SequentialDataset for producing training
            # samples, which means we will have 50 - 22 - 2 + 1 = 27 samples, which is < 32 (batch_size). The model
            # should still train on those samples and not crash in any way
            ts = linear_timeseries(start_value=0, end_value=1, length=50)

            model = RNNModel(input_chunk_length=20,
                             output_chunk_length=2,
                             n_epochs=2,
                             batch_size=32)
            model.fit(ts)

        def test_max_samples_per_ts(self):
            """
            Checking that we can fit TorchForecastingModels with max_samples_per_ts, without crash
            """

            ts = linear_timeseries(start_value=0, end_value=1, length=50)

            model = RNNModel(input_chunk_length=20,
                             output_chunk_length=2,
                             n_epochs=2,
                             batch_size=32)

            model.fit(ts, max_samples_per_ts=5)

        def test_residuals(self):
            """
            Torch models should not fail when computing residuals on a series
            long enough to accomodate at least one training sample.
            """
            ts = linear_timeseries(start_value=0, end_value=1, length=38)

            model = NBEATSModel(
                input_chunk_length=24,
                output_chunk_length=12,
                num_stacks=2,
                num_blocks=1,
                num_layers=1,
                layer_widths=2,
                n_epochs=2,
            )

            model.residuals(ts)
예제 #26
0
 def test_moving_average_univariate(self):
     ma = MovingAverage(window=3, centered=False)
     sine_ts = tg.sine_timeseries(length=30, value_frequency=0.1)
     sine_filtered = ma.filter(sine_ts)
     self.assertGreater(np.mean(np.abs(sine_ts.values())),
                        np.mean(np.abs(sine_filtered.values())))
예제 #27
0
 def helper_test_seasonality_inference(self, freq_string,
                                       expected_seasonal_periods):
     series = tg.sine_timeseries(length=200, freq=freq_string)
     model = ExponentialSmoothing()
     model.fit(series)
     self.assertEqual(model.seasonal_periods, expected_seasonal_periods)
예제 #28
0
class BoxCoxTestCase(unittest.TestCase):

    sine_series = sine_timeseries(length=50,
                                  value_y_offset=5,
                                  value_frequency=0.05)
    lin_series = linear_timeseries(start_value=1, end_value=10, length=50)
    multi_series = sine_series.stack(lin_series)

    def test_boxbox_lambda(self):
        boxcox = BoxCox(lmbda=0.3)

        boxcox.fit(self.multi_series)
        self.assertEqual(boxcox._fitted_params, [[0.3, 0.3]])

        boxcox = BoxCox(lmbda=[0.3, 0.4])
        boxcox.fit(self.multi_series)
        self.assertEqual(boxcox._fitted_params, [[0.3, 0.4]])

        with self.assertRaises(ValueError):
            boxcox = BoxCox(lmbda=[0.2, 0.4, 0.5])
            boxcox.fit(self.multi_series)

        boxcox = BoxCox(optim_method="mle")
        boxcox.fit(self.multi_series)
        lmbda1 = boxcox._fitted_params[0].tolist()

        boxcox = BoxCox(optim_method="pearsonr")
        boxcox.fit(self.multi_series)
        lmbda2 = boxcox._fitted_params[0].tolist()

        self.assertNotEqual(lmbda1, lmbda2)

    def test_boxcox_transform(self):
        log_mapper = Mapper(lambda x: np.log(x))
        boxcox = BoxCox(lmbda=0)

        transformed1 = log_mapper.transform(self.sine_series)
        transformed2 = boxcox.fit(self.sine_series).transform(self.sine_series)

        np.testing.assert_almost_equal(
            transformed1.all_values(copy=False),
            transformed2.all_values(copy=False),
            decimal=4,
        )

    def test_boxcox_inverse(self):
        boxcox = BoxCox()
        transformed = boxcox.fit_transform(self.multi_series)
        back = boxcox.inverse_transform(transformed)
        pd.testing.assert_frame_equal(self.multi_series.pd_dataframe(),
                                      back.pd_dataframe(),
                                      check_exact=False)

    def test_boxcox_multi_ts(self):

        test_cases = [
            ([[0.2, 0.4], [0.3, 0.6]]),  # full lambda
            (0.4),  # single value
            None,  # None
        ]

        for lmbda in test_cases:
            box_cox = BoxCox(lmbda=lmbda)
            transformed = box_cox.fit_transform(
                [self.multi_series, self.multi_series])
            back = box_cox.inverse_transform(transformed)
            pd.testing.assert_frame_equal(
                self.multi_series.pd_dataframe(),
                back[0].pd_dataframe(),
                check_exact=False,
            )
            pd.testing.assert_frame_equal(
                self.multi_series.pd_dataframe(),
                back[1].pd_dataframe(),
                check_exact=False,
            )

    def test_boxcox_multiple_calls_to_fit(self):
        """
        This test checks whether calling the scaler twice is calculating new lambdas instead of
        keeping the old ones
        """
        box_cox = BoxCox()

        box_cox.fit(self.sine_series)
        lambda1 = deepcopy(box_cox._fitted_params)[0].tolist()

        box_cox.fit(self.lin_series)
        lambda2 = deepcopy(box_cox._fitted_params)[0].tolist()

        self.assertNotEqual(
            lambda1, lambda2,
            "Lambdas should change when the transformer is retrained")

    def test_multivariate_stochastic_series(self):
        transformer = BoxCox()
        vals = np.random.rand(10, 5, 10)
        series = TimeSeries.from_values(vals)

        new_series = transformer.fit_transform(series)
        series_back = transformer.inverse_transform(new_series)

        # Test inverse transform
        np.testing.assert_allclose(series.all_values(),
                                   series_back.all_values())
예제 #29
0
class DynamicTimeWarpingTestCase(DartsBaseTestClass):
    length = 20
    freq = 1 / length
    series1 = tg.sine_timeseries(length=length,
                                 value_frequency=freq,
                                 value_phase=0,
                                 value_y_offset=5)
    series2 = tg.sine_timeseries(length=length,
                                 value_frequency=freq,
                                 value_phase=np.pi / 4,
                                 value_y_offset=5)

    def test_shift(self):
        input1 = [
            1,
            1,
            1,
            1,
            1.2,
            1.4,
            1.2,
            1,
            1,
            1,
            1,
            1,
            1,
            1.2,
            1.4,
            1.6,
            1.8,
            1.6,
            1.4,
            1.2,
            1,
            1,
        ]
        input2 = [1] + input1[:-1]

        expected_path = ([(0, 0)] + list(
            (i - 1, i) for i in range(1, len(input1))) +
                         [(len(input1) - 1, len(input2) - 1)])

        series1 = _series_from_values(input1)
        series2 = _series_from_values(input2)

        exact_alignment = dtw.dtw(series1, series2, multi_grid_radius=-1)

        self.assertEqual(
            exact_alignment.distance(),
            0,
            "Minimum cost between two shifted series should be 0",
        )
        self.assertTrue(np.array_equal(exact_alignment.path(), expected_path),
                        "Incorrect path")

    def test_multi_grid(self):
        size = 2**5 - 1  # test odd size
        freq = 1 / size
        input1 = np.cos(np.arange(size) * 2 * np.pi * freq)
        input2 = np.sin(np.arange(size) * 2 * np.pi *
                        freq) + 0.1 * np.random.random(size=size)

        series1 = _series_from_values(input1)
        series2 = _series_from_values(input2)

        exact_distance = dtw.dtw(series1, series2,
                                 multi_grid_radius=-1).distance()
        approx_distance = dtw.dtw(series1, series2,
                                  multi_grid_radius=1).distance()

        self.assertAlmostEqual(exact_distance, approx_distance, 3)

    def test_sakoe_chiba_window(self):
        window = 2
        alignment = dtw.dtw(self.series1,
                            self.series2,
                            window=dtw.SakoeChiba(window_size=2))
        path = alignment.path()

        for i, j in path:
            self.assertGreaterEqual(window, abs(i - j))

    def test_itakura_window(self):
        n = 6
        m = 5
        slope = 1.5

        window = dtw.Itakura(max_slope=slope)
        window.init_size(n, m)

        cells = list(window)
        self.assertEqual(
            cells,
            [
                (1, 1),
                (1, 2),
                (2, 1),
                (2, 2),
                (2, 3),
                (3, 1),
                (3, 2),
                (3, 3),
                (3, 4),
                (4, 2),
                (4, 3),
                (4, 4),
                (5, 2),
                (5, 3),
                (5, 4),
                (5, 5),
                (6, 4),
                (6, 5),
            ],
        )

        sizes = [(10, 43), (543, 45), (34, 11)]

        for n, m in sizes:
            slope = m / n + 1

            series1 = tg.sine_timeseries(length=n,
                                         value_frequency=1 / n,
                                         value_phase=0)
            series2 = tg.sine_timeseries(length=m,
                                         value_frequency=1 / m,
                                         value_phase=np.pi / 4)

            dist = dtw.dtw(series1, series2,
                           window=dtw.Itakura(slope)).mean_distance()
            self.assertGreater(1, dist)

    def test_warp(self):
        # Support different time dimension names
        xa1 = self.series1.data_array().rename({"time": "time1"})
        xa2 = self.series2.data_array().rename({"time": "time2"})

        static_covs = pd.DataFrame([[0.0, 1.0]], columns=["st1", "st2"])
        series1 = TimeSeries.from_xarray(xa1).with_static_covariates(
            static_covs)
        series2 = TimeSeries.from_xarray(xa2).with_static_covariates(
            static_covs)

        alignment = dtw.dtw(series1, series2)

        warped1, warped2 = alignment.warped()
        self.assertAlmostEqual(alignment.mean_distance(),
                               mae(warped1, warped2))
        assert warped1.static_covariates.equals(series1.static_covariates)
        assert warped2.static_covariates.equals(series2.static_covariates)
        """
        See DTWAlignment.warped for why this functionality is currently disabled

        #Mutually Exclusive Option
        with self.assertRaises(ValueError):
            alignment.warped(take_dates=True, range_index=True)

        #Take_dates does not support indexing by RangeIndex
        with self.assertRaises(ValueError):
            xa3 = xa1.copy()
            xa3["time1"] = pd.RangeIndex(0, len(self.series1))

            dtw.dtw(TimeSeries.from_xarray(xa3), series2).warped(take_dates=True)


        warped1, warped2 = alignment.warped(take_dates=True)
        self.assertTrue(np.all(warped1.time_index == warped2.time_index))
        """

    def test_metric(self):
        metric1 = dtw_metric(self.series1, self.series2, metric=mae)
        metric2 = dtw_metric(self.series1, self.series2, metric=mape)

        self.assertGreater(0.5, metric1)
        self.assertGreater(5, metric2)

    def test_nans(self):
        with self.assertRaises(ValueError):
            series1 = _series_from_values([np.nan, 0, 1, 2, 3])
            series2 = _series_from_values([0, 1, 2, 3, 4])

            dtw.dtw(series1, series2)

    def test_plot(self):
        align = dtw.dtw(self.series2, self.series1)
        align.plot()
        align.plot_alignment()

    def test_multivariate(self):
        n = 2

        values1 = np.repeat(self.series1.univariate_values(), n)
        values2 = np.repeat(self.series2.univariate_values(), n)

        values1 = values1.reshape((-1, n))
        values2 = values2.reshape((-1, n))

        multi_series1 = TimeSeries.from_values(values1)
        multi_series2 = TimeSeries.from_values(values2)

        radius = 2

        alignment_uni = dtw.dtw(self.series1,
                                self.series2,
                                multi_grid_radius=radius)
        alignment_multi = dtw.dtw(multi_series1,
                                  multi_series2,
                                  multi_grid_radius=radius)

        self.assertTrue(np.all(alignment_uni.path() == alignment_multi.path()))
예제 #30
0
class BoxCoxTestCase(unittest.TestCase):

    sine_series = sine_timeseries(length=50, value_y_offset=5, value_frequency=0.05)
    lin_series = linear_timeseries(start_value=1, end_value=10, length=50)
    multi_series = sine_series.stack(lin_series)

    def test_boxbox_lambda(self):
        boxcox = BoxCox(lmbda=0.3)

        boxcox.fit(self.multi_series)
        self.assertEqual(boxcox._fitted_params, [[0.3, 0.3]])

        boxcox = BoxCox(lmbda=[0.3, 0.4])
        boxcox.fit(self.multi_series)
        self.assertEqual(boxcox._fitted_params, [[0.3, 0.4]])

        with self.assertRaises(ValueError):
            boxcox = BoxCox(lmbda=[0.2, 0.4, 0.5])
            boxcox.fit(self.multi_series)

        boxcox = BoxCox(optim_method='mle')
        boxcox.fit(self.multi_series)
        lmbda1 = boxcox._fitted_params[0].tolist()

        boxcox = BoxCox(optim_method='pearsonr')
        boxcox.fit(self.multi_series)
        lmbda2 = boxcox._fitted_params[0].tolist()

        self.assertNotEqual(lmbda1, lmbda2)

    def test_boxcox_transform(self):
        log_mapper = Mapper(lambda x: log(x))
        boxcox = BoxCox(lmbda=0)

        transformed1 = log_mapper.transform(self.sine_series)
        transformed2 = boxcox.fit(self.sine_series).transform(self.sine_series)

        self.assertEqual(transformed1, transformed2)

    def test_boxcox_inverse(self):
        boxcox = BoxCox()
        transformed = boxcox.fit_transform(self.multi_series)
        back = boxcox.inverse_transform(transformed)
        pd.testing.assert_frame_equal(self.multi_series._df, back._df, check_exact=False)

    def test_boxcox_multi_ts(self):

        test_cases = [
            ([[0.2, 0.4], [0.3, 0.6]]),  # full lambda
            (0.4),  # single value
            None  # None
        ]

        for lmbda in test_cases:
            box_cox = BoxCox(lmbda=lmbda)
            transformed = box_cox.fit_transform([self.multi_series, self.multi_series])
            back = box_cox.inverse_transform(transformed)
            pd.testing.assert_frame_equal(self.multi_series._df, back[0]._df, check_exact=False)
            pd.testing.assert_frame_equal(self.multi_series._df, back[1]._df, check_exact=False)

    def test_boxcox_multiple_calls_to_fit(self):
        """
        This test checks whether calling the scaler twice is calculating new lambdas instead of
        keeping the old ones
        """
        box_cox = BoxCox()

        box_cox.fit(self.sine_series)
        lambda1 = deepcopy(box_cox._fitted_params)[0].tolist()

        box_cox.fit(self.lin_series)
        lambda2 = deepcopy(box_cox._fitted_params)[0].tolist()

        self.assertNotEqual(lambda1, lambda2, "Lambdas should change when the transformer is retrained")