Ejemplo n.º 1
0
    def helper_relevant_attributes(self, freq, length,
                                   period_attributes_tuples):

        # test random walk
        random_walk_ts = tg.random_walk_timeseries(freq=freq, length=length)
        self.assertEqual(_find_relevant_timestamp_attributes(random_walk_ts),
                         set())

        for period, relevant_attributes in period_attributes_tuples:

            # test seasonal period with no noise
            seasonal_ts = tg.sine_timeseries(freq=freq,
                                             value_frequency=1 / period,
                                             length=length)
            self.assertEqual(
                _find_relevant_timestamp_attributes(seasonal_ts),
                relevant_attributes,
                "failed to recognize season in non-noisy timeseries",
            )

            # test seasonal period with no noise
            seasonal_noisy_ts = seasonal_ts + tg.gaussian_timeseries(
                freq=freq, length=length)
            self.assertEqual(
                _find_relevant_timestamp_attributes(seasonal_noisy_ts),
                relevant_attributes,
                "failed to recognize season in noisy timeseries",
            )
Ejemplo n.º 2
0
    def test_stationarity_tests(self):
        series_1 = constant_timeseries(start=0, end=9999).stack(
            constant_timeseries(start=0, end=9999))

        series_2 = TimeSeries.from_values(
            np.random.uniform(0, 1, (1000, 2, 1000)))
        series_3 = gaussian_timeseries(start=0, end=9999)

        # Test univariate
        with self.assertRaises(AssertionError):
            stationarity_tests(series_1)
        with self.assertRaises(AssertionError):
            stationarity_test_adf(series_1)
        with self.assertRaises(AssertionError):
            stationarity_test_kpss(series_1)

        # Test deterministic
        with self.assertRaises(AssertionError):
            stationarity_tests(series_2)
        with self.assertRaises(AssertionError):
            stationarity_test_adf(series_2)
        with self.assertRaises(AssertionError):
            stationarity_test_kpss(series_2)

        # Test basics
        self.assertTrue(stationarity_test_kpss(series_3)[1] > 0.05)
        self.assertTrue(stationarity_test_adf(series_3)[1] < 0.05)
        self.assertTrue(stationarity_tests)
Ejemplo n.º 3
0
    def test_gaussian_process_multivariate(self):
        gpf = GaussianProcessFilter()

        sine_ts = tg.sine_timeseries(length=30, value_frequency=0.1)
        noise_ts = tg.gaussian_timeseries(length=30) * 0.1
        ts = sine_ts.stack(noise_ts)

        prediction = gpf.filter(ts)

        self.assertEqual(prediction.width, 2)
Ejemplo n.º 4
0
    def test_granger_causality(self):
        series_cause_1 = constant_timeseries(start=0, end=9999).stack(
            constant_timeseries(start=0, end=9999))
        series_cause_2 = gaussian_timeseries(start=0, end=9999)
        series_effect_1 = constant_timeseries(start=0, end=999)
        series_effect_2 = TimeSeries.from_values(np.random.uniform(
            0, 1, 10000))
        series_effect_3 = TimeSeries.from_values(
            np.random.uniform(0, 1, (1000, 2, 1000)))
        series_effect_4 = constant_timeseries(start=pd.Timestamp("2000-01-01"),
                                              length=10000)

        # Test univariate
        with self.assertRaises(AssertionError):
            granger_causality_tests(series_cause_1,
                                    series_effect_1,
                                    10,
                                    verbose=False)
        with self.assertRaises(AssertionError):
            granger_causality_tests(series_effect_1,
                                    series_cause_1,
                                    10,
                                    verbose=False)

        # Test deterministic
        with self.assertRaises(AssertionError):
            granger_causality_tests(series_cause_1,
                                    series_effect_3,
                                    10,
                                    verbose=False)
        with self.assertRaises(AssertionError):
            granger_causality_tests(series_effect_3,
                                    series_cause_1,
                                    10,
                                    verbose=False)

        # Test Frequency
        with self.assertRaises(ValueError):
            granger_causality_tests(series_cause_2,
                                    series_effect_4,
                                    10,
                                    verbose=False)

        # Test granger basics
        tests = granger_causality_tests(series_effect_2,
                                        series_effect_2,
                                        10,
                                        verbose=False)
        self.assertTrue(tests[1][0]["ssr_ftest"][1] > 0.99)
        tests = granger_causality_tests(series_cause_2,
                                        series_effect_2,
                                        10,
                                        verbose=False)
        self.assertTrue(tests[1][0]["ssr_ftest"][1] > 0.01)
Ejemplo n.º 5
0
    def test_kalman_multivariate(self):
        kf = KalmanFilter(dim_x=3)

        sine_ts = tg.sine_timeseries(length=30, value_frequency=0.1)
        noise_ts = tg.gaussian_timeseries(length=30) * 0.1
        series = sine_ts.stack(noise_ts)

        kf.fit(series)
        prediction = kf.filter(series)

        self.assertEqual(prediction.width, 2)
        self.assertEqual(prediction.n_samples, 1)
Ejemplo n.º 6
0
    def test_exogenous_variables_support(self):
        # test case with pd.DatetimeIndex
        target_dt_idx = self.ts_gaussian
        fc_dt_idx = self.ts_gaussian_long

        # test case with numerical pd.RangeIndex
        target_num_idx = TimeSeries.from_times_and_values(
            times=tg._generate_index(start=0, length=len(self.ts_gaussian)),
            values=self.ts_gaussian.all_values(copy=False),
        )
        fc_num_idx = TimeSeries.from_times_and_values(
            times=tg._generate_index(start=0,
                                     length=len(self.ts_gaussian_long)),
            values=self.ts_gaussian_long.all_values(copy=False),
        )

        for target, future_covariates in zip([target_dt_idx, target_num_idx],
                                             [fc_dt_idx, fc_num_idx]):
            for model in dual_models:
                # skip models which do not support RangeIndex
                if isinstance(target.time_index, pd.RangeIndex):
                    try:
                        # _supports_range_index raises a ValueError if model does not support RangeIndex
                        model._supports_range_index()
                    except ValueError:
                        continue

                # Test models runnability - proper future covariates slicing
                model.fit(target, future_covariates=future_covariates)
                prediction = model.predict(self.forecasting_horizon,
                                           future_covariates=future_covariates)

                self.assertTrue(len(prediction) == self.forecasting_horizon)

                # Test mismatch in length between exogenous variables and forecasting horizon
                with self.assertRaises(ValueError):
                    model.predict(
                        self.forecasting_horizon,
                        future_covariates=tg.gaussian_timeseries(
                            start=future_covariates.start_time(),
                            length=self.forecasting_horizon - 1,
                        ),
                    )

                # Test mismatch in time-index/length between series and exogenous variables
                with self.assertRaises(ValueError):
                    model.fit(target, future_covariates=target[:-1])
                with self.assertRaises(ValueError):
                    model.fit(target[1:], future_covariates=target[:-1])
Ejemplo n.º 7
0
    def test_moving_average_multivariate(self):
        ma = MovingAverage(window=3)
        sine_ts = tg.sine_timeseries(length=30, value_frequency=0.1)
        noise_ts = tg.gaussian_timeseries(length=30) * 0.1
        ts = sine_ts.stack(noise_ts)
        ts_filtered = ma.filter(ts)

        self.assertGreater(
            np.mean(np.abs(ts.values()[:, 0])),
            np.mean(np.abs(ts_filtered.values()[:, 0])),
        )
        self.assertGreater(
            np.mean(np.abs(ts.values()[:, 1])),
            np.mean(np.abs(ts_filtered.values()[:, 1])),
        )
Ejemplo n.º 8
0
    def denoising_input(self):
        np.random.seed(self.RANDOM_SEED)

        ts_periodic = tg.sine_timeseries(length=500)
        ts_gaussian = tg.gaussian_timeseries(length=500)
        ts_random_walk = tg.random_walk_timeseries(length=500)

        ts_cov1 = ts_periodic.stack(ts_gaussian)
        ts_cov1 = ts_cov1.pd_dataframe()
        ts_cov1.columns = ["Periodic", "Gaussian"]
        ts_cov1 = TimeSeries.from_dataframe(ts_cov1)
        ts_sum1 = ts_periodic + ts_gaussian

        ts_cov2 = ts_sum1.stack(ts_random_walk)
        ts_sum2 = ts_sum1 + ts_random_walk

        return ts_sum1, ts_cov1, ts_sum2, ts_cov2
Ejemplo n.º 9
0
    def test_kalman_missing_values(self):
        sine = tg.sine_timeseries(
            length=100,
            value_frequency=0.05) + 0.1 * tg.gaussian_timeseries(length=100)
        values = sine.values()
        values[20:22] = np.nan
        values[28:40] = np.nan
        sine_holes = TimeSeries.from_values(values)
        sine = TimeSeries.from_values(sine.values())

        kf = KalmanFilter(dim_x=2)
        kf.fit(sine_holes[-50:])  # fit on the part with no holes

        # reconstructruction should succeed
        filtered_series = kf.filter(sine_holes, num_samples=100)

        # reconstruction error should be sufficiently small
        self.assertLess(rmse(filtered_series, sine), 0.1)
Ejemplo n.º 10
0
    class ProbabilisticRegressionModelsTestCase(DartsBaseTestClass):
        models_cls_kwargs_errs = [
            (
                LightGBMModel,
                {
                    "lags": 2,
                    "likelihood": "quantile",
                    "random_state": 42
                },
                0.4,
            ),
            (
                LightGBMModel,
                {
                    "lags": 2,
                    "likelihood": "quantile",
                    "quantiles": [0.1, 0.3, 0.5, 0.7, 0.9],
                    "random_state": 42,
                },
                0.4,
            ),
            (
                LightGBMModel,
                {
                    "lags": 2,
                    "likelihood": "poisson",
                    "random_state": 42
                },
                0.6,
            ),
            (
                LinearRegressionModel,
                {
                    "lags": 2,
                    "likelihood": "quantile",
                    "random_state": 42
                },
                0.6,
            ),
            (
                LinearRegressionModel,
                {
                    "lags": 2,
                    "likelihood": "poisson",
                    "random_state": 42
                },
                0.6,
            ),
        ]

        constant_ts = tg.constant_timeseries(length=200, value=0.5)
        constant_noisy_ts = constant_ts + tg.gaussian_timeseries(length=200,
                                                                 std=0.1)
        constant_multivar_ts = constant_ts.stack(constant_ts)
        constant_noisy_multivar_ts = constant_noisy_ts.stack(constant_noisy_ts)
        num_samples = 5

        def test_fit_predict_determinism(self):

            for model_cls, model_kwargs, _ in self.models_cls_kwargs_errs:
                # whether the first predictions of two models initiated with the same random state are the same
                model = model_cls(**model_kwargs)
                model.fit(self.constant_noisy_multivar_ts)
                pred1 = model.predict(n=10, num_samples=2).values()

                model = model_cls(**model_kwargs)
                model.fit(self.constant_noisy_multivar_ts)
                pred2 = model.predict(n=10, num_samples=2).values()

                self.assertTrue((pred1 == pred2).all())

                # test whether the next prediction of the same model is different
                pred3 = model.predict(n=10, num_samples=2).values()
                self.assertTrue((pred2 != pred3).any())

        def test_probabilistic_forecast_accuracy(self):
            for model_cls, model_kwargs, err in self.models_cls_kwargs_errs:
                self.helper_test_probabilistic_forecast_accuracy(
                    model_cls,
                    model_kwargs,
                    err,
                    self.constant_ts,
                    self.constant_noisy_ts,
                )
                if issubclass(model_cls, GlobalForecastingModel):
                    self.helper_test_probabilistic_forecast_accuracy(
                        model_cls,
                        model_kwargs,
                        err,
                        self.constant_multivar_ts,
                        self.constant_noisy_multivar_ts,
                    )

        def helper_test_probabilistic_forecast_accuracy(
                self, model_cls, model_kwargs, err, ts, noisy_ts):
            model = model_cls(**model_kwargs)
            model.fit(noisy_ts[:100])
            pred = model.predict(n=100, num_samples=100)

            # test accuracy of the median prediction compared to the noiseless ts
            mae_err_median = mae(ts[100:], pred)
            self.assertLess(mae_err_median, err)

            # test accuracy for increasing quantiles between 0.7 and 1 (it should ~decrease, mae should ~increase)
            tested_quantiles = [0.7, 0.8, 0.9, 0.99]
            mae_err = mae_err_median
            for quantile in tested_quantiles:
                new_mae = mae(ts[100:],
                              pred.quantile_timeseries(quantile=quantile))
                self.assertLess(mae_err, new_mae + 0.1)
                mae_err = new_mae

            # test accuracy for decreasing quantiles between 0.3 and 0 (it should ~decrease, mae should ~increase)
            tested_quantiles = [0.3, 0.2, 0.1, 0.01]
            mae_err = mae_err_median
            for quantile in tested_quantiles:
                new_mae = mae(ts[100:],
                              pred.quantile_timeseries(quantile=quantile))
                self.assertLess(mae_err, new_mae + 0.1)
                mae_err = new_mae
Ejemplo n.º 11
0
        def test_coverage(self):
            torch.manual_seed(0)
            input_chunk_lengths = range(20, 50)
            kernel_sizes = range(2, 5)
            dilation_bases = range(2, 5)

            for kernel_size in kernel_sizes:
                for dilation_base in dilation_bases:
                    if dilation_base > kernel_size:
                        continue
                    for input_chunk_length in input_chunk_lengths:

                        # create model with all weights set to one
                        model = TCNModel(
                            input_chunk_length=input_chunk_length,
                            output_chunk_length=1,
                            kernel_size=kernel_size,
                            dilation_base=dilation_base,
                            weight_norm=False,
                            n_epochs=1,
                        )

                        # we have to fit the model on a dummy series in order to create the internal nn.Module
                        model.fit(tg.gaussian_timeseries(length=100))

                        for res_block in model.model.res_blocks:
                            res_block.conv1.weight = torch.nn.Parameter(
                                torch.ones(
                                    res_block.conv1.weight.shape, dtype=torch.float64
                                )
                            )
                            res_block.conv2.weight = torch.nn.Parameter(
                                torch.ones(
                                    res_block.conv2.weight.shape, dtype=torch.float64
                                )
                            )

                        model.model.eval()

                        # also disable MC Dropout:
                        model.model.set_mc_dropout(False)

                        input_tensor = torch.zeros(
                            [1, input_chunk_length, 1], dtype=torch.float64
                        )
                        zero_output = model.model.forward((input_tensor, None))[
                            0, -1, 0
                        ]

                        # test for full coverage
                        for i in range(input_chunk_length):
                            input_tensor[0, i, 0] = 1
                            curr_output = model.model.forward((input_tensor, None))[
                                0, -1, 0
                            ]
                            self.assertNotEqual(zero_output, curr_output)
                            input_tensor[0, i, 0] = 0

                        # create model with all weights set to one and one layer less than is automatically detected
                        model_2 = TCNModel(
                            input_chunk_length=input_chunk_length,
                            output_chunk_length=1,
                            kernel_size=kernel_size,
                            dilation_base=dilation_base,
                            weight_norm=False,
                            num_layers=model.model.num_layers - 1,
                            n_epochs=1,
                        )

                        # we have to fit the model on a dummy series in order to create the internal nn.Module
                        model_2.fit(tg.gaussian_timeseries(length=100))

                        for res_block in model_2.model.res_blocks:
                            res_block.conv1.weight = torch.nn.Parameter(
                                torch.ones(
                                    res_block.conv1.weight.shape, dtype=torch.float64
                                )
                            )
                            res_block.conv2.weight = torch.nn.Parameter(
                                torch.ones(
                                    res_block.conv2.weight.shape, dtype=torch.float64
                                )
                            )

                        model_2.model.eval()

                        # also disable MC Dropout:
                        model_2.model.set_mc_dropout(False)

                        input_tensor = torch.zeros(
                            [1, input_chunk_length, 1], dtype=torch.float64
                        )
                        zero_output = model_2.model.forward((input_tensor, None))[
                            0, -1, 0
                        ]

                        # test for incomplete coverage
                        uncovered_input_found = False
                        if model_2.model.num_layers == 1:
                            continue
                        for i in range(input_chunk_length):
                            input_tensor[0, i, 0] = 1
                            curr_output = model_2.model.forward((input_tensor, None))[
                                0, -1, 0
                            ]
                            if zero_output == curr_output:
                                uncovered_input_found = True
                                break
                            input_tensor[0, i, 0] = 0
                        self.assertTrue(uncovered_input_found)
Ejemplo n.º 12
0
class LocalForecastingModelsTestCase(DartsBaseTestClass):

    # forecasting horizon used in runnability tests
    forecasting_horizon = 5

    # dummy timeseries for runnability tests
    np.random.seed(1)
    ts_gaussian = tg.gaussian_timeseries(length=100, mean=50)
    # for testing covariate slicing
    ts_gaussian_long = tg.gaussian_timeseries(
        length=len(ts_gaussian) + 2 * forecasting_horizon,
        start=ts_gaussian.start_time() -
        forecasting_horizon * ts_gaussian.freq,
        mean=50,
    )

    # real timeseries for functionality tests
    ts_passengers = AirPassengersDataset().load()
    ts_pass_train, ts_pass_val = ts_passengers.split_after(
        pd.Timestamp("19570101"))

    # real multivariate timeseries for functionality tests
    ts_ice_heater = IceCreamHeaterDataset().load()
    ts_ice_heater_train, ts_ice_heater_val = ts_ice_heater.split_after(
        split_point=0.7)

    def test_save_model_parameters(self):
        # model creation parameters were saved before. check if re-created model has same params as original
        for model, _ in models:
            self.assertTrue(
                model._model_params == model.untrained_model()._model_params)

    def test_models_runnability(self):
        for model, _ in models:
            prediction = model.fit(self.ts_gaussian).predict(
                self.forecasting_horizon)
            self.assertTrue(len(prediction) == self.forecasting_horizon)

    def test_models_performance(self):
        # for every model, check whether its errors do not exceed the given bounds
        for model, max_mape in models:
            np.random.seed(1)  # some models are probabilist...
            model.fit(self.ts_pass_train)
            prediction = model.predict(len(self.ts_pass_val))
            current_mape = mape(prediction, self.ts_pass_val)
            self.assertTrue(
                current_mape < max_mape,
                "{} model exceeded the maximum MAPE of {}. "
                "with a MAPE of {}".format(str(model), max_mape, current_mape),
            )

    def test_multivariate_models_performance(self):
        # for every model, check whether its errors do not exceed the given bounds
        for model, max_mape in multivariate_models:
            np.random.seed(1)
            model.fit(self.ts_ice_heater_train)
            prediction = model.predict(len(self.ts_ice_heater_val))
            current_mape = mape(prediction, self.ts_ice_heater_val)
            self.assertTrue(
                current_mape < max_mape,
                "{} model exceeded the maximum MAPE of {}. "
                "with a MAPE of {}".format(str(model), max_mape, current_mape),
            )

    def test_multivariate_input(self):
        es_model = ExponentialSmoothing()
        ts_passengers_enhanced = self.ts_passengers.add_datetime_attribute(
            "month")
        with self.assertRaises(AssertionError):
            es_model.fit(ts_passengers_enhanced)
        es_model.fit(ts_passengers_enhanced["#Passengers"])
        with self.assertRaises(KeyError):
            es_model.fit(ts_passengers_enhanced["2"])

    def test_exogenous_variables_support(self):
        # test case with pd.DatetimeIndex
        target_dt_idx = self.ts_gaussian
        fc_dt_idx = self.ts_gaussian_long

        # test case with numerical pd.RangeIndex
        target_num_idx = TimeSeries.from_times_and_values(
            times=tg._generate_index(start=0, length=len(self.ts_gaussian)),
            values=self.ts_gaussian.all_values(copy=False),
        )
        fc_num_idx = TimeSeries.from_times_and_values(
            times=tg._generate_index(start=0,
                                     length=len(self.ts_gaussian_long)),
            values=self.ts_gaussian_long.all_values(copy=False),
        )

        for target, future_covariates in zip([target_dt_idx, target_num_idx],
                                             [fc_dt_idx, fc_num_idx]):
            for model in dual_models:
                # skip models which do not support RangeIndex
                if isinstance(target.time_index, pd.RangeIndex):
                    try:
                        # _supports_range_index raises a ValueError if model does not support RangeIndex
                        model._supports_range_index()
                    except ValueError:
                        continue

                # Test models runnability - proper future covariates slicing
                model.fit(target, future_covariates=future_covariates)
                prediction = model.predict(self.forecasting_horizon,
                                           future_covariates=future_covariates)

                self.assertTrue(len(prediction) == self.forecasting_horizon)

                # Test mismatch in length between exogenous variables and forecasting horizon
                with self.assertRaises(ValueError):
                    model.predict(
                        self.forecasting_horizon,
                        future_covariates=tg.gaussian_timeseries(
                            start=future_covariates.start_time(),
                            length=self.forecasting_horizon - 1,
                        ),
                    )

                # Test mismatch in time-index/length between series and exogenous variables
                with self.assertRaises(ValueError):
                    model.fit(target, future_covariates=target[:-1])
                with self.assertRaises(ValueError):
                    model.fit(target[1:], future_covariates=target[:-1])

    def test_dummy_series(self):
        values = np.random.uniform(low=-10, high=10, size=100)
        ts = TimeSeries.from_dataframe(pd.DataFrame({"V1": values}))

        varima = VARIMA(trend="t")
        with self.assertRaises(ValueError):
            varima.fit(series=ts)

        if PMDARIMA_AVAILABLE:
            autoarima = AutoARIMA(trend="t")
            with self.assertRaises(ValueError):
                autoarima.fit(series=ts)
Ejemplo n.º 13
0
class RegressionEnsembleModelsTestCase(DartsBaseTestClass):

    RANDOM_SEED = 111

    sine_series = tg.sine_timeseries(value_frequency=(1 / 5),
                                     value_y_offset=10,
                                     length=50)
    lin_series = tg.linear_timeseries(length=50)

    combined = sine_series + lin_series

    seq1 = [_make_ts(0), _make_ts(10), _make_ts(20)]
    cov1 = [_make_ts(5), _make_ts(15), _make_ts(25)]

    seq2 = [_make_ts(0, 20), _make_ts(10, 20), _make_ts(20, 20)]
    cov2 = [_make_ts(5, 30), _make_ts(15, 30), _make_ts(25, 30)]

    # dummy feature and target TimeSeries instances
    ts_periodic = tg.sine_timeseries(length=500)
    ts_gaussian = tg.gaussian_timeseries(length=500)
    ts_random_walk = tg.random_walk_timeseries(length=500)

    ts_cov1 = ts_periodic.stack(ts_gaussian)
    ts_cov1 = ts_cov1.pd_dataframe()
    ts_cov1.columns = ["Periodic", "Gaussian"]
    ts_cov1 = TimeSeries.from_dataframe(ts_cov1)
    ts_sum1 = ts_periodic + ts_gaussian

    ts_cov2 = ts_sum1.stack(ts_random_walk)
    ts_sum2 = ts_sum1 + ts_random_walk

    def get_local_models(self):
        return [NaiveDrift(), NaiveSeasonal(5), NaiveSeasonal(10)]

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def get_global_models(self, output_chunk_length=5):
        return [
            RNNModel(
                input_chunk_length=20,
                output_chunk_length=output_chunk_length,
                n_epochs=1,
                random_state=42,
            ),
            BlockRNNModel(
                input_chunk_length=20,
                output_chunk_length=output_chunk_length,
                n_epochs=1,
                random_state=42,
            ),
        ]

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_accepts_different_regression_models(self):
        regr1 = LinearRegression()
        regr2 = RandomForestRegressor()
        regr3 = RandomForest(lags_future_covariates=[0])

        model0 = RegressionEnsembleModel(self.get_local_models(), 10)
        model1 = RegressionEnsembleModel(self.get_local_models(), 10, regr1)
        model2 = RegressionEnsembleModel(self.get_local_models(), 10, regr2)
        model3 = RegressionEnsembleModel(self.get_local_models(), 10, regr3)

        models = [model0, model1, model2, model3]
        for model in models:
            model.fit(series=self.combined)
            model.predict(10)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_accepts_one_model(self):
        regr1 = LinearRegression()
        regr2 = RandomForest(lags_future_covariates=[0])

        model0 = RegressionEnsembleModel([self.get_local_models()[0]], 10)
        model1 = RegressionEnsembleModel([self.get_local_models()[0]], 10,
                                         regr1)
        model2 = RegressionEnsembleModel([self.get_local_models()[0]], 10,
                                         regr2)

        models = [model0, model1, model2]
        for model in models:
            model.fit(series=self.combined)
            model.predict(10)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_train_n_points(self):
        regr = LinearRegressionModel(lags_future_covariates=[0])

        # same values
        ensemble = RegressionEnsembleModel(self.get_local_models(), 5, regr)

        # too big value to perform the split
        ensemble = RegressionEnsembleModel(self.get_local_models(), 100)
        with self.assertRaises(ValueError):
            ensemble.fit(self.combined)

        ensemble = RegressionEnsembleModel(self.get_local_models(), 50)
        with self.assertRaises(ValueError):
            ensemble.fit(self.combined)

        # too big value considering min_train_series_length
        ensemble = RegressionEnsembleModel(self.get_local_models(), 45)
        with self.assertRaises(ValueError):
            ensemble.fit(self.combined)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_torch_models_retrain(self):
        model1 = BlockRNNModel(input_chunk_length=12,
                               output_chunk_length=1,
                               random_state=0,
                               n_epochs=2)
        model2 = BlockRNNModel(input_chunk_length=12,
                               output_chunk_length=1,
                               random_state=0,
                               n_epochs=2)

        ensemble = RegressionEnsembleModel([model1], 5)
        ensemble.fit(self.combined)

        model1_fitted = ensemble.models[0]
        forecast1 = model1_fitted.predict(10)

        model2.fit(self.combined)
        forecast2 = model2.predict(10)

        self.assertAlmostEqual(sum(forecast1.values() - forecast2.values())[0],
                               0.0,
                               places=2)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_train_predict_global_models_univar(self):
        ensemble_models = self.get_global_models(output_chunk_length=10)
        ensemble_models.append(RegressionModel(lags=1))
        ensemble = RegressionEnsembleModel(ensemble_models, 10)
        ensemble.fit(series=self.combined)
        ensemble.predict(10)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_train_predict_global_models_multivar_no_covariates(self):
        ensemble_models = self.get_global_models(output_chunk_length=10)
        ensemble_models.append(RegressionModel(lags=1))
        ensemble = RegressionEnsembleModel(ensemble_models, 10)
        ensemble.fit(self.seq1)
        ensemble.predict(10, self.seq1)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_train_predict_global_models_multivar_with_covariates(self):
        ensemble_models = self.get_global_models(output_chunk_length=10)
        ensemble_models.append(
            RegressionModel(lags=1, lags_past_covariates=[-1]))
        ensemble = RegressionEnsembleModel(ensemble_models, 10)
        ensemble.fit(self.seq1, self.cov1)
        ensemble.predict(10, self.seq2, self.cov2)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def helper_test_models_accuracy(self, model_instance, n, series,
                                    past_covariates, min_rmse):
        # for every model, test whether it predicts the target with a minimum r2 score of `min_rmse`
        train_series, test_series = train_test_split(series,
                                                     pd.Timestamp("20010101"))
        train_past_covariates, _ = train_test_split(past_covariates,
                                                    pd.Timestamp("20010101"))

        model_instance.fit(series=train_series,
                           past_covariates=train_past_covariates)
        prediction = model_instance.predict(n=n,
                                            past_covariates=past_covariates)
        current_rmse = rmse(test_series, prediction)

        self.assertTrue(
            current_rmse <= min_rmse,
            f"Model was not able to denoise data. A rmse score of {current_rmse} was recorded.",
        )

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def denoising_input(self):
        np.random.seed(self.RANDOM_SEED)

        ts_periodic = tg.sine_timeseries(length=500)
        ts_gaussian = tg.gaussian_timeseries(length=500)
        ts_random_walk = tg.random_walk_timeseries(length=500)

        ts_cov1 = ts_periodic.stack(ts_gaussian)
        ts_cov1 = ts_cov1.pd_dataframe()
        ts_cov1.columns = ["Periodic", "Gaussian"]
        ts_cov1 = TimeSeries.from_dataframe(ts_cov1)
        ts_sum1 = ts_periodic + ts_gaussian

        ts_cov2 = ts_sum1.stack(ts_random_walk)
        ts_sum2 = ts_sum1 + ts_random_walk

        return ts_sum1, ts_cov1, ts_sum2, ts_cov2

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_ensemble_models_denoising(self):
        # for every model, test whether it correctly denoises ts_sum using ts_gaussian and ts_sum as inputs
        # WARNING: this test isn't numerically stable, changing self.RANDOM_SEED can lead to exploding coefficients
        horizon = 10
        ts_sum1, ts_cov1, _, _ = self.denoising_input()
        torch.manual_seed(self.RANDOM_SEED)

        ensemble_models = [
            RNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            BlockRNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            RegressionModel(lags_past_covariates=[-1]),
        ]

        ensemble = RegressionEnsembleModel(ensemble_models, horizon)
        self.helper_test_models_accuracy(ensemble, horizon, ts_sum1, ts_cov1,
                                         3)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_ensemble_models_denoising_multi_input(self):
        # for every model, test whether it correctly denoises ts_sum_2 using ts_random_multi and ts_sum_2 as inputs
        # WARNING: this test isn't numerically stable, changing self.RANDOM_SEED can lead to exploding coefficients
        horizon = 10
        _, _, ts_sum2, ts_cov2 = self.denoising_input()
        torch.manual_seed(self.RANDOM_SEED)

        ensemble_models = [
            RNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            BlockRNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            RegressionModel(lags_past_covariates=[-1]),
            RegressionModel(lags_past_covariates=[-1]),
        ]

        ensemble = RegressionEnsembleModel(ensemble_models, horizon)
        self.helper_test_models_accuracy(ensemble, horizon, ts_sum2, ts_cov2,
                                         3)
Ejemplo n.º 14
0
class ReconciliationTestCase(unittest.TestCase):
    __test__ = True

    @classmethod
    def setUpClass(cls):
        logging.disable(logging.CRITICAL)

    np.random.seed(42)
    """ test case with a more intricate hierarchy """
    LENGTH = 200
    total_series = (tg.sine_timeseries(value_frequency=0.03, length=LENGTH) +
                    1 + tg.gaussian_timeseries(length=LENGTH) * 0.2)
    bottom_1 = total_series / 3 + tg.gaussian_timeseries(length=LENGTH) * 0.01
    bottom_2 = 2 * total_series / 3 + tg.gaussian_timeseries(
        length=LENGTH) * 0.01
    series = concatenate([total_series, bottom_1, bottom_2], axis=1)
    hierarchy = {"sine_1": ["sine"], "sine_2": ["sine"]}
    series = series.with_hierarchy(hierarchy)

    # get a single forecast
    model = LinearRegressionModel(lags=30, output_chunk_length=10)
    model.fit(series)
    pred = model.predict(n=20)

    # get a backtest forecast to get residuals
    pred_back = model.historical_forecasts(series,
                                           start=0.75,
                                           forecast_horizon=10)
    intersection = series.slice_intersect(pred_back)
    residuals = intersection - pred_back
    """ test case with a more intricate hierarchy """
    components_complex = ["total", "a", "b", "x", "y", "ax", "ay", "bx", "by"]

    hierarchy_complex = {
        "ax": ["a", "x"],
        "ay": ["a", "y"],
        "bx": ["b", "x"],
        "by": ["b", "y"],
        "a": ["total"],
        "b": ["total"],
        "x": ["total"],
        "y": ["total"],
    }

    series_complex = TimeSeries.from_values(
        values=np.random.rand(50, len(components_complex), 5),
        columns=components_complex,
        hierarchy=hierarchy_complex,
    )

    def _assert_reconciliation(self, fitted_recon):
        pred_r = fitted_recon.transform(self.pred)
        np.testing.assert_almost_equal(
            pred_r["sine"].values(copy=False),
            (pred_r["sine_1"] + pred_r["sine_2"]).values(copy=False),
        )

    def _assert_reconciliation_complex(self, fitted_recon):
        reconciled = fitted_recon.transform(self.series_complex)

        def _assert_comps(comp, comps):
            np.testing.assert_almost_equal(
                reconciled[comp].values(copy=False),
                sum(reconciled[c] for c in comps).values(copy=False),
            )

        _assert_comps("a", ["ax", "ay"])
        _assert_comps("b", ["bx", "by"])
        _assert_comps("x", ["ax", "bx"])
        _assert_comps("y", ["ay", "by"])
        _assert_comps("total", ["ax", "ay", "bx", "by"])
        _assert_comps("total", ["a", "b"])
        _assert_comps("total", ["x", "y"])

    def test_bottom_up(self):
        recon = BottomUpReconciliator()
        self._assert_reconciliation(recon)

    def test_top_down(self):
        # should work when fitting on training series
        recon = TopDownReconciliator()
        recon.fit(self.series)
        self._assert_reconciliation(recon)

        # or when fitting on forecasts
        recon = TopDownReconciliator()
        recon.fit(self.pred)
        self._assert_reconciliation(recon)

    def test_mint(self):
        # ols
        recon = MinTReconciliator("ols")
        recon.fit(self.series)
        self._assert_reconciliation(recon)

        # wls_struct
        recon = MinTReconciliator("wls_struct")
        recon.fit(self.series)
        self._assert_reconciliation(recon)

        # wls_var
        recon = MinTReconciliator("wls_var")
        recon.fit(self.residuals)
        self._assert_reconciliation(recon)

        # mint_cov
        recon = MinTReconciliator("mint_cov")
        recon.fit(self.residuals)
        self._assert_reconciliation(recon)

        # wls_val
        recon = MinTReconciliator("wls_val")
        recon.fit(self.series)
        self._assert_reconciliation(recon)

    def test_summation_matrix(self):
        np.testing.assert_equal(
            _get_summation_matrix(self.series_complex),
            np.array([
                [1, 1, 1, 1],
                [1, 1, 0, 0],
                [0, 0, 1, 1],
                [1, 0, 1, 0],
                [0, 1, 0, 1],
                [1, 0, 0, 0],
                [0, 1, 0, 0],
                [0, 0, 1, 0],
                [0, 0, 0, 1],
            ]),
        )

    def test_hierarchy_preserved_after_predict(self):
        self.assertEqual(self.pred.hierarchy, self.series.hierarchy)

    def test_more_intricate_hierarchy(self):
        recon = BottomUpReconciliator()
        self._assert_reconciliation_complex(recon)

        recon = TopDownReconciliator()
        recon.fit(self.series_complex)
        self._assert_reconciliation_complex(recon)

        recon = MinTReconciliator("ols")
        recon.fit(self.series_complex)
        self._assert_reconciliation_complex(recon)

        recon = MinTReconciliator("wls_struct")
        recon.fit(self.series_complex)
        self._assert_reconciliation_complex(recon)

        recon = MinTReconciliator("wls_val")
        recon.fit(self.series_complex)
        self._assert_reconciliation_complex(recon)
Ejemplo n.º 15
0
class ProbabilisticTorchModelsTestCase(DartsBaseTestClass):
    np.random.seed(0)

    constant_ts = tg.constant_timeseries(length=200, value=0.5)
    constant_noisy_ts = constant_ts + tg.gaussian_timeseries(length=200,
                                                             std=0.1)
    constant_multivar_ts = constant_ts.stack(constant_ts)
    constant_noisy_multivar_ts = constant_noisy_ts.stack(constant_noisy_ts)
    num_samples = 5

    def test_fit_predict_determinism(self):

        for model_cls, model_kwargs, _ in models_cls_kwargs_errs:

            # whether the first predictions of two models initiated with the same random state are the same
            model = model_cls(**model_kwargs)
            model.fit(self.constant_noisy_ts)
            pred1 = model.predict(n=10, num_samples=2).values()

            model = model_cls(**model_kwargs)
            model.fit(self.constant_noisy_ts)
            pred2 = model.predict(n=10, num_samples=2).values()

            self.assertTrue((pred1 == pred2).all())

            # test whether the next prediction of the same model is different
            pred3 = model.predict(n=10, num_samples=2).values()
            self.assertTrue((pred2 != pred3).any())

    def test_probabilistic_forecast_accuracy(self):
        for model_cls, model_kwargs, err in models_cls_kwargs_errs:
            self.helper_test_probabilistic_forecast_accuracy(
                model_cls, model_kwargs, err, self.constant_ts,
                self.constant_noisy_ts)
            if issubclass(model_cls, GlobalForecastingModel):
                self.helper_test_probabilistic_forecast_accuracy(
                    model_cls,
                    model_kwargs,
                    err,
                    self.constant_multivar_ts,
                    self.constant_noisy_multivar_ts,
                )

    def helper_test_probabilistic_forecast_accuracy(self, model_cls,
                                                    model_kwargs, err, ts,
                                                    noisy_ts):
        model = model_cls(**model_kwargs)
        model.fit(noisy_ts[:100])
        pred = model.predict(n=100, num_samples=100)

        # test accuracy of the median prediction compared to the noiseless ts
        mae_err_median = mae(ts[100:], pred)
        self.assertLess(mae_err_median, err)

        # test accuracy for increasing quantiles between 0.7 and 1 (it should ~decrease, mae should ~increase)
        tested_quantiles = [0.7, 0.8, 0.9, 0.99]
        mae_err = mae_err_median
        for quantile in tested_quantiles:
            new_mae = mae(ts[100:],
                          pred.quantile_timeseries(quantile=quantile))
            self.assertLess(mae_err, new_mae + 0.1)
            mae_err = new_mae

        # test accuracy for decreasing quantiles between 0.3 and 0 (it should ~decrease, mae should ~increase)
        tested_quantiles = [0.3, 0.2, 0.1, 0.01]
        mae_err = mae_err_median
        for quantile in tested_quantiles:
            new_mae = mae(ts[100:],
                          pred.quantile_timeseries(quantile=quantile))
            self.assertLess(mae_err, new_mae + 0.1)
            mae_err = new_mae

    """ More likelihood tests
    """
    if TORCH_AVAILABLE:
        np.random.seed(42)
        torch.manual_seed(42)

        real_series = TimeSeries.from_values(np.random.randn(100, 2) + [0, 5])
        vals = real_series.all_values()

        real_pos_series = TimeSeries.from_values(
            np.where(vals > 0, vals, -vals))
        discrete_pos_series = TimeSeries.from_values(
            np.random.randint(low=0, high=11, size=(100, 2)))
        binary_series = TimeSeries.from_values(
            np.random.randint(low=0, high=2, size=(100, 2)))
        bounded_series = TimeSeries.from_values(
            np.random.beta(2, 5, size=(100, 2)))
        simplex_series = bounded_series["0"].stack(1.0 - bounded_series["0"])

        lkl_series = (
            (GaussianLikelihood(), real_series, 0.1, 3),
            (PoissonLikelihood(), discrete_pos_series, 2, 2),
            (NegativeBinomialLikelihood(), discrete_pos_series, 0.5, 0.5),
            (BernoulliLikelihood(), binary_series, 0.15, 0.15),
            (GammaLikelihood(), real_pos_series, 0.3, 0.3),
            (GumbelLikelihood(), real_series, 0.2, 3),
            (LaplaceLikelihood(), real_series, 0.3, 4),
            (BetaLikelihood(), bounded_series, 0.1, 0.1),
            (ExponentialLikelihood(), real_pos_series, 0.3, 2),
            (DirichletLikelihood(), simplex_series, 0.3, 0.3),
            (GeometricLikelihood(), discrete_pos_series, 1, 1),
            (CauchyLikelihood(), real_series, 3, 11),
            (ContinuousBernoulliLikelihood(), bounded_series, 0.1, 0.1),
            (HalfNormalLikelihood(), real_pos_series, 0.3, 8),
            (LogNormalLikelihood(), real_pos_series, 0.3, 1),
            (WeibullLikelihood(), real_pos_series, 0.2, 2.5),
            (QuantileRegression(), real_series, 0.2, 1),
        )

        def test_likelihoods_and_resulting_mean_forecasts(self):
            def _get_avgs(series):
                return np.mean(series.all_values()[:, 0, :]), np.mean(
                    series.all_values()[:, 1, :])

            for lkl, series, diff1, diff2 in self.lkl_series:
                model = RNNModel(input_chunk_length=5, likelihood=lkl)
                model.fit(series, epochs=50)
                pred = model.predict(n=50, num_samples=50)

                avgs_orig, avgs_pred = _get_avgs(series), _get_avgs(pred)
                self.assertLess(
                    abs(avgs_orig[0] - avgs_pred[0]),
                    diff1,
                    "The difference between the mean forecast and the mean series is larger "
                    "than expected on component 0 for distribution {}".format(
                        lkl),
                )
                self.assertLess(
                    abs(avgs_orig[1] - avgs_pred[1]),
                    diff2,
                    "The difference between the mean forecast and the mean series is larger "
                    "than expected on component 1 for distribution {}".format(
                        lkl),
                )

        def test_stochastic_inputs(self):
            model = RNNModel(input_chunk_length=5)
            model.fit(self.constant_ts, epochs=2)

            # build a stochastic series
            target_vals = self.constant_ts.values()
            stochastic_vals = np.random.normal(loc=target_vals,
                                               scale=1.0,
                                               size=(len(self.constant_ts),
                                                     100))
            stochastic_vals = np.expand_dims(stochastic_vals, axis=1)
            stochastic_series = TimeSeries.from_times_and_values(
                self.constant_ts.time_index, stochastic_vals)

            # A deterministic model forecasting a stochastic series
            # should return stochastic samples
            preds = [
                model.predict(series=stochastic_series, n=10) for _ in range(2)
            ]

            # random samples should differ
            self.assertFalse(
                np.alltrue(preds[0].values() == preds[1].values()))
Ejemplo n.º 16
0
    class GlobalForecastingModelsTestCase(DartsBaseTestClass):
        # forecasting horizon used in runnability tests
        forecasting_horizon = 12

        np.random.seed(42)
        torch.manual_seed(42)

        # some arbitrary static covariates
        static_covariates = pd.DataFrame([[0.0, 1.0]], columns=["st1", "st2"])

        # real timeseries for functionality tests
        ts_passengers = (AirPassengersDataset().load().with_static_covariates(
            static_covariates))
        scaler = Scaler()
        ts_passengers = scaler.fit_transform(ts_passengers)
        ts_pass_train, ts_pass_val = ts_passengers[:-36], ts_passengers[-36:]

        # an additional noisy series
        ts_pass_train_1 = ts_pass_train + 0.01 * tg.gaussian_timeseries(
            length=len(ts_pass_train),
            freq=ts_pass_train.freq_str,
            start=ts_pass_train.start_time(),
        )

        # an additional time series serving as covariates
        year_series = tg.datetime_attribute_timeseries(ts_passengers,
                                                       attribute="year")
        month_series = tg.datetime_attribute_timeseries(ts_passengers,
                                                        attribute="month")
        scaler_dt = Scaler()
        time_covariates = scaler_dt.fit_transform(
            year_series.stack(month_series))
        time_covariates_train, time_covariates_val = (
            time_covariates[:-36],
            time_covariates[-36:],
        )

        # an artificial time series that is highly dependent on covariates
        ts_length = 400
        split_ratio = 0.6
        sine_1_ts = tg.sine_timeseries(length=ts_length)
        sine_2_ts = tg.sine_timeseries(length=ts_length, value_frequency=0.05)
        sine_3_ts = tg.sine_timeseries(length=ts_length,
                                       value_frequency=0.003,
                                       value_amplitude=5)
        linear_ts = tg.linear_timeseries(length=ts_length,
                                         start_value=3,
                                         end_value=8)

        covariates = sine_3_ts.stack(sine_2_ts).stack(linear_ts)
        covariates_past, _ = covariates.split_after(split_ratio)

        target = sine_1_ts + sine_2_ts + linear_ts + sine_3_ts
        target_past, target_future = target.split_after(split_ratio)

        def test_save_model_parameters(self):
            # model creation parameters were saved before. check if re-created model has same params as original
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                self.assertTrue(model._model_params,
                                model.untrained_model()._model_params)

        def test_single_ts(self):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(
                    input_chunk_length=IN_LEN,
                    output_chunk_length=OUT_LEN,
                    random_state=0,
                    **kwargs,
                )
                model.fit(self.ts_pass_train)
                pred = model.predict(n=36)
                mape_err = mape(self.ts_pass_val, pred)
                self.assertTrue(
                    mape_err < err,
                    "Model {} produces errors too high (one time "
                    "series). Error = {}".format(model_cls, mape_err),
                )
                self.assertTrue(
                    pred.static_covariates.equals(
                        self.ts_passengers.static_covariates))

        def test_multi_ts(self):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(
                    input_chunk_length=IN_LEN,
                    output_chunk_length=OUT_LEN,
                    random_state=0,
                    **kwargs,
                )
                model.fit([self.ts_pass_train, self.ts_pass_train_1])
                with self.assertRaises(ValueError):
                    # when model is fit from >1 series, one must provide a series in argument
                    model.predict(n=1)
                pred = model.predict(n=36, series=self.ts_pass_train)
                mape_err = mape(self.ts_pass_val, pred)
                self.assertTrue(
                    mape_err < err,
                    "Model {} produces errors too high (several time "
                    "series). Error = {}".format(model_cls, mape_err),
                )

                # check prediction for several time series
                pred_list = model.predict(
                    n=36, series=[self.ts_pass_train, self.ts_pass_train_1])
                self.assertTrue(
                    len(pred_list) == 2,
                    f"Model {model_cls} did not return a list of prediction",
                )
                for pred in pred_list:
                    mape_err = mape(self.ts_pass_val, pred)
                    self.assertTrue(
                        mape_err < err,
                        "Model {} produces errors too high (several time series 2). "
                        "Error = {}".format(model_cls, mape_err),
                    )

        def test_covariates(self):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(
                    input_chunk_length=IN_LEN,
                    output_chunk_length=OUT_LEN,
                    random_state=0,
                    **kwargs,
                )

                # Here we rely on the fact that all non-Dual models currently are Past models
                cov_name = ("future_covariates" if isinstance(
                    model, DualCovariatesTorchModel) else "past_covariates")
                cov_kwargs = {
                    cov_name:
                    [self.time_covariates_train, self.time_covariates_train]
                }
                model.fit(series=[self.ts_pass_train, self.ts_pass_train_1],
                          **cov_kwargs)
                with self.assertRaises(ValueError):
                    # when model is fit from >1 series, one must provide a series in argument
                    model.predict(n=1)

                with self.assertRaises(ValueError):
                    # when model is fit using multiple covariates, covariates are required at prediction time
                    model.predict(n=1, series=self.ts_pass_train)

                cov_kwargs_train = {cov_name: self.time_covariates_train}
                cov_kwargs_notrain = {cov_name: self.time_covariates}
                with self.assertRaises(ValueError):
                    # when model is fit using covariates, n cannot be greater than output_chunk_length...
                    model.predict(n=13,
                                  series=self.ts_pass_train,
                                  **cov_kwargs_train)

                # ... unless future covariates are provided
                pred = model.predict(n=13,
                                     series=self.ts_pass_train,
                                     **cov_kwargs_notrain)

                pred = model.predict(n=12,
                                     series=self.ts_pass_train,
                                     **cov_kwargs_notrain)
                mape_err = mape(self.ts_pass_val, pred)
                self.assertTrue(
                    mape_err < err,
                    "Model {} produces errors too high (several time "
                    "series with covariates). Error = {}".format(
                        model_cls, mape_err),
                )

                # when model is fit using 1 training and 1 covariate series, time series args are optional
                if model._is_probabilistic:
                    continue
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                model.fit(series=self.ts_pass_train, **cov_kwargs_train)
                pred1 = model.predict(1)
                pred2 = model.predict(1, series=self.ts_pass_train)
                pred3 = model.predict(1, **cov_kwargs_train)
                pred4 = model.predict(1,
                                      **cov_kwargs_train,
                                      series=self.ts_pass_train)
                self.assertEqual(pred1, pred2)
                self.assertEqual(pred1, pred3)
                self.assertEqual(pred1, pred4)

        def test_future_covariates(self):
            # models with future covariates should produce better predictions over a long forecasting horizon
            # than a model trained with no covariates
            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=5,
                n_epochs=20,
                random_state=0,
            )

            model.fit(series=self.target_past)
            long_pred_no_cov = model.predict(n=160)

            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=5,
                n_epochs=20,
                random_state=0,
            )
            model.fit(series=self.target_past,
                      past_covariates=self.covariates_past)
            long_pred_with_cov = model.predict(n=160,
                                               past_covariates=self.covariates)
            self.assertTrue(
                mape(self.target_future, long_pred_no_cov) > mape(
                    self.target_future, long_pred_with_cov),
                "Models with future covariates should produce better predictions.",
            )

            # block models can predict up to self.output_chunk_length points beyond the last future covariate...
            model.predict(n=165, past_covariates=self.covariates)

            # ... not more
            with self.assertRaises(ValueError):
                model.predict(n=166, series=self.ts_pass_train)

            # recurrent models can only predict data points for time steps where future covariates are available
            model = RNNModel(12, n_epochs=1)
            model.fit(series=self.target_past,
                      future_covariates=self.covariates_past)
            model.predict(n=160, future_covariates=self.covariates)
            with self.assertRaises(ValueError):
                model.predict(n=161, future_covariates=self.covariates)

        def test_batch_predictions(self):
            # predicting multiple time series at once needs to work for arbitrary batch sizes
            # univariate case
            targets_univar = [
                self.target_past,
                self.target_past[:60],
                self.target_past[:80],
            ]
            self._batch_prediction_test_helper_function(targets_univar)

            # multivariate case
            targets_multivar = [tgt.stack(tgt) for tgt in targets_univar]
            self._batch_prediction_test_helper_function(targets_multivar)

        def _batch_prediction_test_helper_function(self, targets):
            epsilon = 1e-4
            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=10,
                n_epochs=10,
                random_state=0,
            )
            model.fit(series=targets[0], past_covariates=self.covariates_past)
            preds_default = model.predict(
                n=160,
                series=targets,
                past_covariates=[self.covariates] * len(targets),
                batch_size=None,
            )

            # make batch size large enough to test stacking samples
            for batch_size in range(1, 4 * len(targets)):
                preds = model.predict(
                    n=160,
                    series=targets,
                    past_covariates=[self.covariates] * len(targets),
                    batch_size=batch_size,
                )
                for i in range(len(targets)):
                    self.assertLess(
                        sum(sum((preds[i] - preds_default[i]).values())),
                        epsilon)

        def test_predict_from_dataset_unsupported_input(self):
            # an exception should be thrown if an unsupported type is passed
            unsupported_type = "unsupported_type"
            # just need to test this with one model
            model_cls, kwargs, err = models_cls_kwargs_errs[0]
            model = model_cls(input_chunk_length=IN_LEN,
                              output_chunk_length=OUT_LEN,
                              **kwargs)
            model.fit([self.ts_pass_train, self.ts_pass_train_1])

            with self.assertRaises(ValueError):
                model.predict_from_dataset(
                    n=1, input_series_dataset=unsupported_type)

        def test_prediction_with_different_n(self):
            # test model predictions for n < out_len, n == out_len and n > out_len
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                self.assertTrue(
                    isinstance(
                        model,
                        (
                            PastCovariatesTorchModel,
                            DualCovariatesTorchModel,
                            MixedCovariatesTorchModel,
                        ),
                    ),
                    "unit test not yet defined for the given {X}CovariatesTorchModel.",
                )

                if isinstance(model, PastCovariatesTorchModel):
                    past_covs, future_covs = self.covariates, None
                elif isinstance(model, DualCovariatesTorchModel):
                    past_covs, future_covs = None, self.covariates
                else:
                    past_covs, future_covs = self.covariates, self.covariates

                model.fit(
                    self.target_past,
                    past_covariates=past_covs,
                    future_covariates=future_covs,
                    epochs=1,
                )

                # test prediction for n < out_len, n == out_len and n > out_len
                for n in [OUT_LEN - 1, OUT_LEN, 2 * OUT_LEN - 1]:
                    pred = model.predict(n=n,
                                         past_covariates=past_covs,
                                         future_covariates=future_covs)
                    self.assertEqual(len(pred), n)

        def test_same_result_with_different_n_jobs(self):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)

                multiple_ts = [self.ts_pass_train] * 10

                model.fit(multiple_ts)

                # safe random state for two successive identical predictions
                if model._is_probabilistic():
                    random_state = deepcopy(model._random_instance)
                else:
                    random_state = None

                pred1 = model.predict(n=36, series=multiple_ts, n_jobs=1)

                if random_state is not None:
                    model._random_instance = random_state

                pred2 = model.predict(
                    n=36, series=multiple_ts,
                    n_jobs=-1)  # assuming > 1 core available in the machine
                self.assertEqual(
                    pred1,
                    pred2,
                    "Model {} produces different predictions with different number of jobs",
                )

        @patch(
            "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer"
        )
        def test_fit_with_constr_epochs(self, init_trainer):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                multiple_ts = [self.ts_pass_train] * 10
                model.fit(multiple_ts)

                init_trainer.assert_called_with(max_epochs=kwargs["n_epochs"],
                                                trainer_params=ANY)

        @patch(
            "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer"
        )
        def test_fit_with_fit_epochs(self, init_trainer):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                multiple_ts = [self.ts_pass_train] * 10
                epochs = 3

                model.fit(multiple_ts, epochs=epochs)
                init_trainer.assert_called_with(max_epochs=epochs,
                                                trainer_params=ANY)

                model.total_epochs = epochs
                # continue training
                model.fit(multiple_ts, epochs=epochs)
                init_trainer.assert_called_with(max_epochs=epochs,
                                                trainer_params=ANY)

        @patch(
            "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer"
        )
        def test_fit_from_dataset_with_epochs(self, init_trainer):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                multiple_ts = [self.ts_pass_train] * 10
                train_dataset = model._build_train_dataset(
                    multiple_ts,
                    past_covariates=None,
                    future_covariates=None,
                    max_samples_per_ts=None,
                )
                epochs = 3

                model.fit_from_dataset(train_dataset, epochs=epochs)
                init_trainer.assert_called_with(max_epochs=epochs,
                                                trainer_params=ANY)

                # continue training
                model.fit_from_dataset(train_dataset, epochs=epochs)
                init_trainer.assert_called_with(max_epochs=epochs,
                                                trainer_params=ANY)

        def test_predit_after_fit_from_dataset(self):
            model_cls, kwargs, _ = models_cls_kwargs_errs[0]
            model = model_cls(input_chunk_length=IN_LEN,
                              output_chunk_length=OUT_LEN,
                              **kwargs)

            multiple_ts = [self.ts_pass_train] * 10
            train_dataset = model._build_train_dataset(
                multiple_ts,
                past_covariates=None,
                future_covariates=None,
                max_samples_per_ts=None,
            )
            model.fit_from_dataset(train_dataset, epochs=3)

            # test predict() works after fit_from_dataset()
            model.predict(n=1, series=multiple_ts[0])

        def test_sample_smaller_than_batch_size(self):
            """
            Checking that the TorchForecastingModels do not crash even if the number of available samples for training
            is strictly lower than the selected batch_size
            """
            # TS with 50 timestamps. TorchForecastingModels will use the SequentialDataset for producing training
            # samples, which means we will have 50 - 22 - 2 + 1 = 27 samples, which is < 32 (batch_size). The model
            # should still train on those samples and not crash in any way
            ts = linear_timeseries(start_value=0, end_value=1, length=50)

            model = RNNModel(input_chunk_length=20,
                             output_chunk_length=2,
                             n_epochs=2,
                             batch_size=32)
            model.fit(ts)

        def test_max_samples_per_ts(self):
            """
            Checking that we can fit TorchForecastingModels with max_samples_per_ts, without crash
            """

            ts = linear_timeseries(start_value=0, end_value=1, length=50)

            model = RNNModel(input_chunk_length=20,
                             output_chunk_length=2,
                             n_epochs=2,
                             batch_size=32)

            model.fit(ts, max_samples_per_ts=5)

        def test_residuals(self):
            """
            Torch models should not fail when computing residuals on a series
            long enough to accomodate at least one training sample.
            """
            ts = linear_timeseries(start_value=0, end_value=1, length=38)

            model = NBEATSModel(
                input_chunk_length=24,
                output_chunk_length=12,
                num_stacks=2,
                num_blocks=1,
                num_layers=1,
                layer_widths=2,
                n_epochs=2,
            )

            model.residuals(ts)
Ejemplo n.º 17
0
 def test_routine(start, end=None, length=None):
     gaussian_ts = gaussian_timeseries(start=start,
                                       end=end,
                                       length=length)
     self.assertEqual(len(gaussian_ts), length_assert)
Ejemplo n.º 18
0
        def test_multiple_ts(self):
            lags = 4
            lags_past_covariates = 3
            model = RegressionModel(lags=lags,
                                    lags_past_covariates=lags_past_covariates)

            target_series = tg.linear_timeseries(start_value=0,
                                                 end_value=49,
                                                 length=50)
            past_covariates = tg.linear_timeseries(start_value=100,
                                                   end_value=149,
                                                   length=50)
            past_covariates = past_covariates.stack(
                tg.linear_timeseries(start_value=400, end_value=449,
                                     length=50))

            target_train, target_test = target_series.split_after(0.7)
            past_covariates_train, past_covariates_test = past_covariates.split_after(
                0.7)
            model.fit(
                series=[target_train, target_train + 0.5],
                past_covariates=[
                    past_covariates_train, past_covariates_train + 0.5
                ],
            )

            predictions = model.predict(
                10,
                series=[target_train, target_train + 0.5],
                past_covariates=[past_covariates, past_covariates + 0.5],
            )

            self.assertEqual(len(predictions[0]), 10,
                             f"Found {len(predictions)} instead")

            # multiple TS, both future and past covariates, checking that both covariates lead to better results than
            # using a single one (target series = past_cov + future_cov + noise)
            np.random.seed(42)

            linear_ts_1 = tg.linear_timeseries(start_value=10,
                                               end_value=59,
                                               length=50)
            linear_ts_2 = tg.linear_timeseries(start_value=40,
                                               end_value=89,
                                               length=50)

            past_covariates = tg.sine_timeseries(length=50) * 10
            future_covariates = (
                tg.sine_timeseries(length=50, value_frequency=0.015) * 50)

            target_series_1 = linear_ts_1 + 4 * past_covariates + 2 * future_covariates
            target_series_2 = linear_ts_2 + 4 * past_covariates + 2 * future_covariates

            target_series_1_noise = (linear_ts_1 + 4 * past_covariates +
                                     2 * future_covariates +
                                     tg.gaussian_timeseries(std=7, length=50))

            target_series_2_noise = (linear_ts_2 + 4 * past_covariates +
                                     2 * future_covariates +
                                     tg.gaussian_timeseries(std=7, length=50))

            target_train_1, target_test_1 = target_series_1.split_after(0.7)
            target_train_2, target_test_2 = target_series_2.split_after(0.7)

            (
                target_train_1_noise,
                target_test_1_noise,
            ) = target_series_1_noise.split_after(0.7)
            (
                target_train_2_noise,
                target_test_2_noise,
            ) = target_series_2_noise.split_after(0.7)

            # testing improved denoise with multiple TS

            # test 1: with single TS, 2 covariates should be better than one
            model = RegressionModel(lags=3, lags_past_covariates=5)
            model.fit([target_train_1_noise], [past_covariates])

            prediction_past_only = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
            )

            model = RegressionModel(lags=3,
                                    lags_past_covariates=5,
                                    lags_future_covariates=(5, 0))
            model.fit([target_train_1_noise], [past_covariates],
                      [future_covariates])
            prediction_past_and_future = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
                future_covariates=[future_covariates] * 2,
            )

            error_past_only = rmse(
                [target_test_1, target_test_2],
                prediction_past_only,
                inter_reduction=np.mean,
            )
            error_both = rmse(
                [target_test_1, target_test_2],
                prediction_past_and_future,
                inter_reduction=np.mean,
            )

            self.assertTrue(error_past_only > error_both)
            # test 2: with both covariates, 2 TS should learn more than one (with little noise)
            model = RegressionModel(lags=3,
                                    lags_past_covariates=5,
                                    lags_future_covariates=(5, 0))
            model.fit(
                [target_train_1_noise, target_train_2_noise],
                [past_covariates] * 2,
                [future_covariates] * 2,
            )
            prediction_past_and_future_multi_ts = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
                future_covariates=[future_covariates] * 2,
            )
            error_both_multi_ts = rmse(
                [target_test_1, target_test_2],
                prediction_past_and_future_multi_ts,
                inter_reduction=np.mean,
            )

            self.assertTrue(error_both > error_both_multi_ts)