Exemple #1
0
    def test_generate_index(self):
        def test_routine(start, end=None, length=None, freq="D"):
            # testing length, correct start and if sorted (monotonic increasing)
            index = _generate_index(start=start,
                                    end=end,
                                    length=length,
                                    freq=freq)
            self.assertEqual(len(index), length_assert)
            self.assertTrue(index.is_monotonic_increasing)
            self.assertTrue(index[0] == start_assert)
            self.assertTrue(index[-1] == end_assert)

        for length_assert in [1, 2, 5, 10, 100]:
            for start_pos in [0, 1]:
                # pandas.RangeIndex
                start_assert, end_assert = start_pos, start_pos + length_assert - 1
                test_routine(start=start_assert, length=length_assert, freq="")
                test_routine(start=start_assert,
                             length=length_assert,
                             freq="D")
                test_routine(start=start_assert, end=end_assert)
                test_routine(start=start_assert, end=end_assert, freq="D")
                test_routine(start=None,
                             end=end_assert,
                             length=length_assert,
                             freq="BH")
                # pandas.DatetimeIndex
                start_date = pd.DatetimeIndex(["2000-01-01"], freq="D")
                start_date += start_date.freq * start_pos
                # dates = _generate_index(start=start_date[0], length=length_assert)
                dates = _generate_index(start=start_date[0],
                                        length=length_assert)
                start_assert, end_assert = dates[0], dates[-1]
                test_routine(start=start_assert, length=length_assert)
                test_routine(start=start_assert, end=end_assert)
                test_routine(start=None,
                             end=end_assert,
                             length=length_assert,
                             freq="D")

        # `start`, `end` and `length` cannot both be set simultaneously
        with self.assertRaises(ValueError):
            _generate_index(start=0, end=9, length=10)
        # same as above but `start` defaults to timestamp '2000-01-01' in all timeseries generation functions
        with self.assertRaises(ValueError):
            linear_timeseries(end=9, length=10)

        # exactly two of [`start`, `end`, `length`] must be set
        with self.assertRaises(ValueError):
            test_routine(start=0)
        with self.assertRaises(ValueError):
            test_routine(start=None, end=1)
        with self.assertRaises(ValueError):
            test_routine(start=None, end=None, length=10)

        # `start` and `end` must have same type
        with self.assertRaises(ValueError):
            test_routine(start=0, end=pd.Timestamp("2000-01-01"))
        with self.assertRaises(ValueError):
            test_routine(start=pd.Timestamp("2000-01-01"), end=10)
Exemple #2
0
        def test_only_future_covariates(self):

            model = RegressionModel(lags_future_covariates=[-2])

            target_series = tg.linear_timeseries(start_value=0,
                                                 end_value=49,
                                                 length=50)
            covariates = tg.linear_timeseries(start_value=100,
                                              end_value=149,
                                              length=50)
            covariates = covariates.stack(
                tg.linear_timeseries(start_value=400, end_value=449,
                                     length=50))

            target_train, target_test = target_series.split_after(0.7)
            covariates_train, covariates_test = covariates.split_after(0.7)
            model.fit(
                series=[target_train, target_train + 0.5],
                future_covariates=[covariates_train, covariates_train + 0.5],
            )

            predictions = model.predict(
                10,
                series=[target_train, target_train + 0.5],
                future_covariates=[covariates, covariates + 0.5],
            )

            self.assertEqual(len(predictions[0]), 10,
                             f"Found {len(predictions[0])} instead")
Exemple #3
0
        def helper_generate_multivariate_case_data(self, season_length,
                                                   n_repeat):
            """generates multivariate test case data. Target series is a sine wave stacked with a repeating
            linear curve of equal seasonal length. Covariates are datetime attributes for 'hours'.
            """

            # generate sine wave
            ts_sine = tg.sine_timeseries(
                value_frequency=1 / season_length,
                length=n_repeat * season_length,
                freq="h",
            )

            # generate repeating linear curve
            ts_linear = tg.linear_timeseries(0,
                                             1,
                                             length=season_length,
                                             start=ts_sine.end_time() +
                                             ts_sine.freq)
            for i in range(n_repeat - 1):
                start = ts_linear.end_time() + ts_linear.freq
                new_ts = tg.linear_timeseries(0,
                                              1,
                                              length=season_length,
                                              start=start)
                ts_linear = ts_linear.append(new_ts)
            ts_linear = TimeSeries.from_times_and_values(
                times=ts_sine.time_index, values=ts_linear.values())

            # create multivariate TimeSeries by stacking sine and linear curves
            ts = ts_sine.stack(ts_linear)

            # create train/test sets
            val_length = 10 * season_length
            ts_train, ts_val = ts[:-val_length], ts[-val_length:]

            # scale data
            scaler_ts = Scaler()
            ts_train_scaled = scaler_ts.fit_transform(ts_train)
            ts_val_scaled = scaler_ts.transform(ts_val)
            ts_scaled = scaler_ts.transform(ts)

            # generate long enough covariates (past and future covariates will be the same for simplicity)
            long_enough_ts = tg.sine_timeseries(value_frequency=1 /
                                                season_length,
                                                length=1000,
                                                freq=ts.freq)
            covariates = tg.datetime_attribute_timeseries(long_enough_ts,
                                                          attribute="hour")
            scaler_covs = Scaler()
            covariates_scaled = scaler_covs.fit_transform(covariates)
            return ts_scaled, ts_train_scaled, ts_val_scaled, covariates_scaled
Exemple #4
0
    def test_callable_encoder(self):
        """Test `CallableIndexEncoder`"""
        ts = tg.linear_timeseries(length=24, freq="A")
        input_chunk_length = 12
        output_chunk_length = 6

        # ===> test absolute position encoder <===
        encoder_params = {
            "custom": {
                "past":
                [lambda index: index.year, lambda index: index.year - 1]
            }
        }
        encs = SequentialEncoder(
            add_encoders=encoder_params,
            input_chunk_length=input_chunk_length,
            output_chunk_length=output_chunk_length,
            takes_past_covariates=True,
            takes_future_covariates=True,
        )

        t1, _ = encs.encode_train(ts)
        self.assertTrue((ts.time_index.year.values == t1[0].values()[:,
                                                                     0]).all())
        self.assertTrue(
            (ts.time_index.year.values - 1 == t1[0].values()[:, 1]).all())
Exemple #5
0
    def test_with_static_covariates_multivariate(self):
        ts = linear_timeseries(length=10)
        ts_multi = ts.stack(ts)
        static_covs = pd.DataFrame([[0.0, 1.0], [0.0, 1.0]],
                                   columns=["st1", "st2"])

        # from univariate static covariates
        ts_multi = ts_multi.with_static_covariates(static_covs.loc[0])
        assert ts_multi.static_covariates.index.equals(
            pd.Index([DEFAULT_GLOBAL_STATIC_COV_NAME]))
        assert ts_multi.static_covariates.columns.equals(static_covs.columns)
        np.testing.assert_almost_equal(
            ts_multi.static_covariates_values(copy=False),
            static_covs.loc[0:0].values)

        # from multivariate static covariates
        ts_multi = ts_multi.with_static_covariates(static_covs)
        assert ts_multi.static_covariates.index.equals(ts_multi.components)
        assert ts_multi.static_covariates.columns.equals(static_covs.columns)
        np.testing.assert_almost_equal(
            ts_multi.static_covariates_values(copy=False), static_covs.values)

        # raise an error if multivariate static covariates columns don't match the number of components in the series
        with pytest.raises(ValueError):
            _ = ts_multi.with_static_covariates(
                pd.concat([static_covs] * 2, axis=0))
Exemple #6
0
    def test_concatenate_dim_samples(self):
        """
        Test concatenation with static covariates along sample dimension (axis=2)
        Along sample dimension, we only take the static covariates of the first series (as we components and
        time don't change).
        """
        static_covs_left = pd.DataFrame([[0, 1]], columns=["st1",
                                                           "st2"]).astype(int)
        static_covs_right = pd.DataFrame([[3, 4]], columns=["st3",
                                                            "st4"]).astype(int)

        ts_left = linear_timeseries(
            length=10).with_static_covariates(static_covs_left)
        ts_right = linear_timeseries(
            length=10).with_static_covariates(static_covs_right)

        ts_concat = concatenate([ts_left, ts_right], axis=2)
        assert ts_concat.static_covariates.equals(ts_left.static_covariates)
Exemple #7
0
    def test_concatenate_dim_time(self):
        """
        Test concatenation with static covariates along time dimension (axis=0)
        Along time dimension, we only take the static covariates of the first series (as static covariates are
        time-independant).
        """
        static_covs_left = pd.DataFrame([[0, 1]], columns=["st1",
                                                           "st2"]).astype(int)
        static_covs_right = pd.DataFrame([[3, 4]], columns=["st3",
                                                            "st4"]).astype(int)

        ts_left = linear_timeseries(
            length=10).with_static_covariates(static_covs_left)
        ts_right = linear_timeseries(
            length=10, start=ts_left.end_time() +
            ts_left.freq).with_static_covariates(static_covs_right)

        ts_concat = concatenate([ts_left, ts_right], axis=0)
        assert ts_concat.static_covariates.equals(ts_left.static_covariates)
Exemple #8
0
    def test_stack(self):
        ts_uni = linear_timeseries(length=10)
        ts_multi = ts_uni.stack(ts_uni)

        static_covs_uni1 = pd.DataFrame([[0, 1]], columns=["st1",
                                                           "st2"]).astype(int)
        static_covs_uni2 = pd.DataFrame([[3, 4]], columns=["st3",
                                                           "st4"]).astype(int)
        static_covs_uni3 = pd.DataFrame([[2, 3, 4]],
                                        columns=["st1", "st2",
                                                 "st3"]).astype(int)

        static_covs_multi = pd.DataFrame([[0, 0], [1, 1]],
                                         columns=["st1", "st2"]).astype(int)

        ts_uni = ts_uni.with_static_covariates(static_covs_uni1)
        ts_multi = ts_multi.with_static_covariates(static_covs_multi)

        # valid static covariates for concatenation/stack
        ts_stacked1 = ts_uni.stack(ts_uni)
        assert ts_stacked1.static_covariates.index.equals(
            ts_stacked1.components)
        np.testing.assert_almost_equal(
            ts_stacked1.static_covariates_values(copy=False),
            pd.concat([ts_uni.static_covariates] * 2, axis=0).values,
        )

        # valid static covariates for concatenation/stack: first only has static covs
        # -> this gives multivar ts with univar static covs
        ts_stacked2 = ts_uni.stack(ts_uni.with_static_covariates(None))
        np.testing.assert_almost_equal(
            ts_stacked2.static_covariates_values(copy=False),
            ts_uni.static_covariates_values(copy=False),
        )

        # mismatch between column names
        with pytest.raises(ValueError):
            _ = ts_uni.stack(ts_uni.with_static_covariates(static_covs_uni2))

        # mismatch between number of covariates
        with pytest.raises(ValueError):
            _ = ts_uni.stack(ts_uni.with_static_covariates(static_covs_uni3))

        # valid univar ts with univar static covariates + multivar ts with multivar static covariates
        ts_stacked3 = ts_uni.stack(ts_multi)
        np.testing.assert_almost_equal(
            ts_stacked3.static_covariates_values(copy=False),
            pd.concat([ts_uni.static_covariates, ts_multi.static_covariates],
                      axis=0).values,
        )

        # invalid univar ts with univar static covariates + multivar ts with univar static covariates
        with pytest.raises(ValueError):
            _ = ts_uni.stack(ts_multi.with_static_covariates(static_covs_uni1))
Exemple #9
0
    def test_ts_from_x(self):
        ts = linear_timeseries(length=10).with_static_covariates(
            pd.Series([0.0, 1.0], index=["st1", "st2"]))

        self.helper_test_cov_transfer(ts,
                                      TimeSeries.from_xarray(ts.data_array()))
        self.helper_test_cov_transfer(
            ts,
            TimeSeries.from_dataframe(ts.pd_dataframe(),
                                      static_covariates=ts.static_covariates),
        )
        # ts.pd_series() loses component names -> static covariates have different components names
        self.helper_test_cov_transfer_values(
            ts,
            TimeSeries.from_series(ts.pd_series(),
                                   static_covariates=ts.static_covariates),
        )
        self.helper_test_cov_transfer(
            ts,
            TimeSeries.from_times_and_values(
                times=ts.time_index,
                values=ts.all_values(),
                columns=ts.components,
                static_covariates=ts.static_covariates,
            ),
        )

        self.helper_test_cov_transfer(
            ts,
            TimeSeries.from_values(
                values=ts.all_values(),
                columns=ts.components,
                static_covariates=ts.static_covariates,
            ),
        )

        f_csv = os.path.join(self.temp_work_dir, "temp_ts.csv")
        f_pkl = os.path.join(self.temp_work_dir, "temp_ts.pkl")
        ts.to_csv(f_csv)
        ts.to_pickle(f_pkl)
        ts_json = ts.to_json()

        self.helper_test_cov_transfer(
            ts,
            TimeSeries.from_csv(f_csv,
                                time_col="time",
                                static_covariates=ts.static_covariates),
        )
        self.helper_test_cov_transfer(ts, TimeSeries.from_pickle(f_pkl))
        self.helper_test_cov_transfer(
            ts,
            TimeSeries.from_json(ts_json,
                                 static_covariates=ts.static_covariates))
Exemple #10
0
    def test_scalers_with_static_covariates(self):
        ts = linear_timeseries(start_value=1.0, end_value=2.0, length=10)
        static_covs = pd.Series([0.0, 2.0], index=["st1", "st2"])
        ts = ts.with_static_covariates(static_covs)

        for scaler_cls in [Scaler, BoxCox]:
            scaler = scaler_cls()
            ts_scaled = scaler.fit_transform(ts)
            assert ts_scaled.static_covariates.equals(ts.static_covariates)

            ts_inv = scaler.inverse_transform(ts_scaled)
            assert ts_inv.static_covariates.equals(ts.static_covariates)
Exemple #11
0
        def test_max_samples_per_ts(self):
            """
            Checking that we can fit TorchForecastingModels with max_samples_per_ts, without crash
            """

            ts = linear_timeseries(start_value=0, end_value=1, length=50)

            model = RNNModel(input_chunk_length=20,
                             output_chunk_length=2,
                             n_epochs=2,
                             batch_size=32)

            model.fit(ts, max_samples_per_ts=5)
Exemple #12
0
        def test_sample_smaller_than_batch_size(self):
            """
            Checking that the TorchForecastingModels do not crash even if the number of available samples for training
            is strictly lower than the selected batch_size
            """
            # TS with 50 timestamps. TorchForecastingModels will use the SequentialDataset for producing training
            # samples, which means we will have 50 - 22 - 2 + 1 = 27 samples, which is < 32 (batch_size). The model
            # should still train on those samples and not crash in any way
            ts = linear_timeseries(start_value=0, end_value=1, length=50)

            model = RNNModel(input_chunk_length=20,
                             output_chunk_length=2,
                             n_epochs=2,
                             batch_size=32)
            model.fit(ts)
Exemple #13
0
        def test_performance(self):
            # test TCN performance on dummy time series
            ts = tg.sine_timeseries(length=100) + tg.linear_timeseries(
                length=100, end_value=2
            )
            train, test = ts[:90], ts[90:]
            model = TCNModel(
                input_chunk_length=12,
                output_chunk_length=10,
                n_epochs=300,
                random_state=0,
            )
            model.fit(train)
            pred = model.predict(n=10)

            self.assertTrue(mae(pred, test) < 0.3)
Exemple #14
0
 def test_routine(start, end=None, length=None):
     # testing for start value, end value and delta between two adjacent entries
     linear_ts = linear_timeseries(
         start=start,
         end=end,
         length=length,
         start_value=start_value,
         end_value=end_value,
     )
     self.assertEqual(linear_ts.values()[0][0], start_value)
     self.assertEqual(linear_ts.values()[-1][0], end_value)
     self.assertAlmostEqual(
         linear_ts.values()[-1][0] - linear_ts.values()[-2][0],
         (end_value - start_value) / (length_assert - 1),
     )
     self.assertEqual(len(linear_ts), length_assert)
Exemple #15
0
    def test_map_with_timestamp(self):
        series = linear_timeseries(start_value=1,
                                   length=12,
                                   freq='MS',
                                   start_ts=pd.Timestamp('2000-01-01'),
                                   end_value=12)  # noqa: E501
        zeroes = constant_timeseries(value=0.0,
                                     length=12,
                                     freq='MS',
                                     start_ts=pd.Timestamp('2000-01-01'))

        def function(ts, x):
            return x - ts.month

        new_series = series.map(function)
        self.assertEqual(new_series, zeroes)
Exemple #16
0
    def test_static_covariates_values(self):
        ts = linear_timeseries(length=10)
        static_covs = pd.DataFrame([[0.0, 1.0]], columns=["st1", "st2"])
        ts = ts.with_static_covariates(static_covs)

        # changing values of copy should not change original DataFrame
        vals = ts.static_covariates_values(copy=True)
        vals[:] = -1.0
        assert (ts.static_covariates_values(copy=False) != -1.0).all()

        # changing values of view should change original DataFrame
        vals = ts.static_covariates_values(copy=False)
        vals[:] = -1.0
        assert (ts.static_covariates_values(copy=False) == -1.0).all()

        ts = ts.with_static_covariates(None)
        assert ts.static_covariates_values() is None
Exemple #17
0
    def test_map_wrong_fn(self):
        series = linear_timeseries(start_value=1,
                                   length=12,
                                   freq='MS',
                                   start_ts=pd.Timestamp('2000-01-01'),
                                   end_value=12)  # noqa: E501

        def add(x, y, z):
            return x + y + z

        with self.assertRaises(ValueError):
            series.map(add)

        ufunc_add = np.frompyfunc(add, 3, 1)

        with self.assertRaises(ValueError):
            series.map(ufunc_add)
Exemple #18
0
class MappersTestCase(unittest.TestCase):

    @staticmethod
    def func(x):
        return x + 10

    @staticmethod
    def inverse_func(x):
        return x - 10

    @staticmethod
    def ts_func(ts, x):
        return x - ts.month

    @staticmethod
    def inverse_ts_func(ts, x):
        return x + ts.month

    plus_ten = Mapper(func.__func__)
    plus_ten_invertible = InvertibleMapper(func.__func__, inverse_func.__func__)

    subtract_month = Mapper(ts_func.__func__)
    subtract_month_invertible = InvertibleMapper(ts_func.__func__, inverse_ts_func.__func__)

    lin_series = linear_timeseries(start_value=1, length=12, freq='MS', start_ts=pd.Timestamp('2000-01-01'), end_value=12)  # noqa: E501
    zeroes = constant_timeseries(value=0.0, length=12, freq='MS', start_ts=pd.Timestamp('2000-01-01'))
    tens = constant_timeseries(value=10.0, length=12, freq='MS', start_ts=pd.Timestamp('2000-01-01'))

    def test_mapper(self):
        transformed = self.plus_ten.transform(self.zeroes)
        self.assertEqual(transformed, self.tens)

    def test_invertible_mapper(self):
        transformed = self.plus_ten_invertible.transform(self.lin_series)
        back = self.plus_ten_invertible.inverse_transform(transformed)
        self.assertEqual(back, self.lin_series)

    def test_mapper_with_timestamp(self):
        transformed = self.subtract_month.transform(self.lin_series)
        self.assertEqual(transformed, self.zeroes)

    def test_invertible_mapper_with_timestamp(self):
        transformed = self.subtract_month_invertible.transform(self.lin_series)
        back = self.subtract_month_invertible.inverse_transform(transformed)
        self.assertEqual(back, self.lin_series)
Exemple #19
0
        def test_residuals(self):
            """
            Torch models should not fail when computing residuals on a series
            long enough to accomodate at least one training sample.
            """
            ts = linear_timeseries(start_value=0, end_value=1, length=38)

            model = NBEATSModel(
                input_chunk_length=24,
                output_chunk_length=12,
                num_stacks=2,
                num_blocks=1,
                num_layers=1,
                layer_widths=2,
                n_epochs=2,
            )

            model.residuals(ts)
Exemple #20
0
class BoxCoxTestCase(unittest.TestCase):

    sine_series = sine_timeseries(length=50,
                                  value_y_offset=5,
                                  value_frequency=0.05)
    lin_series = linear_timeseries(start_value=1, end_value=10, length=50)
    multi_series = sine_series.stack(lin_series)

    def test_boxbox_lambda(self):
        boxcox = BoxCox()

        boxcox.fit(self.multi_series, 0.3)
        self.assertEqual(boxcox._lmbda, [0.3, 0.3])

        boxcox.fit(self.multi_series, [0.3, 0.4])
        self.assertEqual(boxcox._lmbda, [0.3, 0.4])

        with self.assertRaises(ValueError):
            boxcox.fit(self.multi_series, [0.2, 0.4, 0.5])

        boxcox.fit(self.multi_series, optim_method='mle')
        lmbda1 = boxcox._lmbda
        boxcox.fit(self.multi_series, optim_method='pearsonr')
        lmbda2 = boxcox._lmbda

        self.assertNotEqual(lmbda1.array, lmbda2.array)

    def test_boxcox_transform(self):
        log_mapper = Mapper(lambda x: log(x))
        boxcox = BoxCox()

        transformed1 = log_mapper.transform(self.sine_series)
        transformed2 = boxcox.fit(self.sine_series,
                                  lmbda=0).transform(self.sine_series)

        self.assertEqual(transformed1, transformed2)

    def test_boxcox_inverse(self):
        boxcox = BoxCox()
        transformed = boxcox.fit_transform(self.multi_series)
        back = boxcox.inverse_transform(transformed)
        pd.testing.assert_frame_equal(self.multi_series._df,
                                      back._df,
                                      check_exact=False)
Exemple #21
0
    def test_with_static_covariates_univariate(self):
        ts = linear_timeseries(length=10)
        static_covs_series = pd.Series([0.0, 1.0], index=["st1", "st2"])
        static_covs_df = pd.DataFrame([[0.0, 1.0]], columns=["st1", "st2"])

        # check immutable
        ts.with_static_covariates(static_covs_series)
        assert not ts.has_static_covariates

        # from Series
        ts = ts.with_static_covariates(static_covs_series)
        assert ts.has_static_covariates
        np.testing.assert_almost_equal(
            ts.static_covariates_values(copy=False),
            np.expand_dims(static_covs_series.values, -1).T,
        )
        assert ts.static_covariates.index.equals(ts.components)

        # from DataFrame
        ts = ts.with_static_covariates(static_covs_df)
        assert ts.has_static_covariates
        np.testing.assert_almost_equal(ts.static_covariates_values(copy=False),
                                       static_covs_df.values)
        assert ts.static_covariates.index.equals(ts.components)

        # with None
        ts = ts.with_static_covariates(None)
        assert ts.static_covariates is None
        assert not ts.has_static_covariates

        # only pd.Series, pd.DataFrame or None
        with pytest.raises(ValueError):
            _ = ts.with_static_covariates([1, 2, 3])

        # multivariate does not work with univariate TimeSeries
        with pytest.raises(ValueError):
            static_covs_multi = pd.concat([static_covs_series] * 2, axis=1).T
            _ = ts.with_static_covariates(static_covs_multi)
Exemple #22
0
        def test_multiple_ts(self):
            lags = 4
            lags_past_covariates = 3
            model = RegressionModel(lags=lags,
                                    lags_past_covariates=lags_past_covariates)

            target_series = tg.linear_timeseries(start_value=0,
                                                 end_value=49,
                                                 length=50)
            past_covariates = tg.linear_timeseries(start_value=100,
                                                   end_value=149,
                                                   length=50)
            past_covariates = past_covariates.stack(
                tg.linear_timeseries(start_value=400, end_value=449,
                                     length=50))

            target_train, target_test = target_series.split_after(0.7)
            past_covariates_train, past_covariates_test = past_covariates.split_after(
                0.7)
            model.fit(
                series=[target_train, target_train + 0.5],
                past_covariates=[
                    past_covariates_train, past_covariates_train + 0.5
                ],
            )

            predictions = model.predict(
                10,
                series=[target_train, target_train + 0.5],
                past_covariates=[past_covariates, past_covariates + 0.5],
            )

            self.assertEqual(len(predictions[0]), 10,
                             f"Found {len(predictions)} instead")

            # multiple TS, both future and past covariates, checking that both covariates lead to better results than
            # using a single one (target series = past_cov + future_cov + noise)
            np.random.seed(42)

            linear_ts_1 = tg.linear_timeseries(start_value=10,
                                               end_value=59,
                                               length=50)
            linear_ts_2 = tg.linear_timeseries(start_value=40,
                                               end_value=89,
                                               length=50)

            past_covariates = tg.sine_timeseries(length=50) * 10
            future_covariates = (
                tg.sine_timeseries(length=50, value_frequency=0.015) * 50)

            target_series_1 = linear_ts_1 + 4 * past_covariates + 2 * future_covariates
            target_series_2 = linear_ts_2 + 4 * past_covariates + 2 * future_covariates

            target_series_1_noise = (linear_ts_1 + 4 * past_covariates +
                                     2 * future_covariates +
                                     tg.gaussian_timeseries(std=7, length=50))

            target_series_2_noise = (linear_ts_2 + 4 * past_covariates +
                                     2 * future_covariates +
                                     tg.gaussian_timeseries(std=7, length=50))

            target_train_1, target_test_1 = target_series_1.split_after(0.7)
            target_train_2, target_test_2 = target_series_2.split_after(0.7)

            (
                target_train_1_noise,
                target_test_1_noise,
            ) = target_series_1_noise.split_after(0.7)
            (
                target_train_2_noise,
                target_test_2_noise,
            ) = target_series_2_noise.split_after(0.7)

            # testing improved denoise with multiple TS

            # test 1: with single TS, 2 covariates should be better than one
            model = RegressionModel(lags=3, lags_past_covariates=5)
            model.fit([target_train_1_noise], [past_covariates])

            prediction_past_only = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
            )

            model = RegressionModel(lags=3,
                                    lags_past_covariates=5,
                                    lags_future_covariates=(5, 0))
            model.fit([target_train_1_noise], [past_covariates],
                      [future_covariates])
            prediction_past_and_future = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
                future_covariates=[future_covariates] * 2,
            )

            error_past_only = rmse(
                [target_test_1, target_test_2],
                prediction_past_only,
                inter_reduction=np.mean,
            )
            error_both = rmse(
                [target_test_1, target_test_2],
                prediction_past_and_future,
                inter_reduction=np.mean,
            )

            self.assertTrue(error_past_only > error_both)
            # test 2: with both covariates, 2 TS should learn more than one (with little noise)
            model = RegressionModel(lags=3,
                                    lags_past_covariates=5,
                                    lags_future_covariates=(5, 0))
            model.fit(
                [target_train_1_noise, target_train_2_noise],
                [past_covariates] * 2,
                [future_covariates] * 2,
            )
            prediction_past_and_future_multi_ts = model.predict(
                n=len(target_test_1),
                series=[target_train_1_noise, target_train_2_noise],
                past_covariates=[past_covariates] * 2,
                future_covariates=[future_covariates] * 2,
            )
            error_both_multi_ts = rmse(
                [target_test_1, target_test_2],
                prediction_past_and_future_multi_ts,
                inter_reduction=np.mean,
            )

            self.assertTrue(error_both > error_both_multi_ts)
Exemple #23
0
class RegressionEnsembleModelsTestCase(DartsBaseTestClass):

    RANDOM_SEED = 111

    sine_series = tg.sine_timeseries(value_frequency=(1 / 5),
                                     value_y_offset=10,
                                     length=50)
    lin_series = tg.linear_timeseries(length=50)

    combined = sine_series + lin_series

    seq1 = [_make_ts(0), _make_ts(10), _make_ts(20)]
    cov1 = [_make_ts(5), _make_ts(15), _make_ts(25)]

    seq2 = [_make_ts(0, 20), _make_ts(10, 20), _make_ts(20, 20)]
    cov2 = [_make_ts(5, 30), _make_ts(15, 30), _make_ts(25, 30)]

    # dummy feature and target TimeSeries instances
    ts_periodic = tg.sine_timeseries(length=500)
    ts_gaussian = tg.gaussian_timeseries(length=500)
    ts_random_walk = tg.random_walk_timeseries(length=500)

    ts_cov1 = ts_periodic.stack(ts_gaussian)
    ts_cov1 = ts_cov1.pd_dataframe()
    ts_cov1.columns = ["Periodic", "Gaussian"]
    ts_cov1 = TimeSeries.from_dataframe(ts_cov1)
    ts_sum1 = ts_periodic + ts_gaussian

    ts_cov2 = ts_sum1.stack(ts_random_walk)
    ts_sum2 = ts_sum1 + ts_random_walk

    def get_local_models(self):
        return [NaiveDrift(), NaiveSeasonal(5), NaiveSeasonal(10)]

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def get_global_models(self, output_chunk_length=5):
        return [
            RNNModel(
                input_chunk_length=20,
                output_chunk_length=output_chunk_length,
                n_epochs=1,
                random_state=42,
            ),
            BlockRNNModel(
                input_chunk_length=20,
                output_chunk_length=output_chunk_length,
                n_epochs=1,
                random_state=42,
            ),
        ]

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_accepts_different_regression_models(self):
        regr1 = LinearRegression()
        regr2 = RandomForestRegressor()
        regr3 = RandomForest(lags_future_covariates=[0])

        model0 = RegressionEnsembleModel(self.get_local_models(), 10)
        model1 = RegressionEnsembleModel(self.get_local_models(), 10, regr1)
        model2 = RegressionEnsembleModel(self.get_local_models(), 10, regr2)
        model3 = RegressionEnsembleModel(self.get_local_models(), 10, regr3)

        models = [model0, model1, model2, model3]
        for model in models:
            model.fit(series=self.combined)
            model.predict(10)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_accepts_one_model(self):
        regr1 = LinearRegression()
        regr2 = RandomForest(lags_future_covariates=[0])

        model0 = RegressionEnsembleModel([self.get_local_models()[0]], 10)
        model1 = RegressionEnsembleModel([self.get_local_models()[0]], 10,
                                         regr1)
        model2 = RegressionEnsembleModel([self.get_local_models()[0]], 10,
                                         regr2)

        models = [model0, model1, model2]
        for model in models:
            model.fit(series=self.combined)
            model.predict(10)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_train_n_points(self):
        regr = LinearRegressionModel(lags_future_covariates=[0])

        # same values
        ensemble = RegressionEnsembleModel(self.get_local_models(), 5, regr)

        # too big value to perform the split
        ensemble = RegressionEnsembleModel(self.get_local_models(), 100)
        with self.assertRaises(ValueError):
            ensemble.fit(self.combined)

        ensemble = RegressionEnsembleModel(self.get_local_models(), 50)
        with self.assertRaises(ValueError):
            ensemble.fit(self.combined)

        # too big value considering min_train_series_length
        ensemble = RegressionEnsembleModel(self.get_local_models(), 45)
        with self.assertRaises(ValueError):
            ensemble.fit(self.combined)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_torch_models_retrain(self):
        model1 = BlockRNNModel(input_chunk_length=12,
                               output_chunk_length=1,
                               random_state=0,
                               n_epochs=2)
        model2 = BlockRNNModel(input_chunk_length=12,
                               output_chunk_length=1,
                               random_state=0,
                               n_epochs=2)

        ensemble = RegressionEnsembleModel([model1], 5)
        ensemble.fit(self.combined)

        model1_fitted = ensemble.models[0]
        forecast1 = model1_fitted.predict(10)

        model2.fit(self.combined)
        forecast2 = model2.predict(10)

        self.assertAlmostEqual(sum(forecast1.values() - forecast2.values())[0],
                               0.0,
                               places=2)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_train_predict_global_models_univar(self):
        ensemble_models = self.get_global_models(output_chunk_length=10)
        ensemble_models.append(RegressionModel(lags=1))
        ensemble = RegressionEnsembleModel(ensemble_models, 10)
        ensemble.fit(series=self.combined)
        ensemble.predict(10)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_train_predict_global_models_multivar_no_covariates(self):
        ensemble_models = self.get_global_models(output_chunk_length=10)
        ensemble_models.append(RegressionModel(lags=1))
        ensemble = RegressionEnsembleModel(ensemble_models, 10)
        ensemble.fit(self.seq1)
        ensemble.predict(10, self.seq1)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_train_predict_global_models_multivar_with_covariates(self):
        ensemble_models = self.get_global_models(output_chunk_length=10)
        ensemble_models.append(
            RegressionModel(lags=1, lags_past_covariates=[-1]))
        ensemble = RegressionEnsembleModel(ensemble_models, 10)
        ensemble.fit(self.seq1, self.cov1)
        ensemble.predict(10, self.seq2, self.cov2)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def helper_test_models_accuracy(self, model_instance, n, series,
                                    past_covariates, min_rmse):
        # for every model, test whether it predicts the target with a minimum r2 score of `min_rmse`
        train_series, test_series = train_test_split(series,
                                                     pd.Timestamp("20010101"))
        train_past_covariates, _ = train_test_split(past_covariates,
                                                    pd.Timestamp("20010101"))

        model_instance.fit(series=train_series,
                           past_covariates=train_past_covariates)
        prediction = model_instance.predict(n=n,
                                            past_covariates=past_covariates)
        current_rmse = rmse(test_series, prediction)

        self.assertTrue(
            current_rmse <= min_rmse,
            f"Model was not able to denoise data. A rmse score of {current_rmse} was recorded.",
        )

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def denoising_input(self):
        np.random.seed(self.RANDOM_SEED)

        ts_periodic = tg.sine_timeseries(length=500)
        ts_gaussian = tg.gaussian_timeseries(length=500)
        ts_random_walk = tg.random_walk_timeseries(length=500)

        ts_cov1 = ts_periodic.stack(ts_gaussian)
        ts_cov1 = ts_cov1.pd_dataframe()
        ts_cov1.columns = ["Periodic", "Gaussian"]
        ts_cov1 = TimeSeries.from_dataframe(ts_cov1)
        ts_sum1 = ts_periodic + ts_gaussian

        ts_cov2 = ts_sum1.stack(ts_random_walk)
        ts_sum2 = ts_sum1 + ts_random_walk

        return ts_sum1, ts_cov1, ts_sum2, ts_cov2

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_ensemble_models_denoising(self):
        # for every model, test whether it correctly denoises ts_sum using ts_gaussian and ts_sum as inputs
        # WARNING: this test isn't numerically stable, changing self.RANDOM_SEED can lead to exploding coefficients
        horizon = 10
        ts_sum1, ts_cov1, _, _ = self.denoising_input()
        torch.manual_seed(self.RANDOM_SEED)

        ensemble_models = [
            RNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            BlockRNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            RegressionModel(lags_past_covariates=[-1]),
        ]

        ensemble = RegressionEnsembleModel(ensemble_models, horizon)
        self.helper_test_models_accuracy(ensemble, horizon, ts_sum1, ts_cov1,
                                         3)

    @unittest.skipUnless(TORCH_AVAILABLE, "requires torch")
    def test_ensemble_models_denoising_multi_input(self):
        # for every model, test whether it correctly denoises ts_sum_2 using ts_random_multi and ts_sum_2 as inputs
        # WARNING: this test isn't numerically stable, changing self.RANDOM_SEED can lead to exploding coefficients
        horizon = 10
        _, _, ts_sum2, ts_cov2 = self.denoising_input()
        torch.manual_seed(self.RANDOM_SEED)

        ensemble_models = [
            RNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            BlockRNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            RegressionModel(lags_past_covariates=[-1]),
            RegressionModel(lags_past_covariates=[-1]),
        ]

        ensemble = RegressionEnsembleModel(ensemble_models, horizon)
        self.helper_test_models_accuracy(ensemble, horizon, ts_sum2, ts_cov2,
                                         3)
Exemple #24
0
    class GlobalForecastingModelsTestCase(DartsBaseTestClass):
        # forecasting horizon used in runnability tests
        forecasting_horizon = 12

        np.random.seed(42)
        torch.manual_seed(42)

        # some arbitrary static covariates
        static_covariates = pd.DataFrame([[0.0, 1.0]], columns=["st1", "st2"])

        # real timeseries for functionality tests
        ts_passengers = (AirPassengersDataset().load().with_static_covariates(
            static_covariates))
        scaler = Scaler()
        ts_passengers = scaler.fit_transform(ts_passengers)
        ts_pass_train, ts_pass_val = ts_passengers[:-36], ts_passengers[-36:]

        # an additional noisy series
        ts_pass_train_1 = ts_pass_train + 0.01 * tg.gaussian_timeseries(
            length=len(ts_pass_train),
            freq=ts_pass_train.freq_str,
            start=ts_pass_train.start_time(),
        )

        # an additional time series serving as covariates
        year_series = tg.datetime_attribute_timeseries(ts_passengers,
                                                       attribute="year")
        month_series = tg.datetime_attribute_timeseries(ts_passengers,
                                                        attribute="month")
        scaler_dt = Scaler()
        time_covariates = scaler_dt.fit_transform(
            year_series.stack(month_series))
        time_covariates_train, time_covariates_val = (
            time_covariates[:-36],
            time_covariates[-36:],
        )

        # an artificial time series that is highly dependent on covariates
        ts_length = 400
        split_ratio = 0.6
        sine_1_ts = tg.sine_timeseries(length=ts_length)
        sine_2_ts = tg.sine_timeseries(length=ts_length, value_frequency=0.05)
        sine_3_ts = tg.sine_timeseries(length=ts_length,
                                       value_frequency=0.003,
                                       value_amplitude=5)
        linear_ts = tg.linear_timeseries(length=ts_length,
                                         start_value=3,
                                         end_value=8)

        covariates = sine_3_ts.stack(sine_2_ts).stack(linear_ts)
        covariates_past, _ = covariates.split_after(split_ratio)

        target = sine_1_ts + sine_2_ts + linear_ts + sine_3_ts
        target_past, target_future = target.split_after(split_ratio)

        def test_save_model_parameters(self):
            # model creation parameters were saved before. check if re-created model has same params as original
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                self.assertTrue(model._model_params,
                                model.untrained_model()._model_params)

        def test_single_ts(self):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(
                    input_chunk_length=IN_LEN,
                    output_chunk_length=OUT_LEN,
                    random_state=0,
                    **kwargs,
                )
                model.fit(self.ts_pass_train)
                pred = model.predict(n=36)
                mape_err = mape(self.ts_pass_val, pred)
                self.assertTrue(
                    mape_err < err,
                    "Model {} produces errors too high (one time "
                    "series). Error = {}".format(model_cls, mape_err),
                )
                self.assertTrue(
                    pred.static_covariates.equals(
                        self.ts_passengers.static_covariates))

        def test_multi_ts(self):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(
                    input_chunk_length=IN_LEN,
                    output_chunk_length=OUT_LEN,
                    random_state=0,
                    **kwargs,
                )
                model.fit([self.ts_pass_train, self.ts_pass_train_1])
                with self.assertRaises(ValueError):
                    # when model is fit from >1 series, one must provide a series in argument
                    model.predict(n=1)
                pred = model.predict(n=36, series=self.ts_pass_train)
                mape_err = mape(self.ts_pass_val, pred)
                self.assertTrue(
                    mape_err < err,
                    "Model {} produces errors too high (several time "
                    "series). Error = {}".format(model_cls, mape_err),
                )

                # check prediction for several time series
                pred_list = model.predict(
                    n=36, series=[self.ts_pass_train, self.ts_pass_train_1])
                self.assertTrue(
                    len(pred_list) == 2,
                    f"Model {model_cls} did not return a list of prediction",
                )
                for pred in pred_list:
                    mape_err = mape(self.ts_pass_val, pred)
                    self.assertTrue(
                        mape_err < err,
                        "Model {} produces errors too high (several time series 2). "
                        "Error = {}".format(model_cls, mape_err),
                    )

        def test_covariates(self):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(
                    input_chunk_length=IN_LEN,
                    output_chunk_length=OUT_LEN,
                    random_state=0,
                    **kwargs,
                )

                # Here we rely on the fact that all non-Dual models currently are Past models
                cov_name = ("future_covariates" if isinstance(
                    model, DualCovariatesTorchModel) else "past_covariates")
                cov_kwargs = {
                    cov_name:
                    [self.time_covariates_train, self.time_covariates_train]
                }
                model.fit(series=[self.ts_pass_train, self.ts_pass_train_1],
                          **cov_kwargs)
                with self.assertRaises(ValueError):
                    # when model is fit from >1 series, one must provide a series in argument
                    model.predict(n=1)

                with self.assertRaises(ValueError):
                    # when model is fit using multiple covariates, covariates are required at prediction time
                    model.predict(n=1, series=self.ts_pass_train)

                cov_kwargs_train = {cov_name: self.time_covariates_train}
                cov_kwargs_notrain = {cov_name: self.time_covariates}
                with self.assertRaises(ValueError):
                    # when model is fit using covariates, n cannot be greater than output_chunk_length...
                    model.predict(n=13,
                                  series=self.ts_pass_train,
                                  **cov_kwargs_train)

                # ... unless future covariates are provided
                pred = model.predict(n=13,
                                     series=self.ts_pass_train,
                                     **cov_kwargs_notrain)

                pred = model.predict(n=12,
                                     series=self.ts_pass_train,
                                     **cov_kwargs_notrain)
                mape_err = mape(self.ts_pass_val, pred)
                self.assertTrue(
                    mape_err < err,
                    "Model {} produces errors too high (several time "
                    "series with covariates). Error = {}".format(
                        model_cls, mape_err),
                )

                # when model is fit using 1 training and 1 covariate series, time series args are optional
                if model._is_probabilistic:
                    continue
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                model.fit(series=self.ts_pass_train, **cov_kwargs_train)
                pred1 = model.predict(1)
                pred2 = model.predict(1, series=self.ts_pass_train)
                pred3 = model.predict(1, **cov_kwargs_train)
                pred4 = model.predict(1,
                                      **cov_kwargs_train,
                                      series=self.ts_pass_train)
                self.assertEqual(pred1, pred2)
                self.assertEqual(pred1, pred3)
                self.assertEqual(pred1, pred4)

        def test_future_covariates(self):
            # models with future covariates should produce better predictions over a long forecasting horizon
            # than a model trained with no covariates
            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=5,
                n_epochs=20,
                random_state=0,
            )

            model.fit(series=self.target_past)
            long_pred_no_cov = model.predict(n=160)

            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=5,
                n_epochs=20,
                random_state=0,
            )
            model.fit(series=self.target_past,
                      past_covariates=self.covariates_past)
            long_pred_with_cov = model.predict(n=160,
                                               past_covariates=self.covariates)
            self.assertTrue(
                mape(self.target_future, long_pred_no_cov) > mape(
                    self.target_future, long_pred_with_cov),
                "Models with future covariates should produce better predictions.",
            )

            # block models can predict up to self.output_chunk_length points beyond the last future covariate...
            model.predict(n=165, past_covariates=self.covariates)

            # ... not more
            with self.assertRaises(ValueError):
                model.predict(n=166, series=self.ts_pass_train)

            # recurrent models can only predict data points for time steps where future covariates are available
            model = RNNModel(12, n_epochs=1)
            model.fit(series=self.target_past,
                      future_covariates=self.covariates_past)
            model.predict(n=160, future_covariates=self.covariates)
            with self.assertRaises(ValueError):
                model.predict(n=161, future_covariates=self.covariates)

        def test_batch_predictions(self):
            # predicting multiple time series at once needs to work for arbitrary batch sizes
            # univariate case
            targets_univar = [
                self.target_past,
                self.target_past[:60],
                self.target_past[:80],
            ]
            self._batch_prediction_test_helper_function(targets_univar)

            # multivariate case
            targets_multivar = [tgt.stack(tgt) for tgt in targets_univar]
            self._batch_prediction_test_helper_function(targets_multivar)

        def _batch_prediction_test_helper_function(self, targets):
            epsilon = 1e-4
            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=10,
                n_epochs=10,
                random_state=0,
            )
            model.fit(series=targets[0], past_covariates=self.covariates_past)
            preds_default = model.predict(
                n=160,
                series=targets,
                past_covariates=[self.covariates] * len(targets),
                batch_size=None,
            )

            # make batch size large enough to test stacking samples
            for batch_size in range(1, 4 * len(targets)):
                preds = model.predict(
                    n=160,
                    series=targets,
                    past_covariates=[self.covariates] * len(targets),
                    batch_size=batch_size,
                )
                for i in range(len(targets)):
                    self.assertLess(
                        sum(sum((preds[i] - preds_default[i]).values())),
                        epsilon)

        def test_predict_from_dataset_unsupported_input(self):
            # an exception should be thrown if an unsupported type is passed
            unsupported_type = "unsupported_type"
            # just need to test this with one model
            model_cls, kwargs, err = models_cls_kwargs_errs[0]
            model = model_cls(input_chunk_length=IN_LEN,
                              output_chunk_length=OUT_LEN,
                              **kwargs)
            model.fit([self.ts_pass_train, self.ts_pass_train_1])

            with self.assertRaises(ValueError):
                model.predict_from_dataset(
                    n=1, input_series_dataset=unsupported_type)

        def test_prediction_with_different_n(self):
            # test model predictions for n < out_len, n == out_len and n > out_len
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                self.assertTrue(
                    isinstance(
                        model,
                        (
                            PastCovariatesTorchModel,
                            DualCovariatesTorchModel,
                            MixedCovariatesTorchModel,
                        ),
                    ),
                    "unit test not yet defined for the given {X}CovariatesTorchModel.",
                )

                if isinstance(model, PastCovariatesTorchModel):
                    past_covs, future_covs = self.covariates, None
                elif isinstance(model, DualCovariatesTorchModel):
                    past_covs, future_covs = None, self.covariates
                else:
                    past_covs, future_covs = self.covariates, self.covariates

                model.fit(
                    self.target_past,
                    past_covariates=past_covs,
                    future_covariates=future_covs,
                    epochs=1,
                )

                # test prediction for n < out_len, n == out_len and n > out_len
                for n in [OUT_LEN - 1, OUT_LEN, 2 * OUT_LEN - 1]:
                    pred = model.predict(n=n,
                                         past_covariates=past_covs,
                                         future_covariates=future_covs)
                    self.assertEqual(len(pred), n)

        def test_same_result_with_different_n_jobs(self):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)

                multiple_ts = [self.ts_pass_train] * 10

                model.fit(multiple_ts)

                # safe random state for two successive identical predictions
                if model._is_probabilistic():
                    random_state = deepcopy(model._random_instance)
                else:
                    random_state = None

                pred1 = model.predict(n=36, series=multiple_ts, n_jobs=1)

                if random_state is not None:
                    model._random_instance = random_state

                pred2 = model.predict(
                    n=36, series=multiple_ts,
                    n_jobs=-1)  # assuming > 1 core available in the machine
                self.assertEqual(
                    pred1,
                    pred2,
                    "Model {} produces different predictions with different number of jobs",
                )

        @patch(
            "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer"
        )
        def test_fit_with_constr_epochs(self, init_trainer):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                multiple_ts = [self.ts_pass_train] * 10
                model.fit(multiple_ts)

                init_trainer.assert_called_with(max_epochs=kwargs["n_epochs"],
                                                trainer_params=ANY)

        @patch(
            "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer"
        )
        def test_fit_with_fit_epochs(self, init_trainer):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                multiple_ts = [self.ts_pass_train] * 10
                epochs = 3

                model.fit(multiple_ts, epochs=epochs)
                init_trainer.assert_called_with(max_epochs=epochs,
                                                trainer_params=ANY)

                model.total_epochs = epochs
                # continue training
                model.fit(multiple_ts, epochs=epochs)
                init_trainer.assert_called_with(max_epochs=epochs,
                                                trainer_params=ANY)

        @patch(
            "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer"
        )
        def test_fit_from_dataset_with_epochs(self, init_trainer):
            for model_cls, kwargs, err in models_cls_kwargs_errs:
                model = model_cls(input_chunk_length=IN_LEN,
                                  output_chunk_length=OUT_LEN,
                                  **kwargs)
                multiple_ts = [self.ts_pass_train] * 10
                train_dataset = model._build_train_dataset(
                    multiple_ts,
                    past_covariates=None,
                    future_covariates=None,
                    max_samples_per_ts=None,
                )
                epochs = 3

                model.fit_from_dataset(train_dataset, epochs=epochs)
                init_trainer.assert_called_with(max_epochs=epochs,
                                                trainer_params=ANY)

                # continue training
                model.fit_from_dataset(train_dataset, epochs=epochs)
                init_trainer.assert_called_with(max_epochs=epochs,
                                                trainer_params=ANY)

        def test_predit_after_fit_from_dataset(self):
            model_cls, kwargs, _ = models_cls_kwargs_errs[0]
            model = model_cls(input_chunk_length=IN_LEN,
                              output_chunk_length=OUT_LEN,
                              **kwargs)

            multiple_ts = [self.ts_pass_train] * 10
            train_dataset = model._build_train_dataset(
                multiple_ts,
                past_covariates=None,
                future_covariates=None,
                max_samples_per_ts=None,
            )
            model.fit_from_dataset(train_dataset, epochs=3)

            # test predict() works after fit_from_dataset()
            model.predict(n=1, series=multiple_ts[0])

        def test_sample_smaller_than_batch_size(self):
            """
            Checking that the TorchForecastingModels do not crash even if the number of available samples for training
            is strictly lower than the selected batch_size
            """
            # TS with 50 timestamps. TorchForecastingModels will use the SequentialDataset for producing training
            # samples, which means we will have 50 - 22 - 2 + 1 = 27 samples, which is < 32 (batch_size). The model
            # should still train on those samples and not crash in any way
            ts = linear_timeseries(start_value=0, end_value=1, length=50)

            model = RNNModel(input_chunk_length=20,
                             output_chunk_length=2,
                             n_epochs=2,
                             batch_size=32)
            model.fit(ts)

        def test_max_samples_per_ts(self):
            """
            Checking that we can fit TorchForecastingModels with max_samples_per_ts, without crash
            """

            ts = linear_timeseries(start_value=0, end_value=1, length=50)

            model = RNNModel(input_chunk_length=20,
                             output_chunk_length=2,
                             n_epochs=2,
                             batch_size=32)

            model.fit(ts, max_samples_per_ts=5)

        def test_residuals(self):
            """
            Torch models should not fail when computing residuals on a series
            long enough to accomodate at least one training sample.
            """
            ts = linear_timeseries(start_value=0, end_value=1, length=38)

            model = NBEATSModel(
                input_chunk_length=24,
                output_chunk_length=12,
                num_stacks=2,
                num_blocks=1,
                num_layers=1,
                layer_widths=2,
                n_epochs=2,
            )

            model.residuals(ts)
Exemple #25
0
class CovariateIndexGeneratorTestCase(DartsBaseTestClass):
    n_target = 24
    target_time = tg.linear_timeseries(length=n_target, freq="MS")
    cov_time_train = tg.datetime_attribute_timeseries(target_time,
                                                      attribute="month",
                                                      cyclic=True)
    cov_time_train_short = cov_time_train[1:]

    target_int = tg.linear_timeseries(length=n_target, start=2)
    cov_int_train = target_int
    cov_int_train_short = cov_int_train[1:]

    input_chunk_length = 12
    output_chunk_length = 6
    n_short = 6
    n_long = 8

    # pd.DatetimeIndex
    # target covariate for inference dataset for n <= output_chunk_length
    cov_time_inf_short = TimeSeries.from_times_and_values(
        tg._generate_index(
            start=target_time.start_time(),
            length=n_target + n_short,
            freq=target_time.freq,
        ),
        np.arange(n_target + n_short),
    )
    # target covariate for inference dataset for n > output_chunk_length
    cov_time_inf_long = TimeSeries.from_times_and_values(
        tg._generate_index(
            start=target_time.start_time(),
            length=n_target + n_long,
            freq=target_time.freq,
        ),
        np.arange(n_target + n_long),
    )

    # integer index
    # target covariate for inference dataset for n <= output_chunk_length
    cov_int_inf_short = TimeSeries.from_times_and_values(
        tg._generate_index(
            start=target_int.start_time(),
            length=n_target + n_short,
            freq=target_int.freq,
        ),
        np.arange(n_target + n_short),
    )
    # target covariate for inference dataset for n > output_chunk_length
    cov_int_inf_long = TimeSeries.from_times_and_values(
        tg._generate_index(
            start=target_int.start_time(),
            length=n_target + n_long,
            freq=target_int.freq,
        ),
        np.arange(n_target + n_long),
    )

    def helper_test_index_types(self, ig: CovariateIndexGenerator):
        """test the index type of generated index"""
        # pd.DatetimeIndex
        idx = ig.generate_train_series(self.target_time, self.cov_time_train)
        self.assertTrue(isinstance(idx, pd.DatetimeIndex))
        idx = ig.generate_inference_series(self.n_short, self.target_time,
                                           self.cov_time_inf_short)
        self.assertTrue(isinstance(idx, pd.DatetimeIndex))
        idx = ig.generate_train_series(self.target_time, None)
        self.assertTrue(isinstance(idx, pd.DatetimeIndex))

        # pd.RangeIndex
        idx = ig.generate_train_series(self.target_int, self.cov_int_train)
        self.assertTrue(isinstance(idx, pd.RangeIndex))
        idx = ig.generate_inference_series(self.n_short, self.target_int,
                                           self.cov_int_inf_short)
        self.assertTrue(isinstance(idx, pd.RangeIndex))
        idx = ig.generate_train_series(self.target_int, None)
        self.assertTrue(isinstance(idx, pd.RangeIndex))

    def helper_test_index_generator_train(self, ig: CovariateIndexGenerator):
        """
        If covariates are given, the index generators should return the covariate series' index.
        If covariates are not given, the index generators should return the target series' index.
        """
        # pd.DatetimeIndex
        # generated index must be equal to input covariate index
        idx = ig.generate_train_series(self.target_time, self.cov_time_train)
        self.assertTrue(idx.equals(self.cov_time_train.time_index))
        # generated index must be equal to input covariate index
        idx = ig.generate_train_series(self.target_time,
                                       self.cov_time_train_short)
        self.assertTrue(idx.equals(self.cov_time_train_short.time_index))
        # generated index must be equal to input target index when no covariates are defined
        idx = ig.generate_train_series(self.target_time, None)
        self.assertTrue(idx.equals(self.cov_time_train.time_index))

        # integer index
        # generated index must be equal to input covariate index
        idx = ig.generate_train_series(self.target_int, self.cov_int_train)
        self.assertTrue(idx.equals(self.cov_int_train.time_index))
        # generated index must be equal to input covariate index
        idx = ig.generate_train_series(self.target_time,
                                       self.cov_int_train_short)
        self.assertTrue(idx.equals(self.cov_int_train_short.time_index))
        # generated index must be equal to input target index when no covariates are defined
        idx = ig.generate_train_series(self.target_int, None)
        self.assertTrue(idx.equals(self.cov_int_train.time_index))

    def helper_test_index_generator_inference(self, ig, is_past=False):
        """
        For prediction (`n` is given) with past covariates we have to distinguish between two cases:
        1)  if past covariates are given, we can use them as reference
        2)  if past covariates are missing, we need to generate a time index that starts `input_chunk_length`
            before the end of `target` and ends `max(0, n - output_chunk_length)` after the end of `target`

        For prediction (`n` is given) with future covariates we have to distinguish between two cases:
        1)  if future covariates are given, we can use them as reference
        2)  if future covariates are missing, we need to generate a time index that starts `input_chunk_length`
            before the end of `target` and ends `max(n, output_chunk_length)` after the end of `target`
        """

        # check generated inference index without passing covariates when n <= output_chunk_length
        idx = ig.generate_inference_series(self.n_short, self.target_time,
                                           None)
        if is_past:
            n_out = self.input_chunk_length
            last_idx = self.target_time.end_time()
        else:
            n_out = self.input_chunk_length + self.output_chunk_length
            last_idx = self.cov_time_inf_short.end_time()

        self.assertTrue(len(idx) == n_out)
        self.assertTrue(idx[-1] == last_idx)

        # check generated inference index without passing covariates when n > output_chunk_length
        idx = ig.generate_inference_series(self.n_long, self.target_time, None)
        if is_past:
            n_out = self.input_chunk_length + self.n_long - self.output_chunk_length
            last_idx = (self.target_time.end_time() +
                        (self.n_long - self.output_chunk_length) *
                        self.target_time.freq)
        else:
            n_out = self.input_chunk_length + self.n_long
            last_idx = self.cov_time_inf_long.end_time()

        self.assertTrue(len(idx) == n_out)
        self.assertTrue(idx[-1] == last_idx)

        idx = ig.generate_inference_series(self.n_short, self.target_time,
                                           self.cov_time_inf_short)
        self.assertTrue(idx.equals(self.cov_time_inf_short.time_index))
        idx = ig.generate_inference_series(self.n_long, self.target_time,
                                           self.cov_time_inf_long)
        self.assertTrue(idx.equals(self.cov_time_inf_long.time_index))
        idx = ig.generate_inference_series(self.n_short, self.target_int,
                                           self.cov_int_inf_short)
        self.assertTrue(idx.equals(self.cov_int_inf_short.time_index))
        idx = ig.generate_inference_series(self.n_long, self.target_int,
                                           self.cov_int_inf_long)
        self.assertTrue(idx.equals(self.cov_int_inf_long.time_index))

    def test_past_index_generator(self):
        ig = PastCovariateIndexGenerator(self.input_chunk_length,
                                         self.output_chunk_length)
        self.helper_test_index_types(ig)
        self.helper_test_index_generator_train(ig)
        self.helper_test_index_generator_inference(ig, is_past=True)

    def test_future_index_generator(self):
        ig = FutureCovariateIndexGenerator(self.input_chunk_length,
                                           self.output_chunk_length)
        self.helper_test_index_types(ig)
        self.helper_test_index_generator_train(ig)
        self.helper_test_index_generator_inference(ig, is_past=False)
Exemple #26
0
class MappersTestCase(unittest.TestCase):
    @staticmethod
    def func(x):
        return x + 10

    @staticmethod
    def inverse_func(x):
        return x - 10

    @staticmethod
    def ts_func(ts, x):
        return x - ts.month

    @staticmethod
    def inverse_ts_func(ts, x):
        return x + ts.month

    plus_ten = Mapper(func.__func__)
    plus_ten_invertible = InvertibleMapper(func.__func__,
                                           inverse_func.__func__)

    subtract_month = Mapper(ts_func.__func__)
    subtract_month_invertible = InvertibleMapper(ts_func.__func__,
                                                 inverse_ts_func.__func__)

    lin_series = linear_timeseries(
        start_value=1,
        length=12,
        freq="MS",
        start=pd.Timestamp("2000-01-01"),
        end_value=12,
    )  # noqa: E501
    zeroes = constant_timeseries(value=0.0,
                                 length=12,
                                 freq="MS",
                                 start=pd.Timestamp("2000-01-01"))
    tens = constant_timeseries(value=10.0,
                               length=12,
                               freq="MS",
                               start=pd.Timestamp("2000-01-01"))
    twenties = constant_timeseries(value=20.0,
                                   length=12,
                                   freq="MS",
                                   start=pd.Timestamp("2000-01-01"))

    def test_mapper(self):

        test_cases = [
            (self.zeroes, self.tens),
            ([self.zeroes, self.tens], [self.tens, self.twenties]),
        ]

        for to_transform, expected_output in test_cases:
            transformed = self.plus_ten.transform(to_transform)
            self.assertEqual(transformed, expected_output)

    def test_invertible_mapper(self):
        test_cases = [(self.zeroes), ([self.zeroes, self.tens])]

        for data in test_cases:
            transformed = self.plus_ten_invertible.transform(data)
            back = self.plus_ten_invertible.inverse_transform(transformed)
            self.assertEqual(back, data)

    def test_mapper_with_timestamp(self):

        test_cases = [
            (self.lin_series, self.zeroes),
            ([self.lin_series, self.lin_series], [self.zeroes, self.zeroes]),
        ]

        for to_transform, expected_output in test_cases:
            transformed = self.subtract_month.transform(to_transform)
            if isinstance(to_transform, list):
                expected_output = [
                    o.with_columns_renamed(o.components[0], t.components[0])
                    for t, o in zip(transformed, expected_output)
                ]
            else:
                expected_output = expected_output.with_columns_renamed(
                    expected_output.components[0], transformed.components[0])
            self.assertEqual(transformed, expected_output)

    def test_invertible_mapper_with_timestamp(self):

        test_cases = [(self.lin_series), ([self.lin_series, self.lin_series])]

        for data in test_cases:
            transformed = self.subtract_month_invertible.transform(data)
            back = self.subtract_month_invertible.inverse_transform(
                transformed)
            self.assertEqual(back, data)

    def test_invertible_mappers_on_stochastic_series(self):
        vals = np.random.rand(10, 2, 100) + 2
        series = TimeSeries.from_values(vals)

        imapper = InvertibleMapper(np.log, np.exp)
        tr = imapper.transform(series)
        inv_tr = imapper.inverse_transform(tr)

        np.testing.assert_almost_equal(series.all_values(copy=False),
                                       inv_tr.all_values(copy=False))
Exemple #27
0
class BoxCoxTestCase(unittest.TestCase):

    sine_series = sine_timeseries(length=50,
                                  value_y_offset=5,
                                  value_frequency=0.05)
    lin_series = linear_timeseries(start_value=1, end_value=10, length=50)
    multi_series = sine_series.stack(lin_series)

    def test_boxbox_lambda(self):
        boxcox = BoxCox(lmbda=0.3)

        boxcox.fit(self.multi_series)
        self.assertEqual(boxcox._fitted_params, [[0.3, 0.3]])

        boxcox = BoxCox(lmbda=[0.3, 0.4])
        boxcox.fit(self.multi_series)
        self.assertEqual(boxcox._fitted_params, [[0.3, 0.4]])

        with self.assertRaises(ValueError):
            boxcox = BoxCox(lmbda=[0.2, 0.4, 0.5])
            boxcox.fit(self.multi_series)

        boxcox = BoxCox(optim_method="mle")
        boxcox.fit(self.multi_series)
        lmbda1 = boxcox._fitted_params[0].tolist()

        boxcox = BoxCox(optim_method="pearsonr")
        boxcox.fit(self.multi_series)
        lmbda2 = boxcox._fitted_params[0].tolist()

        self.assertNotEqual(lmbda1, lmbda2)

    def test_boxcox_transform(self):
        log_mapper = Mapper(lambda x: np.log(x))
        boxcox = BoxCox(lmbda=0)

        transformed1 = log_mapper.transform(self.sine_series)
        transformed2 = boxcox.fit(self.sine_series).transform(self.sine_series)

        np.testing.assert_almost_equal(
            transformed1.all_values(copy=False),
            transformed2.all_values(copy=False),
            decimal=4,
        )

    def test_boxcox_inverse(self):
        boxcox = BoxCox()
        transformed = boxcox.fit_transform(self.multi_series)
        back = boxcox.inverse_transform(transformed)
        pd.testing.assert_frame_equal(self.multi_series.pd_dataframe(),
                                      back.pd_dataframe(),
                                      check_exact=False)

    def test_boxcox_multi_ts(self):

        test_cases = [
            ([[0.2, 0.4], [0.3, 0.6]]),  # full lambda
            (0.4),  # single value
            None,  # None
        ]

        for lmbda in test_cases:
            box_cox = BoxCox(lmbda=lmbda)
            transformed = box_cox.fit_transform(
                [self.multi_series, self.multi_series])
            back = box_cox.inverse_transform(transformed)
            pd.testing.assert_frame_equal(
                self.multi_series.pd_dataframe(),
                back[0].pd_dataframe(),
                check_exact=False,
            )
            pd.testing.assert_frame_equal(
                self.multi_series.pd_dataframe(),
                back[1].pd_dataframe(),
                check_exact=False,
            )

    def test_boxcox_multiple_calls_to_fit(self):
        """
        This test checks whether calling the scaler twice is calculating new lambdas instead of
        keeping the old ones
        """
        box_cox = BoxCox()

        box_cox.fit(self.sine_series)
        lambda1 = deepcopy(box_cox._fitted_params)[0].tolist()

        box_cox.fit(self.lin_series)
        lambda2 = deepcopy(box_cox._fitted_params)[0].tolist()

        self.assertNotEqual(
            lambda1, lambda2,
            "Lambdas should change when the transformer is retrained")

    def test_multivariate_stochastic_series(self):
        transformer = BoxCox()
        vals = np.random.rand(10, 5, 10)
        series = TimeSeries.from_values(vals)

        new_series = transformer.fit_transform(series)
        series_back = transformer.inverse_transform(new_series)

        # Test inverse transform
        np.testing.assert_allclose(series.all_values(),
                                   series_back.all_values())
Exemple #28
0
 def test_pred_length(self):
     series = tg.linear_timeseries(length=100)
     self.helper_test_pred_length(TransformerModel, series)
Exemple #29
0
        def test_not_enough_covariates(self):

            target_series = tg.linear_timeseries(start_value=0,
                                                 end_value=100,
                                                 length=50)
            past_covariates = tg.linear_timeseries(start_value=100,
                                                   end_value=200,
                                                   length=50)
            future_covariates = tg.linear_timeseries(start_value=200,
                                                     end_value=300,
                                                     length=50)

            model = RegressionModel(
                lags_past_covariates=[-10],
                lags_future_covariates=[-5, 5],
                output_chunk_length=7,
            )
            model.fit(
                series=target_series,
                past_covariates=past_covariates,
                future_covariates=future_covariates,
                max_samples_per_ts=1,
            )

            # output_chunk_length, required past_offset, required future_offset
            test_cases = [
                (1, 0, 13),
                (5, -4, 9),
                (7, -2, 11),
            ]
            for (output_chunk_length, req_past_offset,
                 req_future_offset) in test_cases:
                model = RegressionModel(
                    lags_past_covariates=[-10],
                    lags_future_covariates=[-4, 3],
                    output_chunk_length=output_chunk_length,
                )
                model.fit(
                    series=target_series,
                    past_covariates=past_covariates,
                    future_covariates=future_covariates,
                )

                # check that given the required offsets no ValueError is raised
                model.predict(
                    10,
                    series=target_series[:-25],
                    past_covariates=past_covariates[:-25 + req_past_offset],
                    future_covariates=future_covariates[:-25 +
                                                        req_future_offset],
                )
                # check that one less past covariate time step causes ValueError
                with self.assertRaises(ValueError):
                    model.predict(
                        10,
                        series=target_series[:-25],
                        past_covariates=past_covariates[:-26 +
                                                        req_past_offset],
                        future_covariates=future_covariates[:-25 +
                                                            req_future_offset],
                    )
                # check that one less future covariate time step causes ValueError
                with self.assertRaises(ValueError):
                    model.predict(
                        10,
                        series=target_series[:-25],
                        past_covariates=past_covariates[:-25 +
                                                        req_past_offset],
                        future_covariates=future_covariates[:-26 +
                                                            req_future_offset],
                    )
Exemple #30
0
def dummy_timeseries(
    length,
    n_series=1,
    comps_target=1,
    comps_pcov=1,
    comps_fcov=1,
    multiseries_offset=0,
    pcov_offset=0,
    fcov_offset=0,
    comps_stride=100,
    type_stride=10000,
    series_stride=1000000,
    target_start_value=1,
    first_target_start_date=pd.Timestamp("2000-01-01"),
    freq="D",
    integer_index=False,
):

    targets, pcovs, fcovs = [], [], []
    for series_idx in range(n_series):

        target_start_date = (
            series_idx *
            multiseries_offset if integer_index else first_target_start_date +
            pd.Timedelta(series_idx * multiseries_offset, unit=freq))
        pcov_start_date = (target_start_date + pcov_offset
                           if integer_index else target_start_date +
                           pd.Timedelta(pcov_offset, unit=freq))
        fcov_start_date = (target_start_date + fcov_offset
                           if integer_index else target_start_date +
                           pd.Timedelta(fcov_offset, unit=freq))

        target_start_val = target_start_value + series_stride * series_idx
        pcov_start_val = target_start_val + type_stride
        fcov_start_val = target_start_val + 2 * type_stride

        target_ts = None
        pcov_ts = None
        fcov_ts = None

        for idx in range(comps_target):
            start = target_start_val + idx * comps_stride
            curr_ts = tg.linear_timeseries(
                start_value=start,
                end_value=start + length - 1,
                start=target_start_date,
                length=length,
                freq=freq,
                column_name=f"{series_idx}-trgt-{idx}",
            )
            target_ts = target_ts.stack(curr_ts) if target_ts else curr_ts
        for idx in range(comps_pcov):
            start = pcov_start_val + idx * comps_stride
            curr_ts = tg.linear_timeseries(
                start_value=start,
                end_value=start + length - 1,
                start=pcov_start_date,
                length=length,
                freq=freq,
                column_name=f"{series_idx}-pcov-{idx}",
            )
            pcov_ts = pcov_ts.stack(curr_ts) if pcov_ts else curr_ts
        for idx in range(comps_fcov):
            start = fcov_start_val + idx * comps_stride
            curr_ts = tg.linear_timeseries(
                start_value=start,
                end_value=start + length - 1,
                start=fcov_start_date,
                length=length,
                freq=freq,
                column_name=f"{series_idx}-fcov-{idx}",
            )
            fcov_ts = fcov_ts.stack(curr_ts) if fcov_ts else curr_ts

        targets.append(target_ts)
        pcovs.append(pcov_ts)
        fcovs.append(fcov_ts)

    return targets, pcovs, fcovs