def test_generate_index(self): def test_routine(start, end=None, length=None, freq="D"): # testing length, correct start and if sorted (monotonic increasing) index = _generate_index(start=start, end=end, length=length, freq=freq) self.assertEqual(len(index), length_assert) self.assertTrue(index.is_monotonic_increasing) self.assertTrue(index[0] == start_assert) self.assertTrue(index[-1] == end_assert) for length_assert in [1, 2, 5, 10, 100]: for start_pos in [0, 1]: # pandas.RangeIndex start_assert, end_assert = start_pos, start_pos + length_assert - 1 test_routine(start=start_assert, length=length_assert, freq="") test_routine(start=start_assert, length=length_assert, freq="D") test_routine(start=start_assert, end=end_assert) test_routine(start=start_assert, end=end_assert, freq="D") test_routine(start=None, end=end_assert, length=length_assert, freq="BH") # pandas.DatetimeIndex start_date = pd.DatetimeIndex(["2000-01-01"], freq="D") start_date += start_date.freq * start_pos # dates = _generate_index(start=start_date[0], length=length_assert) dates = _generate_index(start=start_date[0], length=length_assert) start_assert, end_assert = dates[0], dates[-1] test_routine(start=start_assert, length=length_assert) test_routine(start=start_assert, end=end_assert) test_routine(start=None, end=end_assert, length=length_assert, freq="D") # `start`, `end` and `length` cannot both be set simultaneously with self.assertRaises(ValueError): _generate_index(start=0, end=9, length=10) # same as above but `start` defaults to timestamp '2000-01-01' in all timeseries generation functions with self.assertRaises(ValueError): linear_timeseries(end=9, length=10) # exactly two of [`start`, `end`, `length`] must be set with self.assertRaises(ValueError): test_routine(start=0) with self.assertRaises(ValueError): test_routine(start=None, end=1) with self.assertRaises(ValueError): test_routine(start=None, end=None, length=10) # `start` and `end` must have same type with self.assertRaises(ValueError): test_routine(start=0, end=pd.Timestamp("2000-01-01")) with self.assertRaises(ValueError): test_routine(start=pd.Timestamp("2000-01-01"), end=10)
def test_only_future_covariates(self): model = RegressionModel(lags_future_covariates=[-2]) target_series = tg.linear_timeseries(start_value=0, end_value=49, length=50) covariates = tg.linear_timeseries(start_value=100, end_value=149, length=50) covariates = covariates.stack( tg.linear_timeseries(start_value=400, end_value=449, length=50)) target_train, target_test = target_series.split_after(0.7) covariates_train, covariates_test = covariates.split_after(0.7) model.fit( series=[target_train, target_train + 0.5], future_covariates=[covariates_train, covariates_train + 0.5], ) predictions = model.predict( 10, series=[target_train, target_train + 0.5], future_covariates=[covariates, covariates + 0.5], ) self.assertEqual(len(predictions[0]), 10, f"Found {len(predictions[0])} instead")
def helper_generate_multivariate_case_data(self, season_length, n_repeat): """generates multivariate test case data. Target series is a sine wave stacked with a repeating linear curve of equal seasonal length. Covariates are datetime attributes for 'hours'. """ # generate sine wave ts_sine = tg.sine_timeseries( value_frequency=1 / season_length, length=n_repeat * season_length, freq="h", ) # generate repeating linear curve ts_linear = tg.linear_timeseries(0, 1, length=season_length, start=ts_sine.end_time() + ts_sine.freq) for i in range(n_repeat - 1): start = ts_linear.end_time() + ts_linear.freq new_ts = tg.linear_timeseries(0, 1, length=season_length, start=start) ts_linear = ts_linear.append(new_ts) ts_linear = TimeSeries.from_times_and_values( times=ts_sine.time_index, values=ts_linear.values()) # create multivariate TimeSeries by stacking sine and linear curves ts = ts_sine.stack(ts_linear) # create train/test sets val_length = 10 * season_length ts_train, ts_val = ts[:-val_length], ts[-val_length:] # scale data scaler_ts = Scaler() ts_train_scaled = scaler_ts.fit_transform(ts_train) ts_val_scaled = scaler_ts.transform(ts_val) ts_scaled = scaler_ts.transform(ts) # generate long enough covariates (past and future covariates will be the same for simplicity) long_enough_ts = tg.sine_timeseries(value_frequency=1 / season_length, length=1000, freq=ts.freq) covariates = tg.datetime_attribute_timeseries(long_enough_ts, attribute="hour") scaler_covs = Scaler() covariates_scaled = scaler_covs.fit_transform(covariates) return ts_scaled, ts_train_scaled, ts_val_scaled, covariates_scaled
def test_callable_encoder(self): """Test `CallableIndexEncoder`""" ts = tg.linear_timeseries(length=24, freq="A") input_chunk_length = 12 output_chunk_length = 6 # ===> test absolute position encoder <=== encoder_params = { "custom": { "past": [lambda index: index.year, lambda index: index.year - 1] } } encs = SequentialEncoder( add_encoders=encoder_params, input_chunk_length=input_chunk_length, output_chunk_length=output_chunk_length, takes_past_covariates=True, takes_future_covariates=True, ) t1, _ = encs.encode_train(ts) self.assertTrue((ts.time_index.year.values == t1[0].values()[:, 0]).all()) self.assertTrue( (ts.time_index.year.values - 1 == t1[0].values()[:, 1]).all())
def test_with_static_covariates_multivariate(self): ts = linear_timeseries(length=10) ts_multi = ts.stack(ts) static_covs = pd.DataFrame([[0.0, 1.0], [0.0, 1.0]], columns=["st1", "st2"]) # from univariate static covariates ts_multi = ts_multi.with_static_covariates(static_covs.loc[0]) assert ts_multi.static_covariates.index.equals( pd.Index([DEFAULT_GLOBAL_STATIC_COV_NAME])) assert ts_multi.static_covariates.columns.equals(static_covs.columns) np.testing.assert_almost_equal( ts_multi.static_covariates_values(copy=False), static_covs.loc[0:0].values) # from multivariate static covariates ts_multi = ts_multi.with_static_covariates(static_covs) assert ts_multi.static_covariates.index.equals(ts_multi.components) assert ts_multi.static_covariates.columns.equals(static_covs.columns) np.testing.assert_almost_equal( ts_multi.static_covariates_values(copy=False), static_covs.values) # raise an error if multivariate static covariates columns don't match the number of components in the series with pytest.raises(ValueError): _ = ts_multi.with_static_covariates( pd.concat([static_covs] * 2, axis=0))
def test_concatenate_dim_samples(self): """ Test concatenation with static covariates along sample dimension (axis=2) Along sample dimension, we only take the static covariates of the first series (as we components and time don't change). """ static_covs_left = pd.DataFrame([[0, 1]], columns=["st1", "st2"]).astype(int) static_covs_right = pd.DataFrame([[3, 4]], columns=["st3", "st4"]).astype(int) ts_left = linear_timeseries( length=10).with_static_covariates(static_covs_left) ts_right = linear_timeseries( length=10).with_static_covariates(static_covs_right) ts_concat = concatenate([ts_left, ts_right], axis=2) assert ts_concat.static_covariates.equals(ts_left.static_covariates)
def test_concatenate_dim_time(self): """ Test concatenation with static covariates along time dimension (axis=0) Along time dimension, we only take the static covariates of the first series (as static covariates are time-independant). """ static_covs_left = pd.DataFrame([[0, 1]], columns=["st1", "st2"]).astype(int) static_covs_right = pd.DataFrame([[3, 4]], columns=["st3", "st4"]).astype(int) ts_left = linear_timeseries( length=10).with_static_covariates(static_covs_left) ts_right = linear_timeseries( length=10, start=ts_left.end_time() + ts_left.freq).with_static_covariates(static_covs_right) ts_concat = concatenate([ts_left, ts_right], axis=0) assert ts_concat.static_covariates.equals(ts_left.static_covariates)
def test_stack(self): ts_uni = linear_timeseries(length=10) ts_multi = ts_uni.stack(ts_uni) static_covs_uni1 = pd.DataFrame([[0, 1]], columns=["st1", "st2"]).astype(int) static_covs_uni2 = pd.DataFrame([[3, 4]], columns=["st3", "st4"]).astype(int) static_covs_uni3 = pd.DataFrame([[2, 3, 4]], columns=["st1", "st2", "st3"]).astype(int) static_covs_multi = pd.DataFrame([[0, 0], [1, 1]], columns=["st1", "st2"]).astype(int) ts_uni = ts_uni.with_static_covariates(static_covs_uni1) ts_multi = ts_multi.with_static_covariates(static_covs_multi) # valid static covariates for concatenation/stack ts_stacked1 = ts_uni.stack(ts_uni) assert ts_stacked1.static_covariates.index.equals( ts_stacked1.components) np.testing.assert_almost_equal( ts_stacked1.static_covariates_values(copy=False), pd.concat([ts_uni.static_covariates] * 2, axis=0).values, ) # valid static covariates for concatenation/stack: first only has static covs # -> this gives multivar ts with univar static covs ts_stacked2 = ts_uni.stack(ts_uni.with_static_covariates(None)) np.testing.assert_almost_equal( ts_stacked2.static_covariates_values(copy=False), ts_uni.static_covariates_values(copy=False), ) # mismatch between column names with pytest.raises(ValueError): _ = ts_uni.stack(ts_uni.with_static_covariates(static_covs_uni2)) # mismatch between number of covariates with pytest.raises(ValueError): _ = ts_uni.stack(ts_uni.with_static_covariates(static_covs_uni3)) # valid univar ts with univar static covariates + multivar ts with multivar static covariates ts_stacked3 = ts_uni.stack(ts_multi) np.testing.assert_almost_equal( ts_stacked3.static_covariates_values(copy=False), pd.concat([ts_uni.static_covariates, ts_multi.static_covariates], axis=0).values, ) # invalid univar ts with univar static covariates + multivar ts with univar static covariates with pytest.raises(ValueError): _ = ts_uni.stack(ts_multi.with_static_covariates(static_covs_uni1))
def test_ts_from_x(self): ts = linear_timeseries(length=10).with_static_covariates( pd.Series([0.0, 1.0], index=["st1", "st2"])) self.helper_test_cov_transfer(ts, TimeSeries.from_xarray(ts.data_array())) self.helper_test_cov_transfer( ts, TimeSeries.from_dataframe(ts.pd_dataframe(), static_covariates=ts.static_covariates), ) # ts.pd_series() loses component names -> static covariates have different components names self.helper_test_cov_transfer_values( ts, TimeSeries.from_series(ts.pd_series(), static_covariates=ts.static_covariates), ) self.helper_test_cov_transfer( ts, TimeSeries.from_times_and_values( times=ts.time_index, values=ts.all_values(), columns=ts.components, static_covariates=ts.static_covariates, ), ) self.helper_test_cov_transfer( ts, TimeSeries.from_values( values=ts.all_values(), columns=ts.components, static_covariates=ts.static_covariates, ), ) f_csv = os.path.join(self.temp_work_dir, "temp_ts.csv") f_pkl = os.path.join(self.temp_work_dir, "temp_ts.pkl") ts.to_csv(f_csv) ts.to_pickle(f_pkl) ts_json = ts.to_json() self.helper_test_cov_transfer( ts, TimeSeries.from_csv(f_csv, time_col="time", static_covariates=ts.static_covariates), ) self.helper_test_cov_transfer(ts, TimeSeries.from_pickle(f_pkl)) self.helper_test_cov_transfer( ts, TimeSeries.from_json(ts_json, static_covariates=ts.static_covariates))
def test_scalers_with_static_covariates(self): ts = linear_timeseries(start_value=1.0, end_value=2.0, length=10) static_covs = pd.Series([0.0, 2.0], index=["st1", "st2"]) ts = ts.with_static_covariates(static_covs) for scaler_cls in [Scaler, BoxCox]: scaler = scaler_cls() ts_scaled = scaler.fit_transform(ts) assert ts_scaled.static_covariates.equals(ts.static_covariates) ts_inv = scaler.inverse_transform(ts_scaled) assert ts_inv.static_covariates.equals(ts.static_covariates)
def test_max_samples_per_ts(self): """ Checking that we can fit TorchForecastingModels with max_samples_per_ts, without crash """ ts = linear_timeseries(start_value=0, end_value=1, length=50) model = RNNModel(input_chunk_length=20, output_chunk_length=2, n_epochs=2, batch_size=32) model.fit(ts, max_samples_per_ts=5)
def test_sample_smaller_than_batch_size(self): """ Checking that the TorchForecastingModels do not crash even if the number of available samples for training is strictly lower than the selected batch_size """ # TS with 50 timestamps. TorchForecastingModels will use the SequentialDataset for producing training # samples, which means we will have 50 - 22 - 2 + 1 = 27 samples, which is < 32 (batch_size). The model # should still train on those samples and not crash in any way ts = linear_timeseries(start_value=0, end_value=1, length=50) model = RNNModel(input_chunk_length=20, output_chunk_length=2, n_epochs=2, batch_size=32) model.fit(ts)
def test_performance(self): # test TCN performance on dummy time series ts = tg.sine_timeseries(length=100) + tg.linear_timeseries( length=100, end_value=2 ) train, test = ts[:90], ts[90:] model = TCNModel( input_chunk_length=12, output_chunk_length=10, n_epochs=300, random_state=0, ) model.fit(train) pred = model.predict(n=10) self.assertTrue(mae(pred, test) < 0.3)
def test_routine(start, end=None, length=None): # testing for start value, end value and delta between two adjacent entries linear_ts = linear_timeseries( start=start, end=end, length=length, start_value=start_value, end_value=end_value, ) self.assertEqual(linear_ts.values()[0][0], start_value) self.assertEqual(linear_ts.values()[-1][0], end_value) self.assertAlmostEqual( linear_ts.values()[-1][0] - linear_ts.values()[-2][0], (end_value - start_value) / (length_assert - 1), ) self.assertEqual(len(linear_ts), length_assert)
def test_map_with_timestamp(self): series = linear_timeseries(start_value=1, length=12, freq='MS', start_ts=pd.Timestamp('2000-01-01'), end_value=12) # noqa: E501 zeroes = constant_timeseries(value=0.0, length=12, freq='MS', start_ts=pd.Timestamp('2000-01-01')) def function(ts, x): return x - ts.month new_series = series.map(function) self.assertEqual(new_series, zeroes)
def test_static_covariates_values(self): ts = linear_timeseries(length=10) static_covs = pd.DataFrame([[0.0, 1.0]], columns=["st1", "st2"]) ts = ts.with_static_covariates(static_covs) # changing values of copy should not change original DataFrame vals = ts.static_covariates_values(copy=True) vals[:] = -1.0 assert (ts.static_covariates_values(copy=False) != -1.0).all() # changing values of view should change original DataFrame vals = ts.static_covariates_values(copy=False) vals[:] = -1.0 assert (ts.static_covariates_values(copy=False) == -1.0).all() ts = ts.with_static_covariates(None) assert ts.static_covariates_values() is None
def test_map_wrong_fn(self): series = linear_timeseries(start_value=1, length=12, freq='MS', start_ts=pd.Timestamp('2000-01-01'), end_value=12) # noqa: E501 def add(x, y, z): return x + y + z with self.assertRaises(ValueError): series.map(add) ufunc_add = np.frompyfunc(add, 3, 1) with self.assertRaises(ValueError): series.map(ufunc_add)
class MappersTestCase(unittest.TestCase): @staticmethod def func(x): return x + 10 @staticmethod def inverse_func(x): return x - 10 @staticmethod def ts_func(ts, x): return x - ts.month @staticmethod def inverse_ts_func(ts, x): return x + ts.month plus_ten = Mapper(func.__func__) plus_ten_invertible = InvertibleMapper(func.__func__, inverse_func.__func__) subtract_month = Mapper(ts_func.__func__) subtract_month_invertible = InvertibleMapper(ts_func.__func__, inverse_ts_func.__func__) lin_series = linear_timeseries(start_value=1, length=12, freq='MS', start_ts=pd.Timestamp('2000-01-01'), end_value=12) # noqa: E501 zeroes = constant_timeseries(value=0.0, length=12, freq='MS', start_ts=pd.Timestamp('2000-01-01')) tens = constant_timeseries(value=10.0, length=12, freq='MS', start_ts=pd.Timestamp('2000-01-01')) def test_mapper(self): transformed = self.plus_ten.transform(self.zeroes) self.assertEqual(transformed, self.tens) def test_invertible_mapper(self): transformed = self.plus_ten_invertible.transform(self.lin_series) back = self.plus_ten_invertible.inverse_transform(transformed) self.assertEqual(back, self.lin_series) def test_mapper_with_timestamp(self): transformed = self.subtract_month.transform(self.lin_series) self.assertEqual(transformed, self.zeroes) def test_invertible_mapper_with_timestamp(self): transformed = self.subtract_month_invertible.transform(self.lin_series) back = self.subtract_month_invertible.inverse_transform(transformed) self.assertEqual(back, self.lin_series)
def test_residuals(self): """ Torch models should not fail when computing residuals on a series long enough to accomodate at least one training sample. """ ts = linear_timeseries(start_value=0, end_value=1, length=38) model = NBEATSModel( input_chunk_length=24, output_chunk_length=12, num_stacks=2, num_blocks=1, num_layers=1, layer_widths=2, n_epochs=2, ) model.residuals(ts)
class BoxCoxTestCase(unittest.TestCase): sine_series = sine_timeseries(length=50, value_y_offset=5, value_frequency=0.05) lin_series = linear_timeseries(start_value=1, end_value=10, length=50) multi_series = sine_series.stack(lin_series) def test_boxbox_lambda(self): boxcox = BoxCox() boxcox.fit(self.multi_series, 0.3) self.assertEqual(boxcox._lmbda, [0.3, 0.3]) boxcox.fit(self.multi_series, [0.3, 0.4]) self.assertEqual(boxcox._lmbda, [0.3, 0.4]) with self.assertRaises(ValueError): boxcox.fit(self.multi_series, [0.2, 0.4, 0.5]) boxcox.fit(self.multi_series, optim_method='mle') lmbda1 = boxcox._lmbda boxcox.fit(self.multi_series, optim_method='pearsonr') lmbda2 = boxcox._lmbda self.assertNotEqual(lmbda1.array, lmbda2.array) def test_boxcox_transform(self): log_mapper = Mapper(lambda x: log(x)) boxcox = BoxCox() transformed1 = log_mapper.transform(self.sine_series) transformed2 = boxcox.fit(self.sine_series, lmbda=0).transform(self.sine_series) self.assertEqual(transformed1, transformed2) def test_boxcox_inverse(self): boxcox = BoxCox() transformed = boxcox.fit_transform(self.multi_series) back = boxcox.inverse_transform(transformed) pd.testing.assert_frame_equal(self.multi_series._df, back._df, check_exact=False)
def test_with_static_covariates_univariate(self): ts = linear_timeseries(length=10) static_covs_series = pd.Series([0.0, 1.0], index=["st1", "st2"]) static_covs_df = pd.DataFrame([[0.0, 1.0]], columns=["st1", "st2"]) # check immutable ts.with_static_covariates(static_covs_series) assert not ts.has_static_covariates # from Series ts = ts.with_static_covariates(static_covs_series) assert ts.has_static_covariates np.testing.assert_almost_equal( ts.static_covariates_values(copy=False), np.expand_dims(static_covs_series.values, -1).T, ) assert ts.static_covariates.index.equals(ts.components) # from DataFrame ts = ts.with_static_covariates(static_covs_df) assert ts.has_static_covariates np.testing.assert_almost_equal(ts.static_covariates_values(copy=False), static_covs_df.values) assert ts.static_covariates.index.equals(ts.components) # with None ts = ts.with_static_covariates(None) assert ts.static_covariates is None assert not ts.has_static_covariates # only pd.Series, pd.DataFrame or None with pytest.raises(ValueError): _ = ts.with_static_covariates([1, 2, 3]) # multivariate does not work with univariate TimeSeries with pytest.raises(ValueError): static_covs_multi = pd.concat([static_covs_series] * 2, axis=1).T _ = ts.with_static_covariates(static_covs_multi)
def test_multiple_ts(self): lags = 4 lags_past_covariates = 3 model = RegressionModel(lags=lags, lags_past_covariates=lags_past_covariates) target_series = tg.linear_timeseries(start_value=0, end_value=49, length=50) past_covariates = tg.linear_timeseries(start_value=100, end_value=149, length=50) past_covariates = past_covariates.stack( tg.linear_timeseries(start_value=400, end_value=449, length=50)) target_train, target_test = target_series.split_after(0.7) past_covariates_train, past_covariates_test = past_covariates.split_after( 0.7) model.fit( series=[target_train, target_train + 0.5], past_covariates=[ past_covariates_train, past_covariates_train + 0.5 ], ) predictions = model.predict( 10, series=[target_train, target_train + 0.5], past_covariates=[past_covariates, past_covariates + 0.5], ) self.assertEqual(len(predictions[0]), 10, f"Found {len(predictions)} instead") # multiple TS, both future and past covariates, checking that both covariates lead to better results than # using a single one (target series = past_cov + future_cov + noise) np.random.seed(42) linear_ts_1 = tg.linear_timeseries(start_value=10, end_value=59, length=50) linear_ts_2 = tg.linear_timeseries(start_value=40, end_value=89, length=50) past_covariates = tg.sine_timeseries(length=50) * 10 future_covariates = ( tg.sine_timeseries(length=50, value_frequency=0.015) * 50) target_series_1 = linear_ts_1 + 4 * past_covariates + 2 * future_covariates target_series_2 = linear_ts_2 + 4 * past_covariates + 2 * future_covariates target_series_1_noise = (linear_ts_1 + 4 * past_covariates + 2 * future_covariates + tg.gaussian_timeseries(std=7, length=50)) target_series_2_noise = (linear_ts_2 + 4 * past_covariates + 2 * future_covariates + tg.gaussian_timeseries(std=7, length=50)) target_train_1, target_test_1 = target_series_1.split_after(0.7) target_train_2, target_test_2 = target_series_2.split_after(0.7) ( target_train_1_noise, target_test_1_noise, ) = target_series_1_noise.split_after(0.7) ( target_train_2_noise, target_test_2_noise, ) = target_series_2_noise.split_after(0.7) # testing improved denoise with multiple TS # test 1: with single TS, 2 covariates should be better than one model = RegressionModel(lags=3, lags_past_covariates=5) model.fit([target_train_1_noise], [past_covariates]) prediction_past_only = model.predict( n=len(target_test_1), series=[target_train_1_noise, target_train_2_noise], past_covariates=[past_covariates] * 2, ) model = RegressionModel(lags=3, lags_past_covariates=5, lags_future_covariates=(5, 0)) model.fit([target_train_1_noise], [past_covariates], [future_covariates]) prediction_past_and_future = model.predict( n=len(target_test_1), series=[target_train_1_noise, target_train_2_noise], past_covariates=[past_covariates] * 2, future_covariates=[future_covariates] * 2, ) error_past_only = rmse( [target_test_1, target_test_2], prediction_past_only, inter_reduction=np.mean, ) error_both = rmse( [target_test_1, target_test_2], prediction_past_and_future, inter_reduction=np.mean, ) self.assertTrue(error_past_only > error_both) # test 2: with both covariates, 2 TS should learn more than one (with little noise) model = RegressionModel(lags=3, lags_past_covariates=5, lags_future_covariates=(5, 0)) model.fit( [target_train_1_noise, target_train_2_noise], [past_covariates] * 2, [future_covariates] * 2, ) prediction_past_and_future_multi_ts = model.predict( n=len(target_test_1), series=[target_train_1_noise, target_train_2_noise], past_covariates=[past_covariates] * 2, future_covariates=[future_covariates] * 2, ) error_both_multi_ts = rmse( [target_test_1, target_test_2], prediction_past_and_future_multi_ts, inter_reduction=np.mean, ) self.assertTrue(error_both > error_both_multi_ts)
class RegressionEnsembleModelsTestCase(DartsBaseTestClass): RANDOM_SEED = 111 sine_series = tg.sine_timeseries(value_frequency=(1 / 5), value_y_offset=10, length=50) lin_series = tg.linear_timeseries(length=50) combined = sine_series + lin_series seq1 = [_make_ts(0), _make_ts(10), _make_ts(20)] cov1 = [_make_ts(5), _make_ts(15), _make_ts(25)] seq2 = [_make_ts(0, 20), _make_ts(10, 20), _make_ts(20, 20)] cov2 = [_make_ts(5, 30), _make_ts(15, 30), _make_ts(25, 30)] # dummy feature and target TimeSeries instances ts_periodic = tg.sine_timeseries(length=500) ts_gaussian = tg.gaussian_timeseries(length=500) ts_random_walk = tg.random_walk_timeseries(length=500) ts_cov1 = ts_periodic.stack(ts_gaussian) ts_cov1 = ts_cov1.pd_dataframe() ts_cov1.columns = ["Periodic", "Gaussian"] ts_cov1 = TimeSeries.from_dataframe(ts_cov1) ts_sum1 = ts_periodic + ts_gaussian ts_cov2 = ts_sum1.stack(ts_random_walk) ts_sum2 = ts_sum1 + ts_random_walk def get_local_models(self): return [NaiveDrift(), NaiveSeasonal(5), NaiveSeasonal(10)] @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def get_global_models(self, output_chunk_length=5): return [ RNNModel( input_chunk_length=20, output_chunk_length=output_chunk_length, n_epochs=1, random_state=42, ), BlockRNNModel( input_chunk_length=20, output_chunk_length=output_chunk_length, n_epochs=1, random_state=42, ), ] @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_accepts_different_regression_models(self): regr1 = LinearRegression() regr2 = RandomForestRegressor() regr3 = RandomForest(lags_future_covariates=[0]) model0 = RegressionEnsembleModel(self.get_local_models(), 10) model1 = RegressionEnsembleModel(self.get_local_models(), 10, regr1) model2 = RegressionEnsembleModel(self.get_local_models(), 10, regr2) model3 = RegressionEnsembleModel(self.get_local_models(), 10, regr3) models = [model0, model1, model2, model3] for model in models: model.fit(series=self.combined) model.predict(10) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_accepts_one_model(self): regr1 = LinearRegression() regr2 = RandomForest(lags_future_covariates=[0]) model0 = RegressionEnsembleModel([self.get_local_models()[0]], 10) model1 = RegressionEnsembleModel([self.get_local_models()[0]], 10, regr1) model2 = RegressionEnsembleModel([self.get_local_models()[0]], 10, regr2) models = [model0, model1, model2] for model in models: model.fit(series=self.combined) model.predict(10) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_train_n_points(self): regr = LinearRegressionModel(lags_future_covariates=[0]) # same values ensemble = RegressionEnsembleModel(self.get_local_models(), 5, regr) # too big value to perform the split ensemble = RegressionEnsembleModel(self.get_local_models(), 100) with self.assertRaises(ValueError): ensemble.fit(self.combined) ensemble = RegressionEnsembleModel(self.get_local_models(), 50) with self.assertRaises(ValueError): ensemble.fit(self.combined) # too big value considering min_train_series_length ensemble = RegressionEnsembleModel(self.get_local_models(), 45) with self.assertRaises(ValueError): ensemble.fit(self.combined) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_torch_models_retrain(self): model1 = BlockRNNModel(input_chunk_length=12, output_chunk_length=1, random_state=0, n_epochs=2) model2 = BlockRNNModel(input_chunk_length=12, output_chunk_length=1, random_state=0, n_epochs=2) ensemble = RegressionEnsembleModel([model1], 5) ensemble.fit(self.combined) model1_fitted = ensemble.models[0] forecast1 = model1_fitted.predict(10) model2.fit(self.combined) forecast2 = model2.predict(10) self.assertAlmostEqual(sum(forecast1.values() - forecast2.values())[0], 0.0, places=2) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_train_predict_global_models_univar(self): ensemble_models = self.get_global_models(output_chunk_length=10) ensemble_models.append(RegressionModel(lags=1)) ensemble = RegressionEnsembleModel(ensemble_models, 10) ensemble.fit(series=self.combined) ensemble.predict(10) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_train_predict_global_models_multivar_no_covariates(self): ensemble_models = self.get_global_models(output_chunk_length=10) ensemble_models.append(RegressionModel(lags=1)) ensemble = RegressionEnsembleModel(ensemble_models, 10) ensemble.fit(self.seq1) ensemble.predict(10, self.seq1) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_train_predict_global_models_multivar_with_covariates(self): ensemble_models = self.get_global_models(output_chunk_length=10) ensemble_models.append( RegressionModel(lags=1, lags_past_covariates=[-1])) ensemble = RegressionEnsembleModel(ensemble_models, 10) ensemble.fit(self.seq1, self.cov1) ensemble.predict(10, self.seq2, self.cov2) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def helper_test_models_accuracy(self, model_instance, n, series, past_covariates, min_rmse): # for every model, test whether it predicts the target with a minimum r2 score of `min_rmse` train_series, test_series = train_test_split(series, pd.Timestamp("20010101")) train_past_covariates, _ = train_test_split(past_covariates, pd.Timestamp("20010101")) model_instance.fit(series=train_series, past_covariates=train_past_covariates) prediction = model_instance.predict(n=n, past_covariates=past_covariates) current_rmse = rmse(test_series, prediction) self.assertTrue( current_rmse <= min_rmse, f"Model was not able to denoise data. A rmse score of {current_rmse} was recorded.", ) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def denoising_input(self): np.random.seed(self.RANDOM_SEED) ts_periodic = tg.sine_timeseries(length=500) ts_gaussian = tg.gaussian_timeseries(length=500) ts_random_walk = tg.random_walk_timeseries(length=500) ts_cov1 = ts_periodic.stack(ts_gaussian) ts_cov1 = ts_cov1.pd_dataframe() ts_cov1.columns = ["Periodic", "Gaussian"] ts_cov1 = TimeSeries.from_dataframe(ts_cov1) ts_sum1 = ts_periodic + ts_gaussian ts_cov2 = ts_sum1.stack(ts_random_walk) ts_sum2 = ts_sum1 + ts_random_walk return ts_sum1, ts_cov1, ts_sum2, ts_cov2 @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_ensemble_models_denoising(self): # for every model, test whether it correctly denoises ts_sum using ts_gaussian and ts_sum as inputs # WARNING: this test isn't numerically stable, changing self.RANDOM_SEED can lead to exploding coefficients horizon = 10 ts_sum1, ts_cov1, _, _ = self.denoising_input() torch.manual_seed(self.RANDOM_SEED) ensemble_models = [ RNNModel( input_chunk_length=20, output_chunk_length=horizon, n_epochs=1, random_state=self.RANDOM_SEED, ), BlockRNNModel( input_chunk_length=20, output_chunk_length=horizon, n_epochs=1, random_state=self.RANDOM_SEED, ), RegressionModel(lags_past_covariates=[-1]), ] ensemble = RegressionEnsembleModel(ensemble_models, horizon) self.helper_test_models_accuracy(ensemble, horizon, ts_sum1, ts_cov1, 3) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_ensemble_models_denoising_multi_input(self): # for every model, test whether it correctly denoises ts_sum_2 using ts_random_multi and ts_sum_2 as inputs # WARNING: this test isn't numerically stable, changing self.RANDOM_SEED can lead to exploding coefficients horizon = 10 _, _, ts_sum2, ts_cov2 = self.denoising_input() torch.manual_seed(self.RANDOM_SEED) ensemble_models = [ RNNModel( input_chunk_length=20, output_chunk_length=horizon, n_epochs=1, random_state=self.RANDOM_SEED, ), BlockRNNModel( input_chunk_length=20, output_chunk_length=horizon, n_epochs=1, random_state=self.RANDOM_SEED, ), RegressionModel(lags_past_covariates=[-1]), RegressionModel(lags_past_covariates=[-1]), ] ensemble = RegressionEnsembleModel(ensemble_models, horizon) self.helper_test_models_accuracy(ensemble, horizon, ts_sum2, ts_cov2, 3)
class GlobalForecastingModelsTestCase(DartsBaseTestClass): # forecasting horizon used in runnability tests forecasting_horizon = 12 np.random.seed(42) torch.manual_seed(42) # some arbitrary static covariates static_covariates = pd.DataFrame([[0.0, 1.0]], columns=["st1", "st2"]) # real timeseries for functionality tests ts_passengers = (AirPassengersDataset().load().with_static_covariates( static_covariates)) scaler = Scaler() ts_passengers = scaler.fit_transform(ts_passengers) ts_pass_train, ts_pass_val = ts_passengers[:-36], ts_passengers[-36:] # an additional noisy series ts_pass_train_1 = ts_pass_train + 0.01 * tg.gaussian_timeseries( length=len(ts_pass_train), freq=ts_pass_train.freq_str, start=ts_pass_train.start_time(), ) # an additional time series serving as covariates year_series = tg.datetime_attribute_timeseries(ts_passengers, attribute="year") month_series = tg.datetime_attribute_timeseries(ts_passengers, attribute="month") scaler_dt = Scaler() time_covariates = scaler_dt.fit_transform( year_series.stack(month_series)) time_covariates_train, time_covariates_val = ( time_covariates[:-36], time_covariates[-36:], ) # an artificial time series that is highly dependent on covariates ts_length = 400 split_ratio = 0.6 sine_1_ts = tg.sine_timeseries(length=ts_length) sine_2_ts = tg.sine_timeseries(length=ts_length, value_frequency=0.05) sine_3_ts = tg.sine_timeseries(length=ts_length, value_frequency=0.003, value_amplitude=5) linear_ts = tg.linear_timeseries(length=ts_length, start_value=3, end_value=8) covariates = sine_3_ts.stack(sine_2_ts).stack(linear_ts) covariates_past, _ = covariates.split_after(split_ratio) target = sine_1_ts + sine_2_ts + linear_ts + sine_3_ts target_past, target_future = target.split_after(split_ratio) def test_save_model_parameters(self): # model creation parameters were saved before. check if re-created model has same params as original for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) self.assertTrue(model._model_params, model.untrained_model()._model_params) def test_single_ts(self): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls( input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, random_state=0, **kwargs, ) model.fit(self.ts_pass_train) pred = model.predict(n=36) mape_err = mape(self.ts_pass_val, pred) self.assertTrue( mape_err < err, "Model {} produces errors too high (one time " "series). Error = {}".format(model_cls, mape_err), ) self.assertTrue( pred.static_covariates.equals( self.ts_passengers.static_covariates)) def test_multi_ts(self): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls( input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, random_state=0, **kwargs, ) model.fit([self.ts_pass_train, self.ts_pass_train_1]) with self.assertRaises(ValueError): # when model is fit from >1 series, one must provide a series in argument model.predict(n=1) pred = model.predict(n=36, series=self.ts_pass_train) mape_err = mape(self.ts_pass_val, pred) self.assertTrue( mape_err < err, "Model {} produces errors too high (several time " "series). Error = {}".format(model_cls, mape_err), ) # check prediction for several time series pred_list = model.predict( n=36, series=[self.ts_pass_train, self.ts_pass_train_1]) self.assertTrue( len(pred_list) == 2, f"Model {model_cls} did not return a list of prediction", ) for pred in pred_list: mape_err = mape(self.ts_pass_val, pred) self.assertTrue( mape_err < err, "Model {} produces errors too high (several time series 2). " "Error = {}".format(model_cls, mape_err), ) def test_covariates(self): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls( input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, random_state=0, **kwargs, ) # Here we rely on the fact that all non-Dual models currently are Past models cov_name = ("future_covariates" if isinstance( model, DualCovariatesTorchModel) else "past_covariates") cov_kwargs = { cov_name: [self.time_covariates_train, self.time_covariates_train] } model.fit(series=[self.ts_pass_train, self.ts_pass_train_1], **cov_kwargs) with self.assertRaises(ValueError): # when model is fit from >1 series, one must provide a series in argument model.predict(n=1) with self.assertRaises(ValueError): # when model is fit using multiple covariates, covariates are required at prediction time model.predict(n=1, series=self.ts_pass_train) cov_kwargs_train = {cov_name: self.time_covariates_train} cov_kwargs_notrain = {cov_name: self.time_covariates} with self.assertRaises(ValueError): # when model is fit using covariates, n cannot be greater than output_chunk_length... model.predict(n=13, series=self.ts_pass_train, **cov_kwargs_train) # ... unless future covariates are provided pred = model.predict(n=13, series=self.ts_pass_train, **cov_kwargs_notrain) pred = model.predict(n=12, series=self.ts_pass_train, **cov_kwargs_notrain) mape_err = mape(self.ts_pass_val, pred) self.assertTrue( mape_err < err, "Model {} produces errors too high (several time " "series with covariates). Error = {}".format( model_cls, mape_err), ) # when model is fit using 1 training and 1 covariate series, time series args are optional if model._is_probabilistic: continue model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) model.fit(series=self.ts_pass_train, **cov_kwargs_train) pred1 = model.predict(1) pred2 = model.predict(1, series=self.ts_pass_train) pred3 = model.predict(1, **cov_kwargs_train) pred4 = model.predict(1, **cov_kwargs_train, series=self.ts_pass_train) self.assertEqual(pred1, pred2) self.assertEqual(pred1, pred3) self.assertEqual(pred1, pred4) def test_future_covariates(self): # models with future covariates should produce better predictions over a long forecasting horizon # than a model trained with no covariates model = TCNModel( input_chunk_length=50, output_chunk_length=5, n_epochs=20, random_state=0, ) model.fit(series=self.target_past) long_pred_no_cov = model.predict(n=160) model = TCNModel( input_chunk_length=50, output_chunk_length=5, n_epochs=20, random_state=0, ) model.fit(series=self.target_past, past_covariates=self.covariates_past) long_pred_with_cov = model.predict(n=160, past_covariates=self.covariates) self.assertTrue( mape(self.target_future, long_pred_no_cov) > mape( self.target_future, long_pred_with_cov), "Models with future covariates should produce better predictions.", ) # block models can predict up to self.output_chunk_length points beyond the last future covariate... model.predict(n=165, past_covariates=self.covariates) # ... not more with self.assertRaises(ValueError): model.predict(n=166, series=self.ts_pass_train) # recurrent models can only predict data points for time steps where future covariates are available model = RNNModel(12, n_epochs=1) model.fit(series=self.target_past, future_covariates=self.covariates_past) model.predict(n=160, future_covariates=self.covariates) with self.assertRaises(ValueError): model.predict(n=161, future_covariates=self.covariates) def test_batch_predictions(self): # predicting multiple time series at once needs to work for arbitrary batch sizes # univariate case targets_univar = [ self.target_past, self.target_past[:60], self.target_past[:80], ] self._batch_prediction_test_helper_function(targets_univar) # multivariate case targets_multivar = [tgt.stack(tgt) for tgt in targets_univar] self._batch_prediction_test_helper_function(targets_multivar) def _batch_prediction_test_helper_function(self, targets): epsilon = 1e-4 model = TCNModel( input_chunk_length=50, output_chunk_length=10, n_epochs=10, random_state=0, ) model.fit(series=targets[0], past_covariates=self.covariates_past) preds_default = model.predict( n=160, series=targets, past_covariates=[self.covariates] * len(targets), batch_size=None, ) # make batch size large enough to test stacking samples for batch_size in range(1, 4 * len(targets)): preds = model.predict( n=160, series=targets, past_covariates=[self.covariates] * len(targets), batch_size=batch_size, ) for i in range(len(targets)): self.assertLess( sum(sum((preds[i] - preds_default[i]).values())), epsilon) def test_predict_from_dataset_unsupported_input(self): # an exception should be thrown if an unsupported type is passed unsupported_type = "unsupported_type" # just need to test this with one model model_cls, kwargs, err = models_cls_kwargs_errs[0] model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) model.fit([self.ts_pass_train, self.ts_pass_train_1]) with self.assertRaises(ValueError): model.predict_from_dataset( n=1, input_series_dataset=unsupported_type) def test_prediction_with_different_n(self): # test model predictions for n < out_len, n == out_len and n > out_len for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) self.assertTrue( isinstance( model, ( PastCovariatesTorchModel, DualCovariatesTorchModel, MixedCovariatesTorchModel, ), ), "unit test not yet defined for the given {X}CovariatesTorchModel.", ) if isinstance(model, PastCovariatesTorchModel): past_covs, future_covs = self.covariates, None elif isinstance(model, DualCovariatesTorchModel): past_covs, future_covs = None, self.covariates else: past_covs, future_covs = self.covariates, self.covariates model.fit( self.target_past, past_covariates=past_covs, future_covariates=future_covs, epochs=1, ) # test prediction for n < out_len, n == out_len and n > out_len for n in [OUT_LEN - 1, OUT_LEN, 2 * OUT_LEN - 1]: pred = model.predict(n=n, past_covariates=past_covs, future_covariates=future_covs) self.assertEqual(len(pred), n) def test_same_result_with_different_n_jobs(self): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) multiple_ts = [self.ts_pass_train] * 10 model.fit(multiple_ts) # safe random state for two successive identical predictions if model._is_probabilistic(): random_state = deepcopy(model._random_instance) else: random_state = None pred1 = model.predict(n=36, series=multiple_ts, n_jobs=1) if random_state is not None: model._random_instance = random_state pred2 = model.predict( n=36, series=multiple_ts, n_jobs=-1) # assuming > 1 core available in the machine self.assertEqual( pred1, pred2, "Model {} produces different predictions with different number of jobs", ) @patch( "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer" ) def test_fit_with_constr_epochs(self, init_trainer): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) multiple_ts = [self.ts_pass_train] * 10 model.fit(multiple_ts) init_trainer.assert_called_with(max_epochs=kwargs["n_epochs"], trainer_params=ANY) @patch( "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer" ) def test_fit_with_fit_epochs(self, init_trainer): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) multiple_ts = [self.ts_pass_train] * 10 epochs = 3 model.fit(multiple_ts, epochs=epochs) init_trainer.assert_called_with(max_epochs=epochs, trainer_params=ANY) model.total_epochs = epochs # continue training model.fit(multiple_ts, epochs=epochs) init_trainer.assert_called_with(max_epochs=epochs, trainer_params=ANY) @patch( "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer" ) def test_fit_from_dataset_with_epochs(self, init_trainer): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) multiple_ts = [self.ts_pass_train] * 10 train_dataset = model._build_train_dataset( multiple_ts, past_covariates=None, future_covariates=None, max_samples_per_ts=None, ) epochs = 3 model.fit_from_dataset(train_dataset, epochs=epochs) init_trainer.assert_called_with(max_epochs=epochs, trainer_params=ANY) # continue training model.fit_from_dataset(train_dataset, epochs=epochs) init_trainer.assert_called_with(max_epochs=epochs, trainer_params=ANY) def test_predit_after_fit_from_dataset(self): model_cls, kwargs, _ = models_cls_kwargs_errs[0] model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) multiple_ts = [self.ts_pass_train] * 10 train_dataset = model._build_train_dataset( multiple_ts, past_covariates=None, future_covariates=None, max_samples_per_ts=None, ) model.fit_from_dataset(train_dataset, epochs=3) # test predict() works after fit_from_dataset() model.predict(n=1, series=multiple_ts[0]) def test_sample_smaller_than_batch_size(self): """ Checking that the TorchForecastingModels do not crash even if the number of available samples for training is strictly lower than the selected batch_size """ # TS with 50 timestamps. TorchForecastingModels will use the SequentialDataset for producing training # samples, which means we will have 50 - 22 - 2 + 1 = 27 samples, which is < 32 (batch_size). The model # should still train on those samples and not crash in any way ts = linear_timeseries(start_value=0, end_value=1, length=50) model = RNNModel(input_chunk_length=20, output_chunk_length=2, n_epochs=2, batch_size=32) model.fit(ts) def test_max_samples_per_ts(self): """ Checking that we can fit TorchForecastingModels with max_samples_per_ts, without crash """ ts = linear_timeseries(start_value=0, end_value=1, length=50) model = RNNModel(input_chunk_length=20, output_chunk_length=2, n_epochs=2, batch_size=32) model.fit(ts, max_samples_per_ts=5) def test_residuals(self): """ Torch models should not fail when computing residuals on a series long enough to accomodate at least one training sample. """ ts = linear_timeseries(start_value=0, end_value=1, length=38) model = NBEATSModel( input_chunk_length=24, output_chunk_length=12, num_stacks=2, num_blocks=1, num_layers=1, layer_widths=2, n_epochs=2, ) model.residuals(ts)
class CovariateIndexGeneratorTestCase(DartsBaseTestClass): n_target = 24 target_time = tg.linear_timeseries(length=n_target, freq="MS") cov_time_train = tg.datetime_attribute_timeseries(target_time, attribute="month", cyclic=True) cov_time_train_short = cov_time_train[1:] target_int = tg.linear_timeseries(length=n_target, start=2) cov_int_train = target_int cov_int_train_short = cov_int_train[1:] input_chunk_length = 12 output_chunk_length = 6 n_short = 6 n_long = 8 # pd.DatetimeIndex # target covariate for inference dataset for n <= output_chunk_length cov_time_inf_short = TimeSeries.from_times_and_values( tg._generate_index( start=target_time.start_time(), length=n_target + n_short, freq=target_time.freq, ), np.arange(n_target + n_short), ) # target covariate for inference dataset for n > output_chunk_length cov_time_inf_long = TimeSeries.from_times_and_values( tg._generate_index( start=target_time.start_time(), length=n_target + n_long, freq=target_time.freq, ), np.arange(n_target + n_long), ) # integer index # target covariate for inference dataset for n <= output_chunk_length cov_int_inf_short = TimeSeries.from_times_and_values( tg._generate_index( start=target_int.start_time(), length=n_target + n_short, freq=target_int.freq, ), np.arange(n_target + n_short), ) # target covariate for inference dataset for n > output_chunk_length cov_int_inf_long = TimeSeries.from_times_and_values( tg._generate_index( start=target_int.start_time(), length=n_target + n_long, freq=target_int.freq, ), np.arange(n_target + n_long), ) def helper_test_index_types(self, ig: CovariateIndexGenerator): """test the index type of generated index""" # pd.DatetimeIndex idx = ig.generate_train_series(self.target_time, self.cov_time_train) self.assertTrue(isinstance(idx, pd.DatetimeIndex)) idx = ig.generate_inference_series(self.n_short, self.target_time, self.cov_time_inf_short) self.assertTrue(isinstance(idx, pd.DatetimeIndex)) idx = ig.generate_train_series(self.target_time, None) self.assertTrue(isinstance(idx, pd.DatetimeIndex)) # pd.RangeIndex idx = ig.generate_train_series(self.target_int, self.cov_int_train) self.assertTrue(isinstance(idx, pd.RangeIndex)) idx = ig.generate_inference_series(self.n_short, self.target_int, self.cov_int_inf_short) self.assertTrue(isinstance(idx, pd.RangeIndex)) idx = ig.generate_train_series(self.target_int, None) self.assertTrue(isinstance(idx, pd.RangeIndex)) def helper_test_index_generator_train(self, ig: CovariateIndexGenerator): """ If covariates are given, the index generators should return the covariate series' index. If covariates are not given, the index generators should return the target series' index. """ # pd.DatetimeIndex # generated index must be equal to input covariate index idx = ig.generate_train_series(self.target_time, self.cov_time_train) self.assertTrue(idx.equals(self.cov_time_train.time_index)) # generated index must be equal to input covariate index idx = ig.generate_train_series(self.target_time, self.cov_time_train_short) self.assertTrue(idx.equals(self.cov_time_train_short.time_index)) # generated index must be equal to input target index when no covariates are defined idx = ig.generate_train_series(self.target_time, None) self.assertTrue(idx.equals(self.cov_time_train.time_index)) # integer index # generated index must be equal to input covariate index idx = ig.generate_train_series(self.target_int, self.cov_int_train) self.assertTrue(idx.equals(self.cov_int_train.time_index)) # generated index must be equal to input covariate index idx = ig.generate_train_series(self.target_time, self.cov_int_train_short) self.assertTrue(idx.equals(self.cov_int_train_short.time_index)) # generated index must be equal to input target index when no covariates are defined idx = ig.generate_train_series(self.target_int, None) self.assertTrue(idx.equals(self.cov_int_train.time_index)) def helper_test_index_generator_inference(self, ig, is_past=False): """ For prediction (`n` is given) with past covariates we have to distinguish between two cases: 1) if past covariates are given, we can use them as reference 2) if past covariates are missing, we need to generate a time index that starts `input_chunk_length` before the end of `target` and ends `max(0, n - output_chunk_length)` after the end of `target` For prediction (`n` is given) with future covariates we have to distinguish between two cases: 1) if future covariates are given, we can use them as reference 2) if future covariates are missing, we need to generate a time index that starts `input_chunk_length` before the end of `target` and ends `max(n, output_chunk_length)` after the end of `target` """ # check generated inference index without passing covariates when n <= output_chunk_length idx = ig.generate_inference_series(self.n_short, self.target_time, None) if is_past: n_out = self.input_chunk_length last_idx = self.target_time.end_time() else: n_out = self.input_chunk_length + self.output_chunk_length last_idx = self.cov_time_inf_short.end_time() self.assertTrue(len(idx) == n_out) self.assertTrue(idx[-1] == last_idx) # check generated inference index without passing covariates when n > output_chunk_length idx = ig.generate_inference_series(self.n_long, self.target_time, None) if is_past: n_out = self.input_chunk_length + self.n_long - self.output_chunk_length last_idx = (self.target_time.end_time() + (self.n_long - self.output_chunk_length) * self.target_time.freq) else: n_out = self.input_chunk_length + self.n_long last_idx = self.cov_time_inf_long.end_time() self.assertTrue(len(idx) == n_out) self.assertTrue(idx[-1] == last_idx) idx = ig.generate_inference_series(self.n_short, self.target_time, self.cov_time_inf_short) self.assertTrue(idx.equals(self.cov_time_inf_short.time_index)) idx = ig.generate_inference_series(self.n_long, self.target_time, self.cov_time_inf_long) self.assertTrue(idx.equals(self.cov_time_inf_long.time_index)) idx = ig.generate_inference_series(self.n_short, self.target_int, self.cov_int_inf_short) self.assertTrue(idx.equals(self.cov_int_inf_short.time_index)) idx = ig.generate_inference_series(self.n_long, self.target_int, self.cov_int_inf_long) self.assertTrue(idx.equals(self.cov_int_inf_long.time_index)) def test_past_index_generator(self): ig = PastCovariateIndexGenerator(self.input_chunk_length, self.output_chunk_length) self.helper_test_index_types(ig) self.helper_test_index_generator_train(ig) self.helper_test_index_generator_inference(ig, is_past=True) def test_future_index_generator(self): ig = FutureCovariateIndexGenerator(self.input_chunk_length, self.output_chunk_length) self.helper_test_index_types(ig) self.helper_test_index_generator_train(ig) self.helper_test_index_generator_inference(ig, is_past=False)
class MappersTestCase(unittest.TestCase): @staticmethod def func(x): return x + 10 @staticmethod def inverse_func(x): return x - 10 @staticmethod def ts_func(ts, x): return x - ts.month @staticmethod def inverse_ts_func(ts, x): return x + ts.month plus_ten = Mapper(func.__func__) plus_ten_invertible = InvertibleMapper(func.__func__, inverse_func.__func__) subtract_month = Mapper(ts_func.__func__) subtract_month_invertible = InvertibleMapper(ts_func.__func__, inverse_ts_func.__func__) lin_series = linear_timeseries( start_value=1, length=12, freq="MS", start=pd.Timestamp("2000-01-01"), end_value=12, ) # noqa: E501 zeroes = constant_timeseries(value=0.0, length=12, freq="MS", start=pd.Timestamp("2000-01-01")) tens = constant_timeseries(value=10.0, length=12, freq="MS", start=pd.Timestamp("2000-01-01")) twenties = constant_timeseries(value=20.0, length=12, freq="MS", start=pd.Timestamp("2000-01-01")) def test_mapper(self): test_cases = [ (self.zeroes, self.tens), ([self.zeroes, self.tens], [self.tens, self.twenties]), ] for to_transform, expected_output in test_cases: transformed = self.plus_ten.transform(to_transform) self.assertEqual(transformed, expected_output) def test_invertible_mapper(self): test_cases = [(self.zeroes), ([self.zeroes, self.tens])] for data in test_cases: transformed = self.plus_ten_invertible.transform(data) back = self.plus_ten_invertible.inverse_transform(transformed) self.assertEqual(back, data) def test_mapper_with_timestamp(self): test_cases = [ (self.lin_series, self.zeroes), ([self.lin_series, self.lin_series], [self.zeroes, self.zeroes]), ] for to_transform, expected_output in test_cases: transformed = self.subtract_month.transform(to_transform) if isinstance(to_transform, list): expected_output = [ o.with_columns_renamed(o.components[0], t.components[0]) for t, o in zip(transformed, expected_output) ] else: expected_output = expected_output.with_columns_renamed( expected_output.components[0], transformed.components[0]) self.assertEqual(transformed, expected_output) def test_invertible_mapper_with_timestamp(self): test_cases = [(self.lin_series), ([self.lin_series, self.lin_series])] for data in test_cases: transformed = self.subtract_month_invertible.transform(data) back = self.subtract_month_invertible.inverse_transform( transformed) self.assertEqual(back, data) def test_invertible_mappers_on_stochastic_series(self): vals = np.random.rand(10, 2, 100) + 2 series = TimeSeries.from_values(vals) imapper = InvertibleMapper(np.log, np.exp) tr = imapper.transform(series) inv_tr = imapper.inverse_transform(tr) np.testing.assert_almost_equal(series.all_values(copy=False), inv_tr.all_values(copy=False))
class BoxCoxTestCase(unittest.TestCase): sine_series = sine_timeseries(length=50, value_y_offset=5, value_frequency=0.05) lin_series = linear_timeseries(start_value=1, end_value=10, length=50) multi_series = sine_series.stack(lin_series) def test_boxbox_lambda(self): boxcox = BoxCox(lmbda=0.3) boxcox.fit(self.multi_series) self.assertEqual(boxcox._fitted_params, [[0.3, 0.3]]) boxcox = BoxCox(lmbda=[0.3, 0.4]) boxcox.fit(self.multi_series) self.assertEqual(boxcox._fitted_params, [[0.3, 0.4]]) with self.assertRaises(ValueError): boxcox = BoxCox(lmbda=[0.2, 0.4, 0.5]) boxcox.fit(self.multi_series) boxcox = BoxCox(optim_method="mle") boxcox.fit(self.multi_series) lmbda1 = boxcox._fitted_params[0].tolist() boxcox = BoxCox(optim_method="pearsonr") boxcox.fit(self.multi_series) lmbda2 = boxcox._fitted_params[0].tolist() self.assertNotEqual(lmbda1, lmbda2) def test_boxcox_transform(self): log_mapper = Mapper(lambda x: np.log(x)) boxcox = BoxCox(lmbda=0) transformed1 = log_mapper.transform(self.sine_series) transformed2 = boxcox.fit(self.sine_series).transform(self.sine_series) np.testing.assert_almost_equal( transformed1.all_values(copy=False), transformed2.all_values(copy=False), decimal=4, ) def test_boxcox_inverse(self): boxcox = BoxCox() transformed = boxcox.fit_transform(self.multi_series) back = boxcox.inverse_transform(transformed) pd.testing.assert_frame_equal(self.multi_series.pd_dataframe(), back.pd_dataframe(), check_exact=False) def test_boxcox_multi_ts(self): test_cases = [ ([[0.2, 0.4], [0.3, 0.6]]), # full lambda (0.4), # single value None, # None ] for lmbda in test_cases: box_cox = BoxCox(lmbda=lmbda) transformed = box_cox.fit_transform( [self.multi_series, self.multi_series]) back = box_cox.inverse_transform(transformed) pd.testing.assert_frame_equal( self.multi_series.pd_dataframe(), back[0].pd_dataframe(), check_exact=False, ) pd.testing.assert_frame_equal( self.multi_series.pd_dataframe(), back[1].pd_dataframe(), check_exact=False, ) def test_boxcox_multiple_calls_to_fit(self): """ This test checks whether calling the scaler twice is calculating new lambdas instead of keeping the old ones """ box_cox = BoxCox() box_cox.fit(self.sine_series) lambda1 = deepcopy(box_cox._fitted_params)[0].tolist() box_cox.fit(self.lin_series) lambda2 = deepcopy(box_cox._fitted_params)[0].tolist() self.assertNotEqual( lambda1, lambda2, "Lambdas should change when the transformer is retrained") def test_multivariate_stochastic_series(self): transformer = BoxCox() vals = np.random.rand(10, 5, 10) series = TimeSeries.from_values(vals) new_series = transformer.fit_transform(series) series_back = transformer.inverse_transform(new_series) # Test inverse transform np.testing.assert_allclose(series.all_values(), series_back.all_values())
def test_pred_length(self): series = tg.linear_timeseries(length=100) self.helper_test_pred_length(TransformerModel, series)
def test_not_enough_covariates(self): target_series = tg.linear_timeseries(start_value=0, end_value=100, length=50) past_covariates = tg.linear_timeseries(start_value=100, end_value=200, length=50) future_covariates = tg.linear_timeseries(start_value=200, end_value=300, length=50) model = RegressionModel( lags_past_covariates=[-10], lags_future_covariates=[-5, 5], output_chunk_length=7, ) model.fit( series=target_series, past_covariates=past_covariates, future_covariates=future_covariates, max_samples_per_ts=1, ) # output_chunk_length, required past_offset, required future_offset test_cases = [ (1, 0, 13), (5, -4, 9), (7, -2, 11), ] for (output_chunk_length, req_past_offset, req_future_offset) in test_cases: model = RegressionModel( lags_past_covariates=[-10], lags_future_covariates=[-4, 3], output_chunk_length=output_chunk_length, ) model.fit( series=target_series, past_covariates=past_covariates, future_covariates=future_covariates, ) # check that given the required offsets no ValueError is raised model.predict( 10, series=target_series[:-25], past_covariates=past_covariates[:-25 + req_past_offset], future_covariates=future_covariates[:-25 + req_future_offset], ) # check that one less past covariate time step causes ValueError with self.assertRaises(ValueError): model.predict( 10, series=target_series[:-25], past_covariates=past_covariates[:-26 + req_past_offset], future_covariates=future_covariates[:-25 + req_future_offset], ) # check that one less future covariate time step causes ValueError with self.assertRaises(ValueError): model.predict( 10, series=target_series[:-25], past_covariates=past_covariates[:-25 + req_past_offset], future_covariates=future_covariates[:-26 + req_future_offset], )
def dummy_timeseries( length, n_series=1, comps_target=1, comps_pcov=1, comps_fcov=1, multiseries_offset=0, pcov_offset=0, fcov_offset=0, comps_stride=100, type_stride=10000, series_stride=1000000, target_start_value=1, first_target_start_date=pd.Timestamp("2000-01-01"), freq="D", integer_index=False, ): targets, pcovs, fcovs = [], [], [] for series_idx in range(n_series): target_start_date = ( series_idx * multiseries_offset if integer_index else first_target_start_date + pd.Timedelta(series_idx * multiseries_offset, unit=freq)) pcov_start_date = (target_start_date + pcov_offset if integer_index else target_start_date + pd.Timedelta(pcov_offset, unit=freq)) fcov_start_date = (target_start_date + fcov_offset if integer_index else target_start_date + pd.Timedelta(fcov_offset, unit=freq)) target_start_val = target_start_value + series_stride * series_idx pcov_start_val = target_start_val + type_stride fcov_start_val = target_start_val + 2 * type_stride target_ts = None pcov_ts = None fcov_ts = None for idx in range(comps_target): start = target_start_val + idx * comps_stride curr_ts = tg.linear_timeseries( start_value=start, end_value=start + length - 1, start=target_start_date, length=length, freq=freq, column_name=f"{series_idx}-trgt-{idx}", ) target_ts = target_ts.stack(curr_ts) if target_ts else curr_ts for idx in range(comps_pcov): start = pcov_start_val + idx * comps_stride curr_ts = tg.linear_timeseries( start_value=start, end_value=start + length - 1, start=pcov_start_date, length=length, freq=freq, column_name=f"{series_idx}-pcov-{idx}", ) pcov_ts = pcov_ts.stack(curr_ts) if pcov_ts else curr_ts for idx in range(comps_fcov): start = fcov_start_val + idx * comps_stride curr_ts = tg.linear_timeseries( start_value=start, end_value=start + length - 1, start=fcov_start_date, length=length, freq=freq, column_name=f"{series_idx}-fcov-{idx}", ) fcov_ts = fcov_ts.stack(curr_ts) if fcov_ts else curr_ts targets.append(target_ts) pcovs.append(pcov_ts) fcovs.append(fcov_ts) return targets, pcovs, fcovs