def __init__(self, forecasting_models: List[ForecastingModel], regression_train_n_points: int, regression_model=None): """ Class for ensemble models using a regression model for ensembling individual models' predictions. The provided regression model must implement fit() and predict() methods (e.g. scikit-learn regression models). Note that here the regression model is used to learn how to best ensemble the individual forecasting models' forecasts. It is not the same usage of regression as in `RegressionModel`, where the regression model is used to produce forecasts based on the lagged series. Parameters ---------- forecasting_models List of forecasting models whose predictions to ensemble regression_train_n_points The number of points to use to train the regression model regression_model Any regression model with predict() and fit() methods (e.g. from scikit-learn) Default: `darts.model.LinearRegressionModel(fit_intercept=False)` """ super().__init__(forecasting_models) if regression_model is None: regression_model = LinearRegressionModel(lags_exog=0, fit_intercept=False) regression_model = RegressionModel(lags_exog=0, model=regression_model) raise_if( regression_model.lags is not None and regression_model.lags_exog != [0], ("`lags` of regression model must be `None` and `lags_exog` must be [0]. Given: {} and {}" .format(regression_model.lags, regression_model.lags_exog))) self.regression_model = regression_model self.train_n_points = regression_train_n_points
def test_train_n_points(self): regr = LinearRegressionModel(lags_future_covariates=[0]) # same values ensemble = RegressionEnsembleModel(self.get_local_models(), 5, regr) # too big value to perform the split ensemble = RegressionEnsembleModel(self.get_local_models(), 100) with self.assertRaises(ValueError): ensemble.fit(self.combined) ensemble = RegressionEnsembleModel(self.get_local_models(), 50) with self.assertRaises(ValueError): ensemble.fit(self.combined) # too big value considering min_train_series_length ensemble = RegressionEnsembleModel(self.get_local_models(), 45) with self.assertRaises(ValueError): ensemble.fit(self.combined)
def test_backtest_regression(self): gaussian_series = gt(mean=2, length=50) sine_series = st(length=50) features = gaussian_series.stack(sine_series) features_multivariate = ( gaussian_series + sine_series).stack(gaussian_series).stack(sine_series) target = sine_series features = TimeSeries(features.pd_dataframe().rename( { "0": "Value0", "1": "Value1" }, axis=1)) features_multivariate = TimeSeries( features_multivariate.pd_dataframe().rename( { "0": "Value0", "1": "Value1", "2": "Value2" }, axis=1)) # univariate feature test score = LinearRegressionModel(lags=None, lags_exog=[0, 1]).backtest( series=target, covariates=features, start=pd.Timestamp('20000201'), forecast_horizon=3, metric=r2_score, last_points_only=True) self.assertGreater(score, 0.95) # Using an int or float value for start score = RandomForest(lags=12, lags_exog=[0]).backtest(series=target, covariates=features, start=30, forecast_horizon=3, metric=r2_score) self.assertGreater(score, 0.95) score = RandomForest(lags=12, lags_exog=[0]).backtest(series=target, covariates=features, start=0.5, forecast_horizon=3, metric=r2_score) self.assertGreater(score, 0.95) # Using a too small start value with self.assertRaises(ValueError): RandomForest(lags=12).backtest(series=target, start=0, forecast_horizon=3) with self.assertRaises(ValueError): RandomForest(lags=12).backtest(series=target, start=0.01, forecast_horizon=3) # Using RandomForest's start default value score = RandomForest(lags=12).backtest(series=target, forecast_horizon=3, metric=r2_score) self.assertGreater(score, 0.95) # multivariate feature test score = RandomForest(lags=12, lags_exog=[0, 1]).backtest( series=target, covariates=features_multivariate, start=pd.Timestamp('20000201'), forecast_horizon=3, metric=r2_score) self.assertGreater(score, 0.95) # multivariate with stride score = RandomForest(lags=12, lags_exog=[0]).backtest( series=target, covariates=features_multivariate, start=pd.Timestamp('20000201'), forecast_horizon=3, metric=r2_score, last_points_only=True, stride=3) self.assertGreater(score, 0.95)
def test_backtest_regression(self): np.random.seed(4) gaussian_series = gt(mean=2, length=50) sine_series = st(length=50) features = gaussian_series.stack(sine_series) features_multivariate = ( (gaussian_series + sine_series).stack(gaussian_series).stack(sine_series) ) target = sine_series features = features.with_columns_renamed( features.components, ["Value0", "Value1"] ) features_multivariate = features_multivariate.with_columns_renamed( features_multivariate.components, ["Value0", "Value1", "Value2"] ) # univariate feature test score = LinearRegressionModel( lags=None, lags_future_covariates=[0, -1] ).backtest( series=target, future_covariates=features, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, last_points_only=True, ) self.assertGreater(score, 0.9) # univariate feature test + train length score = LinearRegressionModel( lags=None, lags_future_covariates=[0, -1] ).backtest( series=target, future_covariates=features, start=pd.Timestamp("20000201"), train_length=20, forecast_horizon=3, metric=r2_score, last_points_only=True, ) self.assertGreater(score, 0.9) # Using an int or float value for start score = RandomForest( lags=12, lags_future_covariates=[0], random_state=0 ).backtest( series=target, future_covariates=features, start=30, forecast_horizon=3, metric=r2_score, ) self.assertGreater(score, 0.9) score = RandomForest( lags=12, lags_future_covariates=[0], random_state=0 ).backtest( series=target, future_covariates=features, start=0.5, forecast_horizon=3, metric=r2_score, ) self.assertGreater(score, 0.9) # Using a too small start value with self.assertRaises(ValueError): RandomForest(lags=12).backtest(series=target, start=0, forecast_horizon=3) with self.assertRaises(ValueError): RandomForest(lags=12).backtest( series=target, start=0.01, forecast_horizon=3 ) # Using RandomForest's start default value score = RandomForest(lags=12, random_state=0).backtest( series=target, forecast_horizon=3, metric=r2_score ) self.assertGreater(score, 0.95) # multivariate feature test score = RandomForest( lags=12, lags_future_covariates=[0, -1], random_state=0 ).backtest( series=target, future_covariates=features_multivariate, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, ) self.assertGreater(score, 0.94) # multivariate feature test with train window 35 score_35 = RandomForest( lags=12, lags_future_covariates=[0, -1], random_state=0 ).backtest( series=target, train_length=35, future_covariates=features_multivariate, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, ) logger.info( "Score for multivariate feature test with train window 35 is: ", score_35 ) self.assertGreater(score_35, 0.92) # multivariate feature test with train window 45 score_45 = RandomForest( lags=12, lags_future_covariates=[0, -1], random_state=0 ).backtest( series=target, train_length=45, future_covariates=features_multivariate, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, ) logger.info( "Score for multivariate feature test with train window 45 is: ", score_45 ) self.assertGreater(score_45, 0.94) self.assertGreater(score_45, score_35) # multivariate with stride score = RandomForest( lags=12, lags_future_covariates=[0], random_state=0 ).backtest( series=target, future_covariates=features_multivariate, start=pd.Timestamp("20000201"), forecast_horizon=3, metric=r2_score, last_points_only=True, stride=3, ) self.assertGreater(score, 0.9)
(Theta(), 11.3), (Theta(1), 20.2), (Theta(-1), 9.8), (FourTheta(1), 20.2), (FourTheta(-1), 9.8), (FourTheta(trend_mode=TrendMode.EXPONENTIAL), 5.5), (FourTheta(model_mode=ModelMode.MULTIPLICATIVE), 11.4), (FourTheta(season_mode=SeasonalityMode.ADDITIVE), 14.2), (FFT(trend="poly"), 11.4), (NaiveSeasonal(), 32.4), (KalmanForecaster(dim_x=3), 17.0), ] if TORCH_AVAILABLE: models += [ (LinearRegressionModel(lags=12), 11.0), (RandomForest(lags=12, n_estimators=200, max_depth=3), 15.5), ] # forecasting models with exogenous variables support multivariate_models = [ (VARIMA(1, 0, 0), 55.6), (VARIMA(1, 1, 1), 57.0), (KalmanForecaster(dim_x=30), 30.0), ] dual_models = [ARIMA(), StatsForecastAutoARIMA(period=12)] try: from darts.models import Prophet
for m in models_simple] id_fin = id_end + len(test) if id_fin == 0: id_fin = None models_des_predictions = [m.predict(len(test)) * (seasonOut if id_end is None else season[id_end:id_fin]) for m in models_des] model_predictions = models_simple_predictions + models_des_predictions return model_predictions val_predictions = train_pred(id_end=-len(test)) target_val = train.slice_intersect(val_predictions[0]) regr_model = LinearRegressionModel(train_n_points=len(test), model=LassoCV(positive=True, fit_intercept=False)) regr_model.fit(val_predictions, target_val) for mod in models_simple: mod.fit(train) for mod in models_des: mod.fit(train_des) models_simple_predictions = [mod.predict(len(test)) for mod in models_simple] models_des_predictions = [mod.predict(len(test)) * seasonOut for mod in models_des] model_predictions = models_simple_predictions + models_des_predictions # constraint sum equal to 1
class ReconciliationTestCase(unittest.TestCase): __test__ = True @classmethod def setUpClass(cls): logging.disable(logging.CRITICAL) np.random.seed(42) """ test case with a more intricate hierarchy """ LENGTH = 200 total_series = (tg.sine_timeseries(value_frequency=0.03, length=LENGTH) + 1 + tg.gaussian_timeseries(length=LENGTH) * 0.2) bottom_1 = total_series / 3 + tg.gaussian_timeseries(length=LENGTH) * 0.01 bottom_2 = 2 * total_series / 3 + tg.gaussian_timeseries( length=LENGTH) * 0.01 series = concatenate([total_series, bottom_1, bottom_2], axis=1) hierarchy = {"sine_1": ["sine"], "sine_2": ["sine"]} series = series.with_hierarchy(hierarchy) # get a single forecast model = LinearRegressionModel(lags=30, output_chunk_length=10) model.fit(series) pred = model.predict(n=20) # get a backtest forecast to get residuals pred_back = model.historical_forecasts(series, start=0.75, forecast_horizon=10) intersection = series.slice_intersect(pred_back) residuals = intersection - pred_back """ test case with a more intricate hierarchy """ components_complex = ["total", "a", "b", "x", "y", "ax", "ay", "bx", "by"] hierarchy_complex = { "ax": ["a", "x"], "ay": ["a", "y"], "bx": ["b", "x"], "by": ["b", "y"], "a": ["total"], "b": ["total"], "x": ["total"], "y": ["total"], } series_complex = TimeSeries.from_values( values=np.random.rand(50, len(components_complex), 5), columns=components_complex, hierarchy=hierarchy_complex, ) def _assert_reconciliation(self, fitted_recon): pred_r = fitted_recon.transform(self.pred) np.testing.assert_almost_equal( pred_r["sine"].values(copy=False), (pred_r["sine_1"] + pred_r["sine_2"]).values(copy=False), ) def _assert_reconciliation_complex(self, fitted_recon): reconciled = fitted_recon.transform(self.series_complex) def _assert_comps(comp, comps): np.testing.assert_almost_equal( reconciled[comp].values(copy=False), sum(reconciled[c] for c in comps).values(copy=False), ) _assert_comps("a", ["ax", "ay"]) _assert_comps("b", ["bx", "by"]) _assert_comps("x", ["ax", "bx"]) _assert_comps("y", ["ay", "by"]) _assert_comps("total", ["ax", "ay", "bx", "by"]) _assert_comps("total", ["a", "b"]) _assert_comps("total", ["x", "y"]) def test_bottom_up(self): recon = BottomUpReconciliator() self._assert_reconciliation(recon) def test_top_down(self): # should work when fitting on training series recon = TopDownReconciliator() recon.fit(self.series) self._assert_reconciliation(recon) # or when fitting on forecasts recon = TopDownReconciliator() recon.fit(self.pred) self._assert_reconciliation(recon) def test_mint(self): # ols recon = MinTReconciliator("ols") recon.fit(self.series) self._assert_reconciliation(recon) # wls_struct recon = MinTReconciliator("wls_struct") recon.fit(self.series) self._assert_reconciliation(recon) # wls_var recon = MinTReconciliator("wls_var") recon.fit(self.residuals) self._assert_reconciliation(recon) # mint_cov recon = MinTReconciliator("mint_cov") recon.fit(self.residuals) self._assert_reconciliation(recon) # wls_val recon = MinTReconciliator("wls_val") recon.fit(self.series) self._assert_reconciliation(recon) def test_summation_matrix(self): np.testing.assert_equal( _get_summation_matrix(self.series_complex), np.array([ [1, 1, 1, 1], [1, 1, 0, 0], [0, 0, 1, 1], [1, 0, 1, 0], [0, 1, 0, 1], [1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], ]), ) def test_hierarchy_preserved_after_predict(self): self.assertEqual(self.pred.hierarchy, self.series.hierarchy) def test_more_intricate_hierarchy(self): recon = BottomUpReconciliator() self._assert_reconciliation_complex(recon) recon = TopDownReconciliator() recon.fit(self.series_complex) self._assert_reconciliation_complex(recon) recon = MinTReconciliator("ols") recon.fit(self.series_complex) self._assert_reconciliation_complex(recon) recon = MinTReconciliator("wls_struct") recon.fit(self.series_complex) self._assert_reconciliation_complex(recon) recon = MinTReconciliator("wls_val") recon.fit(self.series_complex) self._assert_reconciliation_complex(recon)