コード例 #1
0
    def __init__(self,
                 forecasting_models: List[ForecastingModel],
                 regression_train_n_points: int,
                 regression_model=None):
        """
        Class for ensemble models using a regression model for ensembling individual models' predictions.
        The provided regression model must implement fit() and predict() methods
        (e.g. scikit-learn regression models). Note that here the regression model is used to learn how to
        best ensemble the individual forecasting models' forecasts. It is not the same usage of regression
        as in `RegressionModel`, where the regression model is used to produce forecasts based on the
        lagged series.

        Parameters
        ----------
        forecasting_models
            List of forecasting models whose predictions to ensemble
        regression_train_n_points
            The number of points to use to train the regression model
        regression_model
            Any regression model with predict() and fit() methods (e.g. from scikit-learn)
            Default: `darts.model.LinearRegressionModel(fit_intercept=False)`
        """
        super().__init__(forecasting_models)
        if regression_model is None:
            regression_model = LinearRegressionModel(lags_exog=0,
                                                     fit_intercept=False)

        regression_model = RegressionModel(lags_exog=0, model=regression_model)
        raise_if(
            regression_model.lags is not None
            and regression_model.lags_exog != [0],
            ("`lags` of regression model must be `None` and `lags_exog` must be [0]. Given: {} and {}"
             .format(regression_model.lags, regression_model.lags_exog)))
        self.regression_model = regression_model
        self.train_n_points = regression_train_n_points
コード例 #2
0
    def test_train_n_points(self):
        regr = LinearRegressionModel(lags_future_covariates=[0])

        # same values
        ensemble = RegressionEnsembleModel(self.get_local_models(), 5, regr)

        # too big value to perform the split
        ensemble = RegressionEnsembleModel(self.get_local_models(), 100)
        with self.assertRaises(ValueError):
            ensemble.fit(self.combined)

        ensemble = RegressionEnsembleModel(self.get_local_models(), 50)
        with self.assertRaises(ValueError):
            ensemble.fit(self.combined)

        # too big value considering min_train_series_length
        ensemble = RegressionEnsembleModel(self.get_local_models(), 45)
        with self.assertRaises(ValueError):
            ensemble.fit(self.combined)
コード例 #3
0
    def test_backtest_regression(self):
        gaussian_series = gt(mean=2, length=50)
        sine_series = st(length=50)
        features = gaussian_series.stack(sine_series)
        features_multivariate = (
            gaussian_series +
            sine_series).stack(gaussian_series).stack(sine_series)
        target = sine_series

        features = TimeSeries(features.pd_dataframe().rename(
            {
                "0": "Value0",
                "1": "Value1"
            }, axis=1))
        features_multivariate = TimeSeries(
            features_multivariate.pd_dataframe().rename(
                {
                    "0": "Value0",
                    "1": "Value1",
                    "2": "Value2"
                }, axis=1))

        # univariate feature test
        score = LinearRegressionModel(lags=None, lags_exog=[0, 1]).backtest(
            series=target,
            covariates=features,
            start=pd.Timestamp('20000201'),
            forecast_horizon=3,
            metric=r2_score,
            last_points_only=True)
        self.assertGreater(score, 0.95)

        # Using an int or float value for start
        score = RandomForest(lags=12,
                             lags_exog=[0]).backtest(series=target,
                                                     covariates=features,
                                                     start=30,
                                                     forecast_horizon=3,
                                                     metric=r2_score)
        self.assertGreater(score, 0.95)

        score = RandomForest(lags=12,
                             lags_exog=[0]).backtest(series=target,
                                                     covariates=features,
                                                     start=0.5,
                                                     forecast_horizon=3,
                                                     metric=r2_score)
        self.assertGreater(score, 0.95)

        # Using a too small start value
        with self.assertRaises(ValueError):
            RandomForest(lags=12).backtest(series=target,
                                           start=0,
                                           forecast_horizon=3)

        with self.assertRaises(ValueError):
            RandomForest(lags=12).backtest(series=target,
                                           start=0.01,
                                           forecast_horizon=3)

        # Using RandomForest's start default value
        score = RandomForest(lags=12).backtest(series=target,
                                               forecast_horizon=3,
                                               metric=r2_score)
        self.assertGreater(score, 0.95)

        # multivariate feature test
        score = RandomForest(lags=12, lags_exog=[0, 1]).backtest(
            series=target,
            covariates=features_multivariate,
            start=pd.Timestamp('20000201'),
            forecast_horizon=3,
            metric=r2_score)
        self.assertGreater(score, 0.95)

        # multivariate with stride
        score = RandomForest(lags=12, lags_exog=[0]).backtest(
            series=target,
            covariates=features_multivariate,
            start=pd.Timestamp('20000201'),
            forecast_horizon=3,
            metric=r2_score,
            last_points_only=True,
            stride=3)
        self.assertGreater(score, 0.95)
コード例 #4
0
    def test_backtest_regression(self):
        np.random.seed(4)

        gaussian_series = gt(mean=2, length=50)
        sine_series = st(length=50)
        features = gaussian_series.stack(sine_series)
        features_multivariate = (
            (gaussian_series + sine_series).stack(gaussian_series).stack(sine_series)
        )
        target = sine_series

        features = features.with_columns_renamed(
            features.components, ["Value0", "Value1"]
        )

        features_multivariate = features_multivariate.with_columns_renamed(
            features_multivariate.components, ["Value0", "Value1", "Value2"]
        )

        # univariate feature test
        score = LinearRegressionModel(
            lags=None, lags_future_covariates=[0, -1]
        ).backtest(
            series=target,
            future_covariates=features,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
            last_points_only=True,
        )
        self.assertGreater(score, 0.9)

        # univariate feature test + train length
        score = LinearRegressionModel(
            lags=None, lags_future_covariates=[0, -1]
        ).backtest(
            series=target,
            future_covariates=features,
            start=pd.Timestamp("20000201"),
            train_length=20,
            forecast_horizon=3,
            metric=r2_score,
            last_points_only=True,
        )
        self.assertGreater(score, 0.9)

        # Using an int or float value for start
        score = RandomForest(
            lags=12, lags_future_covariates=[0], random_state=0
        ).backtest(
            series=target,
            future_covariates=features,
            start=30,
            forecast_horizon=3,
            metric=r2_score,
        )
        self.assertGreater(score, 0.9)

        score = RandomForest(
            lags=12, lags_future_covariates=[0], random_state=0
        ).backtest(
            series=target,
            future_covariates=features,
            start=0.5,
            forecast_horizon=3,
            metric=r2_score,
        )
        self.assertGreater(score, 0.9)

        # Using a too small start value
        with self.assertRaises(ValueError):
            RandomForest(lags=12).backtest(series=target, start=0, forecast_horizon=3)

        with self.assertRaises(ValueError):
            RandomForest(lags=12).backtest(
                series=target, start=0.01, forecast_horizon=3
            )

        # Using RandomForest's start default value
        score = RandomForest(lags=12, random_state=0).backtest(
            series=target, forecast_horizon=3, metric=r2_score
        )
        self.assertGreater(score, 0.95)

        # multivariate feature test
        score = RandomForest(
            lags=12, lags_future_covariates=[0, -1], random_state=0
        ).backtest(
            series=target,
            future_covariates=features_multivariate,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
        )
        self.assertGreater(score, 0.94)

        # multivariate feature test with train window 35
        score_35 = RandomForest(
            lags=12, lags_future_covariates=[0, -1], random_state=0
        ).backtest(
            series=target,
            train_length=35,
            future_covariates=features_multivariate,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
        )
        logger.info(
            "Score for multivariate feature test with train window 35 is: ", score_35
        )
        self.assertGreater(score_35, 0.92)

        # multivariate feature test with train window 45
        score_45 = RandomForest(
            lags=12, lags_future_covariates=[0, -1], random_state=0
        ).backtest(
            series=target,
            train_length=45,
            future_covariates=features_multivariate,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
        )
        logger.info(
            "Score for multivariate feature test with train window 45 is: ", score_45
        )
        self.assertGreater(score_45, 0.94)
        self.assertGreater(score_45, score_35)

        # multivariate with stride
        score = RandomForest(
            lags=12, lags_future_covariates=[0], random_state=0
        ).backtest(
            series=target,
            future_covariates=features_multivariate,
            start=pd.Timestamp("20000201"),
            forecast_horizon=3,
            metric=r2_score,
            last_points_only=True,
            stride=3,
        )
        self.assertGreater(score, 0.9)
コード例 #5
0
    (Theta(), 11.3),
    (Theta(1), 20.2),
    (Theta(-1), 9.8),
    (FourTheta(1), 20.2),
    (FourTheta(-1), 9.8),
    (FourTheta(trend_mode=TrendMode.EXPONENTIAL), 5.5),
    (FourTheta(model_mode=ModelMode.MULTIPLICATIVE), 11.4),
    (FourTheta(season_mode=SeasonalityMode.ADDITIVE), 14.2),
    (FFT(trend="poly"), 11.4),
    (NaiveSeasonal(), 32.4),
    (KalmanForecaster(dim_x=3), 17.0),
]

if TORCH_AVAILABLE:
    models += [
        (LinearRegressionModel(lags=12), 11.0),
        (RandomForest(lags=12, n_estimators=200, max_depth=3), 15.5),
    ]

# forecasting models with exogenous variables support
multivariate_models = [
    (VARIMA(1, 0, 0), 55.6),
    (VARIMA(1, 1, 1), 57.0),
    (KalmanForecaster(dim_x=30), 30.0),
]

dual_models = [ARIMA(), StatsForecastAutoARIMA(period=12)]

try:
    from darts.models import Prophet
コード例 #6
0
                                             for m in models_simple]
                id_fin = id_end + len(test)
                if id_fin == 0:
                    id_fin = None
                models_des_predictions = [m.predict(len(test)) *
                                          (seasonOut if id_end is None else season[id_end:id_fin])
                                          for m in models_des]

                model_predictions = models_simple_predictions + models_des_predictions

                return model_predictions

            val_predictions = train_pred(id_end=-len(test))
            target_val = train.slice_intersect(val_predictions[0])

            regr_model = LinearRegressionModel(train_n_points=len(test),
                                                 model=LassoCV(positive=True, fit_intercept=False))
            regr_model.fit(val_predictions, target_val)

            for mod in models_simple:
                mod.fit(train)
            for mod in models_des:
                mod.fit(train_des)

            models_simple_predictions = [mod.predict(len(test))
                                         for mod in models_simple]
            models_des_predictions = [mod.predict(len(test)) * seasonOut
                                      for mod in models_des]

            model_predictions = models_simple_predictions + models_des_predictions

            # constraint sum equal to 1
コード例 #7
0
class ReconciliationTestCase(unittest.TestCase):
    __test__ = True

    @classmethod
    def setUpClass(cls):
        logging.disable(logging.CRITICAL)

    np.random.seed(42)
    """ test case with a more intricate hierarchy """
    LENGTH = 200
    total_series = (tg.sine_timeseries(value_frequency=0.03, length=LENGTH) +
                    1 + tg.gaussian_timeseries(length=LENGTH) * 0.2)
    bottom_1 = total_series / 3 + tg.gaussian_timeseries(length=LENGTH) * 0.01
    bottom_2 = 2 * total_series / 3 + tg.gaussian_timeseries(
        length=LENGTH) * 0.01
    series = concatenate([total_series, bottom_1, bottom_2], axis=1)
    hierarchy = {"sine_1": ["sine"], "sine_2": ["sine"]}
    series = series.with_hierarchy(hierarchy)

    # get a single forecast
    model = LinearRegressionModel(lags=30, output_chunk_length=10)
    model.fit(series)
    pred = model.predict(n=20)

    # get a backtest forecast to get residuals
    pred_back = model.historical_forecasts(series,
                                           start=0.75,
                                           forecast_horizon=10)
    intersection = series.slice_intersect(pred_back)
    residuals = intersection - pred_back
    """ test case with a more intricate hierarchy """
    components_complex = ["total", "a", "b", "x", "y", "ax", "ay", "bx", "by"]

    hierarchy_complex = {
        "ax": ["a", "x"],
        "ay": ["a", "y"],
        "bx": ["b", "x"],
        "by": ["b", "y"],
        "a": ["total"],
        "b": ["total"],
        "x": ["total"],
        "y": ["total"],
    }

    series_complex = TimeSeries.from_values(
        values=np.random.rand(50, len(components_complex), 5),
        columns=components_complex,
        hierarchy=hierarchy_complex,
    )

    def _assert_reconciliation(self, fitted_recon):
        pred_r = fitted_recon.transform(self.pred)
        np.testing.assert_almost_equal(
            pred_r["sine"].values(copy=False),
            (pred_r["sine_1"] + pred_r["sine_2"]).values(copy=False),
        )

    def _assert_reconciliation_complex(self, fitted_recon):
        reconciled = fitted_recon.transform(self.series_complex)

        def _assert_comps(comp, comps):
            np.testing.assert_almost_equal(
                reconciled[comp].values(copy=False),
                sum(reconciled[c] for c in comps).values(copy=False),
            )

        _assert_comps("a", ["ax", "ay"])
        _assert_comps("b", ["bx", "by"])
        _assert_comps("x", ["ax", "bx"])
        _assert_comps("y", ["ay", "by"])
        _assert_comps("total", ["ax", "ay", "bx", "by"])
        _assert_comps("total", ["a", "b"])
        _assert_comps("total", ["x", "y"])

    def test_bottom_up(self):
        recon = BottomUpReconciliator()
        self._assert_reconciliation(recon)

    def test_top_down(self):
        # should work when fitting on training series
        recon = TopDownReconciliator()
        recon.fit(self.series)
        self._assert_reconciliation(recon)

        # or when fitting on forecasts
        recon = TopDownReconciliator()
        recon.fit(self.pred)
        self._assert_reconciliation(recon)

    def test_mint(self):
        # ols
        recon = MinTReconciliator("ols")
        recon.fit(self.series)
        self._assert_reconciliation(recon)

        # wls_struct
        recon = MinTReconciliator("wls_struct")
        recon.fit(self.series)
        self._assert_reconciliation(recon)

        # wls_var
        recon = MinTReconciliator("wls_var")
        recon.fit(self.residuals)
        self._assert_reconciliation(recon)

        # mint_cov
        recon = MinTReconciliator("mint_cov")
        recon.fit(self.residuals)
        self._assert_reconciliation(recon)

        # wls_val
        recon = MinTReconciliator("wls_val")
        recon.fit(self.series)
        self._assert_reconciliation(recon)

    def test_summation_matrix(self):
        np.testing.assert_equal(
            _get_summation_matrix(self.series_complex),
            np.array([
                [1, 1, 1, 1],
                [1, 1, 0, 0],
                [0, 0, 1, 1],
                [1, 0, 1, 0],
                [0, 1, 0, 1],
                [1, 0, 0, 0],
                [0, 1, 0, 0],
                [0, 0, 1, 0],
                [0, 0, 0, 1],
            ]),
        )

    def test_hierarchy_preserved_after_predict(self):
        self.assertEqual(self.pred.hierarchy, self.series.hierarchy)

    def test_more_intricate_hierarchy(self):
        recon = BottomUpReconciliator()
        self._assert_reconciliation_complex(recon)

        recon = TopDownReconciliator()
        recon.fit(self.series_complex)
        self._assert_reconciliation_complex(recon)

        recon = MinTReconciliator("ols")
        recon.fit(self.series_complex)
        self._assert_reconciliation_complex(recon)

        recon = MinTReconciliator("wls_struct")
        recon.fit(self.series_complex)
        self._assert_reconciliation_complex(recon)

        recon = MinTReconciliator("wls_val")
        recon.fit(self.series_complex)
        self._assert_reconciliation_complex(recon)