Beispiel #1
0
    def _fit(self, X, y=None):
        """Fit transformer to X and y.

        private _fit containing the core logic, called from fit

        Parameters
        ----------
        X : pd.Series or pd.DataFrame
            Data to fit transform to
        y : pd.DataFrame, default=None
            Additional data, e.g., labels for transformation

        Returns
        -------
        self: a fitted instance of the estimator
        """
        self.n_columns_ = self.n_columns
        self.feature_importances_ = None

        # multivariate X
        if not isinstance(X, pd.Series):
            if self.method == "feature-importances":
                self.regressor_ = check_regressor(
                    regressor=self.regressor, random_state=self.random_state
                )
                self._check_n_columns(X)
                # fit regressor with X as exog data and y as endog data (target)
                self.regressor_.fit(X=X, y=y)
                if not hasattr(self.regressor_, "feature_importances_"):
                    raise ValueError(
                        """The given regressor must have an
                        attribute feature_importances_ after fitting."""
                    )
                # create dict with columns name (key) and feauter importance (value)
                d = dict(zip(X.columns, self.regressor_.feature_importances_))
                # sort d descending
                d = {k: d[k] for k in sorted(d, key=d.get, reverse=True)}
                self.feature_importances_ = d
                self.columns_ = list(d.keys())[: self.n_columns_]
            elif self.method == "random":
                self._check_n_columns(X)
                self.columns_ = list(
                    X.sample(
                        n=self.n_columns_, random_state=self.random_state, axis=1
                    ).columns
                )
            elif self.method == "columns":
                if self.columns is None:
                    raise AttributeError("Parameter columns must be given.")
                self.columns_ = self.columns
            elif self.method == "none":
                self.columns_ = None
            elif self.method == "all":
                self.columns_ = list(X.columns)
            else:
                raise ValueError("Incorrect method given. Try another method.")
        return self
Beispiel #2
0
    def fit(self, Z, X=None):
        """Fit the transformation on input series `Z`.

        Parameters
        ----------
        Z : pd.Series, pd.DataFrame
            A time series to apply the transformation on.
        X : pd.DataFrame, default=None
            Exogenous variables are usd in method="feature-importances"
            to fit the meta-model (regressor).

        Returns
        -------
        self
        """
        Z = check_series(Z)
        self.n_columns_ = self.n_columns
        self.feature_importances_ = None

        # multivariate Z
        if not isinstance(Z, pd.Series):
            if self.method == "feature-importances":
                self.regressor_ = check_regressor(
                    regressor=self.regressor, random_state=self.random_state)
                self._check_n_columns(Z)
                X = check_series(X)
                # fit regressor with Z as exog data and X as endog data (target)
                self.regressor_.fit(X=Z, y=X)
                if not hasattr(self.regressor_, "feature_importances_"):
                    raise ValueError("""The given regressor must have an
                        attribute feature_importances_ after fitting.""")
                # create dict with columns name (key) and feauter importance (value)
                d = dict(zip(Z.columns, self.regressor_.feature_importances_))
                # sort d descending
                d = {k: d[k] for k in sorted(d, key=d.get, reverse=True)}
                self.feature_importances_ = d
                self.columns_ = list(d.keys())[:self.n_columns_]
            elif self.method == "random":
                self._check_n_columns(Z)
                self.columns_ = list(
                    Z.sample(n=self.n_columns_,
                             random_state=self.random_state,
                             axis=1).columns)
            elif self.method == "columns":
                if self.columns is None:
                    raise AttributeError("Parameter columns must be given.")
                self.columns_ = self.columns
            elif self.method == "none":
                self.columns_ = None
            elif self.method == "all":
                self.columns_ = list(Z.columns)
            else:
                raise ValueError("Incorrect method given. Try another method.")

        self._is_fitted = True
        return self
Beispiel #3
0
    def _fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional, default=None
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional, default=None
            Exogenous variables are ignored.

        Returns
        -------
        self : returns an instance of self.
        """
        _, forecasters = self._check_forecasters()
        self.regressor_ = check_regressor(regressor=self.regressor,
                                          random_state=self.random_state)

        # get training data for meta-model
        if X is not None:
            y_train, y_test, X_train, X_test = temporal_train_test_split(
                y, X, test_size=self.test_size)
        else:
            y_train, y_test = temporal_train_test_split(
                y, test_size=self.test_size)
            X_train, X_test = None, None

        # fit ensemble models
        fh_regressor = ForecastingHorizon(y_test.index, is_relative=False)
        self._fit_forecasters(forecasters, y_train, X_train, fh_regressor)
        X_meta = pd.concat(self._predict_forecasters(fh_regressor, X_test),
                           axis=1)

        # fit meta-model (regressor) on predictions of ensemble models
        # with y_test as endog/target
        self.regressor_.fit(X=X_meta, y=y_test)

        # check if regressor is a sklearn.Pipeline
        if isinstance(self.regressor_, Pipeline):
            # extract regressor from pipeline to access its attributes
            self.weights_ = _get_weights(self.regressor_.steps[-1][1])
        else:
            self.weights_ = _get_weights(self.regressor_)

        # fit forecasters with all data
        self._fit_forecasters(forecasters, y, X, fh)

        return self
Beispiel #4
0
    def _fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored

        Returns
        -------
        self : returns an instance of self.
        """
        _, forecasters = self._check_forecasters()
        self.regressor_ = check_regressor(
            regressor=self.regressor, random_state=self.random_state
        )

        # split training series into training set to fit forecasters and
        # validation set to fit meta-learner
        cv = SingleWindowSplitter(fh=fh.to_relative(self.cutoff))
        train_window, test_window = next(cv.split(y))
        y_fcst = y.iloc[train_window]
        y_meta = y.iloc[test_window].values
        if X is not None:
            X_meta = X.iloc[test_window]
        else:
            X_meta = None

        # fit forecasters on training window
        self._fit_forecasters(forecasters, y_fcst, fh=fh, X=X)
        X_meta = np.column_stack(self._predict_forecasters(fh=fh, X=X_meta))

        # fit final regressor on on validation window
        self.regressor_.fit(X_meta, y_meta)

        # refit forecasters on entire training series
        self._fit_forecasters(forecasters, y, fh=self.fh, X=X)

        return self
Beispiel #5
0
    def _fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional, default=None
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional, default=None
            Exogenous variables are ignored.

        Returns
        -------
        self : returns an instance of self.
        """
        _, forecasters = self._check_forecasters()

        # get training data for meta-model
        if X is not None:
            y_train, y_test, X_train, X_test = temporal_train_test_split(
                y, X, test_size=self.test_size)
        else:
            y_train, y_test = temporal_train_test_split(
                y, test_size=self.test_size)
            X_train, X_test = None, None

        # fit ensemble models
        fh_test = ForecastingHorizon(y_test.index, is_relative=False)
        self._fit_forecasters(forecasters, y_train, X_train, fh_test)

        if self.method == "feature-importance":

            self.regressor_ = check_regressor(regressor=self.regressor,
                                              random_state=self.random_state)
            X_meta = pd.concat(self._predict_forecasters(fh_test, X_test),
                               axis=1)

            # fit meta-model (regressor) on predictions of ensemble models
            # with y_test as endog/target
            self.regressor_.fit(X=X_meta, y=y_test)

            # check if regressor is a sklearn.Pipeline
            if isinstance(self.regressor_, Pipeline):
                # extract regressor from pipeline to access its attributes
                self.weights_ = _get_weights(self.regressor_.steps[-1][1])
            else:
                self.weights_ = _get_weights(self.regressor_)

        elif self.method == "inverse-variance":
            # get in-sample forecasts
            if self.regressor is not None:
                Warning(
                    f"regressor will not be used because ${self.method} is set."
                )
            inv_var = np.array([
                1 / np.var(y_test - y_pred_test)
                for y_pred_test in self._predict_forecasters(fh_test, X)
            ])
            # standardize the inverse variance
            self.weights_ = list(inv_var / np.sum(inv_var))
        else:
            raise NotImplementedError(
                f"Given method {self.method} does not exist, "
                f"please provide valid method parameter.")

        self._fit_forecasters(forecasters, y, X, fh)
        return self