def predict(self, X):
        """Predict using provided Sklearn compatible regressor.

        If `optimize_for_horizon` is set to True, then new model is created for
        each new horizon and fitted independently
        (i.e. len(X)=5 --> horizon=5 --> 5 models will be fitted).
        The final prediction is then combination of single point forecast of individual models
        for different horizons.

        Parameters
        ----------
        X : pandas.DataFrame
            Input features.

        Returns
        -------
        pandas.DataFrame
            Prediction is stored in column with name being the `name` of the wrapper.
        """
        if len(X) > len(self._X) + 3:
            raise InsufficientDataLengthError(
                f"`X` must have at least {len(self._X) + 3} observations. Please provide valid data."
            )

        if self.optimize_for_horizon:
            preds = pd.concat(
                [self._predict(X.iloc[:index, :]).tail(1) for index in range(1, X.shape[0] + 1)]
            )
        else:
            preds = self._predict(X)
        preds.index = X.index
        return self._clip_predictions(preds)
Beispiel #2
0
    def _check_X_y(self, X, y=None):

        if not isinstance(X, pd.DataFrame):
            raise TypeError("`X` must be a pandas dataframe.")

        if len(X) < 3:
            raise InsufficientDataLengthError(
                "`X` must have at least 3 observations. "
                "Please provide valid data.")

        if not pd.api.types.is_datetime64_dtype(X.index):
            raise ValueError(
                f"`X` must contain index of type datetime. Your index is {X.index}"
            )

        if y is not None:
            if not isinstance(y, (pd.Series, np.ndarray)):
                raise TypeError(
                    f"`y` must be either pandas series or numpy ndarray. "
                    f"You provided `{type(y)}`")

            if len(X) != len(y):
                raise ValueError(f"`X` and `y` must have same length. "
                                 f"`len(X)={len(X)}` and `len(y)={len(y)}`")

            if y.ndim != 1:
                raise ValueError(f"`y` must have 1 dimension. "
                                 f"You provided y with ndim={y.ndim}")

        return func(self, X, y)
Beispiel #3
0
    def _transform_data_to_tsmodel_input_format(self, X, y=None, horizon=None):
        """Trasnform data into Sklearn API required form and shift them.

        Shift is done in autoregressive format with `lags` columns based on prediction horizon which
        is derived from length of provided input data for `predict` call.

        Parameters
        ----------
        X : pandas.DataFrame
            Input features.

        y : array_like, (1d)
            Target vector

        horizon: int
            Number of steps used to shift the data

        Returns
        -------
        X, y
            X - pandas.DataFrame
            y - numpy.ndarray
        """
        if y is not None:
            if self.lags + horizon > len(self._y):
                raise InsufficientDataLengthError(
                    f"Sum of model lags ({self.lags}) and forecasting horizon ({horizon} "
                    f"cannot be bigger than length of y ({len(y)}).")
            y = self._y[self.lags + horizon - 1:]
        X = self._add_lag_features(X, self._y, horizon)
        if X.filter(like="_holiday_").shape[1] > 0:
            X = self._adjust_holidays(X)
        X = X.astype(float)

        return X, y