Example #1
0
def test_column_ensemble_shape(forecasters, fh):
    """Check the shape of the returned prediction."""
    y = pd.DataFrame(np.random.randint(0, 100, size=(100, 3)), columns=list("ABC"))
    forecaster = ColumnEnsembleForecaster(forecasters)
    forecaster.fit(y, fh=fh)
    actual = forecaster.predict()
    assert actual.shape == (len(fh), y.shape[1])
Example #2
0
    def _fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series, pd.DataFrame
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, default=None
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, default=None
            Exogenous variables are ignored.

        Returns
        -------
        self : returns an instance of self.
        """
        self._forecaster = ColumnEnsembleForecaster(
            _NaiveForecaster(strategy=self.strategy,
                             sp=self.sp,
                             window_length=self.window_length))
        self._forecaster.fit(y=y, X=X, fh=fh)
Example #3
0
class NaiveForecaster(BaseForecaster):
    """Forecast based on naive assumptions about past trends continuing.

    NaiveForecaster is a forecaster that makes forecasts using simple
    strategies. Two out of three strategies are robust against NaNs. The
    NaiveForecaster can also be used for multivariate data and it then
    applies internally the ColumnEnsembleForecaster, so each column
    is forecasted with the same strategy.

    Internally, this forecaster does the following:
    - obtains the so-called "last window", a 1D array that denotes the
      most recent time window that the forecaster is allowed to use
    - reshapes the last window into a 2D array according to the given
      seasonal periodicity (prepended with NaN values to make it fit);
    - make a prediction for each column, using the given strategy:
      - "last": last non-NaN row
      - "mean": np.nanmean over rows
    - tile the predictions using the seasonal periodicity

    Parameters
    ----------
    strategy : {"last", "mean", "drift"}, default="last"
        Strategy used to make forecasts:

        * "last":   (robust against NaN values)
                    forecast the last value in the
                    training series when sp is 1.
                    When sp is not 1,
                    last value of each season
                    in the last window will be
                    forecasted for each season.
        * "mean":   (robust against NaN values)
                    forecast the mean of last window
                    of training series when sp is 1.
                    When sp is not 1, mean of all values
                    in a season from last window will be
                    forecasted for each season.
        * "drift":  (not robust against NaN values)
                    forecast by fitting a line between the
                    first and last point of the window and
                    extrapolating it into the future.

    sp : int, default=1
        Seasonal periodicity to use in the seasonal forecasting.

    window_length : int or None, default=None
        Window length to use in the `mean` strategy. If None, entire training
            series will be used.

    Examples
    --------
    >>> from sktime.datasets import load_airline
    >>> from sktime.forecasting.naive import NaiveForecaster
    >>> y = load_airline()
    >>> forecaster = NaiveForecaster(strategy="drift")
    >>> forecaster.fit(y)
    NaiveForecaster(...)
    >>> y_pred = forecaster.predict(fh=[1,2,3])
    """

    _tags = {
        "y_inner_mtype": ["pd.DataFrame", "pd.Series"],
        "scitype:y": "both",
        "requires-fh-in-fit": False,
        "handles-missing-data":
        True,  # todo: switch to True if GH1367 is fixed
    }

    def __init__(self, strategy="last", window_length=None, sp=1):
        self.strategy = strategy
        self.sp = sp
        self.window_length = window_length
        super(NaiveForecaster, self).__init__()

    def _fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series, pd.DataFrame
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, default=None
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, default=None
            Exogenous variables are ignored.

        Returns
        -------
        self : returns an instance of self.
        """
        self._forecaster = ColumnEnsembleForecaster(
            _NaiveForecaster(strategy=self.strategy,
                             sp=self.sp,
                             window_length=self.window_length))
        self._forecaster.fit(y=y, X=X, fh=fh)

    def _predict(self,
                 fh=None,
                 X=None,
                 return_pred_int=False,
                 alpha=DEFAULT_ALPHA):
        """Forecast time series at future horizon.

        Parameters
        ----------
        fh : int, list, np.array or ForecastingHorizon
            Forecasting horizon
        X : pd.DataFrame, optional (default=None)
            Exogenous time series
        """
        y_pred = self._forecaster.predict(fh=fh,
                                          X=X,
                                          return_pred_int=return_pred_int,
                                          alpha=alpha)

        # check for in-sample prediction, if first time point needs to be imputed
        if self._y.index[0] in y_pred.index:
            # fill NaN with next row values
            y_pred.loc[self._y.index[0]] = y_pred.loc[self._y.index[1]]

        return y_pred

    def _update(self, y, X=None, update_params=True):
        """Update cutoff value and, optionally, fitted parameters.

        Parameters
        ----------
        y : pd.Series, pd.DataFrame, or np.array
            Target time series to which to fit the forecaster.
        X : pd.DataFrame, optional (default=None)
            Exogeneous data
        update_params : bool, optional (default=True)
            whether model parameters should be updated

        Returns
        -------
        self : reference to self
        """
        return self._forecaster.update(y=y, X=X, update_params=update_params)
Example #4
0
def test_invalid_forecasters_indices(forecasters):
    """Check if invalid forecaster indices return correct Error."""
    y = pd.DataFrame(np.random.randint(0, 100, size=(100, 3)), columns=list("ABC"))
    forecaster = ColumnEnsembleForecaster(forecasters=forecasters)
    with pytest.raises(ValueError, match=r"estimator per column"):
        forecaster.fit(y, fh=[1, 2])