Esempio n. 1
0
    def _split(self, y):
        step_length = check_step_length(self.step_length)
        window_length = check_window_length(self.window_length, "window_length")
        initial_window = check_window_length(self.initial_window, "initial_window")
        fh = _check_fh(self.fh)
        _check_window_lengths(y, fh, window_length, initial_window)

        if self.initial_window is not None:
            if not self.start_with_window:
                raise ValueError(
                    "`start_with_window` must be True if `initial_window` is given"
                )

            if self.initial_window <= self.window_length:
                raise ValueError("`initial_window` must greater than `window_length`")

            # For in-sample forecasting horizons, the first split must ensure that
            # in-sample test set is still within the data.
            if not fh.is_all_out_of_sample() and abs(fh[0]) >= self.initial_window:
                initial_start = abs(fh[0]) - self.initial_window + 1
            else:
                initial_start = 0

            initial_end = initial_start + initial_window
            train = np.arange(initial_start, initial_end)
            test = initial_end + fh.to_numpy() - 1
            yield train, test

        start = self._get_start(fh)
        end = _get_end(y, fh)

        for train, test in self._split_windows(
            start, end, step_length, window_length, fh.to_numpy()
        ):
            yield train, test
Esempio n. 2
0
    def _fit(self, X, y=None):
        """Fit transformer, generating random interval indices.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, n_features]
            each cell of X must contain pandas.Series
            Data to fit transform to
        y : any container with method shape, optional, default=None
            y.shape[0] determines n_timepoints, 1 if None

        Returns
        -------
        self : RandomIntervalSegmenter
            This estimator
        """
        if y is not None:
            n_timepoints = y.shape[0]
        else:
            n_timepoints = 1

        self.min_length = check_window_length(self.min_length, n_timepoints,
                                              "min_length")
        self.max_length = check_window_length(self.max_length, n_timepoints,
                                              "max_length")
        if self.min_length is None:
            min_length = 2
        else:
            min_length = self.min_length
        if self.max_length is not None:
            if not min_length < self.max_length:
                raise ValueError(
                    "`max_length` must be bigger than `min_length`.")

        self.input_shape_ = X.shape

        # Retrieve time-series indexes from each column.
        # TODO generalise to columns with series of unequal length
        self._time_index = _get_time_index(X)

        # Compute random intervals for each column.
        # TODO if multiple columns are passed, introduce option to compute
        #  one set of shared intervals,
        #  or rely on ColumnTransformer?
        if self.n_intervals == "random":
            if self.min_length is not None or self.max_length is not None:
                raise ValueError(
                    "Setting `min_length` or `max_length` is not yet "
                    "implemented for `n_intervals='random'`.")
            self.intervals_ = _rand_intervals_rand_n(
                self._time_index, random_state=self.random_state)
        else:
            self.intervals_ = _rand_intervals_fixed_n(
                self._time_index,
                n_intervals=self.n_intervals,
                min_length=min_length,
                max_length=self.max_length,
                random_state=self.random_state,
            )
        return self
Esempio n. 3
0
    def _split(self, y: pd.Index) -> SPLIT_GENERATOR_TYPE:
        n_timepoints = y.shape[0]
        step_length = check_step_length(self.step_length)
        window_length = check_window_length(
            window_length=self.window_length,
            n_timepoints=n_timepoints,
            name="window_length",
        )
        initial_window = check_window_length(
            window_length=self.initial_window,
            n_timepoints=n_timepoints,
            name="initial_window",
        )
        fh = _check_fh(self.fh)
        _check_window_lengths(
            y=y, fh=fh, window_length=window_length, initial_window=initial_window
        )

        if self.initial_window is not None:
            yield self._split_for_initial_window(y)

        start = self._get_start(y=y, fh=fh)
        end = _get_end(y_index=y, fh=fh) + 2
        step_length = self._get_step_length(x=step_length)

        for train, test in self._split_windows(
            start=start,
            end=end,
            step_length=step_length,
            window_length=window_length,
            y=y,
            fh=fh.to_numpy(),
        ):
            yield train, test
Esempio n. 4
0
    def fit(self, X, y=None):
        """
        Fit transformer, generating random interval indices.

        Parameters
        ----------
        X : pandas DataFrame of shape [n_samples, n_features]
            Input data
        y : pandas Series, shape (n_samples, ...), optional
            Targets for supervised learning.

        Returns
        -------
        self : RandomIntervalSegmenter
            This estimator
        """
        check_window_length(self.min_length, "`min_length`")
        check_window_length(self.max_length, "`max_length`")
        if self.min_length is None:
            min_length = 2
        else:
            min_length = self.min_length
        if self.max_length is not None:
            if not min_length < self.max_length:
                raise ValueError("`max_length` must be bigger than `min_length`.")

        X = check_X(X, enforce_univariate=True)
        self.input_shape_ = X.shape

        # Retrieve time-series indexes from each column.
        # TODO generalise to columns with series of unequal length
        self._time_index = _get_time_index(X)

        # Compute random intervals for each column.
        # TODO if multiple columns are passed, introduce option to compute
        #  one set of shared intervals,
        #  or rely on ColumnTransformer?
        if self.n_intervals == "random":
            if self.min_length is not None or self.max_length is not None:
                raise ValueError(
                    "Setting `min_length` or `max_length` is not yet "
                    "implemented for `n_intervals='random'`."
                )
            self.intervals_ = _rand_intervals_rand_n(
                self._time_index, random_state=self.random_state
            )
        else:
            self.intervals_ = _rand_intervals_fixed_n(
                self._time_index,
                n_intervals=self.n_intervals,
                min_length=min_length,
                max_length=self.max_length,
                random_state=self.random_state,
            )
        self._is_fitted = True
        return self
Esempio n. 5
0
    def _split(self, y: Optional[ACCEPTED_Y_TYPES]) -> SPLIT_GENERATOR_TYPE:
        n_timepoints = y.shape[0]
        step_length = check_step_length(self.step_length)
        window_length = check_window_length(self.window_length, n_timepoints,
                                            "window_length")
        initial_window = check_window_length(self.initial_window, n_timepoints,
                                             "initial_window")
        fh = _check_fh(self.fh)
        _check_window_lengths(y, fh, window_length, initial_window)

        if self.initial_window is not None:
            if not self.start_with_window:
                raise ValueError(
                    "`start_with_window` must be True if `initial_window` is given"
                )

            if self.initial_window <= self.window_length:
                raise ValueError(
                    "`initial_window` must greater than `window_length`")

            if is_timedelta_or_date_offset(x=self.initial_window):
                initial_window_threshold = y.get_loc(y[0] +
                                                     self.initial_window)
            else:
                initial_window_threshold = self.initial_window
            # For in-sample forecasting horizons, the first split must ensure that
            # in-sample test set is still within the data.
            if not fh.is_all_out_of_sample() and abs(
                    fh[0]) >= initial_window_threshold:
                initial_start = abs(fh[0]) - self.initial_window + 1
            else:
                initial_start = 0

            if is_timedelta_or_date_offset(x=initial_window):
                initial_end = y.get_loc(y[initial_start] + initial_window)
            else:
                initial_end = initial_start + initial_window
            train = np.arange(initial_start, initial_end)
            test = initial_end + fh.to_numpy() - 1
            yield train, test

        start = self._get_start(y=y, fh=fh)
        end = _get_end(y=y, fh=fh)
        step_length = self._get_step_length(x=step_length)

        for train, test in self._split_windows(start, end,
                                               step_length, window_length, y,
                                               fh.to_numpy()):
            yield train, test
Esempio n. 6
0
    def _fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        fh : int, list or np.array, optional (default=None)
             The forecasters horizon with the steps ahead to to predict.

        Returns
        -------
        self : returns an instance of self.
        """
        # We currently only support out-of-sample predictions. For the direct
        # strategy, we need to check this at the beginning of fit, as the fh is
        # required for fitting.
        if not self.fh.is_all_out_of_sample(self.cutoff):
            raise NotImplementedError(
                "In-sample predictions are not implemented.")

        self.window_length_ = check_window_length(self.window_length,
                                                  n_timepoints=len(y))

        yt, Xt = self._transform(y, X)

        # Fit a multi-output estimator to the transformed data.
        self.estimator_ = clone(self.estimator)
        self.estimator_.fit(Xt, yt)
        return self
Esempio n. 7
0
    def split_initial(self, y):
        """Split initial window

        This is useful during forecasting model selection where we want to
        fit the forecaster on some part of the
        data first before doing temporal cross-validation

        Parameters
        ----------
        y : pd.Series

        Returns
        -------
        intial_training_window : np.array
        initial_test_window : np.array
        """
        if self.initial_window is None:
            raise ValueError(
                "Please specify initial window, found: `initial_window`=None"
            )

        initial = check_window_length(self.initial_window)
        initial_training_window = np.arange(initial)
        initial_test_window = np.arange(initial, len(y))
        return initial_training_window, initial_test_window
Esempio n. 8
0
    def fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.

        Returns
        -------
        self : returns an instance of self.
        """
        n_timepoints = len(y)
        self._set_y_X(y, X)
        self._set_fh(fh)

        self.step_length_ = check_step_length(self.step_length)
        self.window_length_ = check_window_length(self.window_length,
                                                  n_timepoints)

        self._fit(y, X)
        self._is_fitted = True
        return self
Esempio n. 9
0
    def _fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        fh : int, list or np.array, optional (default=None)
             The forecasters horizon with the steps ahead to to predict.

        Returns
        -------
        self : returns an instance of self.
        """
        self.window_length_ = check_window_length(self.window_length,
                                                  n_timepoints=len(y))

        yt, Xt = self._transform(y, X)

        # Make sure yt is 1d array to avoid DataConversion warning from scikit-learn.
        yt = yt.ravel()

        self.estimator_ = clone(self.estimator)
        self.estimator_.fit(Xt, yt)
        return self
Esempio n. 10
0
    def _split_windows(self, y):
        window_length = check_window_length(self.window_length)
        fh = self._check_fh()

        end = self._get_end(y) - 1
        start = 0 if window_length is None else end - window_length
        training_window = np.arange(start, end)
        test_window = end + fh - 1
        yield training_window, test_window
Esempio n. 11
0
    def _split(self, y):
        window_length = check_window_length(self.window_length)
        fh = _check_fh(self.fh)

        end = _get_end(y, fh) - 1
        start = 0 if window_length is None else end - window_length
        train = np.arange(start, end)
        test = end + fh.to_numpy() - 1
        yield train, test
Esempio n. 12
0
    def _split_windows(self, y):
        step_length = check_step_length(self.step_length)
        window_length = check_window_length(self.window_length)
        fh = self._check_fh()

        end = self._get_end(y)
        start = self._get_start()
        for split_point in range(start, end, step_length):
            training_window = np.arange(split_point - window_length, split_point)
            test_window = split_point + fh - 1
            yield training_window, test_window
Esempio n. 13
0
    def fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        Returns
        -------
        self : returns an instance of self.
        """
        self._set_y_X(y, X)
        if X is not None:
            raise NotImplementedError(
                "Exogenous variables `X` are not yet supported.")
        self._set_fh(fh)
        if len(self.fh.to_in_sample(self.cutoff)) > 0:
            raise NotImplementedError(
                "In-sample predictions are not implemented")

        self.step_length_ = check_step_length(self.step_length)
        self.window_length_ = check_window_length(self.window_length)

        # for the direct reduction strategy, a separate forecaster is fitted
        # for each step ahead of the forecasting horizon
        self._cv = SlidingWindowSplitter(
            fh=self.fh.to_relative(self.cutoff),
            window_length=self.window_length_,
            step_length=self.step_length_,
            start_with_window=True,
        )

        # transform data using rolling window split
        X, Y_train = self._transform(y, X)

        # iterate over forecasting horizon
        self.regressors_ = []
        for i in range(len(self.fh)):
            y = Y_train[:, i]
            regressor = clone(self.regressor)
            regressor.fit(X, y)
            self.regressors_.append(regressor)

        self._is_fitted = True
        return self
Esempio n. 14
0
    def _split(self, y: ACCEPTED_Y_TYPES) -> SPLIT_GENERATOR_TYPE:
        n_timepoints = y.shape[0]
        window_length = check_window_length(self.window_length, n_timepoints)
        fh = _check_fh(self.fh)

        end = _get_end(y, fh) - 1
        if window_length is None:
            start = 0
        elif is_timedelta_or_date_offset(x=window_length):
            start = y.get_loc(y[end - 1] - window_length) + 1
        else:
            start = end - window_length
        train = np.arange(start, end)
        test = end + fh.to_numpy() - 1
        yield train, test
Esempio n. 15
0
    def _split_windows(self, y):
        # cutoffs
        cutoffs = check_cutoffs(self.cutoffs)
        if not np.max(cutoffs) < len(y):
            raise ValueError("`cutoffs` are out-of-bounds for given `y`.")

        fh = self._check_fh()

        if np.max(cutoffs) + np.max(fh) > len(y):
            raise ValueError("`fh` is out-of-bounds for given `cutoffs` and `y`.")
        window_length = check_window_length(self.window_length)

        for cutoff in cutoffs:
            training_window = np.arange(cutoff - window_length, cutoff) + 1
            test_window = cutoff + fh
            yield training_window, test_window
Esempio n. 16
0
    def _split(self, y):
        # cutoffs
        cutoffs = check_cutoffs(self.cutoffs)
        if np.max(cutoffs) >= y.shape[0]:
            raise ValueError("`cutoffs` are incompatible with given `y`.")

        fh = _check_fh(self.fh)

        if np.max(cutoffs) + np.max(fh) > y.shape[0]:
            raise ValueError("`fh` is incompatible with given `cutoffs` and `y`.")
        window_length = check_window_length(self.window_length)

        for cutoff in cutoffs:
            training_window = np.arange(cutoff - window_length, cutoff) + 1
            test_window = cutoff + fh
            yield training_window, test_window
Esempio n. 17
0
    def fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        Returns
        -------
        self : returns an instance of self.
        """
        # input checks
        if X is not None:
            raise NotImplementedError(
                "Exogenous variables `X` are not yet supported.")

        # set values
        self._set_y_X(y, X)
        self._set_fh(fh)

        self.step_length_ = check_step_length(self.step_length)
        self.window_length_ = check_window_length(self.window_length)

        # set up cv iterator, for recursive strategy, a single estimator
        # is fit for a one-step-ahead forecasting horizon and then called
        # iteratively to predict multiple steps ahead
        self._cv = SlidingWindowSplitter(
            fh=1,
            window_length=self.window_length_,
            step_length=self.step_length_,
            start_with_window=True,
        )

        # transform data into tabular form
        X_train_tab, y_train_tab = self._transform(y, X)

        # fit base regressor
        regressor = clone(self.regressor)
        regressor.fit(X_train_tab, y_train_tab.ravel())
        self.regressor_ = regressor

        self._is_fitted = True
        return self
Esempio n. 18
0
    def fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        Returns
        -------
        self : returns an instance of self.
        """
        self._set_y_X(y, X)
        if X is not None:
            raise NotImplementedError(
                "Exogenous variables `X` are not yet supported.")
        self._set_fh(fh)
        if len(self.fh.to_in_sample(self.cutoff)) > 0:
            raise NotImplementedError(
                "In-sample predictions are not implemented")

        self.step_length_ = check_step_length(self.step_length)
        self.window_length_ = check_window_length(self.window_length)

        # for the multioutput reduction strategy, a single forecaster is fitted
        # simultaneously to all the future steps in the forecasting horizon
        # by reducing to a forecaster that can handle multi-dimensional outputs
        self._cv = SlidingWindowSplitter(
            fh=self.fh.to_relative(self.cutoff),
            window_length=self.window_length_,
            step_length=self.step_length_,
            start_with_window=True,
        )

        # transform data using rolling window split
        X, Y_train = self._transform(y, X)

        # fit regressor to training data
        regressor = clone(self.regressor)
        regressor.fit(X, Y_train)
        self.regressor_ = regressor

        self._is_fitted = True
        return self
Esempio n. 19
0
    def _split(self, y: ACCEPTED_Y_TYPES) -> SPLIT_GENERATOR_TYPE:
        cutoffs = check_cutoffs(self.cutoffs)
        if np.max(cutoffs) >= y.shape[0]:
            raise ValueError("`cutoffs` are incompatible with given `y`.")

        fh = _check_fh(self.fh)
        n_timepoints = y.shape[0]

        if np.max(cutoffs) + np.max(fh) > y.shape[0]:
            raise ValueError("`fh` is incompatible with given `cutoffs` and `y`.")
        window_length = check_window_length(self.window_length, n_timepoints)
        for cutoff in cutoffs:
            if is_timedelta_or_date_offset(x=window_length):
                train_start = y.get_loc(max(y[0], y[cutoff] - window_length))
            else:
                train_start = cutoff - window_length
            training_window = np.arange(train_start, cutoff) + 1
            test_window = cutoff + fh
            yield training_window, test_window
Esempio n. 20
0
    def _get_end(self, y):
        """Helper function to compute the end of the last window"""
        n_timepoints = len(y)
        fh = self._check_fh()
        window_length = check_window_length(self.window_length)
        # end point is end of last window
        is_in_sample = np.all(fh <= 0)
        if is_in_sample:
            end = n_timepoints + 1
        else:
            fh_max = fh[-1]
            end = n_timepoints - fh_max + 1  # non-inclusive end indexing

            # check if computed values are feasible with the provided index
            if window_length is not None:
                if window_length + fh_max > n_timepoints:
                    raise ValueError(
                        "The window length and forecasting horizon are "
                        "incompatible with the length of `y`")
        return end
Esempio n. 21
0
    def _split(self, y: pd.Index) -> SPLIT_GENERATOR_TYPE:
        n_timepoints = y.shape[0]
        window_length = check_window_length(self.window_length, n_timepoints)
        fh = _check_fh(self.fh)
        end = _get_end(y_index=y, fh=fh)

        if window_length is None:
            start = 0
        elif is_int(window_length):
            start = end - window_length + 1
        else:
            start = np.argwhere(y > y[end] - window_length).flatten()[0]

        train = self._get_train_window(y=y, train_start=start, split_point=end + 1)

        if array_is_int(fh):
            test = end + fh.to_numpy()
        else:
            test = np.array([y.get_loc(y[end] + x) for x in fh.to_pandas()])

        yield train, test
Esempio n. 22
0
    def _split(self, y: ACCEPTED_Y_TYPES) -> SPLIT_GENERATOR_TYPE:
        n_timepoints = y.shape[0]
        cutoffs = check_cutoffs(cutoffs=self.cutoffs)
        fh = _check_fh(fh=self.fh)
        window_length = check_window_length(window_length=self.window_length,
                                            n_timepoints=n_timepoints)
        _check_cutoffs_fh_window_length(cutoffs=cutoffs,
                                        fh=fh,
                                        window_length=window_length)
        _check_cutoffs_and_y(cutoffs=cutoffs, y=y)
        _check_cutoffs_fh_y(cutoffs=cutoffs, fh=fh, y=y)
        max_fh = fh.max()
        max_cutoff = np.max(cutoffs)

        for cutoff in cutoffs:
            if is_int(x=window_length) and is_int(x=cutoff):
                train_start = cutoff - window_length
            elif is_timedelta_or_date_offset(x=window_length) and is_datetime(
                    x=cutoff):
                train_start = y.get_loc(max(y[0], cutoff - window_length))
            else:
                raise TypeError(f"Unsupported combination of types: "
                                f"`window_length`: {type(window_length)}, "
                                f"`cutoff`: {type(cutoff)}")

            if is_int(x=cutoff):
                training_window = np.arange(train_start, cutoff) + 1
            else:
                training_window = np.arange(train_start, y.get_loc(cutoff)) + 1

            test_window = cutoff + fh.to_numpy()
            if is_datetime(x=max_cutoff) and is_timedelta(x=max_fh):
                test_window = test_window[test_window >= y.min()]
                test_window = np.array(
                    [y.get_loc(timestamp) for timestamp in test_window])
            yield training_window, test_window
Esempio n. 23
0
    def _fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, default=None
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, default=None
            Exogenous variables are ignored.

        Returns
        -------
        self : returns an instance of self.
        """
        # X_train is ignored

        n_timepoints = y.shape[0]

        if self.strategy in ("last", "mean"):
            # check window length is greater than sp for seasonal mean or seasonal last
            if self.window_length is not None and self.sp != 1:
                if self.window_length < self.sp:
                    raise ValueError(f"The `window_length`: "
                                     f"{self.window_length} is smaller than "
                                     f"`sp`: {self.sp}.")
            self.window_length_ = check_window_length(self.window_length,
                                                      n_timepoints)
            self.sp_ = check_sp(self.sp)

            #  if not given, set default window length
            if self.window_length is None:
                self.window_length_ = len(y)

        elif self.strategy == "drift":
            if self.sp != 1:
                warn(
                    "For the `drift` strategy, the `sp` value will be ignored."
                )
            # window length we need for forecasts is just the
            # length of seasonal periodicity
            self.window_length_ = check_window_length(self.window_length,
                                                      n_timepoints)
            if self.window_length is None:
                self.window_length_ = len(y)
            if self.window_length == 1:
                raise ValueError(f"For the `drift` strategy, "
                                 f"the `window_length`: {self.window_length} "
                                 f"value must be greater than one.")

        else:
            allowed_strategies = ("last", "mean", "drift")
            raise ValueError(f"Unknown strategy: {self.strategy}. Expected "
                             f"one of: {allowed_strategies}.")

        # check window length
        if self.window_length_ > len(self._y):
            param = ("sp" if self.strategy == "last" and self.sp != 1 else
                     "window_length_")
            raise ValueError(
                f"The {param}: {self.window_length_} is larger than "
                f"the training series.")

        return self
Esempio n. 24
0
def _sliding_window_transform(y,
                              window_length,
                              fh,
                              X=None,
                              scitype="tabular-regressor"):
    """Transform time series data `y` and `X` using sliding window.

    See `test_sliding_window_transform_explicit` in test_reduce.py for explicit
    example.

    Parameters
    ----------
    y : pd.Series
        Endogenous time series
    window_length : int
        Window length for transformed feature variables
    fh : ForecastingHorizon
        Forecasting horizon for transformed target variable
    X : pd.DataFrame, optional (default=None)
        Exogenous series.
    scitype : str {"tabular-regressor", "time-series-regressor"}, optional
        Scitype of estimator to use with transformed data.
        - If "tabular-regressor", returns X as tabular 2d array
        - If "time-series-regressor", returns X as panel 3d array

    Returns
    -------
    yt : np.ndarray, shape = (n_timepoints - window_length, 1)
        Transformed target variable.
    Xt : np.ndarray, shape = (n_timepoints - window_length, n_variables,
    window_length)
        Transformed lagged values of target variable and exogenous variables,
        excluding contemporaneous values.
    """
    # There are different ways to implement this transform. Pre-allocating an
    # array and filling it by iterating over the window length seems to be the most
    # efficient one.
    n_timepoints = y.shape[0]
    window_length = check_window_length(window_length, n_timepoints)

    z = _concat_y_X(y, X)
    n_timepoints, n_variables = z.shape

    fh = _check_fh(fh)
    fh_max = fh[-1]

    if window_length + fh_max >= n_timepoints:
        raise ValueError(
            "The `window_length` and `fh` are incompatible with the length of `y`"
        )

    # Get the effective window length accounting for the forecasting horizon.
    effective_window_length = window_length + fh_max

    # Pre-allocate array for sliding windows.
    Zt = np.zeros((
        n_timepoints + effective_window_length,
        n_variables,
        effective_window_length + 1,
    ))

    # Transform data.
    for k in range(effective_window_length + 1):
        i = effective_window_length - k
        j = n_timepoints + effective_window_length - k
        Zt[i:j, :, k] = z

    # Truncate data, selecting only full windows, discarding incomplete ones.
    Zt = Zt[effective_window_length:-effective_window_length]

    # Return transformed feature and target variables separately. This excludes
    # contemporaneous values of the exogenous variables. Including them would lead to
    # unequal-length data, with more time points for exogenous series than the target
    # series, which is currently not supported.
    yt = Zt[:, 0, window_length + fh]
    Xt = Zt[:, :, :window_length]

    # If the scitype is tabular regression, we have to convert X into a 2d array.
    if scitype == "tabular-regressor":
        return yt, Xt.reshape(Xt.shape[0], -1)
    else:
        return yt, Xt
def test_check_window_length(window_length, n_timepoints, expected):
    assert check_window_length(window_length, n_timepoints) == expected
def test_window_length_bad_arg(window_length, n_timepoints):
    with pytest.raises(ValueError):
        check_window_length(window_length, n_timepoints)
Esempio n. 27
0
 def _get_start(self):
     window_length = check_window_length(self.window_length)
     if self.start_with_window:
         return window_length
     else:
         return 0
Esempio n. 28
0
    def _fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        fh : int, list or np.array, optional (default=None)
             The forecasters horizon with the steps ahead to to predict.

        Returns
        -------
        self : Estimator
            An fitted instance of self.
        """
        # Exogenous variables are not yet supported for the dirrec strategy.
        if X is not None:
            raise NotImplementedError(
                f"{self.__class__.__name__} does not yet support exogenous "
                f"variables `X`.")

        if len(self.fh.to_in_sample(self.cutoff)) > 0:
            raise NotImplementedError(
                "In-sample predictions are not implemented")

        self.window_length_ = check_window_length(self.window_length,
                                                  n_timepoints=len(y))

        # Transform the data using sliding-window.
        yt, Xt = self._transform(y, X)

        # We cast the 2d tabular array into a 3d panel array to handle the data
        # consistently for the reduction to tabular and time-series regression.
        if self._estimator_scitype == "tabular-regressor":
            Xt = np.expand_dims(Xt, axis=1)

        # This only works without exogenous variables. To support exogenous
        # variables, we need additional values for X to fill the array
        # appropriately.
        X_full = np.concatenate([Xt, np.expand_dims(yt, axis=1)], axis=2)

        self.estimators_ = []
        n_timepoints = Xt.shape[2]

        for i in range(len(self.fh)):
            estimator = clone(self.estimator)

            # Slice data using expanding window.
            X_fit = X_full[:, :, :n_timepoints + i]

            # Convert to 2d tabular array for reduction to tabular regression.
            if self._estimator_scitype == "tabular-regressor":
                X_fit = X_fit.reshape(X_fit.shape[0], -1)

            estimator.fit(X_fit, yt[:, i])
            self.estimators_.append(estimator)

        self._is_fitted = True
        return self