Ejemplo n.º 1
0
def _cutoffs_fh_window_length_types_are_supported(
    cutoffs: VALID_CUTOFF_TYPES,
    fh: FORECASTING_HORIZON_TYPES,
    window_length: ACCEPTED_WINDOW_LENGTH_TYPES,
) -> bool:
    """Check that combination of inputs is supported.

    Currently, only two cases are allowed:
    either all inputs are integers, or they are all datetime or timedelta

    Parameters
    ----------
    cutoffs : np.array or pd.Index
        cutoff points, positive and integer- or datetime-index like
    fh : int, timedelta, list or np.array of ints or timedeltas
    window_length : int or timedelta or pd.DateOffset

    Returns
    -------
    True if all inputs are compatible, False otherwise
    """
    all_int = array_is_int(cutoffs) and array_is_int(fh) and is_int(
        window_length)
    all_dates = (array_is_datetime64(cutoffs)
                 and array_is_timedelta_or_date_offset(fh)
                 and is_timedelta_or_date_offset(window_length))
    if all_int or all_dates:
        return True
    else:
        return False
Ejemplo n.º 2
0
def _check_cutoffs_against_train_windows(cutoffs, windows, y):
    # Cutoffs should always be the last values of the train windows.
    if array_is_int(cutoffs):
        actual = np.array([window[-1] for window in windows[1:]])
    elif array_is_datetime64(cutoffs):
        actual = np.array(
            [y.index[window[-1]].to_datetime64() for window in windows[1:]],
            dtype="datetime64",
        )
    else:
        raise ValueError(
            f"Provided `cutoffs` type is not supported: {type(cutoffs[0])}")
    np.testing.assert_array_equal(actual, cutoffs[1:])

    # We treat the first window separately, since it may be empty when setting
    # `start_with_window=False`.
    if len(windows[0]) > 0:
        if array_is_int(cutoffs):
            np.testing.assert_array_equal(windows[0][-1], cutoffs[0])
        elif array_is_datetime64(cutoffs):
            np.testing.assert_array_equal(
                y.index[windows[0][-1]].to_datetime64(), cutoffs[0])
        else:
            raise ValueError(
                f"Provided `cutoffs` type is not supported: {type(cutoffs[0])}"
            )
Ejemplo n.º 3
0
def test_single_window_splitter_default_window_length(y, fh):
    """Test SingleWindowSplitter."""
    cv = SingleWindowSplitter(fh=fh)
    train_windows, test_windows, cutoffs, n_splits = _check_cv(cv, y)

    train_window = train_windows[0]
    test_window = test_windows[0]

    assert n_splits == 1
    checked_fh = check_fh(fh)
    assert test_window.shape[0] == len(checked_fh)

    fh = cv.get_fh()
    if fh.is_all_in_sample():
        assert train_window.shape[0] == len(y)
    else:
        if array_is_int(checked_fh):
            assert train_window.shape[0] == len(y) - checked_fh.max()
        else:
            assert train_window.shape[0] == len(
                y[y.index <= y.index.max() - checked_fh.max()])

    if array_is_int(checked_fh):
        test_window_expected = train_window[-1] + checked_fh
    else:
        test_window_expected = np.array([
            y.index.get_loc(y.index[train_window[-1]] + x) for x in checked_fh
        ])
    np.testing.assert_array_equal(test_window, test_window_expected)
Ejemplo n.º 4
0
def _get_end(y_index: pd.Index, fh: ForecastingHorizon) -> int:
    """Compute the end of the last training window for a forecasting horizon.

    For a time series index `y_index`, `y_index[end]` will give
    the index of the training window.
    Correspondingly, for a time series `y` with index `y_index`,
    `y.iloc[end]` or `y.loc[y_index[end]]`
    will provide the last index of the training window.

    Parameters
    ----------
    y_index : pd.Index
        Index of time series
    fh : int, timedelta, list or np.ndarray of ints or timedeltas

    Returns
    -------
    end : int
        0-indexed integer end of the training window
    """
    # `fh` is assumed to be ordered and checked by `_check_fh` and `window_length` by
    # `check_window_length`.
    n_timepoints = y_index.shape[0]
    assert isinstance(y_index, pd.Index)

    # For purely in-sample forecasting horizons, the last split point is the end of the
    # training data.
    # Otherwise, the last point must ensure that the last horizon is within the data.
    null = 0 if array_is_int(fh) else pd.Timedelta(0)
    fh_offset = null if fh.is_all_in_sample() else fh[-1]
    if array_is_int(fh):
        return n_timepoints - fh_offset - 1
    else:
        return y_index.get_loc(y_index[-1] - fh_offset)
Ejemplo n.º 5
0
def _check_cutoffs_and_y(cutoffs: VALID_CUTOFF_TYPES,
                         y: ACCEPTED_Y_TYPES) -> None:
    """Check that combination of inputs is compatible.

    Parameters
    ----------
    cutoffs : np.array or pd.Index
        cutoff points, positive and integer- or datetime-index like
    y : pd.Series, pd.DataFrame, np.ndarray, or pd.Index
        coerced and checked version of input y

    Raises
    ------
    ValueError
        if max cutoff is above the last observation in `y`
    TypeError
        if `cutoffs` type is not supported
    """
    max_cutoff = np.max(cutoffs)
    msg = ("`cutoffs` are incompatible with given `y`. "
           "Maximum cutoff is not smaller than the ")
    if array_is_int(cutoffs):
        if max_cutoff >= y.shape[0]:
            raise ValueError(msg + "number of observations.")
    elif array_is_datetime64(cutoffs):
        if max_cutoff >= np.max(y):
            raise ValueError(msg + "maximum index value of `y`.")
    else:
        raise TypeError("Unsupported type of `cutoffs`")
Ejemplo n.º 6
0
def check_cutoffs(cutoffs: VALID_CUTOFF_TYPES) -> np.ndarray:
    """Validate the cutoff.

    Parameters
    ----------
    cutoffs : np.ndarray or pd.Index

    Returns
    -------
    cutoffs (Sorted array)

    Raises
    ------
    ValueError
        If cutoffs is not a instance of np.array or pd.Index
        If cutoffs array is empty.

    """
    if not isinstance(cutoffs, ACCEPTED_CUTOFF_TYPES):
        raise ValueError(
            f"`cutoffs` must be a np.array or pd.Index, but found: {type(cutoffs)}"
        )
    assert array_is_int(cutoffs) or array_is_datetime64(cutoffs)

    if len(cutoffs) == 0:
        raise ValueError("Found empty `cutoff` array")

    return np.sort(cutoffs)
Ejemplo n.º 7
0
def test_single_window_splitter(y, fh, window_length):
    """Test SingleWindowSplitter."""
    if _inputs_are_supported([fh, window_length]):
        cv = SingleWindowSplitter(fh=fh, window_length=window_length)
        train_windows, test_windows, cutoffs, n_splits = _check_cv(cv, y)

        train_window = train_windows[0]
        test_window = test_windows[0]
        assert n_splits == 1
        assert train_window.shape[0] == _coerce_duration_to_int(
            duration=window_length, freq="D")
        checked_fh = check_fh(fh)
        assert test_window.shape[0] == len(checked_fh)

        if array_is_int(checked_fh):
            test_window_expected = train_window[-1] + checked_fh
        else:
            test_window_expected = np.array([
                y.index.get_loc(y.index[train_window[-1]] + x)
                for x in checked_fh
            ])
        np.testing.assert_array_equal(test_window, test_window_expected)
    else:
        with pytest.raises(TypeError,
                           match="Unsupported combination of types"):
            SingleWindowSplitter(fh=fh, window_length=window_length)
Ejemplo n.º 8
0
    def get_cutoffs(self, y: Optional[ACCEPTED_Y_TYPES] = None) -> np.ndarray:
        """Return the cutoff points.

        Since this splitter returns a single train/test split,
        this method returns a single one-dimensional array
        with the last train set index.

        Parameters
        ----------
        y : pd.Series or pd.Index, optional (default=None)
            Time series to split

        Returns
        -------
        cutoffs : np.array
            The array of cutoff points.
        """
        if y is None:
            raise ValueError(
                f"{self.__class__.__name__} requires `y` to compute the cutoffs."
            )
        fh = _check_fh(self.fh)
        y = get_index_for_series(y)
        end = _get_end(y_index=y, fh=fh)
        cutoff = end if array_is_int(fh) else y[end].to_datetime64()
        return np.array([cutoff])
Ejemplo n.º 9
0
    def _split(self, y: pd.Index) -> SPLIT_GENERATOR_TYPE:
        n_timepoints = y.shape[0]
        window_length = check_window_length(self.window_length, n_timepoints)
        fh = _check_fh(self.fh)
        end = _get_end(y_index=y, fh=fh)

        if window_length is None:
            start = 0
        elif is_int(window_length):
            start = end - window_length + 1
        else:
            start = np.argwhere(y > y[end] - window_length).flatten()[0]

        train = self._get_train_window(y=y, train_start=start, split_point=end + 1)

        if array_is_int(fh):
            test = end + fh.to_numpy()
        else:
            test = np.array([y.get_loc(y[end] + x) for x in fh.to_pandas()])

        yield train, test
Ejemplo n.º 10
0
def _check_values(values: Union[VALID_FORECASTING_HORIZON_TYPES]) -> pd.Index:
    """Validate forecasting horizon values.

    Validation checks validity and also converts forecasting horizon values
    to supported pandas.Index types if possible.

    Parameters
    ----------
    values : int, list, array, certain pd.Index types
        Forecasting horizon with steps ahead to predict.

    Raises
    ------
    TypeError :
        Raised if `values` type is not supported

    Returns
    -------
    values : pd.Index
        Sorted and validated forecasting horizon values.
    """
    # if values are one of the supported pandas index types, we don't have
    # to do
    # anything as the forecasting horizon directly wraps the index, note that
    # isinstance() does not work here, because index types inherit from each
    # other,
    # hence we check for type equality here
    if type(values) in VALID_INDEX_TYPES:
        pass

    # convert single integer to pandas index, no further checks needed
    elif is_int(values):
        return pd.Int64Index([values], dtype=int)

    elif is_timedelta_or_date_offset(values):
        return pd.Index([values])

    # convert np.array or list to pandas index
    elif is_array(values) and array_is_int(values):
        values = pd.Int64Index(values, dtype=int)

    elif is_array(values) and array_is_timedelta_or_date_offset(values):
        values = pd.Index(values)

    # otherwise, raise type error
    else:
        valid_types = (
            "int",
            "np.array",
            "list",
            *[f"pd.{index_type.__name__}" for index_type in VALID_INDEX_TYPES],
        )
        raise TypeError(
            f"Invalid `fh`. The type of the passed `fh` values is not supported. "
            f"Please use one of {valid_types}, but found: {type(values)}")

    # check values does not contain duplicates
    if len(values) != values.nunique():
        raise ValueError(
            "Invalid `fh`. The `fh` values must not contain any duplicates.")

    # return sorted values
    return values.sort_values()
Ejemplo n.º 11
0
def _check_cutoffs(cutoffs):
    assert isinstance(cutoffs, np.ndarray)
    assert array_is_int(cutoffs) or array_is_datetime64(cutoffs)
    assert cutoffs.ndim == 1
    assert len(cutoffs) > 0