Ejemplo n.º 1
0
def test_sliding_window_splitter_with_initial_window(y, fh, window_length,
                                                     step_length,
                                                     initial_window):
    """Test SlidingWindowSplitter."""
    if _inputs_are_supported([fh, initial_window, window_length, step_length]):
        cv = SlidingWindowSplitter(
            fh=fh,
            window_length=window_length,
            step_length=step_length,
            initial_window=initial_window,
            start_with_window=True,
        )
        train_windows, test_windows, _, n_splits = _check_cv(cv, y)

        assert train_windows[0].shape[0] == _coerce_duration_to_int(
            duration=initial_window, freq="D")
        assert np.vstack(train_windows[1:]).shape == (
            n_splits - 1,
            _coerce_duration_to_int(duration=window_length, freq="D"),
        )
        assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
    else:
        match = "Unsupported combination of types"
        with pytest.raises(TypeError, match=match):
            SlidingWindowSplitter(
                fh=fh,
                initial_window=initial_window,
                window_length=window_length,
                step_length=step_length,
                start_with_window=True,
            )
Ejemplo n.º 2
0
    def to_relative(self, cutoff=None):
        """Return relative values
        Parameters
        ----------
        cutoff : pd.Period, pd.Timestamp, int, optional (default=None)
            Cutoff value is required to convert a relative forecasting
            horizon to an absolute one and vice versa.

        Returns
        -------
        fh : ForecastingHorizon
            Relative representation of forecasting horizon
        """
        if self.is_relative:
            return self._new()

        else:
            self._check_cutoff(cutoff)
            values = self.to_pandas() - cutoff

            if isinstance(self.to_pandas(),
                          (pd.PeriodIndex, pd.DatetimeIndex)):
                values = _coerce_duration_to_int(values,
                                                 unit=_get_unit(cutoff))

            return self._new(values, is_relative=True)
Ejemplo n.º 3
0
    def to_absolute_int(self, start, cutoff=None):
        """Return absolute values as zero-based integer index starting from `start`.

        Parameters
        ----------
        start : pd.Period, pd.Timestamp, int
            Start value returned as zero.
        cutoff : pd.Period, pd.Timestamp, int, optional (default=None)
            Cutoff value required to convert a relative forecasting
            horizon to an absolute one (and vice versa).

        Returns
        -------
        fh : ForecastingHorizon
            Absolute representation of forecasting horizon as zero-based
            integer index.
        """
        # We here check the start value, the cutoff value is checked when we use it
        # to convert the horizon to the absolute representation below
        absolute = self.to_absolute(cutoff).to_pandas()
        _check_start(start, absolute)

        # Note: We should here also coerce to periods for more reliable arithmetic
        # operations as in `to_relative` but currently doesn't work with
        # `update_predict` and incomplete time indices where the `freq` information
        # is lost, see comment on issue #534
        integers = absolute - start

        if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)):
            integers = _coerce_duration_to_int(integers, freq=_get_freq(cutoff))

        return self._new(integers, is_relative=False)
Ejemplo n.º 4
0
    def _to_relative(self, y):
        absolute = y.index
        cutoff = self._y_index[0]
        _check_cutoff(cutoff, absolute)

        if isinstance(absolute, pd.DatetimeIndex):
            # We cannot use the freq from the the ForecastingHorizon itself (or its
            # wrapped pd.DatetimeIndex) because it may be none for non-regular
            # indices, so instead we use the freq of cutoff.
            freq = _get_freq(cutoff)

            # coerce to pd.Period for reliable arithmetics and computations of
            # time deltas
            absolute = _coerce_to_period(absolute, freq)
            cutoff = _coerce_to_period(cutoff, freq)

        # Compute relative values
        relative = absolute - cutoff

        # Coerce durations (time deltas) into integer values for given frequency
        if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)):
            relative = _coerce_duration_to_int(relative,
                                               freq=_get_freq(cutoff))

        return relative
Ejemplo n.º 5
0
def test_sliding_window_splitter_start_with_empty_window(
        y, fh, window_length, step_length):
    """Test SlidingWindowSplitter."""
    if _inputs_are_supported([fh, window_length, step_length]):
        cv = SlidingWindowSplitter(
            fh=fh,
            window_length=window_length,
            step_length=step_length,
            start_with_window=False,
        )
        train_windows, test_windows, _, n_splits = _check_cv(
            cv, y, allow_empty_window=True)

        assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))

        n_incomplete = _get_n_incomplete_windows(window_length, step_length)
        train_windows = train_windows[n_incomplete:]

        assert np.vstack(train_windows).shape == (
            n_splits - n_incomplete,
            _coerce_duration_to_int(duration=window_length, freq="D"),
        )
    else:
        match = "Unsupported combination of types"
        with pytest.raises(TypeError, match=match):
            SlidingWindowSplitter(
                fh=fh,
                initial_window=None,
                window_length=window_length,
                step_length=step_length,
                start_with_window=False,
            )
Ejemplo n.º 6
0
def test_single_window_splitter(y, fh, window_length):
    """Test SingleWindowSplitter."""
    if _inputs_are_supported([fh, window_length]):
        cv = SingleWindowSplitter(fh=fh, window_length=window_length)
        train_windows, test_windows, cutoffs, n_splits = _check_cv(cv, y)

        train_window = train_windows[0]
        test_window = test_windows[0]
        assert n_splits == 1
        assert train_window.shape[0] == _coerce_duration_to_int(
            duration=window_length, freq="D")
        checked_fh = check_fh(fh)
        assert test_window.shape[0] == len(checked_fh)

        if array_is_int(checked_fh):
            test_window_expected = train_window[-1] + checked_fh
        else:
            test_window_expected = np.array([
                y.index.get_loc(y.index[train_window[-1]] + x)
                for x in checked_fh
            ])
        np.testing.assert_array_equal(test_window, test_window_expected)
    else:
        with pytest.raises(TypeError,
                           match="Unsupported combination of types"):
            SingleWindowSplitter(fh=fh, window_length=window_length)
Ejemplo n.º 7
0
def _to_relative(fh: ForecastingHorizon, cutoff=None) -> ForecastingHorizon:
    """Return forecasting horizon values relative to a cutoff.

    Parameters
    ----------
    fh : ForecastingHorizon
    cutoff : pd.Period, pd.Timestamp, int, optional (default=None)
        Cutoff value required to convert a relative forecasting
        horizon to an absolute one (and vice versa).

    Returns
    -------
    fh : ForecastingHorizon
        Relative representation of forecasting horizon.
    """
    if fh.is_relative:
        return fh._new()

    else:
        absolute = fh.to_pandas()
        _check_cutoff(cutoff, absolute)

        # We cannot use the freq from the ForecastingHorizon itself (or its
        # wrapped pd.DatetimeIndex) because it may be none for non-regular
        # indices, so instead we use the freq of cutoff.
        freq = _get_freq(cutoff)

        if isinstance(absolute, pd.DatetimeIndex):
            # coerce to pd.Period for reliable arithmetics and computations of
            # time deltas
            absolute = _coerce_to_period(absolute, freq)
            cutoff = _coerce_to_period(cutoff, freq)

        # TODO: Replace when we upgrade our lower pandas bound
        #  to a version where this is fixed
        # Compute relative values
        # The following line circumvents the bug in pandas
        # periods = pd.period_range(start="2021-01-01", periods=3, freq="2H")
        # periods - periods[0]
        # Out: Index([<0 * Hours>, <4 * Hours>, <8 * Hours>], dtype = 'object')
        # [v - periods[0] for v in periods]
        # Out: Index([<0 * Hours>, <2 * Hours>, <4 * Hours>], dtype='object')
        # TODO: v0.12.0: Check if this comment below can be removed,
        # so check if pandas has released the fix to PyPI:
        # This bug was reported: https://github.com/pandas-dev/pandas/issues/45999
        # and fixed: https://github.com/pandas-dev/pandas/pull/46006
        # Most likely it will be released with pandas 1.5
        # Once the bug is fixed the line should simply be:
        # relative = absolute - cutoff
        relative = pd.Index([date - cutoff for date in absolute])

        # Coerce durations (time deltas) into integer values for given frequency
        if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)):
            relative = _coerce_duration_to_int(relative, freq=freq)

        return fh._new(relative, is_relative=True)
Ejemplo n.º 8
0
def test_coerce_duration_to_int(duration):
    ret = _coerce_duration_to_int(duration, unit=_get_unit(duration))

    # check output type is always integer
    assert type(ret) in (pd.Int64Index, np.integer, int)

    # check result
    if isinstance(duration, pd.Index):
        np.testing.assert_array_equal(ret, range(3))

    if isinstance(duration, pd.tseries.offsets.BaseOffset):
        ret == 3
Ejemplo n.º 9
0
    def to_absolute_int(self, start, cutoff=None):
        """Return absolute values as zero-based integer index starting from `start`.

        Parameters
        ----------
        start : pd.Period, pd.Timestamp, int
            Start value returned as zero.
        cutoff : pd.Period, pd.Timestamp, int, optional (default=None)
            Cutoff value required to convert a relative forecasting
            horizon to an absolute one (and vice versa).

        Returns
        -------
        fh : ForecastingHorizon
            Absolute representation of forecasting horizon as zero-based
            integer index.
        """
        freq = _get_freq(cutoff)

        if isinstance(cutoff, pd.Timestamp):
            # coerce to pd.Period for reliable arithmetic operations and
            # computations of time deltas
            cutoff = _coerce_to_period(cutoff, freq=freq)

        absolute = self.to_absolute(cutoff).to_pandas()
        if isinstance(absolute, pd.DatetimeIndex):
            # coerce to pd.Period for reliable arithmetics and computations of
            # time deltas
            absolute = _coerce_to_period(absolute, freq=freq)

        # We here check the start value, the cutoff value is checked when we use it
        # to convert the horizon to the absolute representation below
        if isinstance(start, pd.Timestamp):
            start = _coerce_to_period(start, freq=freq)
        _check_start(start, absolute)

        # Note: We should here also coerce to periods for more reliable arithmetic
        # operations as in `to_relative` but currently doesn't work with
        # `update_predict` and incomplete time indices where the `freq` information
        # is lost, see comment on issue #534
        # The following line circumvents the bug in pandas
        # periods = pd.period_range(start="2021-01-01", periods=3, freq="2H")
        # periods - periods[0]
        # Out: Index([<0 * Hours>, <4 * Hours>, <8 * Hours>], dtype = 'object')
        # [v - periods[0] for v in periods]
        # Out: Index([<0 * Hours>, <2 * Hours>, <4 * Hours>], dtype='object')
        integers = pd.Index([date - start for date in absolute])

        if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)):
            integers = _coerce_duration_to_int(integers,
                                               freq=_get_freq(cutoff))

        return self._new(integers, is_relative=False)
Ejemplo n.º 10
0
def test_expanding_window_splitter(y, fh, initial_window, step_length):
    """Test ExpandingWindowSplitter."""
    cv = ExpandingWindowSplitter(
        fh=fh,
        initial_window=initial_window,
        step_length=step_length,
        start_with_window=True,
    )
    train_windows, test_windows, _, n_splits = _check_cv(cv, y)
    assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
    assert train_windows[0].shape[0] == _coerce_duration_to_int(
        duration=initial_window, freq="D")
    _check_expanding_windows(train_windows)
Ejemplo n.º 11
0
def test_coerce_duration_to_int(duration):
    """Test coercion of duration to int."""
    ret = _coerce_duration_to_int(duration, freq=_get_freq(duration))

    # check output type is always integer
    assert (type(ret) in (np.integer, int)) or is_integer_index(ret)

    # check result
    if isinstance(duration, pd.Index):
        np.testing.assert_array_equal(ret, range(3))

    if isinstance(duration, pd.tseries.offsets.BaseOffset):
        assert ret == 3
Ejemplo n.º 12
0
def test_single_window_splitter(y, fh, window_length):
    """Test SingleWindowSplitter."""
    cv = SingleWindowSplitter(fh=fh, window_length=window_length)
    train_windows, test_windows, cutoffs, n_splits = _check_cv(cv, y)

    train_window = train_windows[0]
    test_window = test_windows[0]
    assert n_splits == 1
    assert train_window.shape[0] == _coerce_duration_to_int(
        duration=window_length, freq="D")
    assert test_window.shape[0] == len(check_fh(fh))

    np.testing.assert_array_equal(test_window, train_window[-1] + check_fh(fh))
Ejemplo n.º 13
0
def test_sliding_window_splitter_with_initial_window(y, fh, window_length,
                                                     step_length,
                                                     initial_window):
    """Test SlidingWindowSplitter."""
    if _windows_are_incompatible(initial_window, window_length):
        pytest.skip(
            "Incompatible initial_window and window_length are tested elsewhere."
        )
    cv = SlidingWindowSplitter(
        fh=fh,
        window_length=window_length,
        step_length=step_length,
        initial_window=initial_window,
        start_with_window=True,
    )
    train_windows, test_windows, _, n_splits = _check_cv(cv, y)

    assert train_windows[0].shape[0] == _coerce_duration_to_int(
        duration=initial_window, freq="D")
    assert np.vstack(train_windows[1:]).shape == (
        n_splits - 1,
        _coerce_duration_to_int(duration=window_length, freq="D"),
    )
    assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
Ejemplo n.º 14
0
def test_sliding_window_splitter(y, fh, window_length, step_length):
    """Test SlidingWindowSplitter."""
    cv = SlidingWindowSplitter(
        fh=fh,
        window_length=window_length,
        step_length=step_length,
        start_with_window=True,
    )
    train_windows, test_windows, _, n_splits = _check_cv(cv, y)

    assert np.vstack(train_windows).shape == (
        n_splits,
        _coerce_duration_to_int(duration=window_length, freq="D"),
    )
    assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
Ejemplo n.º 15
0
    def to_relative(self, cutoff=None):
        """Return forecasting horizon values relative to a cutoff.

        Parameters
        ----------
        cutoff : pd.Period, pd.Timestamp, int, optional (default=None)
            Cutoff value required to convert a relative forecasting
            horizon to an absolute one (and vice versa).

        Returns
        -------
        fh : ForecastingHorizon
            Relative representation of forecasting horizon.
        """
        if self.is_relative:
            return self._new()

        else:
            absolute = self.to_pandas()
            _check_cutoff(cutoff, absolute)

            if isinstance(absolute, pd.DatetimeIndex):
                # We cannot use the freq from the the ForecastingHorizon itself (or its
                # wrapped pd.DatetimeIndex) because it may be none for non-regular
                # indices, so instead we use the freq of cutoff.
                freq = _get_freq(cutoff)

                # coerce to pd.Period for reliable arithmetics and computations of
                # time deltas
                absolute = _coerce_to_period(absolute, freq)
                cutoff = _coerce_to_period(cutoff, freq)

            # Compute relative values
            relative = absolute - cutoff

            # Coerce durations (time deltas) into integer values for given frequency
            if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)):
                relative = _coerce_duration_to_int(relative,
                                                   freq=_get_freq(cutoff))

            return self._new(relative, is_relative=True)
Ejemplo n.º 16
0
def test_sliding_window_splitter_start_with_empty_window(
        y, fh, window_length, step_length):
    """Test SlidingWindowSplitter."""
    cv = SlidingWindowSplitter(
        fh=fh,
        window_length=window_length,
        step_length=step_length,
        start_with_window=False,
    )
    train_windows, test_windows, _, n_splits = _check_cv(
        cv, y, allow_empty_window=True)

    assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))

    n_incomplete = _get_n_incomplete_windows(window_length, step_length)
    train_windows = train_windows[n_incomplete:]

    assert np.vstack(train_windows).shape == (
        n_splits - n_incomplete,
        _coerce_duration_to_int(duration=window_length, freq="D"),
    )
Ejemplo n.º 17
0
 def to_absolute_int(self, start, cutoff=None):
     """Return absolute values as zero-based integer index
     Parameters
     ----------
     start : pd.Period, pd.Timestamp, int
         Start value
     cutoff : pd.Period, pd.Timestamp, int, optional (default=None)
         Cutoff value is required to convert a relative forecasting
         horizon to an absolute one and vice versa.
     Returns
     -------
     fh : ForecastingHorizon
         Absolute representation of forecasting horizon as zero-based
         integer index
     """
     self._check_cutoff(start)
     absolute = self.to_absolute(cutoff).to_pandas()
     values = absolute - start
     if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)):
         values = _coerce_duration_to_int(values, unit=_get_unit(cutoff))
     return self._new(values, is_relative=False)
Ejemplo n.º 18
0
def _get_n_incomplete_windows(window_length, step_length) -> int:
    return int(
        np.ceil(
            _coerce_duration_to_int(duration=window_length, freq="D") /
            _coerce_duration_to_int(duration=step_length, freq="D")))
Ejemplo n.º 19
0
 def _get_step_length(x: NON_FLOAT_WINDOW_LENGTH_TYPES) -> int:
     return _coerce_duration_to_int(duration=x, freq="D")
Ejemplo n.º 20
0
def test_coerce_duration_to_int_with_non_allowed_durations(duration):
    """Test coercion of duration to int."""
    with pytest.raises(ValueError, match="frequency is missing"):
        _coerce_duration_to_int(duration, freq=_get_freq(duration))