예제 #1
0
def check_sp(sp, enforce_list=False):
    """Validate seasonal periodicity.

    Parameters
    ----------
    sp : int or [int/float]
        Seasonal periodicity
    emforce_list : bool, optional (default=False)
        If true, convert sp to list if not list.

    Returns
    -------
    sp : int or [int/float]
        Validated seasonal periodicity
    """
    if sp is not None:
        if enforce_list and is_int(sp) and sp >= 1:
            sp = [sp]
        elif (enforce_list and isinstance(sp, list)) or (is_int(sp) and sp >= 1):
            pass
        else:
            if enforce_list:
                raise ValueError("`sp` must be an int >= 1, [float/int] or None")
            else:
                raise ValueError("`sp` must be an int >= 1 or None")
    return sp
예제 #2
0
def check_fh_values(values):
    """Validate forecasting horizon values.

    Parameters
    ----------
    values : int, list of int, array of int
        Forecasting horizon with steps ahead to predict.

    Raises
    ------
    TypeError : if values do not meet criteria

    Returns
    -------
    fh : numpy array of int
        Sorted and validated forecasting horizon.
    """
    # check single integer
    if is_int(values):
        values = np.array([values], dtype=np.int)

    # check array
    elif isinstance(values, np.ndarray):
        if values.ndim > 1:
            raise TypeError(f"`fh` must be a 1d array, but found shape: "
                            f"{values.shape}")

        if not np.issubdtype(values.dtype, np.integer):
            raise TypeError(f"If `fh` is passed as an array, it must "
                            f"be an array of integers, but found an "
                            f"array of type: {values.dtype}")

    # check list
    elif isinstance(values, list):
        if not np.all([is_int(h) for h in values]):
            raise TypeError("If `fh` is passed as a list, "
                            "it has to be a list of integers.")
        values = np.array(values, dtype=np.int)

    else:
        raise TypeError(f"`fh` has to be either a numpy array, list, "
                        f"or a single integer, but found: {type(values)}")

    # check fh is not empty
    if len(values) < 1:
        raise TypeError("`fh` cannot be empty, please specify now least one "
                        "step to forecast.")

    # check fh does not contain duplicates
    if len(values) != len(np.unique(values)):
        raise TypeError("`fh` should not contain duplicates.")

    # sort fh
    return np.sort(values)
예제 #3
0
    def fit(self, X, y=None):
        """
        Fit transformer, generating random interval indices.

        Parameters
        ----------
        X : pandas DataFrame of shape [n_samples, n_features]
            Input data
        y : pandas Series, shape (n_samples, ...), optional
            Targets for supervised learning.

        Returns
        -------
        self : an instance of self.
        """
        X = check_X(X, enforce_univariate=True)

        self.input_shape_ = X.shape

        # Retrieve time-series indexes from each column.
        self._time_index = get_time_index(X)

        if isinstance(self.intervals, np.ndarray):
            self.intervals_ = self.intervals

        elif is_int(self.intervals):
            self.intervals_ = np.array_split(self._time_index, self.intervals)

        else:
            raise ValueError(
                f"Intervals must be either an integer, an array with "
                f"start and end points, but found: {self.intervals}")
        self._is_fitted = True
        return self
예제 #4
0
def _cutoffs_fh_window_length_types_are_supported(
    cutoffs: VALID_CUTOFF_TYPES,
    fh: FORECASTING_HORIZON_TYPES,
    window_length: ACCEPTED_WINDOW_LENGTH_TYPES,
) -> bool:
    """Check that combination of inputs is supported.

    Currently, only two cases are allowed:
    either all inputs are integers, or they are all datetime or timedelta

    Parameters
    ----------
    cutoffs : np.array or pd.Index
        cutoff points, positive and integer- or datetime-index like
    fh : int, timedelta, list or np.array of ints or timedeltas
    window_length : int or timedelta or pd.DateOffset

    Returns
    -------
    True if all inputs are compatible, False otherwise
    """
    all_int = array_is_int(cutoffs) and array_is_int(fh) and is_int(
        window_length)
    all_dates = (array_is_datetime64(cutoffs)
                 and array_is_timedelta_or_date_offset(fh)
                 and is_timedelta_or_date_offset(window_length))
    if all_int or all_dates:
        return True
    else:
        return False
예제 #5
0
def check_window_length(window_length):
    """Validate window length"""
    if window_length is not None:
        if not is_int(window_length) or window_length < 1:
            raise ValueError(
                f"`window_length_` must be a positive integer >= 1 or None, "
                f"but found: {window_length}")
    return window_length
예제 #6
0
def check_step_length(step_length):
    """Validate window length"""
    if step_length is not None:
        if not is_int(step_length) or step_length < 1:
            raise ValueError(
                f"`step_length` must be a positive integer >= 1 or None, "
                f"but found: {step_length}")
    return step_length
예제 #7
0
def check_window_properties(windows, allow_empty=False):
    """Helper function to test common properties of windows"""
    assert isinstance(windows, list)
    for window in windows:
        assert isinstance(window, np.ndarray)
        assert all(is_int(idx) for idx in window)
        assert window.ndim == 1
        if not allow_empty:
            assert len(window) > 0
예제 #8
0
파일: boxcox.py 프로젝트: ckastner/sktime
def _guerrero(x, sp, bounds=None):
    r"""
    Returns lambda estimated by the Guerrero method [Guerrero].
    Parameters
    ----------
    x : ndarray
        Input array. Must be 1-dimensional.
    sp : integer
        Seasonal periodicity value. Must be an integer >= 2
    bounds : {None, (float, float)}, optional
        Bounds on lambda to be used in minimization.
    Returns
    -------
    lambda : float
        Lambda value that minimizes the coefficient of variation of
        variances of the time series in different periods after
        Box-Cox transformation [Guerrero].

    References
    ----------
    [Guerrero] V.M. Guerrero, "Time-series analysis supported by Power
    Transformations ", Journal of Forecasting, Vol. 12, 37-48 (1993)
    https://doi.org/10.1002/for.3980120104
    """

    if sp is None or not is_int(sp) or sp < 2:
        raise ValueError(
            "Guerrero method requires an integer seasonal periodicity (sp) value >= 2."
        )

    x = np.asarray(x)
    if x.ndim != 1:
        raise ValueError("Data must be 1-dimensional.")

    num_obs = len(x)
    len_prefix = num_obs % sp

    x_trimmed = x[len_prefix:]
    x_mat = x_trimmed.reshape((-1, sp))
    x_mean = np.mean(x_mat, axis=1)

    # [Guerrero, Eq.(5)] uses an unbiased estimation for
    # the standard deviation
    x_std = np.std(x_mat, axis=1, ddof=1)

    def _eval_guerrero(lmb, x_std, x_mean):
        x_ratio = x_std / x_mean**(1 - lmb)
        x_ratio_cv = variation(x_ratio)
        return x_ratio_cv

    optimizer = _make_boxcox_optimizer(bounds)
    return optimizer(_eval_guerrero, args=(x_std, x_mean))
예제 #9
0
def _check_cutoffs_against_test_windows(cutoffs, windows, fh, y):
    # We check for the last value. Some windows may be incomplete, with no first
    # value, whereas the last value will always be there.
    fh = check_fh(fh)
    if is_int(fh[-1]):
        expected = np.array([window[-1] - fh[-1] for window in windows])
    elif array_is_timedelta_or_date_offset(fh):
        expected = np.array([(y.index[window[-1]] - fh[-1]).to_datetime64()
                             for window in windows])
    else:
        raise ValueError(
            f"Provided `fh` type is not supported: {type(fh[-1])}")
    np.testing.assert_array_equal(cutoffs, expected)
예제 #10
0
def _check_cutoffs_fh_y(cutoffs: VALID_CUTOFF_TYPES,
                        fh: FORECASTING_HORIZON_TYPES,
                        y: ACCEPTED_Y_TYPES) -> None:
    """Check that combination of inputs is compatible.

    Currently, only two cases are allowed:
    either both `cutoffs` and `fh` are integers, or they are datetime or timedelta.

    Parameters
    ----------
    cutoffs : np.array or pd.Index
        Cutoff points, positive and integer- or datetime-index like.
        Type should match the type of `fh` input.
    fh : int, timedelta, list or np.array of ints or timedeltas
        Type should match the type of `cutoffs` input.
    y : pd.Series, pd.DataFrame, np.ndarray, or pd.Index
        coerced and checked version of input y

    Raises
    ------
    ValueError
        if max cutoff plus max `fh` is above the last observation in `y`
    TypeError
        if `cutoffs` and `fh` type combination is not supported
    """
    max_cutoff = np.max(cutoffs)
    max_fh = fh.max()

    msg = "`fh` is incompatible with given `cutoffs` and `y`."
    if is_int(x=max_cutoff) and is_int(x=max_fh):
        if max_cutoff + max_fh > y.shape[0]:
            raise ValueError(msg)
    elif is_datetime(x=max_cutoff) and is_timedelta(x=max_fh):
        if max_cutoff + max_fh > y.max():
            raise ValueError(msg)
    else:
        raise TypeError("Unsupported type of `cutoffs` and `fh`")
예제 #11
0
    def _split(self, y: ACCEPTED_Y_TYPES) -> SPLIT_GENERATOR_TYPE:
        n_timepoints = y.shape[0]
        cutoffs = check_cutoffs(cutoffs=self.cutoffs)
        fh = _check_fh(fh=self.fh)
        window_length = check_window_length(window_length=self.window_length,
                                            n_timepoints=n_timepoints)
        _check_cutoffs_fh_window_length(cutoffs=cutoffs,
                                        fh=fh,
                                        window_length=window_length)
        _check_cutoffs_and_y(cutoffs=cutoffs, y=y)
        _check_cutoffs_fh_y(cutoffs=cutoffs, fh=fh, y=y)
        max_fh = fh.max()
        max_cutoff = np.max(cutoffs)

        for cutoff in cutoffs:
            if is_int(x=window_length) and is_int(x=cutoff):
                train_start = cutoff - window_length
            elif is_timedelta_or_date_offset(x=window_length) and is_datetime(
                    x=cutoff):
                train_start = y.get_loc(max(y[0], cutoff - window_length))
            else:
                raise TypeError(f"Unsupported combination of types: "
                                f"`window_length`: {type(window_length)}, "
                                f"`cutoff`: {type(cutoff)}")

            if is_int(x=cutoff):
                training_window = np.arange(train_start, cutoff) + 1
            else:
                training_window = np.arange(train_start, y.get_loc(cutoff)) + 1

            test_window = cutoff + fh.to_numpy()
            if is_datetime(x=max_cutoff) and is_timedelta(x=max_fh):
                test_window = test_window[test_window >= y.min()]
                test_window = np.array(
                    [y.get_loc(timestamp) for timestamp in test_window])
            yield training_window, test_window
예제 #12
0
def check_cutoffs(cutoffs):
    if not isinstance(cutoffs, np.ndarray):
        raise ValueError(
            f"`cutoffs` must be a np.array, but found: {type(cutoffs)}")

    if not all([is_int(cutoff) for cutoff in cutoffs]):
        raise ValueError("All cutoff points must be integers")

    if not cutoffs.ndim == 1:
        raise ValueError("`cutoffs must be 1-dimensional array")

    if not len(cutoffs) > 0:
        raise ValueError("Found empty `cutoff` array")

    return np.sort(cutoffs)
예제 #13
0
def check_sp(sp):
    """Validate seasonal periodicity.

    Parameters
    ----------
    sp : int
        Seasonal periodicity

    Returns
    -------
    sp : int
        Validated seasonal periodicity
    """
    if sp is not None and (not is_int(sp) or sp < 1):
        raise ValueError("`sp` must be a positive integer >= 1 or None")
    return sp
예제 #14
0
def check_step_length(step_length) -> Optional[int]:
    """Validate window length.

    Parameters
    ----------
    step_length : step length for data set.

    Returns
    -------
    step_length : int
        if step_length in not none and is int and greater than or equal to 1.

    Raises
    ------
    ValueError
        if step_length is negative or not an integer or is None.
    """
    if step_length is None:
        return None

    elif is_int(step_length):
        if step_length < 1:
            raise ValueError(f"`step_length` must be a integer >= 1, "
                             f"but found: {step_length}")
        else:
            return step_length

    elif is_timedelta(step_length):
        if step_length <= timedelta(0):
            raise ValueError(f"`step_length` must be a positive timedelta, "
                             f"but found: {step_length}")
        else:
            return step_length

    elif is_date_offset(step_length):
        if step_length + pd.Timestamp(0) <= pd.Timestamp(0):
            raise ValueError(
                f"`step_length` must be a positive pd.DateOffset, "
                f"but found: {step_length}")
        else:
            return step_length

    else:
        raise ValueError(
            f"`step_length` must be an integer, timedelta, pd.DateOffset, or None, "
            f"but found: {type(step_length)}")
예제 #15
0
def _check_lags(lags):
    msg = " ".join([
        "`lags` should be provided as a positive integer scaler, or",
        "a list, tuple or np.ndarray of positive integers,"
        f"but found {type(lags)}.",
    ])
    non_positive_msg = "`lags` should be positive integers."
    if isinstance(lags, int):
        if lags <= 0:
            raise ValueError(non_positive_msg)
        lags = check_array([lags], ensure_2d=False)
    elif isinstance(lags, (list, tuple, np.ndarray)):
        if not all([is_int(lag) for lag in lags]):
            raise TypeError(msg)
        lags = check_array(lags, ensure_2d=False)
        if (lags <= 0).any():
            raise ValueError(non_positive_msg)
    else:
        raise TypeError(msg)

    return lags
예제 #16
0
    def _split(self, y: pd.Index) -> SPLIT_GENERATOR_TYPE:
        n_timepoints = y.shape[0]
        window_length = check_window_length(self.window_length, n_timepoints)
        fh = _check_fh(self.fh)
        end = _get_end(y_index=y, fh=fh)

        if window_length is None:
            start = 0
        elif is_int(window_length):
            start = end - window_length + 1
        else:
            start = np.argwhere(y > y[end] - window_length).flatten()[0]

        train = self._get_train_window(y=y, train_start=start, split_point=end + 1)

        if array_is_int(fh):
            test = end + fh.to_numpy()
        else:
            test = np.array([y.get_loc(y[end] + x) for x in fh.to_pandas()])

        yield train, test
예제 #17
0
def check_step_length(step_length):
    """Validate window length.
    Parameters
    ----------
    step_length : step length for data set.

    Returns
    ----------
    step_length : int
        if step_length in not none and is int and greater than or equal to 1.

    Raises
    ----------
    ValueError
        if step_length is negative or not an integer or is None.
    """
    if step_length is not None:
        if not is_int(step_length) or step_length < 1:
            raise ValueError(
                f"`step_length` must be a positive integer >= 1 or None, "
                f"but found: {step_length}")
    return step_length
예제 #18
0
def _check_n_splits(n_splits):
    assert is_int(n_splits)
    assert n_splits > 0
예제 #19
0
def _check_values(values: Union[VALID_FORECASTING_HORIZON_TYPES]) -> pd.Index:
    """Validate forecasting horizon values.

    Validation checks validity and also converts forecasting horizon values
    to supported pandas.Index types if possible.

    Parameters
    ----------
    values : int, list, array, certain pd.Index types
        Forecasting horizon with steps ahead to predict.

    Raises
    ------
    TypeError :
        Raised if `values` type is not supported

    Returns
    -------
    values : pd.Index
        Sorted and validated forecasting horizon values.
    """
    # if values are one of the supported pandas index types, we don't have
    # to do
    # anything as the forecasting horizon directly wraps the index, note that
    # isinstance() does not work here, because index types inherit from each
    # other,
    # hence we check for type equality here
    if type(values) in VALID_INDEX_TYPES:
        pass

    # convert single integer to pandas index, no further checks needed
    elif is_int(values):
        return pd.Int64Index([values], dtype=int)

    elif is_timedelta_or_date_offset(values):
        return pd.Index([values])

    # convert np.array or list to pandas index
    elif is_array(values) and array_is_int(values):
        values = pd.Int64Index(values, dtype=int)

    elif is_array(values) and array_is_timedelta_or_date_offset(values):
        values = pd.Index(values)

    # otherwise, raise type error
    else:
        valid_types = (
            "int",
            "np.array",
            "list",
            *[f"pd.{index_type.__name__}" for index_type in VALID_INDEX_TYPES],
        )
        raise TypeError(
            f"Invalid `fh`. The type of the passed `fh` values is not supported. "
            f"Please use one of {valid_types}, but found: {type(values)}")

    # check values does not contain duplicates
    if len(values) != values.nunique():
        raise ValueError(
            "Invalid `fh`. The `fh` values must not contain any duplicates.")

    # return sorted values
    return values.sort_values()
예제 #20
0
def check_n_splits_properties(n_splits):
    """Helper function to test common properties of n_splits"""
    assert is_int(n_splits)
    assert n_splits > 0
예제 #21
0
def check_cutoff_properties(cutoffs):
    """Helper function to test common properties of cutoffs"""
    assert isinstance(cutoffs, np.ndarray)
    assert all(is_int(cutoff) for cutoff in cutoffs)
    assert cutoffs.ndim == 1
    assert len(cutoffs) > 0