def _split(self, y): # cutoffs cutoffs = check_cutoffs(self.cutoffs) if np.max(cutoffs) >= y.shape[0]: raise ValueError("`cutoffs` are incompatible with given `y`.") fh = _check_fh(self.fh) if np.max(cutoffs) + np.max(fh) > y.shape[0]: raise ValueError("`fh` is incompatible with given `cutoffs` and `y`.") window_length = check_window_length(self.window_length) for cutoff in cutoffs: training_window = np.arange(cutoff - window_length, cutoff) + 1 test_window = cutoff + fh yield training_window, test_window
def _split_windows(self, y): # cutoffs cutoffs = check_cutoffs(self.cutoffs) if not np.max(cutoffs) < len(y): raise ValueError("`cutoffs` are out-of-bounds for given `y`.") fh = self._check_fh() if np.max(cutoffs) + np.max(fh) > len(y): raise ValueError("`fh` is out-of-bounds for given `cutoffs` and `y`.") window_length = check_window_length(self.window_length) for cutoff in cutoffs: training_window = np.arange(cutoff - window_length, cutoff) + 1 test_window = cutoff + fh yield training_window, test_window
def get_cutoffs(self, y: Optional[ACCEPTED_Y_TYPES] = None) -> np.ndarray: """Return the cutoff points. This method trivially returns the cutoffs given during instance initialization. The only change is that the set of cutoffs is sorted from smallest to largest. Parameters ---------- y : pd.Series or pd.Index, optional (default=None) Time series to split Returns ------- cutoffs : np.array The array of cutoff points. """ return check_cutoffs(self.cutoffs)
def _split(self, y: ACCEPTED_Y_TYPES) -> SPLIT_GENERATOR_TYPE: cutoffs = check_cutoffs(self.cutoffs) if np.max(cutoffs) >= y.shape[0]: raise ValueError("`cutoffs` are incompatible with given `y`.") fh = _check_fh(self.fh) n_timepoints = y.shape[0] if np.max(cutoffs) + np.max(fh) > y.shape[0]: raise ValueError("`fh` is incompatible with given `cutoffs` and `y`.") window_length = check_window_length(self.window_length, n_timepoints) for cutoff in cutoffs: if is_timedelta_or_date_offset(x=window_length): train_start = y.get_loc(max(y[0], y[cutoff] - window_length)) else: train_start = cutoff - window_length training_window = np.arange(train_start, cutoff) + 1 test_window = cutoff + fh yield training_window, test_window
def _split(self, y: ACCEPTED_Y_TYPES) -> SPLIT_GENERATOR_TYPE: n_timepoints = y.shape[0] cutoffs = check_cutoffs(cutoffs=self.cutoffs) fh = _check_fh(fh=self.fh) window_length = check_window_length(window_length=self.window_length, n_timepoints=n_timepoints) _check_cutoffs_fh_window_length(cutoffs=cutoffs, fh=fh, window_length=window_length) _check_cutoffs_and_y(cutoffs=cutoffs, y=y) _check_cutoffs_fh_y(cutoffs=cutoffs, fh=fh, y=y) max_fh = fh.max() max_cutoff = np.max(cutoffs) for cutoff in cutoffs: if is_int(x=window_length) and is_int(x=cutoff): train_start = cutoff - window_length elif is_timedelta_or_date_offset(x=window_length) and is_datetime( x=cutoff): train_start = y.get_loc(max(y[0], cutoff - window_length)) else: raise TypeError(f"Unsupported combination of types: " f"`window_length`: {type(window_length)}, " f"`cutoff`: {type(cutoff)}") if is_int(x=cutoff): training_window = np.arange(train_start, cutoff) + 1 else: training_window = np.arange(train_start, y.get_loc(cutoff)) + 1 test_window = cutoff + fh.to_numpy() if is_datetime(x=max_cutoff) and is_timedelta(x=max_fh): test_window = test_window[test_window >= y.min()] test_window = np.array( [y.get_loc(timestamp) for timestamp in test_window]) yield training_window, test_window
def get_cutoffs(self, y=None): """Return the cutoff points""" return check_cutoffs(self.cutoffs)