def _generate_new_dates( n: int, input_series: TimeSeries) -> Union[pd.DatetimeIndex, pd.RangeIndex]: """ Generates `n` new dates after the end of the specified series """ last = input_series.end_time() start = last + input_series.freq if input_series.has_datetime_index else last + 1 return _generate_index(start=start, freq=input_series.freq, length=n, name=input_series.time_dim)
def _get_matching_index(ts_target: TimeSeries, ts_covariate: TimeSeries, idx: int): """ Given two overlapping series `ts_target` and `ts_covariate` and an index point `idx` of `ts_target`, returns the matching index point in `ts_covariate`, based on the ending times of the two series. The indices are starting from the end of the series. This function is used to jointly slice target and covariate series in datasets. It supports both datetime and integer indexed series. Note: this function does not check if the matching index value is in `ts_covariate` or not. """ raise_if_not( ts_target.freq == ts_covariate.freq, "The dataset contains some target/covariates series pair that have incompatible " 'time axes (not the same "freq") and thus cannot be matched', ) freq = ts_target.freq return idx + _index_diff( self=ts_target.end_time(), other=ts_covariate.end_time(), freq=freq )
def generate_train_series( self, target: TimeSeries, covariate: Optional[TimeSeries] = None) -> SupportedIndex: super().generate_train_series(target, covariate) # save a reference index if specified if (self.reference_index_type is not ReferenceIndexType.NONE and self.reference_index is None): if self.reference_index_type is ReferenceIndexType.PREDICTION: self.reference_index = (len(target) - 1, target.end_time()) else: # save the time step before start of target series self.reference_index = (-1, target.start_time() - target.freq) return covariate.time_index if covariate is not None else target.time_index
def generate_train_series( self, target: TimeSeries, covariate: Optional[TimeSeries] = None) -> SupportedIndex: """For training (when `n` is `None`) we can simply use the future covariates (if available) or target as reference to extract the time index. """ super().generate_train_series(target, covariate) # save a reference index if specified if (self.reference_index_type is not ReferenceIndexType.NONE and self.reference_index is None): if self.reference_index_type is ReferenceIndexType.PREDICTION: self.reference_index = (len(target) - 1, target.end_time()) else: # save the time step before start of target series self.reference_index = (-1, target.start_time() - target.freq) return covariate.time_index if covariate is not None else target.time_index
def generate_inference_series( self, n: int, target: TimeSeries, covariate: Optional[TimeSeries] = None) -> SupportedIndex: """For prediction (`n` is given) with future covariates we have to distinguish between two cases: 1) If future covariates are given, we can use them as reference 2) If future covariates are missing, we need to generate a time index that starts `input_chunk_length` before the end of `target` and ends `max(n, output_chunk_length)` after the end of `target` """ super().generate_inference_series(n, target, covariate) if covariate is not None: return covariate.time_index else: return _generate_index( start=target.end_time() - target.freq * (self.input_chunk_length - 1), length=self.input_chunk_length + max(n, self.output_chunk_length), freq=target.freq, )
def _covariate_indexer( target_idx: int, target_series: TimeSeries, covariate_series: TimeSeries, covariate_type: CovariateType, input_chunk_length: int, output_chunk_length: int, n: int, ): """returns tuple of (past_start, past_end, future_start, future_end)""" # get the main covariate type: CovariateType.PAST or CovariateType.FUTURE main_covariate_type = ( CovariateType.PAST if covariate_type is CovariateType.PAST else CovariateType.FUTURE ) raise_if_not( main_covariate_type in [CovariateType.PAST, CovariateType.FUTURE], "`main_covariate_type` must be one of `(CovariateType.PAST, CovariateType.FUTURE)`", ) # we need to use the time index (datetime or integer) here to match the index with the covariate series past_start = target_series.time_index[-input_chunk_length] past_end = target_series.time_index[-1] if main_covariate_type is CovariateType.PAST: future_end = past_end + max(0, n - output_chunk_length) * target_series.freq else: # CovariateType.FUTURE future_end = past_end + max(n, output_chunk_length) * target_series.freq future_start = ( past_end + target_series.freq if future_end != past_end else future_end ) if input_chunk_length == 0: # for regression ensemble models past_start, past_end = future_start, future_start # check if case specific indexes are available case_start = ( future_start if covariate_type is CovariateType.FUTURE else past_start ) raise_if_not( covariate_series.start_time() <= case_start, f"For the given forecasting case, the provided {main_covariate_type.value} covariates at dataset index " f"`{target_idx}` do not extend far enough into the past. The {main_covariate_type.value} covariates " f"must start at time step `{case_start}`, whereas now they start at time step " f"`{covariate_series.start_time()}`.", ) raise_if_not( covariate_series.end_time() >= future_end, f"For the given forecasting horizon `n={n}`, the provided {main_covariate_type.value} covariates " f"at dataset index `{target_idx}` do not extend far enough into the future. As `" f"{'n > output_chunk_length' if n > output_chunk_length else 'n <= output_chunk_length'}" f"` the {main_covariate_type.value} covariates must end at time step `{future_end}`, " f"whereas now they end at time step `{covariate_series.end_time()}`.", ) # extract the index position (index) from time_index value covariate_start = covariate_series.time_index.get_loc(past_start) covariate_end = covariate_series.time_index.get_loc(future_end) + 1 return covariate_start, covariate_end
def _multivariate_mase( actual_series: TimeSeries, pred_series: TimeSeries, insample: TimeSeries, m: int, intersect: bool, reduction: Callable[[np.ndarray], float], ): raise_if_not( actual_series.width == pred_series.width, "The two TimeSeries instances must have the same width.", logger, ) raise_if_not( actual_series.width == insample.width, "The insample TimeSeries must have the same width as the other series.", logger, ) raise_if_not( insample.end_time() + insample.freq == pred_series.start_time(), "The pred_series must be the forecast of the insample series", logger, ) insample_ = ( insample.quantile_timeseries(quantile=0.5) if insample.is_stochastic else insample ) value_list = [] for i in range(actual_series.width): # old implementation of mase on univariate TimeSeries if m is None: test_season, m = check_seasonality(insample) if not test_season: warn( "No seasonality found when computing MASE. Fixing the period to 1.", UserWarning, ) m = 1 y_true, y_hat = _get_values_or_raise( actual_series.univariate_component(i), pred_series.univariate_component(i), intersect, remove_nan_union=False, ) x_t = insample_.univariate_component(i).values() errors = np.abs(y_true - y_hat) scale = np.mean(np.abs(x_t[m:] - x_t[:-m])) raise_if_not( not np.isclose(scale, 0), "cannot use MASE with periodical signals", logger, ) value_list.append(np.mean(errors / scale)) return reduction(value_list)