def inverse_transform(self, series: Union[TimeSeries, Sequence[TimeSeries]], *args, **kwargs) -> Union[TimeSeries, List[TimeSeries]]: """Inverse-transform a (sequence of) series. In case a sequence is passed as input data, this function takes care of parallelising the transformation of multiple series in the sequence at the same time. Parameters ---------- series the (sequence of) series be inverse-transformed. args Additional positional arguments for the :func:`ts_inverse_transform()` method kwargs Additional keyword arguments for the :func:`ts_inverse_transform()` method component_mask : Optional[np.ndarray] = None Optionally, a 1-D boolean np.ndarray of length ``series.n_components`` that specifies which components of the underlying `series` the Scaler should consider. Returns ------- Union[TimeSeries, List[TimeSeries]] Inverse transformed data. """ if hasattr(self, "_fit_called"): raise_if_not( self._fit_called, "fit() must have been called before inverse_transform()", logger, ) desc = f"Inverse ({self._name})" if isinstance(series, TimeSeries): data = [series] else: data = series input_iterator = _build_tqdm_iterator( self._inverse_transform_iterator(data), verbose=self._verbose, desc=desc, total=len(data), ) transformed_data = _parallel_apply( input_iterator, self.__class__.ts_inverse_transform, self._n_jobs, args, kwargs, ) return (transformed_data[0] if isinstance(series, TimeSeries) else transformed_data)
def wrapper_multi_ts_support(*args, **kwargs): actual_series = kwargs[ 'actual_series'] if 'actual_series' in kwargs else args[0] pred_series = kwargs['pred_series'] if 'pred_series' in kwargs else args[0] if 'actual_series' in kwargs \ else args[1] n_jobs = kwargs.pop('n_jobs', signature(func).parameters['n_jobs'].default) verbose = kwargs.pop('verbose', signature(func).parameters['verbose'].default) raise_if_not(isinstance(n_jobs, int), "n_jobs must be an integer") raise_if_not(isinstance(verbose, bool), "verbose must be a bool") actual_series = [ actual_series ] if not isinstance(actual_series, Sequence) else actual_series pred_series = [ pred_series ] if not isinstance(pred_series, Sequence) else pred_series raise_if_not( len(actual_series) == len(pred_series), "The two TimeSeries sequences must have the same length.", logger) num_series_in_args = int('actual_series' not in kwargs) + int( 'pred_series' not in kwargs) kwargs.pop('actual_series', 0) kwargs.pop('pred_series', 0) iterator = _build_tqdm_iterator(iterable=zip(actual_series, pred_series), verbose=verbose, total=len(actual_series)) value_list = _parallel_apply(iterator=iterator, fn=func, n_jobs=n_jobs, fn_args=args[num_series_in_args:], fn_kwargs=kwargs) # in case the reduction is not reducing the metrics sequence to a single value, e.g., if returning the # np.ndarray of values with the identity function, we must handle the single TS case, where we should # return a single value instead of a np.array of len 1 if len(value_list) == 1: value_list = value_list[0] if 'inter_reduction' in kwargs: return kwargs['inter_reduction'](value_list) else: return signature(func).parameters['inter_reduction'].default( value_list)
def fit(self, series: Union[TimeSeries, Sequence[TimeSeries]], *args, **kwargs) -> "FittableDataTransformer": """Fit the transformer to the provided series or sequence of series. Fit the data and store the fitting parameters into ``self._fitted_params``. If a sequence is passed as input data, this function takes care of parallelising the fitting of multiple series in the sequence at the same time (in this case ``self._fitted_params`` will contain an array of fitted params, one for each series). Parameters ---------- series (sequence of) series to fit the transformer on. args Additional positional arguments for the :func:`ts_fit` method kwargs Additional keyword arguments for the :func:`ts_fit` method component_mask : Optional[np.ndarray] = None Optionally, a 1-D boolean np.ndarray of length ``series.n_components`` that specifies which components of the underlying `series` the Scaler should consider. Returns ------- FittableDataTransformer Fitted transformer. """ self._fit_called = True desc = f"Fitting ({self._name})" if isinstance(series, TimeSeries): data = [series] else: data = series input_iterator = _build_tqdm_iterator(self._fit_iterator(data), verbose=self._verbose, desc=desc, total=len(data)) self._fitted_params = _parallel_apply(input_iterator, self.__class__.ts_fit, self._n_jobs, args, kwargs) return self
def transform(self, series: Union[TimeSeries, Sequence[TimeSeries]], *args, **kwargs) -> Union[TimeSeries, List[TimeSeries]]: """Transform a (sequence of) of series. In case a ``Sequence`` is passed as input data, this function takes care of parallelising the transformation of multiple series in the sequence at the same time. Parameters ---------- series (sequence of) series to be transformed. args Additional positional arguments for each :func:`ts_transform()` method call kwargs Additional keyword arguments for each :func:`ts_transform()` method call Returns ------- Union[TimeSeries, List[TimeSeries]] Transformed data. """ desc = f"Transform ({self._name})" if isinstance(series, TimeSeries): data = [series] else: data = series input_iterator = _build_tqdm_iterator( self._transform_iterator(data), verbose=self._verbose, desc=desc, total=len(data), ) transformed_data = _parallel_apply(input_iterator, self.__class__.ts_transform, self._n_jobs, args, kwargs) return (transformed_data[0] if isinstance(series, TimeSeries) else transformed_data)
def inverse_transform(self, series: Union[TimeSeries, Sequence[TimeSeries]], *args, **kwargs) -> Union[TimeSeries, List[TimeSeries]]: """ Inverse-transform the data. In case a `Sequence` is passed as input data, this function takes care of parallelising the transformation of multiple series in the sequence at the same time. Parameters ---------- series `TimeSeries` or `Sequence[TimeSeries]` which will be inverse-transformed. args Additional positional arguments for the `ts_inverse_transform()` method kwargs Additional keyword arguments for the `ts_inverse_transform()` method Returns ------- Union[TimeSeries, List[TimeSeries]] Inverse transformed data. """ if hasattr(self, "_fit_called"): raise_if_not(self._fit_called, "fit() must have been called before inverse_transform()", logger) desc = "Inverse ({})".format(self._name) if isinstance(series, TimeSeries): data = [series] else: data = series input_iterator = _build_tqdm_iterator(self._inverse_transform_iterator(data), verbose=self._verbose, desc=desc, total=len(data)) transformed_data = _parallel_apply(input_iterator, self.__class__.ts_inverse_transform, self._n_jobs, args, kwargs) return transformed_data[0] if isinstance(series, TimeSeries) else transformed_data
def fit(self, series: Union[TimeSeries, Sequence[TimeSeries]], *args, **kwargs) -> 'FittableDataTransformer': """ Fit the data and stores the fitting parameters into `self._fitted_params`. If a `Sequence` is passed as input data, this function takes care of parallelising the fitting of multiple series in the sequence at the same time (in this case 'self._fitted_params' will contain an array of fitted params, one for each `TimeSeries`). Parameters ---------- series `TimeSeries` or `Sequence[TimeSeries]` against which the transformer is fit. args Additional positional arguments for the `ts_fit()` method kwargs Additional keyword arguments for the `ts_fit()` method Returns ------- FittableDataTransformer Fitted transformer. """ self._fit_called = True desc = "Fitting ({})".format(self._name) if isinstance(series, TimeSeries): data = [series] else: data = series input_iterator = _build_tqdm_iterator(self._fit_iterator(data), verbose=self._verbose, desc=desc, total=len(data)) self._fitted_params = _parallel_apply(input_iterator, self.__class__.ts_fit, self._n_jobs, args, kwargs) return self
'Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly' ] info_dataset = pd.read_csv('dataset/M4-info.csv', delimiter=',').set_index('M4id') for cat in data_categories[::-1]: # Load TimeSeries from M4 ts_train = pkl.load(open("dataset/train_" + cat + ".pkl", "rb")) ts_test = pkl.load(open("dataset/test_" + cat + ".pkl", "rb")) # Test models on all time series mase_all = [] smape_all = [] m = int(info_dataset.Frequency[cat[0] + "1"]) for train, test in _build_tqdm_iterator(zip(ts_train, ts_test), verbose=True): train_des = train seasonOut = 1 if m > 1: if check_seasonality(train, m=m, max_lag=2 * m): _, season = extract_trend_and_seasonality( train, m, model=ModelMode.MULTIPLICATIVE) train_des = remove_from_series( train, season, model=ModelMode.MULTIPLICATIVE) seasonOut = season[-m:].shift(m) seasonOut = seasonOut.append_values(seasonOut.values()) seasonOut = seasonOut[:len(test)] naive = NaiveDrift() naive2 = NaiveSeasonal(K=1) naiveSeason = NaiveSeasonal(K=m) ses = ExponentialSmoothing(trend=None,
def mase(actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], insample: Union[TimeSeries, Sequence[TimeSeries]], m: Optional[int] = 1, intersect: bool = True, *, reduction: Callable[[np.ndarray], float] = np.mean, inter_reduction: Callable[[np.ndarray], Union[float, np.ndarray]] = lambda x: x, n_jobs: int = 1, verbose: bool = False) -> Union[float, np.ndarray]: """ Mean Absolute Scaled Error (MASE). See `Mean absolute scaled error wikipedia page <https://en.wikipedia.org/wiki/Mean_absolute_scaled_error>`_ for details about the MASE and how it is computed. If any of the series is stochastic (containing several samples), the median sample value is considered. Parameters ---------- actual_series The `TimeSeries` or `Sequence[TimeSeries]` of actual values. pred_series The `TimeSeries` or `Sequence[TimeSeries]` of predicted values. insample The training series used to forecast `pred_series` . This series serves to compute the scale of the error obtained by a naive forecaster on the training data. m Optionally, the seasonality to use for differencing. `m=1` corresponds to the non-seasonal MASE, whereas `m>1` corresponds to seasonal MASE. If `m=None`, it will be tentatively inferred from the auto-correlation function (ACF). It will fall back to a value of 1 if this fails. intersect For time series that are overlapping in time without having the same time index, setting `intersect=True` will consider the values only over their common time interval (intersection in time). reduction Function taking as input a `np.ndarray` and returning a scalar value. This function is used to aggregate the metrics of different components in case of multivariate `TimeSeries` instances. inter_reduction Function taking as input a `np.ndarray` and returning either a scalar value or a `np.ndarray`. This function can be used to aggregate the metrics of different series in case the metric is evaluated on a `Sequence[TimeSeries]`. Defaults to the identity function, which returns the pairwise metrics for each pair of `TimeSeries` received in input. Example: `inter_reduction=np.mean`, will return the average of the pairwise metrics. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a `Sequence[TimeSeries]` is passed as input, parallelising operations regarding different `TimeSeries`. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If the `insample` series is periodic ( :math:`X_t = X_{t-m}` ) Returns ------- float The Mean Absolute Scaled Error (MASE) """ def _multivariate_mase(actual_series: TimeSeries, pred_series: TimeSeries, insample: TimeSeries, m: int, intersect: bool, reduction: Callable[[np.ndarray], float]): raise_if_not(actual_series.width == pred_series.width, "The two TimeSeries instances must have the same width.", logger) raise_if_not(actual_series.width == insample.width, "The insample TimeSeries must have the same width as the other series.", logger) raise_if_not(insample.end_time() + insample.freq == pred_series.start_time(), "The pred_series must be the forecast of the insample series", logger) insample_ = insample.quantile_timeseries(quantile=0.5) if insample.is_stochastic else insample value_list = [] for i in range(actual_series.width): # old implementation of mase on univariate TimeSeries if m is None: test_season, m = check_seasonality(insample) if not test_season: warn("No seasonality found when computing MASE. Fixing the period to 1.", UserWarning) m = 1 y_true, y_hat = _get_values_or_raise(actual_series.univariate_component(i), pred_series.univariate_component(i), intersect) x_t = insample_.univariate_component(i).values() errors = np.abs(y_true - y_hat) scale = np.mean(np.abs(x_t[m:] - x_t[:-m])) raise_if_not(not np.isclose(scale, 0), "cannot use MASE with periodical signals", logger) value_list.append(np.mean(errors / scale)) return reduction(value_list) if isinstance(actual_series, TimeSeries): raise_if_not(isinstance(pred_series, TimeSeries), "Expecting pred_series to be TimeSeries") raise_if_not(isinstance(insample, TimeSeries), "Expecting insample to be TimeSeries") return _multivariate_mase(actual_series=actual_series, pred_series=pred_series, insample=insample, m=m, intersect=intersect, reduction=reduction) elif isinstance(actual_series, Sequence) and isinstance(actual_series[0], TimeSeries): raise_if_not(isinstance(pred_series, Sequence) and isinstance(pred_series[0], TimeSeries), "Expecting pred_series to be a Sequence[TimeSeries]") raise_if_not(isinstance(insample, Sequence) and isinstance(insample[0], TimeSeries), "Expecting insample to be a Sequence[TimeSeries]") raise_if_not(len(pred_series) == len(actual_series) and len(pred_series) == len(insample), "The TimeSeries sequences must have the same length.", logger) raise_if_not(isinstance(n_jobs, int), "n_jobs must be an integer") raise_if_not(isinstance(verbose, bool), "verbose must be a bool") iterator = _build_tqdm_iterator(iterable=zip(actual_series, pred_series, insample), verbose=verbose, total=len(actual_series)) value_list = _parallel_apply(iterator=iterator, fn=_multivariate_mase, n_jobs=n_jobs, fn_args=dict(), fn_kwargs={ "m": m, "intersect": intersect, "reduction": reduction }) return inter_reduction(value_list) else: raise_log(ValueError("Input type not supported, only TimeSeries and Sequence[TimeSeries] are accepted."))
def gridsearch(model_class, parameters: dict, series: TimeSeries, covariates: Optional[TimeSeries] = None, forecast_horizon: Optional[int] = None, start: Union[pd.Timestamp, float, int] = 0.5, last_points_only: bool = False, val_series: Optional[TimeSeries] = None, use_fitted_values: bool = False, metric: Callable[[TimeSeries, TimeSeries], float] = metrics.mape, reduction: Callable[[np.ndarray], float] = np.mean, verbose=False) -> Tuple['ForecastingModel', Dict]: """ A function for finding the best hyper-parameters among a given set. This function has 3 modes of operation: Expanding window mode, split mode and fitted value mode. The three modes of operation evaluate every possible combination of hyper-parameter values provided in the `parameters` dictionary by instantiating the `model_class` subclass of ForecastingModel with each combination, and returning the best-performing model with regards to the `metric` function. The `metric` function is expected to return an error value, thus the model resulting in the smallest `metric` output will be chosen. The relationship of the training data and test data depends on the mode of operation. Expanding window mode (activated when `forecast_horizon` is passed): For every hyperparameter combination, the model is repeatedly trained and evaluated on different splits of `training_series` and `target_series`. This process is accomplished by using the `backtest` function as a subroutine to produce historic forecasts starting from `start` that are compared against the ground truth values of `training_series` or `target_series`, if specified. Note that the model is retrained for every single prediction, thus this mode is slower. Split window mode (activated when `val_series` is passed): This mode will be used when the `val_series` argument is passed. For every hyper-parameter combination, the model is trained on `series` and evaluated on `val_series`. Fitted value mode (activated when `use_fitted_values` is set to `True`): For every hyper-parameter combination, the model is trained on `series` and evaluated on the resulting fitted values. Not all models have fitted values, and this method raises an error if the model doesn't have a `fitted_values` member. The fitted values are the result of the fit of the model on `series`. Comparing with the fitted values can be a quick way to assess the model, but one cannot see if the model is overfitting the series. Parameters ---------- model_class The ForecastingModel subclass to be tuned for 'series'. parameters A dictionary containing as keys hyperparameter names, and as values lists of values for the respective hyperparameter. series The TimeSeries instance used as input and target for training. covariates An optional covariate series. This applies only if the model supports covariates. forecast_horizon The integer value of the forecasting horizon used in expanding window mode. start The `int`, `float` or `pandas.Timestamp` that represents the starting point in the time index of `training_series` from which predictions will be made to evaluate the model. For a detailed description of how the different data types are interpreted, please see the documentation for `ForecastingModel.backtest`. last_points_only Whether to use the whole forecasts or only the last point of each forecast to compute the error val_series The TimeSeries instance used for validation in split mode. If provided, this series must start right after the end of `series`; so that a proper comparison of the forecast can be made. use_fitted_values If `True`, uses the comparison with the fitted values. Raises an error if `fitted_values` is not an attribute of `model_class`. metric A function that takes two TimeSeries instances as inputs and returns a float error value. reduction A reduction function (mapping array to float) describing how to aggregate the errors obtained on the different validation series when backtesting. By default it'll compute the mean of errors. verbose Whether to print progress. Returns ------- ForecastingModel, Dict A tuple containing an untrained 'model_class' instance created from the best-performing hyper-parameters, along with a dictionary containing these best hyper-parameters. """ raise_if_not( (forecast_horizon is not None) + (val_series is not None) + use_fitted_values == 1, "Please pass exactly one of the arguments 'forecast_horizon', " "'val_target_series' or 'use_fitted_values'.", logger) if use_fitted_values: raise_if_not( hasattr(model_class(), "fitted_values"), "The model must have a fitted_values attribute to compare with the train TimeSeries", logger) elif val_series is not None: raise_if_not( series.width == val_series.width, "Training and validation series require the same number of components.", logger) if covariates is not None: raise_if_not( series.has_same_time_as(covariates), 'The provided series and covariates must have the ' 'same time axes.') min_error = float('inf') best_param_combination = {} # compute all hyperparameter combinations from selection params_cross_product = list(product(*parameters.values())) # TODO: We should find a better object oriented way of handling covariates in GlobalForecastingModel fit_signature = signature(model_class.fit) predict_signature = signature(model_class.predict) # iterate through all combinations of the provided parameters and choose the best one iterator = _build_tqdm_iterator(params_cross_product, verbose) for param_combination in iterator: param_combination_dict = dict( list(zip(parameters.keys(), param_combination))) model = model_class(**param_combination_dict) if use_fitted_values: # fitted value mode if covariates is not None and 'covariates' in fit_signature.parameters: model.fit(series, covariates=covariates) else: model.fit(series) fitted_values = TimeSeries.from_times_and_values( series.time_index(), model.fitted_values) error = metric(fitted_values, series) elif val_series is None: # expanding window mode error = model.backtest(series, covariates, start, forecast_horizon, metric=metric, reduction=reduction, last_points_only=last_points_only) else: # split mode if covariates is not None and 'covariates' in fit_signature.parameters: model.fit(series, covariates=covariates) else: model.fit(series) if covariates is not None and 'covariates' in predict_signature.parameters: pred = model.predict(n=len(val_series), covariates=covariates) else: pred = model.predict(n=len(val_series)) error = metric(pred, val_series) if error < min_error: min_error = error best_param_combination = param_combination_dict logger.info('Chosen parameters: ' + str(best_param_combination)) return model_class(**best_param_combination), best_param_combination
def historical_forecasts( self, series: TimeSeries, covariates: Optional[TimeSeries] = None, start: Union[pd.Timestamp, float, int] = 0.5, forecast_horizon: int = 1, stride: int = 1, retrain: bool = True, overlap_end: bool = False, last_points_only: bool = True, verbose: bool = False) -> Union[TimeSeries, List[TimeSeries]]: """ Computes the historical forecasts the model would have produced with an expanding training window and (by default) returns a time series created from the last point of each of these individual forecasts. To this end, it repeatedly builds a training set from the beginning of `series`. It trains the current model on the training set, emits a forecast of length equal to forecast_horizon, and then moves the end of the training set forward by `stride` time steps. By default, this method will return a single time series made up of the last point of each historical forecast. This time series will thus have a frequency of `series.freq() * stride`. If `last_points_only` is set to False, it will instead return a list of the historical forecasts. By default, this method always re-trains the models on the entire available history, corresponding to an expanding window strategy. If `retrain` is set to False (useful for models for which training might be time-consuming, such as deep learning models), the model will only be trained on the initial training window (up to `start` time stamp), and only if it has not been trained before. Then, at every iteration, the newly expanded input sequence will be fed to the model to produce the new output. Parameters ---------- series The target time series to use to successively train and evaluate the historical forecasts covariates An optional covariate series. This applies only if the model supports covariates. start The first point of time at which a prediction is computed for a future time. This parameter supports 3 different data types: `float`, `int` and `pandas.Timestamp`. In the case of `float`, the parameter will be treated as the proportion of the time series that should lie before the first prediction point. In the case of `int`, the parameter will be treated as an integer index to the time index of `series` that will be used as first prediction time. In case of `pandas.Timestamp`, this time stamp will be used to determine the first prediction time directly. forecast_horizon The forecast horizon for the predictions stride The number of time steps between two consecutive predictions. retrain Whether to retrain the model for every prediction or not. Currently only `TorchForecastingModel` instances such as `RNNModel`, `TCNModel`, `NBEATSModel` and `TransformerModel` support setting `retrain` to `False`. overlap_end Whether the returned forecasts can go beyond the series' end or not last_points_only Whether to retain only the last point of each historical forecast. If set to True, the method returns a single `TimeSeries` containing the successive point forecasts. Otherwise returns a list of historical `TimeSeries` forecasts. verbose Whether to print progress Returns ------- TimeSeries or List[TimeSeries] By default, a single TimeSeries instance created from the last point of each individual forecast. If `last_points_only` is set to False, a list of the historical forecasts. """ if covariates is not None: raise_if_not( series.has_same_time_as(covariates), 'The provided series and covariates must have the same time index.' ) # prepare the start parameter -> pd.Timestamp start = get_timestamp_at_point(start, series) # build the prediction times in advance (to be able to use tqdm) if not overlap_end: last_valid_pred_time = series.time_index()[-1 - forecast_horizon] else: last_valid_pred_time = series.time_index()[-2] pred_times = [start] while pred_times[-1] < last_valid_pred_time: # compute the next prediction time and add it to pred times pred_times.append(pred_times[-1] + series.freq() * stride) # the last prediction time computed might have overshot last_valid_pred_time if pred_times[-1] > last_valid_pred_time: pred_times.pop(-1) iterator = _build_tqdm_iterator(pred_times, verbose) # Either store the whole forecasts or only the last points of each forecast, depending on last_points_only forecasts = [] last_points_times = [] last_points_values = [] # TODO: We should find a better object oriented way of handling covariates in GlobalForecastingModel fit_signature = signature(self.fit) predict_signature = signature(self.predict) # iterate and forecast for pred_time in iterator: train = series.drop_after(pred_time) # build the training series if covariates is not None: train_cov = covariates.drop_after(pred_time) if retrain: if covariates is not None and 'covariates' in fit_signature.parameters: self.fit(series=train, covariates=train_cov) else: self.fit(series=train) if covariates is not None and 'covariates' in predict_signature.parameters: forecast = self.predict(n=forecast_horizon, series=train, covariates=train_cov) else: if 'series' in predict_signature.parameters: forecast = self.predict(n=forecast_horizon, series=train) else: forecast = self.predict(n=forecast_horizon) if last_points_only: last_points_values.append(forecast.values()[-1]) last_points_times.append(forecast.end_time()) else: forecasts.append(forecast) if last_points_only: return TimeSeries.from_times_and_values( pd.DatetimeIndex(last_points_times), np.array(last_points_values), freq=series.freq() * stride) return forecasts
'Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly' ] train_datasets = [] test_datasets = [] for cat in data_categories: train_datasets.append( pd.read_csv('./dataset/train/{}-train.csv'.format(cat), delimiter=',').set_index('V1').T) test_datasets.append( pd.read_csv('./dataset/test/{}-test.csv'.format(cat), delimiter=',').set_index('V1').T) info_dataset = pd.read_csv('./dataset/M4-info.csv', delimiter=',').set_index('M4id') # creating time series for i, dc in _build_tqdm_iterator(enumerate(data_categories), verbose=True): if os.path.isfile("dataset/train_" + dc + ".pkl") and os.path.isfile("dataset/test_" + dc + ".pkl"): print(" TimeSeries already created") continue train_set = train_datasets[i] test_set = test_datasets[i] ts_train = [] ts_test = [] forecast_horizon = test_set.shape[0] if dc == 'Yearly': index = pd.date_range(pd.Timestamp.min, periods=584, freq=info_dataset.SP.str[0][dc[0] + '1']) fallback_index = pd.date_range(pd.Timestamp.min,