Example #1
0
    def helper_test_drop(test_case, test_series: TimeSeries):
        seriesA = test_series.drop_after(pd.Timestamp('20130105'))
        test_case.assertEqual(seriesA.end_time(),
                              pd.Timestamp('20130105') - test_series.freq())
        test_case.assertTrue(
            np.all(seriesA.time_index() < pd.Timestamp('20130105')))

        seriesB = test_series.drop_before(pd.Timestamp('20130105'))
        test_case.assertEqual(seriesB.start_time(),
                              pd.Timestamp('20130105') + test_series.freq())
        test_case.assertTrue(
            np.all(seriesB.time_index() > pd.Timestamp('20130105')))

        test_case.assertEqual(test_series.freq_str(), seriesA.freq_str())
        test_case.assertEqual(test_series.freq_str(), seriesB.freq_str())
Example #2
0
    def helper_test_append_values(test_case, test_series: TimeSeries):
        # reconstruct series
        seriesA, seriesB = test_series.split_after(pd.Timestamp('20130106'))
        test_case.assertEqual(
            seriesA.append_values(seriesB.values(), seriesB.time_index()),
            test_series)
        test_case.assertEqual(seriesA.append_values(seriesB.values()),
                              test_series)

        # test for equality
        test_case.assertEqual(
            test_series.drop_after(pd.Timestamp('20130105')).append_values(
                test_series.drop_before(pd.Timestamp('20130104')).values()),
            test_series)
        test_case.assertEqual(seriesA.append_values([]), seriesA)

        # randomize order
        rd_order = np.random.permutation(range(len(seriesB.values())))
        test_case.assertEqual(
            seriesA.append_values(seriesB.values()[rd_order],
                                  seriesB.time_index()[rd_order]), test_series)

        # add non consecutive index
        with test_case.assertRaises(ValueError):
            test_case.assertEqual(
                seriesA.append_values(seriesB.values(),
                                      seriesB.time_index() + seriesB.freq()),
                test_series)

        # add existing indices
        with test_case.assertRaises(ValueError):
            test_case.assertEqual(
                seriesA.append_values(
                    seriesB.values(),
                    seriesB.time_index() - 3 * seriesB.freq()), test_series)

        # other frequency
        with test_case.assertRaises(ValueError):
            test_case.assertEqual(
                seriesA.append_values(
                    seriesB.values(),
                    pd.date_range('20130107', '20130113', freq='2d')),
                test_series)
Example #3
0
    def historical_forecasts(
            self,
            series: TimeSeries,
            covariates: Optional[TimeSeries] = None,
            start: Union[pd.Timestamp, float, int] = 0.5,
            forecast_horizon: int = 1,
            stride: int = 1,
            retrain: bool = True,
            overlap_end: bool = False,
            last_points_only: bool = True,
            verbose: bool = False) -> Union[TimeSeries, List[TimeSeries]]:
        """
        Computes the historical forecasts the model would have produced with an expanding training window
        and (by default) returns a time series created from the last point of each of these individual forecasts.
        To this end, it repeatedly builds a training set from the beginning of `series`. It trains the
        current model on the training set, emits a forecast of length equal to forecast_horizon, and then moves
        the end of the training set forward by `stride` time steps.

        By default, this method will return a single time series made up of the last point of each
        historical forecast. This time series will thus have a frequency of `series.freq() * stride`.
        If `last_points_only` is set to False, it will instead return a list of the historical forecasts.

        By default, this method always re-trains the models on the entire available history,
        corresponding to an expanding window strategy.
        If `retrain` is set to False (useful for models for which training might be time-consuming, such as
        deep learning models), the model will only be trained on the initial training window
        (up to `start` time stamp), and only if it has not been trained before. Then, at every iteration, the
        newly expanded input sequence will be fed to the model to produce the new output.

        Parameters
        ----------
        series
            The target time series to use to successively train and evaluate the historical forecasts
        covariates
            An optional covariate series. This applies only if the model supports covariates.
        start
            The first point of time at which a prediction is computed for a future time.
            This parameter supports 3 different data types: `float`, `int` and `pandas.Timestamp`.
            In the case of `float`, the parameter will be treated as the proportion of the time series
            that should lie before the first prediction point.
            In the case of `int`, the parameter will be treated as an integer index to the time index of
            `series` that will be used as first prediction time.
            In case of `pandas.Timestamp`, this time stamp will be used to determine the first prediction time
            directly.
        forecast_horizon
            The forecast horizon for the predictions
        stride
            The number of time steps between two consecutive predictions.
        retrain
            Whether to retrain the model for every prediction or not. Currently only `TorchForecastingModel`
            instances such as `RNNModel`, `TCNModel`, `NBEATSModel` and `TransformerModel` support
            setting `retrain` to `False`.
        overlap_end
            Whether the returned forecasts can go beyond the series' end or not
        last_points_only
            Whether to retain only the last point of each historical forecast.
            If set to True, the method returns a single `TimeSeries` containing the successive point forecasts.
            Otherwise returns a list of historical `TimeSeries` forecasts.
        verbose
            Whether to print progress
        Returns
        -------
        TimeSeries or List[TimeSeries]
            By default, a single TimeSeries instance created from the last point of each individual forecast.
            If `last_points_only` is set to False, a list of the historical forecasts.
        """

        if covariates is not None:
            raise_if_not(
                series.has_same_time_as(covariates),
                'The provided series and covariates must have the same time index.'
            )

        # prepare the start parameter -> pd.Timestamp
        start = get_timestamp_at_point(start, series)

        # build the prediction times in advance (to be able to use tqdm)
        if not overlap_end:
            last_valid_pred_time = series.time_index()[-1 - forecast_horizon]
        else:
            last_valid_pred_time = series.time_index()[-2]

        pred_times = [start]
        while pred_times[-1] < last_valid_pred_time:
            # compute the next prediction time and add it to pred times
            pred_times.append(pred_times[-1] + series.freq() * stride)

        # the last prediction time computed might have overshot last_valid_pred_time
        if pred_times[-1] > last_valid_pred_time:
            pred_times.pop(-1)

        iterator = _build_tqdm_iterator(pred_times, verbose)

        # Either store the whole forecasts or only the last points of each forecast, depending on last_points_only
        forecasts = []

        last_points_times = []
        last_points_values = []

        # TODO: We should find a better object oriented way of handling covariates in GlobalForecastingModel
        fit_signature = signature(self.fit)
        predict_signature = signature(self.predict)

        # iterate and forecast
        for pred_time in iterator:
            train = series.drop_after(pred_time)  # build the training series
            if covariates is not None:
                train_cov = covariates.drop_after(pred_time)

            if retrain:
                if covariates is not None and 'covariates' in fit_signature.parameters:
                    self.fit(series=train, covariates=train_cov)
                else:
                    self.fit(series=train)

            if covariates is not None and 'covariates' in predict_signature.parameters:
                forecast = self.predict(n=forecast_horizon,
                                        series=train,
                                        covariates=train_cov)
            else:
                if 'series' in predict_signature.parameters:
                    forecast = self.predict(n=forecast_horizon, series=train)
                else:
                    forecast = self.predict(n=forecast_horizon)

            if last_points_only:
                last_points_values.append(forecast.values()[-1])
                last_points_times.append(forecast.end_time())
            else:
                forecasts.append(forecast)

        if last_points_only:
            return TimeSeries.from_times_and_values(
                pd.DatetimeIndex(last_points_times),
                np.array(last_points_values),
                freq=series.freq() * stride)
        return forecasts