def predict(self):
        """
        Use the gluon dataset of training to predict future values and
        concat all forecasts timeseries of different identifiers and quantiles together
        """
        forecasts = self.predictor.predict(self.gluon_dataset)
        forecasts_list = list(forecasts)

        forecasts_timeseries = self._compute_forecasts_timeseries(
            forecasts_list)

        multiple_df = concat_timeseries_per_identifiers(forecasts_timeseries)

        self.forecasts_df = concat_all_timeseries(multiple_df)

        self.time_column_name = self.gluon_dataset.list_data[0][
            TIMESERIES_KEYS.TIME_COLUMN_NAME]
        self.identifiers_columns = (list(self.gluon_dataset.list_data[0][
            TIMESERIES_KEYS.IDENTIFIERS].keys()) if TIMESERIES_KEYS.IDENTIFIERS
                                    in self.gluon_dataset.list_data[0] else [])

        if self.include_history:
            self.forecasts_df = self._include_history(
                self.frequency, history_length_limit=self.history_length_limit)

        self.forecasts_df = add_row_origin(self.forecasts_df,
                                           both=ROW_ORIGIN.FORECAST,
                                           left_only=ROW_ORIGIN.HISTORY)

        self.forecasts_df = self.forecasts_df.rename(
            columns={"index": self.time_column_name})
    def _include_history(self, frequency, history_length_limit=None):
        """Include the historical data on which the model was trained to the forecasts dataframe.

        Args:
            frequency (str): Used to reconstruct the date range (because a gluon ListDataset only store the start date).
            history_length_limit (int): Maximum number of values to retrieve from historical data per timeseries. Default to None which means all.

        Returns:
            DataFrame containing both the historical data and the forecasted values.
        """
        history_timeseries = self._retrieve_history_timeseries(
            frequency, history_length_limit)
        multiple_df = concat_timeseries_per_identifiers(history_timeseries)
        history_df = concat_all_timeseries(multiple_df)
        return history_df.merge(self.forecasts_df,
                                on=["index"] + self.identifiers_columns,
                                how="left",
                                indicator=True)
Example #3
0
    def train_evaluate(self,
                       train_list_dataset,
                       test_list_dataset,
                       make_forecasts=False,
                       retrain=False):
        """Train Model on train_list_dataset and evaluate it on test_list_dataset. Then retrain on test_list_dataset if retrain=True.

        Args:
            train_list_dataset (gluonts.dataset.common.ListDataset): ListDataset created with the GluonDataset class.
            test_list_dataset (gluonts.dataset.common.ListDataset): ListDataset created with the GluonDataset class.
            make_forecasts (bool, optional): Whether to make the evaluation forecasts and return them. Defaults to False.
            retrain (bool, optional): Whether to retrain model on test_list_dataset after the evaluation. Defaults to False.

        Returns:
            Evaluation metrics DataFrame for each target and aggregated.
            List of timeseries identifiers column names. Empty list if none found in train_list_dataset.
            DataFrame of predictions for the last prediction_length timesteps of the test_list_dataset timeseries if make_forecasts is True.
        """
        logger.info(f"Evaluating {self.get_label()} model performance...")
        start = perf_counter()
        evaluation_predictor = self._train_estimator(train_list_dataset)

        agg_metrics, item_metrics, forecasts = self._make_evaluation_predictions(
            evaluation_predictor, test_list_dataset)
        self.evaluation_time = perf_counter() - start
        logger.info(
            f"Evaluating {self.get_label()} model performance: Done in {self.evaluation_time:.2f} seconds"
        )

        if retrain:
            self.train(test_list_dataset)

        metrics, identifiers_columns = self._format_metrics(
            agg_metrics, item_metrics, train_list_dataset)

        if make_forecasts:
            median_forecasts_timeseries = self._compute_median_forecasts_timeseries(
                forecasts, train_list_dataset)
            multiple_df = concat_timeseries_per_identifiers(
                median_forecasts_timeseries)
            forecasts_df = concat_all_timeseries(multiple_df)
            return metrics, identifiers_columns, forecasts_df

        return metrics, identifiers_columns
    def predict(self):
        """
        Use the gluon dataset of training to predict future values and
        concat all forecasts timeseries of different identifiers and quantiles together
        """
        model_handler = ModelHandler(self.model_name)
        if self.model_name and not model_handler.can_use_external_feature(
        ) and TIMESERIES_KEYS.FEAT_DYNAMIC_REAL in self.gluon_dataset.list_data[
                0]:
            # remove external features from the ListDataset used for predictions if the model cannot use them
            gluon_dataset_without_external_features = remove_unused_external_features(
                self.gluon_dataset, self.frequency)
            forecasts = self.predictor.predict(
                gluon_dataset_without_external_features)
        else:
            forecasts = self.predictor.predict(self.gluon_dataset)

        forecasts_list = list(forecasts)

        forecasts_timeseries = self._compute_forecasts_timeseries(
            forecasts_list)

        multiple_df = concat_timeseries_per_identifiers(forecasts_timeseries)

        self.forecasts_df = concat_all_timeseries(multiple_df)

        self.time_column_name = self.gluon_dataset.list_data[0][
            TIMESERIES_KEYS.TIME_COLUMN_NAME]
        self.identifiers_columns = (list(self.gluon_dataset.list_data[0][
            TIMESERIES_KEYS.IDENTIFIERS].keys()) if TIMESERIES_KEYS.IDENTIFIERS
                                    in self.gluon_dataset.list_data[0] else [])

        if self.include_history:
            self.forecasts_df = self._include_history(
                self.frequency, history_length_limit=self.history_length_limit)

        self.forecasts_df = add_row_origin(self.forecasts_df,
                                           both=ROW_ORIGIN.FORECAST,
                                           left_only=ROW_ORIGIN.HISTORY)

        self.forecasts_df = self.forecasts_df.rename(
            columns={"index": self.time_column_name})