def run_test(env: TrainEnv, predictor: Predictor, test_dataset: Dataset) -> None: len_original = maybe_len(test_dataset) test_dataset = TransformedDataset( base_dataset=test_dataset, transformations=[ FilterTransformation( lambda x: x["target"].shape[-1] > predictor.prediction_length) ], ) len_filtered = len(test_dataset) if len_original is not None and len_original > len_filtered: logger.warning( f"Not all time-series in the test-channel have " f"enough data to be used for evaluation. Proceeding with " f"{len_filtered}/{len_original} " f"(~{int(len_filtered / len_original * 100)}%) items.") forecast_it, ts_it = backtest.make_evaluation_predictions( dataset=test_dataset, predictor=predictor, num_samples=100) agg_metrics, _item_metrics = Evaluator()( ts_iterator=ts_it, fcst_iterator=forecast_it, num_series=len(test_dataset), ) # we only log aggregate metrics for now as item metrics may be very large for name, score in agg_metrics.items(): logger.info(f"#test_score ({env.current_host}, {name}): {score}")
def create_validation_data_loader( self, data: Dataset, **kwargs, ) -> DataLoader: input_names = get_hybrid_forward_input_names(DeepStateTrainingNetwork) with env._let(max_idle_transforms=maybe_len(data) or 0): instance_splitter = self._create_instance_splitter("validation") return ValidationDataLoader( dataset=data, transform=instance_splitter + SelectFields(input_names), batch_size=self.batch_size, stack_fn=partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype), )
def run_test( env: TrainEnv, predictor: Predictor, test_dataset: Dataset ) -> None: len_original = maybe_len(test_dataset) test_dataset = TransformedDataset( test_dataset, FilterTransformation( lambda x: x["target"].shape[-1] > predictor.prediction_length ), ) len_filtered = len(test_dataset) if len_original is not None and len_original > len_filtered: logger.warning( f"Not all time-series in the test-channel have " f"enough data to be used for evaluation. Proceeding with " f"{len_filtered}/{len_original} " f"(~{int(len_filtered / len_original * 100)}%) items." ) forecast_it, ts_it = backtest.make_evaluation_predictions( dataset=test_dataset, predictor=predictor, num_samples=100 ) if isinstance(predictor, RepresentableBlockPredictor) and isinstance( predictor.forecast_generator, QuantileForecastGenerator ): quantiles = predictor.forecast_generator.quantiles logger.info(f"Using quantiles `{quantiles}` for evaluation.") evaluator = Evaluator(quantiles=quantiles) else: evaluator = Evaluator() agg_metrics, item_metrics = evaluator( ts_iterator=ts_it, fcst_iterator=forecast_it, num_series=len(test_dataset), ) # we only log aggregate metrics for now as item metrics may be very large for name, score in agg_metrics.items(): logger.info(f"#test_score ({env.current_host}, {name}): {score}") # store metrics with open(env.path.model / "agg_metrics.json", "w") as agg_metric_file: json.dump(agg_metrics, agg_metric_file) with open(env.path.model / "item_metrics.csv", "w") as item_metrics_file: item_metrics.to_csv(item_metrics_file, index=False)
def create_training_data_loader( self, data: Dataset, **kwargs, ) -> DataLoader: with env._let(max_idle_transforms=maybe_len(data) or 0): train_transform = (self._create_instance_splitter("training") + self._create_post_split_transform() + SelectFields(["past_target", "valid_length"])) return TrainDataLoader( train_transform.apply(Cyclic(data)), batch_size=self.batch_size, stack_fn=self._stack_fn(), decode_fn=partial(as_in_context, ctx=self.trainer.ctx), )
def create_training_data_loader( self, data: Dataset, **kwargs, ) -> DataLoader: input_names = get_hybrid_forward_input_names(GPVARTrainingNetwork) with env._let(max_idle_transforms=maybe_len(data) or 0): instance_splitter = self._create_instance_splitter("training") return TrainDataLoader( dataset=data, transform=instance_splitter + SelectFields(input_names), batch_size=self.batch_size, stack_fn=partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype), decode_fn=partial(as_in_context, ctx=self.trainer.ctx), **kwargs, )
def create_validation_data_loader( self, data: Dataset, **kwargs, ) -> DataLoader: with env._let(max_idle_transforms=maybe_len(data) or 0): validation_transform = ( self._create_instance_splitter("validation") + self._create_post_split_transform() + SelectFields(["past_target", "valid_length"]) ) return ValidationDataLoader( validation_transform.apply(data), batch_size=self.batch_size, stack_fn=self._stack_fn(), )
def generate_forecasts( predictor: Predictor, dataset: Dataset, num_samples: int = 100, parallelize: bool = False, ) -> Tuple[QuantileForecasts, float]: """ Generates the predictions of the given predictor for the provided dataset. The returned prediction object provides the forecasts along with some metadata. Args: predictor: The predictor which is used to make forecasts. dataset: The GluonTS dataset which is used for testing. num_samples: The number of samples to use for making predictions. parallelize: Whether predictions ought to be parallelized. Returns: The forecasts for the dataset. The average latency for generating a single forecast. """ if parallelize: predictor = ParallelizedPredictor(predictor, num_workers=os.cpu_count()) # First, perform the predictions... tic = time.time() forecast_pred, _ = make_evaluation_predictions(dataset, predictor, num_samples) # ...and compute the quantiles quantiles = [f"0.{i+1}" for i in range(9)] forecasts = [] for i, forecast in tqdm( enumerate(forecast_pred), total=maybe_len(dataset), disable=not env.use_tqdm, ): result = None if isinstance(forecast, QuantileForecast): if forecast.forecast_keys == quantiles: result = forecast elif isinstance(forecast, SampleForecast): quantile_forecast = forecast.to_quantile_forecast( quantiles) # type: ignore result = quantile_forecast if result is None: # If none of the above checks added a quantile forecast, we resort to a method that # should work on all types of forecasts result = QuantileForecast( forecast_arrays=np.stack( [forecast.quantile(q) for q in quantiles], axis=0), start_date=forecast.start_date, freq=forecast.freq, forecast_keys=quantiles, item_id=forecast.item_id, ) if result.item_id is None: result.item_id = i forecasts.append(result) toc = time.time() # Then, we compute the prediction latency latency = (toc - tic) / len(dataset) if parallelize: # We observed that N CPUs only brought a speedup of ~N/2 latency = latency * (cast(int, os.cpu_count()) / 2) # And convert the list of forecasts into a QuantileForecasts object quantile_forecasts = QuantileForecasts( values=np.stack([f.forecast_array for f in forecasts]), start_dates=np.array([f.start_date for f in forecasts]), item_ids=np.array([str(f.item_id) for f in forecasts]), freq=to_offset(forecasts[0].freq), # type: ignore quantiles=forecasts[0].forecast_keys, ) return quantile_forecasts, latency
def backtest_metrics( train_dataset: Optional[Dataset], test_dataset: Dataset, forecaster: Union[Estimator, Predictor], evaluator=Evaluator(quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)), num_samples: int = 100, logging_file: Optional[str] = None, use_symbol_block_predictor: bool = False, ): """ Parameters ---------- train_dataset Dataset to use for training. test_dataset Dataset to use for testing. forecaster An estimator or a predictor to use for generating predictions. evaluator Evaluator to use. num_samples Number of samples to use when generating sample-based forecasts. logging_file If specified, information of the backtest is redirected to this file. use_symbol_block_predictor Use a :class:`SymbolBlockPredictor` during testing. Returns ------- tuple A tuple of aggregate metrics and per-time-series metrics obtained by training `forecaster` on `train_dataset` and evaluating the resulting `evaluator` provided on the `test_dataset`. """ if logging_file is not None: log_formatter = logging.Formatter( "[%(asctime)s %(levelname)s %(thread)d] %(message)s", datefmt="%m/%d/%Y %H:%M:%S", ) logger = logging.getLogger(__name__) handler = logging.FileHandler(logging_file) handler.setFormatter(log_formatter) logger.addHandler(handler) else: logger = logging.getLogger(__name__) if train_dataset is not None: train_statistics = calculate_dataset_statistics(train_dataset) serialize_message(logger, train_dataset_stats_key, train_statistics) test_statistics = calculate_dataset_statistics(test_dataset) serialize_message(logger, test_dataset_stats_key, test_statistics) if isinstance(forecaster, Estimator): serialize_message(logger, estimator_key, forecaster) assert train_dataset is not None predictor = forecaster.train(train_dataset) if isinstance(forecaster, GluonEstimator) and isinstance( predictor, GluonPredictor): inference_data_loader = InferenceDataLoader( dataset=test_dataset, transform=predictor.input_transform, batch_size=forecaster.trainer.batch_size, ctx=forecaster.trainer.ctx, dtype=forecaster.dtype, ) if forecaster.trainer.hybridize: predictor.hybridize(batch=next(iter(inference_data_loader))) if use_symbol_block_predictor: predictor = predictor.as_symbol_block_predictor( batch=next(iter(inference_data_loader))) else: predictor = forecaster forecast_it, ts_it = make_evaluation_predictions(test_dataset, predictor=predictor, num_samples=num_samples) agg_metrics, item_metrics = evaluator(ts_it, forecast_it, num_series=maybe_len(test_dataset)) # we only log aggregate metrics for now as item metrics may be very large for name, value in agg_metrics.items(): serialize_message(logger, f"metric-{name}", value) if logging_file is not None: # Close the file handler to avoid letting the file open. # https://stackoverflow.com/questions/24816456/python-logging-wont-shutdown logger.removeHandler(handler) del logger, handler return agg_metrics, item_metrics
def backtest_metrics( train_dataset: Optional[Dataset], test_dataset: Dataset, forecaster: Union[Estimator, Predictor], evaluator=Evaluator( quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9) ), num_samples: int = 100, logging_file: Optional[str] = None, use_symbol_block_predictor: Optional[bool] = False, num_workers: Optional[int] = None, num_prefetch: Optional[int] = None, **kwargs, ): """ Parameters ---------- train_dataset Dataset to use for training. test_dataset Dataset to use for testing. forecaster An estimator or a predictor to use for generating predictions. evaluator Evaluator to use. num_samples Number of samples to use when generating sample-based forecasts. logging_file If specified, information of the backtest is redirected to this file. use_symbol_block_predictor Use a :class:`SymbolBlockPredictor` during testing. num_workers The number of multiprocessing workers to use for data preprocessing. By default 0, in which case no multiprocessing will be utilized. num_prefetch The number of prefetching batches only works if `num_workers` > 0. If `prefetch` > 0, it allow worker process to prefetch certain batches before acquiring data from iterators. Note that using large prefetching batch will provide smoother bootstrapping performance, but will consume more shared_memory. Using smaller number may forfeit the purpose of using multiple worker processes, try reduce `num_workers` in this case. By default it defaults to `num_workers * 2`. Returns ------- tuple A tuple of aggregate metrics and per-time-series metrics obtained by training `forecaster` on `train_dataset` and evaluating the resulting `evaluator` provided on the `test_dataset`. """ if logging_file is not None: log_formatter = logging.Formatter( "[%(asctime)s %(levelname)s %(thread)d] %(message)s", datefmt="%m/%d/%Y %H:%M:%S", ) logger = logging.getLogger(__name__) handler = logging.FileHandler(logging_file) handler.setFormatter(log_formatter) logger.addHandler(handler) else: logger = logging.getLogger(__name__) if train_dataset is not None: train_statistics = calculate_dataset_statistics(train_dataset) serialize_message(logger, train_dataset_stats_key, train_statistics) test_statistics = calculate_dataset_statistics(test_dataset) serialize_message(logger, test_dataset_stats_key, test_statistics) if isinstance(forecaster, Estimator): serialize_message(logger, estimator_key, forecaster) assert train_dataset is not None predictor = forecaster.train(train_dataset) if isinstance(forecaster, GluonEstimator) and isinstance( predictor, GluonPredictor ): inference_data_loader = InferenceDataLoader( dataset=test_dataset, transform=predictor.input_transform, batch_size=forecaster.trainer.batch_size, ctx=forecaster.trainer.ctx, dtype=forecaster.dtype, num_workers=num_workers, num_prefetch=num_prefetch, **kwargs, ) if forecaster.trainer.hybridize: predictor.hybridize(batch=next(iter(inference_data_loader))) if use_symbol_block_predictor: predictor = predictor.as_symbol_block_predictor( batch=next(iter(inference_data_loader)) ) else: predictor = forecaster forecast_it, ts_it = make_evaluation_predictions( test_dataset, predictor=predictor, num_samples=num_samples ) agg_metrics, item_metrics = evaluator( ts_it, forecast_it, num_series=maybe_len(test_dataset) ) # we only log aggregate metrics for now as item metrics may be very large for name, value in agg_metrics.items(): serialize_message(logger, f"metric-{name}", value) if logging_file is not None: # Close the file handler to avoid letting the file open. # https://stackoverflow.com/questions/24816456/python-logging-wont-shutdown logger.removeHandler(handler) del logger, handler return agg_metrics, item_metrics
def backtest_metrics( test_dataset: Dataset, predictor: Predictor, evaluator=Evaluator(quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)), num_samples: int = 100, logging_file: Optional[str] = None, ): """ Parameters ---------- test_dataset Dataset to use for testing. predictor The predictor to test. evaluator Evaluator to use. num_samples Number of samples to use when generating sample-based forecasts. logging_file If specified, information of the backtest is redirected to this file. Returns ------- tuple A tuple of aggregate metrics and per-time-series metrics obtained by training `forecaster` on `train_dataset` and evaluating the resulting `evaluator` provided on the `test_dataset`. """ if logging_file is not None: log_formatter = logging.Formatter( "[%(asctime)s %(levelname)s %(thread)d] %(message)s", datefmt="%m/%d/%Y %H:%M:%S", ) logger = logging.getLogger(__name__) handler = logging.FileHandler(logging_file) handler.setFormatter(log_formatter) logger.addHandler(handler) else: logger = logging.getLogger(__name__) test_statistics = calculate_dataset_statistics(test_dataset) serialize_message(logger, test_dataset_stats_key, test_statistics) forecast_it, ts_it = make_evaluation_predictions(test_dataset, predictor=predictor, num_samples=num_samples) agg_metrics, item_metrics = evaluator(ts_it, forecast_it, num_series=maybe_len(test_dataset)) # we only log aggregate metrics for now as item metrics may be very large for name, value in agg_metrics.items(): serialize_message(logger, f"metric-{name}", value) if logging_file is not None: # Close the file handler to avoid letting the file open. # https://stackoverflow.com/questions/24816456/python-logging-wont-shutdown logger.removeHandler(handler) del logger, handler return agg_metrics, item_metrics
def run_test( env: TrainEnv, predictor: Predictor, test_dataset: Dataset, hyperparameters: dict, ) -> None: len_original = maybe_len(test_dataset) test_dataset = FilterTransformation( lambda x: x["target"].shape[-1] > predictor.prediction_length ).apply(test_dataset) len_filtered = len(test_dataset) if len_original is not None and len_original > len_filtered: logger.warning( f"Not all time-series in the test-channel have " f"enough data to be used for evaluation. Proceeding with " f"{len_filtered}/{len_original} " f"(~{int(len_filtered / len_original * 100)}%) items." ) forecast_it, ts_it = backtest.make_evaluation_predictions( dataset=test_dataset, predictor=predictor, num_samples=100 ) test_quantiles = ( [ Quantile.parse(quantile).name for quantile in hyperparameters["test_quantiles"] ] if "test_quantiles" in hyperparameters else None ) forecast_generator = getattr(predictor, "forecast_generator", None) if isinstance(forecast_generator, QuantileForecastGenerator): predictor_quantiles = forecast_generator.quantiles if test_quantiles is None: test_quantiles = predictor_quantiles elif not set(test_quantiles).issubset(predictor_quantiles): logger.warning( f"Some of the evaluation quantiles `{test_quantiles}` are " f"not in the computed quantile forecasts `{predictor_quantiles}`." ) test_quantiles = predictor_quantiles if test_quantiles is not None: logger.info(f"Using quantiles `{test_quantiles}` for evaluation.") evaluator = Evaluator(quantiles=test_quantiles) else: evaluator = Evaluator() agg_metrics, item_metrics = evaluator( ts_iterator=ts_it, fcst_iterator=forecast_it, num_series=len(test_dataset), ) # we only log aggregate metrics for now as item metrics may be very large for name, score in agg_metrics.items(): logger.info(f"#test_score ({env.current_host}, {name}): {score}") # store metrics with open(env.path.model / "agg_metrics.json", "w") as agg_metric_file: json.dump(agg_metrics, agg_metric_file) with open(env.path.model / "item_metrics.csv", "w") as item_metrics_file: item_metrics.to_csv(item_metrics_file, index=False)