Example #1
0
def run_test(env: TrainEnv, predictor: Predictor,
             test_dataset: Dataset) -> None:
    len_original = len(test_dataset)

    test_dataset = TransformedDataset(
        base_dataset=test_dataset,
        transformations=[
            FilterTransformation(
                lambda x: x["target"].shape[-1] > predictor.prediction_length)
        ],
    )

    len_filtered = len(test_dataset)

    if len_original > len_filtered:
        logger.warning(
            f"Not all time-series in the test-channel have "
            f"enough data to be used for evaluation. Proceeding with "
            f"{len_filtered}/{len_original} "
            f"(~{int(len_filtered / len_original * 100)}%) items.")

    forecast_it, ts_it = backtest.make_evaluation_predictions(
        dataset=test_dataset, predictor=predictor, num_samples=100)

    agg_metrics, _item_metrics = Evaluator()(
        ts_iterator=ts_it,
        fcst_iterator=forecast_it,
        num_series=len(test_dataset),
    )

    # we only log aggregate metrics for now as item metrics may be very large
    for name, score in agg_metrics.items():
        logger.info(f"#test_score ({env.current_host}, {name}): {score}")
Example #2
0
    def run_test(self, dataset, estimator, predictor):
        test_dataset = TransformedDataset(
            dataset,
            transformations=[
                FilterTransformation(lambda el: el['target'].shape[-1] >
                                     predictor.prediction_length)
            ],
        )

        len_orig = len(dataset)
        len_filtered = len(test_dataset)
        if len_orig > len_filtered:
            logging.warning(
                'Not all time-series in the test-channel have '
                'enough data to be used for evaluation. Proceeding with '
                f'{len_filtered}/{len_orig} '
                f'(~{int(len_filtered/len_orig*100)}%) items.')

        try:
            log.metric('test_dataset_stats', test_dataset.calc_stats())
        except GluonTSDataError as error:
            logging.error(
                f"Failure whilst calculating stats for test dataset: {error}")
            return

        if isinstance(estimator, GluonEstimator) and isinstance(
                predictor, GluonPredictor):
            inference_data_loader = InferenceDataLoader(
                dataset=test_dataset,
                transform=predictor.input_transform,
                batch_size=estimator.trainer.batch_size,
                ctx=estimator.trainer.ctx,
                float_type=estimator.float_type,
            )

            if estimator.trainer.hybridize:
                predictor.hybridize(batch=next(iter(inference_data_loader)))

            if self.hyperparameters.get('use_symbol_block_predictor'):
                predictor = predictor.as_symbol_block_predictor(
                    batch=next(iter(inference_data_loader)))

        num_eval_samples = self.hyperparameters.get('num_eval_samples', 100)
        quantiles = self.hyperparameters.get(
            'quantiles', (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9))

        # we only log aggregate metrics for now as item metrics may be
        # very large
        predictions, input_timeseries = backtest.make_evaluation_predictions(
            test_dataset, predictor, num_eval_samples)
        agg_metrics, _item_metrics = Evaluator(quantiles=quantiles)(
            input_timeseries, predictions, num_series=len_filtered)
        log.metric("agg_metrics", agg_metrics)
Example #3
0
def run_test(
    env: TrainEnv, predictor: Predictor, test_dataset: Dataset
) -> None:
    len_original = maybe_len(test_dataset)

    test_dataset = TransformedDataset(
        test_dataset,
        FilterTransformation(
            lambda x: x["target"].shape[-1] > predictor.prediction_length
        ),
    )

    len_filtered = len(test_dataset)

    if len_original is not None and len_original > len_filtered:
        logger.warning(
            f"Not all time-series in the test-channel have "
            f"enough data to be used for evaluation. Proceeding with "
            f"{len_filtered}/{len_original} "
            f"(~{int(len_filtered / len_original * 100)}%) items."
        )

    forecast_it, ts_it = backtest.make_evaluation_predictions(
        dataset=test_dataset, predictor=predictor, num_samples=100
    )

    if isinstance(predictor, RepresentableBlockPredictor) and isinstance(
        predictor.forecast_generator, QuantileForecastGenerator
    ):
        quantiles = predictor.forecast_generator.quantiles
        logger.info(f"Using quantiles `{quantiles}` for evaluation.")
        evaluator = Evaluator(quantiles=quantiles)
    else:
        evaluator = Evaluator()

    agg_metrics, item_metrics = evaluator(
        ts_iterator=ts_it,
        fcst_iterator=forecast_it,
        num_series=len(test_dataset),
    )

    # we only log aggregate metrics for now as item metrics may be very large
    for name, score in agg_metrics.items():
        logger.info(f"#test_score ({env.current_host}, {name}): {score}")

    # store metrics
    with open(env.path.model / "agg_metrics.json", "w") as agg_metric_file:
        json.dump(agg_metrics, agg_metric_file)
    with open(env.path.model / "item_metrics.csv", "w") as item_metrics_file:
        item_metrics.to_csv(item_metrics_file, index=False)
Example #4
0
def prepare_test_dataset(dataset: Dataset, prediction_length: int) -> Dataset:
    test_dataset = TransformedDataset(
        dataset,
        transformations=[
            FilterTransformation(
                lambda el: el['target'].shape[-1] > prediction_length)
        ],
    )

    len_orig = len(dataset)
    len_filtered = len(test_dataset)
    if len_orig > len_filtered:
        log.logger.warning(
            'Not all time-series in the test-channel have '
            'enough data to be used for evaluation. Proceeding with '
            f'{len_filtered}/{len_orig} '
            f'(~{int(len_filtered / len_orig * 100)}%) items.')
    return test_dataset
 def create_transformation(self) -> Transformation:
     return FilterTransformation(lambda x: True)
Example #6
0
def run_test(
    env: TrainEnv,
    predictor: Predictor,
    test_dataset: Dataset,
    hyperparameters: dict,
) -> None:
    len_original = maybe_len(test_dataset)

    test_dataset = FilterTransformation(
        lambda x: x["target"].shape[-1] > predictor.prediction_length
    ).apply(test_dataset)

    len_filtered = len(test_dataset)

    if len_original is not None and len_original > len_filtered:
        logger.warning(
            f"Not all time-series in the test-channel have "
            f"enough data to be used for evaluation. Proceeding with "
            f"{len_filtered}/{len_original} "
            f"(~{int(len_filtered / len_original * 100)}%) items."
        )

    forecast_it, ts_it = backtest.make_evaluation_predictions(
        dataset=test_dataset, predictor=predictor, num_samples=100
    )

    test_quantiles = (
        [
            Quantile.parse(quantile).name
            for quantile in hyperparameters["test_quantiles"]
        ]
        if "test_quantiles" in hyperparameters
        else None
    )

    forecast_generator = getattr(predictor, "forecast_generator", None)
    if isinstance(forecast_generator, QuantileForecastGenerator):
        predictor_quantiles = forecast_generator.quantiles
        if test_quantiles is None:
            test_quantiles = predictor_quantiles
        elif not set(test_quantiles).issubset(predictor_quantiles):
            logger.warning(
                f"Some of the evaluation quantiles `{test_quantiles}` are "
                f"not in the computed quantile forecasts `{predictor_quantiles}`."
            )
            test_quantiles = predictor_quantiles

    if test_quantiles is not None:
        logger.info(f"Using quantiles `{test_quantiles}` for evaluation.")
        evaluator = Evaluator(quantiles=test_quantiles)
    else:
        evaluator = Evaluator()

    agg_metrics, item_metrics = evaluator(
        ts_iterator=ts_it,
        fcst_iterator=forecast_it,
        num_series=len(test_dataset),
    )

    # we only log aggregate metrics for now as item metrics may be very large
    for name, score in agg_metrics.items():
        logger.info(f"#test_score ({env.current_host}, {name}): {score}")

    # store metrics
    with open(env.path.model / "agg_metrics.json", "w") as agg_metric_file:
        json.dump(agg_metrics, agg_metric_file)
    with open(env.path.model / "item_metrics.csv", "w") as item_metrics_file:
        item_metrics.to_csv(item_metrics_file, index=False)