コード例 #1
0
    def train_loader(
        dataset: ListDataset,
        prediction_interval_length: float,
        context_interval_length: float,
        is_train: bool = True,
        override_args: dict = None,
    ) -> Iterable[DataBatch]:

        if override_args is None:
            override_args = {}

        if is_train:
            sampler = ContinuousTimeUniformSampler(
                num_instances=10,
                min_past=context_interval_length,
                min_future=prediction_interval_length,
            )
        else:
            sampler = ContinuousTimePredictionSampler(
                min_past=context_interval_length)

        splitter = ContinuousTimeInstanceSplitter(
            future_interval_length=prediction_interval_length,
            past_interval_length=context_interval_length,
            instance_sampler=sampler,
            freq=dataset.freq,
        )

        kwargs = dict(
            dataset=dataset,
            transform=splitter,
            batch_size=10,
            stack_fn=partial(batchify, dtype=np.float32, variable_length=True),
        )

        kwargs.update(override_args)

        if is_train:
            return itertools.islice(
                TrainDataLoader(num_workers=None, **kwargs), NUM_BATCHES)
        else:
            return InferenceDataLoader(**kwargs)
コード例 #2
0
ファイル: predictor.py プロジェクト: rshyamsundar/gluon-ts
    def predict(self,
                dataset: Dataset,
                num_samples: Optional[int] = None) -> Iterator[Forecast]:
        inference_data_loader = InferenceDataLoader(
            dataset,
            transform=self.input_transform,
            batch_size=self.batch_size,
            stack_fn=lambda data: batchify(data, self.device),
        )

        self.prediction_net.eval()

        with torch.no_grad():
            yield from self.forecast_generator(
                inference_data_loader=inference_data_loader,
                prediction_net=self.prediction_net,
                input_names=self.input_names,
                freq=self.freq,
                output_transform=self.output_transform,
                num_samples=num_samples,
            )
コード例 #3
0
def test_inference_data_loader(dataset_context):
    with dataset_context as dataset:
        dataset_length = len(list(dataset))
        counter = defaultdict(lambda: 0)

        dl = InferenceDataLoader(
            dataset=dataset,
            transform=default_transformation(),
            batch_size=4,
            stack_fn=partial(batchify, ctx=current_context()),
        )

        batches = list(dl)

        for batch in batches:
            assert all(x is False for x in batch["is_train"])

        counter = count_item_ids(batches)

        for entry in dataset:
            assert counter[entry[FieldName.ITEM_ID]] == 1
コード例 #4
0
 def predict(
     self,
     dataset: Dataset,
     num_samples: Optional[int] = None,
     num_workers: Optional[int] = None,
     num_prefetch: Optional[int] = None,
     **kwargs,
 ) -> Iterator[Forecast]:
     inference_data_loader = InferenceDataLoader(
         dataset,
         transform=self.input_transform,
         batch_size=self.batch_size,
         stack_fn=partial(batchify, ctx=self.ctx, dtype=self.dtype),
     )
     with mx.Context(self.ctx):
         yield from self.forecast_generator(
             inference_data_loader=inference_data_loader,
             prediction_net=self.prediction_net,
             input_names=self.input_names,
             freq=self.freq,
             output_transform=self.output_transform,
             num_samples=num_samples,
         )
コード例 #5
0
 def predict(self, dataset: Dataset, **kwargs) -> Iterator[Forecast]:
     inference_data_loader = InferenceDataLoader(
         dataset,
         self.input_transform,
         self.batch_size,
         ctx=self.ctx,
         float_type=self.float_type,
     )
     for batch in inference_data_loader:
         inputs = [batch[k] for k in self.input_names]
         outputs = self.prediction_net(*inputs).asnumpy()
         if self.output_transform is not None:
             outputs = self.output_transform(batch, outputs)
         assert len(batch['forecast_start']) == len(outputs)
         for i, output in enumerate(outputs):
             yield self._forecast_cls(
                 output,
                 start_date=batch['forecast_start'][i],
                 freq=self.freq,
                 item_id=batch['item_id'][i]
                 if 'item_id' in batch else None,
                 info=batch['info'][i] if 'info' in batch else None,
                 **self.forecast_kwargs,
             )
コード例 #6
0
ファイル: backtest.py プロジェクト: zhupeiru/gluon-ts
def backtest_metrics(
    train_dataset: Optional[Dataset],
    test_dataset: Dataset,
    forecaster: Union[Estimator, Predictor],
    evaluator=Evaluator(
        quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
    ),
    num_eval_samples: int = 100,
    logging_file: Optional[str] = None,
    use_symbol_block_predictor: bool = False,
):
    """
    Parameters
    ----------
    train_dataset
        Dataset to use for training.
    test_dataset
        Dataset to use for testing.
    forecaster
        An estimator or a predictor to use for generating predictions.
    evaluator
        Evaluator to use.
    num_eval_samples
        Number of samples to use when generating sample-based forecasts.
    logging_file
        If specified, information of the backtest is redirected to this file.
    use_symbol_block_predictor
        Use a :class:`SymbolBlockPredictor` during testing.

    Returns
    -------
    tuple
        A tuple of aggregate metrics and per-time-series metrics obtained by
        training `forecaster` on `train_dataset` and evaluating the resulting
        `evaluator` provided on the `test_dataset`.
    """

    if logging_file is not None:
        log_formatter = logging.Formatter(
            "[%(asctime)s %(levelname)s %(thread)d] %(message)s",
            datefmt="%m/%d/%Y %H:%M:%S",
        )
        logger = logging.getLogger(__name__)
        handler = logging.FileHandler(logging_file)
        handler.setFormatter(log_formatter)
        logger.addHandler(handler)
    else:
        logger = logging.getLogger(__name__)

    if train_dataset is not None:
        train_statistics = calculate_dataset_statistics(train_dataset)
        serialize_message(logger, train_dataset_stats_key, train_statistics)

    test_statistics = calculate_dataset_statistics(test_dataset)
    serialize_message(logger, test_dataset_stats_key, test_statistics)

    if isinstance(forecaster, Estimator):
        serialize_message(logger, estimator_key, forecaster)
        predictor = forecaster.train(train_dataset)

        if isinstance(forecaster, GluonEstimator) and isinstance(
            predictor, GluonPredictor
        ):
            inference_data_loader = InferenceDataLoader(
                dataset=test_dataset,
                transform=predictor.input_transform,
                batch_size=forecaster.trainer.batch_size,
                ctx=forecaster.trainer.ctx,
                float_type=forecaster.float_type,
            )

            if forecaster.trainer.hybridize:
                predictor.hybridize(batch=next(iter(inference_data_loader)))

            if use_symbol_block_predictor:
                predictor = predictor.as_symbol_block_predictor(
                    batch=next(iter(inference_data_loader))
                )
    else:
        predictor = forecaster

    forecast_it, ts_it = make_evaluation_predictions(
        test_dataset, predictor=predictor, num_eval_samples=num_eval_samples
    )

    agg_metrics, item_metrics = evaluator(
        ts_it, forecast_it, num_series=len(test_dataset)
    )

    # we only log aggregate metrics for now as item metrics may be very large
    for name, value in agg_metrics.items():
        serialize_message(logger, f"metric-{name}", value)

    if logging_file is not None:
        # Close the file handler to avoid letting the file open.
        # https://stackoverflow.com/questions/24816456/python-logging-wont-shutdown
        logger.removeHandler(handler)
        del logger, handler

    return agg_metrics, item_metrics
コード例 #7
0
ファイル: backtest.py プロジェクト: steverab/gluon-ts
def backtest_metrics(
    train_dataset: Optional[Dataset],
    test_dataset: Dataset,
    forecaster: Union[Estimator, Predictor],
    evaluator=Evaluator(
        quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
    ),
    num_samples: int = 100,
    logging_file: Optional[str] = None,
    use_symbol_block_predictor: Optional[bool] = False,
    num_workers: Optional[int] = None,
    num_prefetch: Optional[int] = None,
    **kwargs,
):
    """
    Parameters
    ----------
    train_dataset
        Dataset to use for training.
    test_dataset
        Dataset to use for testing.
    forecaster
        An estimator or a predictor to use for generating predictions.
    evaluator
        Evaluator to use.
    num_samples
        Number of samples to use when generating sample-based forecasts.
    logging_file
        If specified, information of the backtest is redirected to this file.
    use_symbol_block_predictor
        Use a :class:`SymbolBlockPredictor` during testing.
    num_workers
        The number of multiprocessing workers to use for data preprocessing.
        By default 0, in which case no multiprocessing will be utilized.
    num_prefetch
        The number of prefetching batches only works if `num_workers` > 0.
        If `prefetch` > 0, it allow worker process to prefetch certain batches before
        acquiring data from iterators.
        Note that using large prefetching batch will provide smoother bootstrapping performance,
        but will consume more shared_memory. Using smaller number may forfeit the purpose of using
        multiple worker processes, try reduce `num_workers` in this case.
        By default it defaults to `num_workers * 2`.

    Returns
    -------
    tuple
        A tuple of aggregate metrics and per-time-series metrics obtained by
        training `forecaster` on `train_dataset` and evaluating the resulting
        `evaluator` provided on the `test_dataset`.
    """

    if logging_file is not None:
        log_formatter = logging.Formatter(
            "[%(asctime)s %(levelname)s %(thread)d] %(message)s",
            datefmt="%m/%d/%Y %H:%M:%S",
        )
        logger = logging.getLogger(__name__)
        handler = logging.FileHandler(logging_file)
        handler.setFormatter(log_formatter)
        logger.addHandler(handler)
    else:
        logger = logging.getLogger(__name__)

    if train_dataset is not None:
        train_statistics = calculate_dataset_statistics(train_dataset)
        serialize_message(logger, train_dataset_stats_key, train_statistics)

    test_statistics = calculate_dataset_statistics(test_dataset)
    serialize_message(logger, test_dataset_stats_key, test_statistics)

    if isinstance(forecaster, Estimator):
        serialize_message(logger, estimator_key, forecaster)
        assert train_dataset is not None
        predictor = forecaster.train(train_dataset)

        if isinstance(forecaster, GluonEstimator) and isinstance(
            predictor, GluonPredictor
        ):
            inference_data_loader = InferenceDataLoader(
                dataset=test_dataset,
                transform=predictor.input_transform,
                batch_size=forecaster.trainer.batch_size,
                ctx=forecaster.trainer.ctx,
                dtype=forecaster.dtype,
                num_workers=num_workers,
                num_prefetch=num_prefetch,
                **kwargs,
            )

            if forecaster.trainer.hybridize:
                predictor.hybridize(batch=next(iter(inference_data_loader)))

            if use_symbol_block_predictor:
                predictor = predictor.as_symbol_block_predictor(
                    batch=next(iter(inference_data_loader))
                )
    else:
        predictor = forecaster

    forecast_it, ts_it = make_evaluation_predictions(
        test_dataset, predictor=predictor, num_samples=num_samples
    )

    agg_metrics, item_metrics = evaluator(
        ts_it, forecast_it, num_series=maybe_len(test_dataset)
    )

    # we only log aggregate metrics for now as item metrics may be very large
    for name, value in agg_metrics.items():
        serialize_message(logger, f"metric-{name}", value)

    if logging_file is not None:
        # Close the file handler to avoid letting the file open.
        # https://stackoverflow.com/questions/24816456/python-logging-wont-shutdown
        logger.removeHandler(handler)
        del logger, handler

    return agg_metrics, item_metrics
コード例 #8
0
def create_loaders(
    dataset,
    batch_sizes,
    past_length,
    prediction_length_full,
    prediction_length_rolling,
    num_batches_per_epoch=50,
    num_workers=0,
    extract_tail_chunks_for_train: bool = False,
    val_full_length=True,
):
    """
    The past_length and prediction_length is seriously unintuitive in gluonTS.
    Here is a little summary to make sure it is used it correctly:
    - loader does NOT provide data[-past_length-prediction_length: -prediction_length].
        --> Train set may not include test range. prediction_length does not cut it out.
    - loader instead provides data[-past_length:] and adds prediction_length time_features.
        --> AFTER this loader, we must AGAIN MANUALLY cut out targets[-prediction_length:].

    I had as follows:
    train: past_length=past_length, prediction_length=0
    test: past_length=past_length+n_steps_forecast, prediction_length=0
        && cut out y[-prediction_length:] from batch, but use it for eval.
    Train does not need to forecast
    Test gives the whole thing for features and we cut out the final part for targets.

    Now want to do as they do in the repo.
    for both do: past_length=past_length, prediction_length=prediction_length
    train: does not matter, prediction_length not used
    test: this does not make sense. It still gives you y[-past_length:] ...
    """
    input_transforms = {}
    for name in ["train", "val", "test_full", "test_rolling"]:
        if name == "train":
            prediction_length = 0
            is_train = True
            past_length_ = past_length
        elif name == "val":
            prediction_length = 0
            is_train = False
            past_length_ = past_length + (
                prediction_length_full
                if val_full_length
                else prediction_length_rolling
            )
        elif name == "test_full":
            prediction_length = prediction_length_full
            is_train = False
            past_length_ = past_length
        elif name == "test_rolling":
            prediction_length = prediction_length_rolling
            is_train = False
            past_length_ = past_length
        else:
            raise Exception(f"unknown dataset: {name}")
        input_transforms[name] = create_input_transform(
            is_train=is_train,
            prediction_length=prediction_length,
            past_length=past_length_,
            use_feat_static_cat=True,
            use_feat_dynamic_real=True
            if dataset.metadata.feat_dynamic_real
            else False,
            freq=dataset.metadata.freq,
            time_features=None,
            extract_tail_chunks_for_train=extract_tail_chunks_for_train,
        )

    train_loader = TrainDataLoader(
        dataset=dataset.train,
        transform=input_transforms["train"],
        num_batches_per_epoch=num_batches_per_epoch,
        batch_size=batch_sizes["train"],
        num_workers=num_workers,
        ctx=None,
        dtype=np.float32,
    )
    val_loader = ValidationDataLoader(
        dataset=dataset.train,
        transform=input_transforms["val"],
        batch_size=batch_sizes["val"],
        num_workers=num_workers,
        ctx=None,
        dtype=np.float32,
    )
    test_full_loader = InferenceDataLoader(
        dataset=dataset.test,
        transform=input_transforms["test_full"],
        batch_size=batch_sizes["test_full"],
        num_workers=num_workers,
        ctx=None,
        dtype=np.float32,
    )
    test_rolling_loader = InferenceDataLoader(
        dataset=dataset.test,
        transform=input_transforms["test_rolling"],
        batch_size=batch_sizes["test_rolling"],
        num_workers=num_workers,
        ctx=None,
        dtype=np.float32,
    )

    return (
        train_loader,
        val_loader,
        test_full_loader,
        test_rolling_loader,
        input_transforms,
    )