Example #1
0
def run_test(env: TrainEnv, predictor: Predictor,
             test_dataset: Dataset) -> None:
    len_original = len(test_dataset)

    test_dataset = TransformedDataset(
        base_dataset=test_dataset,
        transformations=[
            FilterTransformation(
                lambda x: x["target"].shape[-1] > predictor.prediction_length)
        ],
    )

    len_filtered = len(test_dataset)

    if len_original > len_filtered:
        logger.warning(
            f"Not all time-series in the test-channel have "
            f"enough data to be used for evaluation. Proceeding with "
            f"{len_filtered}/{len_original} "
            f"(~{int(len_filtered / len_original * 100)}%) items.")

    forecast_it, ts_it = backtest.make_evaluation_predictions(
        dataset=test_dataset, predictor=predictor, num_samples=100)

    agg_metrics, _item_metrics = Evaluator()(
        ts_iterator=ts_it,
        fcst_iterator=forecast_it,
        num_series=len(test_dataset),
    )

    # we only log aggregate metrics for now as item metrics may be very large
    for name, score in agg_metrics.items():
        logger.info(f"#test_score ({env.current_host}, {name}): {score}")
Example #2
0
def run_test(
    env: TrainEnv, predictor: Predictor, test_dataset: Dataset
) -> None:
    len_original = maybe_len(test_dataset)

    test_dataset = TransformedDataset(
        test_dataset,
        FilterTransformation(
            lambda x: x["target"].shape[-1] > predictor.prediction_length
        ),
    )

    len_filtered = len(test_dataset)

    if len_original is not None and len_original > len_filtered:
        logger.warning(
            f"Not all time-series in the test-channel have "
            f"enough data to be used for evaluation. Proceeding with "
            f"{len_filtered}/{len_original} "
            f"(~{int(len_filtered / len_original * 100)}%) items."
        )

    forecast_it, ts_it = backtest.make_evaluation_predictions(
        dataset=test_dataset, predictor=predictor, num_samples=100
    )

    if isinstance(predictor, RepresentableBlockPredictor) and isinstance(
        predictor.forecast_generator, QuantileForecastGenerator
    ):
        quantiles = predictor.forecast_generator.quantiles
        logger.info(f"Using quantiles `{quantiles}` for evaluation.")
        evaluator = Evaluator(quantiles=quantiles)
    else:
        evaluator = Evaluator()

    agg_metrics, item_metrics = evaluator(
        ts_iterator=ts_it,
        fcst_iterator=forecast_it,
        num_series=len(test_dataset),
    )

    # we only log aggregate metrics for now as item metrics may be very large
    for name, score in agg_metrics.items():
        logger.info(f"#test_score ({env.current_host}, {name}): {score}")

    # store metrics
    with open(env.path.model / "agg_metrics.json", "w") as agg_metric_file:
        json.dump(agg_metrics, agg_metric_file)
    with open(env.path.model / "item_metrics.csv", "w") as item_metrics_file:
        item_metrics.to_csv(item_metrics_file, index=False)
def deep_state(seed=42, data="m4_quarterly", epochs=100, batches=50):

    mx.random.seed(seed)
    np.random.seed(seed)

    dataset = get_dataset(data)

    trainer = Trainer(
        ctx=mx.cpu(0),
        #         ctx=mx.gpu(0),
        epochs=epochs,
        num_batches_per_epoch=batches,
        learning_rate=1e-3,
    )

    cardinality = int(dataset.metadata.feat_static_cat[0].cardinality)
    estimator = DeepStateEstimator(
        trainer=trainer,
        cardinality=[cardinality],
        prediction_length=dataset.metadata.prediction_length,
        freq=dataset.metadata.freq,
        use_feat_static_cat=True,
    )

    predictor = estimator.train(dataset.train)

    #     predictor = estimator.train(training_data=dataset.train,
    #                                 validation_data=dataset.test)

    forecast_it, ts_it = make_evaluation_predictions(dataset.test,
                                                     predictor=predictor,
                                                     num_samples=100)

    agg_metrics, item_metrics = Evaluator()(ts_it,
                                            forecast_it,
                                            num_series=len(dataset.test))
    metrics = [
        "MASE", "sMAPE", "MSIS", "wQuantileLoss[0.5]", "wQuantileLoss[0.9]"
    ]
    output = {
        key: round(value, 8)
        for key, value in agg_metrics.items() if key in metrics
    }
    output["epochs"] = epochs
    output["seed"] = seed

    df = pd.DataFrame([output])

    return df
Example #4
0
def test_dynamic_integration(
    train_length: int,
    test_length: int,
    prediction_length: int,
    target_start: str,
    rolling_start: str,
    num_dynamic_feat: int,
):
    """
    Trains an estimator on a rolled dataset with dynamic features.
    Tests https://github.com/awslabs/gluon-ts/issues/1390
    """
    train_ds = create_dynamic_dataset(target_start, train_length,
                                      num_dynamic_feat)
    rolled_ds = generate_rolling_dataset(
        dataset=create_dynamic_dataset(target_start, test_length,
                                       num_dynamic_feat),
        strategy=StepStrategy(prediction_length=prediction_length),
        start_time=pd.Timestamp(rolling_start),
    )
    estimator = DeepAREstimator(
        freq="D",
        prediction_length=prediction_length,
        context_length=2 * prediction_length,
        use_feat_dynamic_real=True,
        trainer=Trainer(epochs=1),
    )
    predictor = estimator.train(training_data=train_ds)
    forecast_it, ts_it = make_evaluation_predictions(rolled_ds,
                                                     predictor=predictor,
                                                     num_samples=100)
    training_agg_metrics, _ = Evaluator(num_workers=0)(ts_it, forecast_it)
    # it should have failed by this point if the dynamic features were wrong
    assert training_agg_metrics
def test_training_with_implicit_quantile_output():
    dataset = get_dataset("constant")
    metadata = dataset.metadata

    deepar_estimator = DeepAREstimator(
        distr_output=ImplicitQuantileOutput(output_domain="Real"),
        freq=metadata.freq,
        prediction_length=metadata.prediction_length,
        trainer=Trainer(
            device="cpu",
            epochs=5,
            learning_rate=1e-3,
            num_batches_per_epoch=3,
            batch_size=256,
        ),
        input_size=15,
    )
    deepar_predictor = deepar_estimator.train(dataset.train, num_workers=1)
    forecast_it, ts_it = make_evaluation_predictions(
        dataset=dataset.test,  # test dataset
        predictor=deepar_predictor,  # predictor
        num_samples=100,  # number of sample paths we want for evaluation
    )
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(num_workers=0)
    agg_metrics, item_metrics = evaluator(iter(tss),
                                          iter(forecasts),
                                          num_series=len(dataset.test))

    assert agg_metrics["MSE"] > 0
Example #6
0
def test_listing_1():
    """
    Test GluonTS paper examples from arxiv paper:
    https://arxiv.org/abs/1906.05264

    Listing 1
    """
    from gluonts.dataset.repository.datasets import get_dataset
    from gluonts.model.deepar import DeepAREstimator
    from gluonts.trainer import Trainer
    from gluonts.evaluation import Evaluator
    from gluonts.evaluation.backtest import backtest_metrics

    # We use electricity in the paper but that would take too long to run in
    # the unit test
    dataset_info, train_ds, test_ds = constant_dataset()

    meta = dataset_info.metadata

    estimator = DeepAREstimator(
        freq=meta.time_granularity,
        prediction_length=1,
        trainer=Trainer(epochs=1, batch_size=32),
    )
    predictor = estimator.train(train_ds)

    evaluator = Evaluator(quantiles=(0.1, 0.5, 0.9))
    agg_metrics, item_metrics = backtest_metrics(
        train_dataset=train_ds,
        test_dataset=test_ds,
        forecaster=predictor,
        evaluator=evaluator,
    )
Example #7
0
def evaluate(dataset_name, estimator):
    dataset = get_dataset(dataset_name)
    estimator = estimator(
        prediction_length=dataset.metadata.prediction_length,
        freq=dataset.metadata.time_granularity,
    )

    print(f"evaluating {estimator} on {dataset}")

    predictor = estimator.train(dataset.train)

    forecast_it, ts_it = make_evaluation_predictions(dataset.test,
                                                     predictor=predictor,
                                                     num_eval_samples=100)

    agg_metrics, item_metrics = Evaluator()(ts_it,
                                            forecast_it,
                                            num_series=len(dataset.test))

    pprint.pprint(agg_metrics)

    eval_dict = agg_metrics
    eval_dict["dataset"] = dataset_name
    eval_dict["estimator"] = type(estimator).__name__
    return eval_dict
Example #8
0
def evaluate(dataset_name, estimator):
    dataset = get_dataset(dataset_name)
    estimator = estimator(
        prediction_length=dataset.metadata.prediction_length,
        freq=dataset.metadata.freq,
        use_feat_static_cat=True,
        cardinality=[
            feat_static_cat.cardinality
            for feat_static_cat in dataset.metadata.feat_static_cat
        ],
    )

    print(f"evaluating {estimator} on {dataset}")

    predictor = estimator.train(dataset.train)

    forecast_it, ts_it = make_evaluation_predictions(dataset.test,
                                                     predictor=predictor,
                                                     num_samples=100)

    agg_metrics, item_metrics = Evaluator()(ts_it,
                                            forecast_it,
                                            num_series=len(dataset.test))

    pprint.pprint(agg_metrics)

    eval_dict = agg_metrics
    eval_dict["dataset"] = dataset_name
    eval_dict["estimator"] = type(estimator).__name__
    return eval_dict
Example #9
0
def test_forecasts(method_name):
    if method_name == "mlp":
        # https://stackoverflow.com/questions/56254321/error-in-ifncol-matrix-rep-argument-is-of-length-zero
        # https://cran.r-project.org/web/packages/neuralnet/index.html
        #   published before the bug fix: https://github.com/bips-hb/neuralnet/pull/21
        # The issue is still open on nnfor package: https://github.com/trnnick/nnfor/issues/8
        # TODO: look for a workaround.
        pytest.xfail(
            "MLP currently does not work because "
            "the `neuralnet` package is not yet updated with a known bug fix in ` bips-hb/neuralnet`"
        )

    dataset = datasets.get_dataset("constant")

    (train_dataset, test_dataset, metadata) = (
        dataset.train,
        dataset.test,
        dataset.metadata,
    )

    freq = metadata.freq
    prediction_length = metadata.prediction_length

    params = dict(
        freq=freq, prediction_length=prediction_length, method_name=method_name
    )

    predictor = RForecastPredictor(**params)
    predictions = list(predictor.predict(train_dataset))

    forecast_type = (
        QuantileForecast
        if method_name in QUANTILE_FORECAST_METHODS
        else SampleForecast
    )
    assert all(
        isinstance(prediction, forecast_type) for prediction in predictions
    )

    assert all(prediction.freq == freq for prediction in predictions)

    assert all(
        prediction.prediction_length == prediction_length
        for prediction in predictions
    )

    assert all(
        prediction.start_date == forecast_start(data)
        for data, prediction in zip(train_dataset, predictions)
    )

    evaluator = Evaluator()
    agg_metrics, item_metrics = backtest_metrics(
        test_dataset=test_dataset,
        predictor=predictor,
        evaluator=evaluator,
    )
    assert agg_metrics["mean_wQuantileLoss"] < TOLERANCE
    assert agg_metrics["NRMSE"] < TOLERANCE
    assert agg_metrics["RMSE"] < TOLERANCE
Example #10
0
def test_MASE_sMAPE_M4(timeseries, res):
    ts_datastructure = pd.Series
    evaluator = Evaluator(quantiles=QUANTILES)
    agg_df, item_df = calculate_metrics(
        timeseries, evaluator, ts_datastructure
    )

    assert abs((agg_df["MASE"] - res["MASE"]) / res["MASE"]) < 0.001, (
        "Scores for the metric MASE do not match: "
        "\nexpected: {} \nobtained: {}".format(res["MASE"], agg_df["MASE"])
    )
    assert abs((agg_df["MAPE"] - res["MAPE"]) / res["MAPE"]) < 0.001, (
        "Scores for the metric MAPE do not match: \nexpected: {} "
        "\nobtained: {}".format(res["MAPE"], agg_df["MAPE"])
    )
    assert abs((agg_df["sMAPE"] - res["sMAPE"]) / res["sMAPE"]) < 0.001, (
        "Scores for the metric sMAPE do not match: \nexpected: {} "
        "\nobtained: {}".format(res["sMAPE"], agg_df["sMAPE"])
    )
    assert (
        sum(abs(item_df["seasonal_error"].values - res["seasonal_error"]))
        < 0.001
    ), (
        "Scores for the metric seasonal_error do not match: \nexpected: {} "
        "\nobtained: {}".format(
            res["seasonal_error"], item_df["seasonal_error"].values
        )
    )
def testDeepRenewal(type, hybridize, freq, num_feat_dynamic_real, cardinality):
    prediction_length = 3
    if type == "synthetic":
        train_ds, test_ds = make_dummy_datasets_with_features(
            prediction_length=prediction_length,
            freq=freq,
            num_feat_dynamic_real=num_feat_dynamic_real,
            cardinality=cardinality,
        )
    else:
        train_ds = make_constant_dataset(train_length=15, freq=freq)
        test_ds = train_ds
    trainer = Trainer(ctx="cpu", epochs=1,
                      hybridize=hybridize)  # hybridize false for development
    estimator = DeepRenewalEstimator(
        prediction_length=prediction_length,
        freq=freq,
        trainer=trainer,
    )
    predictor = estimator.train(training_data=train_ds)
    forecast_it, ts_it = make_evaluation_predictions(dataset=test_ds,
                                                     predictor=predictor,
                                                     num_samples=100)
    evaluator = Evaluator(calculate_owa=False, num_workers=0)

    agg_metrics, item_metrics = evaluator(ts_it,
                                          forecast_it,
                                          num_series=len(test_ds))
    if type == "synthetic":
        accuracy = 1.5
    else:
        accuracy = 1.3
    assert agg_metrics["ND"] <= accuracy
Example #12
0
def simple_main():
    import mxnet as mx
    from pprint import pprint

    dataset = get_dataset("electricity", regenerate=False)

    trainer = Trainer(
        ctx=mx.cpu(0),
        epochs=10,
        num_batches_per_epoch=200,
        learning_rate=1e-3,
        hybridize=False,
    )

    cardinality = int(dataset.metadata.feat_static_cat[0].cardinality)
    estimator = DeepFactorEstimator(
        trainer=trainer,
        context_length=168,
        cardinality=[cardinality],
        prediction_length=dataset.metadata.prediction_length,
        freq=dataset.metadata.freq,
    )

    predictor = estimator.train(dataset.train)

    forecast_it, ts_it = make_evaluation_predictions(dataset.test,
                                                     predictor=predictor,
                                                     num_eval_samples=100)

    agg_metrics, item_metrics = Evaluator()(ts_it,
                                            forecast_it,
                                            num_series=len(dataset.test))

    pprint(agg_metrics)
Example #13
0
def test_forecast_parser():
    # verify that logged for estimator, datasets and metrics can be recovered
    # from their string representation

    dataset_info, train_ds, test_ds = constant_dataset()

    estimator = make_estimator(dataset_info.metadata.freq,
                               dataset_info.prediction_length)
    assert repr(estimator) == repr(load_code(repr(estimator)))

    predictor = estimator.train(training_data=train_ds)

    stats = calculate_dataset_statistics(train_ds)
    assert stats == eval(repr(stats), globals(),
                         {"gluonts": gluonts})  # TODO: use load

    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, _ = backtest_metrics(test_ds, predictor, evaluator)

    # reset infinite metrics to 0 (otherwise the assertion below fails)
    for key, val in agg_metrics.items():
        if not math.isfinite(val):
            agg_metrics[key] = 0.0

    assert agg_metrics == load_code(dump_code(agg_metrics))
Example #14
0
    def _make_evaluation_predictions(self, predictor, test_list_dataset):
        """Evaluate predictor and generate sample forecasts.

        Args:
            predictor (gluonts.model.predictor.Predictor): Trained object used to make forecasts.
            test_list_dataset (gluonts.dataset.common.ListDataset): ListDataset created with the GluonDataset class.

        Returns:
            Dictionary of aggregated metrics over all timeseries.
            DataFrame of metrics for each timeseries (i.e., each target column).
            List of gluonts.model.forecast.Forecast (objects storing the predicted distributions as samples).
        """
        try:
            forecast_it, ts_it = make_evaluation_predictions(
                dataset=test_list_dataset,
                predictor=predictor,
                num_samples=100)
            forecasts = list(forecast_it)
        except Exception as err:
            raise ModelPredictionError(
                f"GluonTS '{self.model_name}' model crashed when making predictions. Full error: {err}"
            )
        evaluator = Evaluator(num_workers=min(2, multiprocessing.cpu_count()))
        agg_metrics, item_metrics = evaluator(
            ts_it, forecasts, num_series=len(test_list_dataset))
        return agg_metrics, item_metrics, forecasts
Example #15
0
def test_custom_eval_fn(
    timeseries,
    res,
    has_nans,
    input_type,
    eval_name,
    eval_fn,
    agg_str,
    fcst_type,
):
    ts_datastructure = pd.Series
    evaluator = Evaluator(
        quantiles=QUANTILES,
        custom_eval_fn={eval_name: [eval_fn, agg_str, fcst_type]},
    )

    agg_metrics, item_metrics = calculate_metrics(
        timeseries,
        evaluator,
        ts_datastructure,
        forecaster=naive_forecaster,
        has_nans=has_nans,
        input_type=input_type,
    )

    assert eval_name in agg_metrics.keys()
    assert eval_name in item_metrics.keys()

    for metric, score in agg_metrics.items():
        if metric in res.keys():
            assert np.isclose(score, res[metric], equal_nan=True), (
                "Scores for the metric {} do not match: \nexpected: {} "
                "\nobtained: {}".format(metric, res[metric], score))
    def gluonts_evaluation(self, tss, preds, load_path, test_ds):

        evaluator = Evaluator(quantiles=[0.1, 0.3, 0.5, 0.7, 0.9])
        agg_metrics, item_metrics = evaluator(iter(tss),
                                              iter(preds),
                                              num_series=len(test_ds))
        item_metrics.to_csv(
            os.path.join(load_path, 'models_evaluation_metric.csv'))
        return item_metrics
def test_simple_model():
    dsinfo, training_data, test_data = default_synthetic()

    freq = dsinfo.metadata.freq
    prediction_length = dsinfo.prediction_length
    context_length = 2 * prediction_length
    hidden_dimensions = [10, 10]

    net = LightningFeedForwardNetwork(
        freq=freq,
        prediction_length=prediction_length,
        context_length=context_length,
        hidden_dimensions=hidden_dimensions,
        distr_output=NormalOutput(),
        batch_norm=True,
        scaling=mean_abs_scaling,
    )

    transformation = Chain([
        AddObservedValuesIndicator(
            target_field=FieldName.TARGET,
            output_field=FieldName.OBSERVED_VALUES,
        ),
        InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            train_sampler=ExpectedNumInstanceSampler(num_instances=1),
            past_length=context_length,
            future_length=prediction_length,
            time_series_fields=[FieldName.OBSERVED_VALUES],
        ),
    ])

    data_loader = TrainDataLoader(
        training_data,
        batch_size=8,
        stack_fn=batchify,
        transform=transformation,
        num_batches_per_epoch=5,
    )

    trainer = pl.Trainer(max_epochs=3, callbacks=[], weights_summary=None)
    trainer.fit(net, train_dataloader=data_loader)

    predictor = net.get_predictor(transformation)

    forecast_it, ts_it = make_evaluation_predictions(
        dataset=test_data,
        predictor=predictor,
        num_samples=100,
    )

    evaluator = Evaluator(quantiles=[0.5, 0.9], num_workers=None)

    agg_metrics, _ = evaluator(ts_it, forecast_it)
Example #18
0
def test_accuracy(predictor_cls, parameters, accuracy):
    predictor = predictor_cls(freq=CONSTANT_DATASET_FREQ, **parameters)
    agg_metrics, item_metrics = backtest_metrics(
        test_dataset=constant_test_ds,
        predictor=predictor,
        evaluator=Evaluator(calculate_owa=True),
    )

    assert agg_metrics["ND"] <= accuracy
Example #19
0
def test_accuracy(Estimator, hyperparameters, accuracy):
    estimator = Estimator.from_hyperparameters(freq=freq, **hyperparameters)
    agg_metrics, item_metrics = backtest_metrics(
        train_dataset=train_ds,
        test_dataset=test_ds,
        forecaster=estimator,
        evaluator=Evaluator(calculate_owa=True),
    )

    assert agg_metrics["ND"] <= accuracy
Example #20
0
def forecast_metrics(tss, forecasts, quantiles=[0.1, 0.5, 0.9], show=True, dir_save=None) :
    from gluonts.evaluation import Evaluator
    evaluator = Evaluator(quantiles=quantiles)
    agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(forecasts ))

    if show :  log(json.dumps(agg_metrics, indent=4))        
    if dir_save :
      json.dump(agg_metrics, indent=4)
      
    return agg_metrics, item_metrics
Example #21
0
def run_test(env: TrainEnv, predictor: Predictor,
             test_dataset: Dataset) -> None:
    forecast_it, ts_it = backtest.make_evaluation_predictions(
        test_dataset, predictor=predictor, num_eval_samples=100)
    agg_metrics, _item_metrics = Evaluator()(ts_it,
                                             forecast_it,
                                             num_series=len(test_dataset))

    # we only log aggregate metrics for now as item metrics may be
    # very large
    log_metrics(env, agg_metrics)
Example #22
0
def mse(net,test):
    predictor = estimator.create_predictor(transformation,net)
    forecast_it, ts_it = make_evaluation_predictions(
        dataset=test,  # test dataset
        predictor=predictor,  # predictor
        num_samples=100,  # number of sample paths we want for evaluation
    )
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test))
    return agg_metrics['MSE']
def train(epochs, prediction_length, num_layers, dropout_rate):

    #create train dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TRAIN'] +
                     "/train.csv",
                     header=0,
                     index_col=0)

    training_data = ListDataset([{
        "start": df.index[0],
        "target": df.value[:]
    }],
                                freq="5min")

    #define DeepAR estimator
    deepar_estimator = DeepAREstimator(freq="5min",
                                       prediction_length=prediction_length,
                                       dropout_rate=dropout_rate,
                                       num_layers=num_layers,
                                       trainer=Trainer(epochs=epochs))

    #train the model
    deepar_predictor = deepar_estimator.train(training_data=training_data)

    #create test dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TEST'] +
                     "/test.csv",
                     header=0,
                     index_col=0)

    test_data = ListDataset([{
        "start": df.index[0],
        "target": df.value[:]
    }],
                            freq="5min")

    #evaluate trained model on test data
    forecast_it, ts_it = make_evaluation_predictions(test_data,
                                                     deepar_predictor,
                                                     num_samples=100)
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss),
                                          iter(forecasts),
                                          num_series=len(test_data))

    print("MSE:", agg_metrics["MSE"])

    #save the model
    deepar_predictor.serialize(pathlib.Path(os.environ['SM_MODEL_DIR']))

    return deepar_predictor
Example #24
0
    def run_test(self, dataset, estimator, predictor):
        test_dataset = TransformedDataset(
            dataset,
            transformations=[
                FilterTransformation(lambda el: el['target'].shape[-1] >
                                     predictor.prediction_length)
            ],
        )

        len_orig = len(dataset)
        len_filtered = len(test_dataset)
        if len_orig > len_filtered:
            logging.warning(
                'Not all time-series in the test-channel have '
                'enough data to be used for evaluation. Proceeding with '
                f'{len_filtered}/{len_orig} '
                f'(~{int(len_filtered/len_orig*100)}%) items.')

        try:
            log.metric('test_dataset_stats', test_dataset.calc_stats())
        except GluonTSDataError as error:
            logging.error(
                f"Failure whilst calculating stats for test dataset: {error}")
            return

        if isinstance(estimator, GluonEstimator) and isinstance(
                predictor, GluonPredictor):
            inference_data_loader = InferenceDataLoader(
                dataset=test_dataset,
                transform=predictor.input_transform,
                batch_size=estimator.trainer.batch_size,
                ctx=estimator.trainer.ctx,
                float_type=estimator.float_type,
            )

            if estimator.trainer.hybridize:
                predictor.hybridize(batch=next(iter(inference_data_loader)))

            if self.hyperparameters.get('use_symbol_block_predictor'):
                predictor = predictor.as_symbol_block_predictor(
                    batch=next(iter(inference_data_loader)))

        num_eval_samples = self.hyperparameters.get('num_eval_samples', 100)
        quantiles = self.hyperparameters.get(
            'quantiles', (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9))

        # we only log aggregate metrics for now as item metrics may be
        # very large
        predictions, input_timeseries = backtest.make_evaluation_predictions(
            test_dataset, predictor, num_eval_samples)
        agg_metrics, _item_metrics = Evaluator(quantiles=quantiles)(
            input_timeseries, predictions, num_series=len_filtered)
        log.metric("agg_metrics", agg_metrics)
Example #25
0
def train(arguments):
    """
    Generic train method that trains a specified estimator on a specified
    dataset.
    """

    logger.info("Downloading estimator config.")
    estimator_config = Path(arguments.estimator) / "estimator.json"
    with estimator_config.open() as config_file:
        estimator = serde.load_json(config_file.read())

    logger.info("Downloading dataset.")
    if arguments.s3_dataset is None:
        # load built in dataset
        dataset = datasets.get_dataset(arguments.dataset)
    else:
        # load custom dataset
        s3_dataset_dir = Path(arguments.s3_dataset)
        dataset = common.load_datasets(
            metadata=s3_dataset_dir,
            train=s3_dataset_dir / "train",
            test=s3_dataset_dir / "test",
        )

    logger.info("Starting model training.")
    predictor = estimator.train(dataset.train)
    forecast_it, ts_it = backtest.make_evaluation_predictions(
        dataset=dataset.test,
        predictor=predictor,
        num_samples=int(arguments.num_samples),
    )

    logger.info("Starting model evaluation.")
    evaluator = Evaluator(quantiles=eval(arguments.quantiles))

    agg_metrics, item_metrics = evaluator(ts_it,
                                          forecast_it,
                                          num_series=len(list(dataset.test)))

    # required for metric tracking.
    for name, value in agg_metrics.items():
        logger.info(f"gluonts[metric-{name}]: {value}")

    # save the evaluation results
    metrics_output_dir = Path(arguments.output_data_dir)
    with open(metrics_output_dir / "agg_metrics.json", "w") as f:
        json.dump(agg_metrics, f)
    with open(metrics_output_dir / "item_metrics.csv", "w") as f:
        item_metrics.to_csv(f, index=False)

    # save the model
    model_output_dir = Path(arguments.model_dir)
    predictor.serialize(model_output_dir)
Example #26
0
def run_test(forecaster, test_dataset):
    agg_metrics, _item_metrics = backtest.backtest_metrics(
        train_dataset=None,
        test_dataset=test_dataset,
        forecaster=forecaster,
        evaluator=Evaluator(quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8,
                                       0.9)),
        num_eval_samples=100,
    )

    # we only log aggregate metrics for now as item metrics may be
    # very large
    log.metric("agg_metrics", agg_metrics)
Example #27
0
    def test_accuracy(Estimator, hyperparameters, accuracy):
        estimator = from_hyperparameters(Estimator, hyperparameters, dsinfo)
        predictor = estimator.train(training_data=dsinfo.train_ds)
        agg_metrics, item_metrics = backtest_metrics(
            test_dataset=dsinfo.test_ds,
            predictor=predictor,
            evaluator=Evaluator(calculate_owa=statsmodels is not None),
        )

        if dsinfo.name == "synthetic":
            accuracy = 10.0

        assert agg_metrics["ND"] <= accuracy
Example #28
0
    def test_training_external_features(self):
        prediction_length = 2
        frequency = "3M"
        gluon_dataset = ListDataset(self.timeseries, freq=frequency)
        estimator = AutoARIMAEstimator(prediction_length=prediction_length, freq=frequency, season_length=4, use_feat_dynamic_real=True)
        predictor = estimator.train(gluon_dataset)

        forecast_it, ts_it = make_evaluation_predictions(dataset=gluon_dataset, predictor=predictor, num_samples=100)
        timeseries = list(ts_it)
        forecasts = list(forecast_it)
        assert forecasts[1].samples.shape == (100, 2)
        evaluator = Evaluator()
        agg_metrics, item_metrics = evaluator(iter(timeseries), iter(forecasts), num_series=len(gluon_dataset))
        assert agg_metrics["MAPE"] is not None
Example #29
0
    def __init__(
        self,
        var_results: List[VARResultsWrapper],
        train_datasets: List[np.ndarray],
        original_datasets: List[np.ndarray],
        initial_log_values: List[np.ndarray],
        horizon: int = 6,
        freq: str = 'M',
        var_diff: bool = False,
    ) -> None:

        self.var_results = var_results
        self.lag_orders = [results.k_ar for results in var_results]
        self.horizon = horizon
        self.train_datasets = train_datasets
        self.original_datasets = [
            np.ma.masked_invalid(original_dataset)
            for original_dataset in original_datasets
        ]
        self.initial_log_values = initial_log_values
        self.evaluator = Evaluator()
        self.freq = freq
        self.var_diff = var_diff
Example #30
0
def save_item_metrics(dataset, forecasts, tss, model, metric):
    evaluator = Evaluator(quantiles=[0.005, 0.1, 0.5, 0.9, 0.995], )
    agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts),
                                          num_series=len(dataset.test_ds))
    if metric == "Coverage":
        low_coverage = item_metrics[["Coverage[0.005]"]].to_numpy()
        high_coverage = item_metrics[["Coverage[0.995]"]].to_numpy()
        low_score = 0.005 - low_coverage
        high_score = high_coverage - 0.995
        item_metric = high_score + low_score
    else:
        item_metric = item_metrics[[metric]].to_numpy()

    np.save("item_metrics/" + metric + '_' + model + '.npy', item_metric)