def test_forecast_parser(): # verify that logged for estimator, datasets and metrics can be recovered # from their string representation dataset_info, train_ds, test_ds = constant_dataset() estimator = make_estimator(dataset_info.metadata.freq, dataset_info.prediction_length) assert repr(estimator) == repr(load_code(repr(estimator))) predictor = estimator.train(training_data=train_ds) stats = calculate_dataset_statistics(train_ds) assert stats == eval(repr(stats), globals(), {"gluonts": gluonts}) # TODO: use load evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) agg_metrics, _ = backtest_metrics(test_ds, predictor, evaluator) # reset infinite metrics to 0 (otherwise the assertion below fails) for key, val in agg_metrics.items(): if not math.isfinite(val): agg_metrics[key] = 0.0 assert agg_metrics == load_code(dump_code(agg_metrics))
def test_listing_1(): """ Test GluonTS paper examples from arxiv paper: https://arxiv.org/abs/1906.05264 Listing 1 """ from gluonts.dataset.repository.datasets import get_dataset from gluonts.model.deepar import DeepAREstimator from gluonts.trainer import Trainer from gluonts.evaluation import Evaluator from gluonts.evaluation.backtest import backtest_metrics # We use electricity in the paper but that would take too long to run in # the unit test dataset_info, train_ds, test_ds = constant_dataset() meta = dataset_info.metadata estimator = DeepAREstimator( freq=meta.time_granularity, prediction_length=1, trainer=Trainer(epochs=1, batch_size=32), ) predictor = estimator.train(train_ds) evaluator = Evaluator(quantiles=(0.1, 0.5, 0.9)) agg_metrics, item_metrics = backtest_metrics( train_dataset=train_ds, test_dataset=test_ds, forecaster=predictor, evaluator=evaluator, )
def test_smoke(hybridize: bool, target_dim_sample: int, use_marginal_transformation: bool): num_batches_per_epoch = 1 estimator = GPVAREstimator( distr_output=LowrankGPOutput(rank=2), num_cells=1, num_layers=1, pick_incomplete=True, prediction_length=metadata.prediction_length, target_dim=target_dim, target_dim_sample=target_dim_sample, freq=metadata.freq, use_marginal_transformation=use_marginal_transformation, trainer=Trainer( epochs=2, batch_size=10, learning_rate=1e-4, num_batches_per_epoch=num_batches_per_epoch, hybridize=hybridize, ), ) predictor = estimator.train(training_data=dataset.train) agg_metrics, _ = backtest_metrics( test_dataset=dataset.test, predictor=predictor, num_samples=10, evaluator=MultivariateEvaluator(quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)), ) assert agg_metrics["ND"] < 2.5
def test_accuracy(Estimator, hyperparameters, accuracy): estimator = Estimator.from_hyperparameters(freq=freq, **hyperparameters) agg_metrics, item_metrics = backtest_metrics(train_dataset=train_ds, test_dataset=test_ds, forecaster=estimator) assert agg_metrics['ND'] <= accuracy
def test_accuracy(predictor_cls, parameters, accuracy): predictor = predictor_cls(freq=CONSTANT_DATASET_FREQ, **parameters) agg_metrics, item_metrics = backtest_metrics( test_dataset=constant_test_ds, predictor=predictor, evaluator=Evaluator(calculate_owa=True), ) assert agg_metrics["ND"] <= accuracy
def test_accuracy(Estimator, hyperparameters, accuracy): estimator = Estimator.from_hyperparameters(freq=freq, **hyperparameters) agg_metrics, item_metrics = backtest_metrics( train_dataset=train_ds, test_dataset=test_ds, forecaster=estimator, evaluator=Evaluator(calculate_owa=True), ) assert agg_metrics["ND"] <= accuracy
def test_benchmark(caplog): # makes sure that information logged can be reconstructed from previous # logs with caplog.at_level(logging.DEBUG): dataset_info, train_ds, test_ds = constant_dataset() estimator = make_estimator(dataset_info.metadata.freq, dataset_info.prediction_length) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) backtest_metrics(train_ds, test_ds, estimator, evaluator) train_stats = calculate_dataset_statistics(train_ds) test_stats = calculate_dataset_statistics(test_ds) log_info = BacktestInformation.make_from_log_contents(caplog.text) assert train_stats == log_info.train_dataset_stats assert test_stats == log_info.test_dataset_stats assert equals(estimator, log_info.estimator) print(log_info)
def run_test(forecaster, test_dataset): agg_metrics, _item_metrics = backtest.backtest_metrics( train_dataset=None, test_dataset=test_dataset, forecaster=forecaster, evaluator=Evaluator(quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)), num_eval_samples=100, ) # we only log aggregate metrics for now as item metrics may be # very large log.metric("agg_metrics", agg_metrics)
def test_accuracy(Estimator, hyperparameters, accuracy): estimator = from_hyperparameters(Estimator, hyperparameters, dsinfo) predictor = estimator.train(training_data=dsinfo.train_ds) agg_metrics, item_metrics = backtest_metrics( test_dataset=dsinfo.test_ds, predictor=predictor, evaluator=Evaluator(calculate_owa=statsmodels is not None), ) if dsinfo.name == "synthetic": accuracy = 10.0 assert agg_metrics["ND"] <= accuracy
def test_benchmark(caplog): # makes sure that information logged can be reconstructed from previous # logs caplog.set_level(logging.DEBUG, logger='log.txt') dataset_info, train_ds, test_ds = constant_dataset() estimator = make_estimator(dataset_info.metadata.time_granularity, dataset_info.prediction_length) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) backtest_metrics(train_ds, test_ds, estimator, evaluator) train_stats = calculate_dataset_statistics(train_ds) test_stats = calculate_dataset_statistics(test_ds) log_file = str(Path(__file__).parent / 'log.txt') log_info = BacktestInformation.make_from_log(log_file) assert train_stats == log_info.train_dataset_stats assert test_stats == log_info.test_dataset_stats assert equals(estimator, log_info.estimator) print(log_info)
def test_localizer(): dataset = ListDataset( data_iter=[{ "start": "2012-01-01", "target": (np.zeros(20) + i * 0.1 + 0.01), "id": f"{i}", } for i in range(3)], freq="1H", ) estimator = MeanEstimator(prediction_length=10, freq="1H", num_samples=50) local_pred = Localizer(estimator=estimator) agg_metrics, _ = backtest_metrics(test_dataset=dataset, predictor=local_pred)
def test_localizer(): dataset = ListDataset( data_iter=[{ 'start': '2012-01-01', 'target': (np.zeros(20) + i * 0.1 + 0.01), 'id': f'{i}', } for i in range(3)], freq='1H', ) estimator = MeanEstimator(prediction_length=10, freq='1H', num_samples=50) local_pred = Localizer(estimator=estimator) agg_metrics, _ = backtest_metrics(train_dataset=None, test_dataset=dataset, forecaster=local_pred)
def test_general_functionality() -> None: ds_info, train_ds, test_ds = constant_dataset() freq = ds_info.metadata.freq prediction_length = ds_info.prediction_length trainer = Trainer(epochs=3, num_batches_per_epoch=5) estimator = DeepAREstimator(prediction_length=prediction_length, freq=freq, trainer=trainer) predictor = estimator.train(training_data=train_ds) agg_metrics, item_metrics = backtest_metrics( test_dataset=test_ds, predictor=predictor, evaluator=Evaluator(calculate_owa=False), ) # just some sanity check assert (agg_metrics is not None and item_metrics is not None ), "Metrics should not be None if everything went smooth."
def test_deepvar( distr_output, num_batches_per_epoch, Estimator, hybridize, use_marginal_transformation, ): estimator = Estimator( num_cells=20, num_layers=1, pick_incomplete=True, target_dim=target_dim, prediction_length=metadata.prediction_length, # target_dim=target_dim, freq=metadata.freq, distr_output=distr_output, scaling=False, use_marginal_transformation=use_marginal_transformation, trainer=Trainer( epochs=1, batch_size=8, learning_rate=1e-10, num_batches_per_epoch=num_batches_per_epoch, hybridize=hybridize, ), ) predictor = estimator.train(training_data=dataset.train) agg_metrics, _ = backtest_metrics( test_dataset=dataset.test, predictor=predictor, evaluator=MultivariateEvaluator(quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)), ) assert agg_metrics["ND"] < 1.5
def test_appendix_c(): """ Test GluonTS paper examples from arxiv paper: https://arxiv.org/abs/1906.05264 Appendix C """ from typing import List from mxnet import gluon from gluonts.model.estimator import GluonEstimator from gluonts.model.predictor import Predictor, RepresentableBlockPredictor from gluonts.trainer import Trainer from gluonts.transform import ( InstanceSplitter, FieldName, Transformation, ExpectedNumInstanceSampler, ) from gluonts.core.component import validated from gluonts.support.util import copy_parameters class MyTrainNetwork(gluon.HybridBlock): def __init__(self, prediction_length, cells, act_type, **kwargs): super().__init__(**kwargs) self.prediction_length = prediction_length with self.name_scope(): # Set up a network that predicts the target self.nn = gluon.nn.HybridSequential() for c in cells: self.nn.add(gluon.nn.Dense(units=c, activation=act_type)) self.nn.add( gluon.nn.Dense(units=self.prediction_length, activation=act_type)) def hybrid_forward(self, F, past_target, future_target): prediction = self.nn(past_target) # calculate L1 loss to learn the median return (prediction - future_target).abs().mean(axis=-1) class MyPredNetwork(MyTrainNetwork): # The prediction network only receives # past target and returns predictions def hybrid_forward(self, F, past_target): prediction = self.nn(past_target) return prediction.expand_dims(axis=1) class MyEstimator(GluonEstimator): @validated() def __init__( self, freq: str, prediction_length: int, act_type: str = "relu", context_length: int = 30, cells: List[int] = [40, 40, 40], trainer: Trainer = Trainer(epochs=10), ) -> None: super().__init__(trainer=trainer) self.freq = freq self.prediction_length = prediction_length self.act_type = act_type self.context_length = context_length self.cells = cells def create_training_network(self) -> MyTrainNetwork: return MyTrainNetwork( prediction_length=self.prediction_length, cells=self.cells, act_type=self.act_type, ) def create_predictor( self, transformation: Transformation, trained_network: gluon.HybridBlock, ) -> Predictor: prediction_network = MyPredNetwork( prediction_length=self.prediction_length, cells=self.cells, act_type=self.act_type, ) copy_parameters(trained_network, prediction_network) return RepresentableBlockPredictor( input_transform=transformation, prediction_net=prediction_network, batch_size=self.trainer.batch_size, freq=self.freq, prediction_length=self.prediction_length, ctx=self.trainer.ctx, ) def create_transformation(self): # Model specific input transform # Here we use a transformation that randomly # selects training samples from all series. return InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=ExpectedNumInstanceSampler(num_instances=1), past_length=self.context_length, future_length=self.prediction_length, ) from gluonts.trainer import Trainer from gluonts.evaluation import Evaluator from gluonts.evaluation.backtest import backtest_metrics dataset_info, train_ds, test_ds = constant_dataset() meta = dataset_info.metadata estimator = MyEstimator( freq=meta.time_granularity, prediction_length=1, trainer=Trainer(epochs=1, batch_size=32), ) predictor = estimator.train(train_ds) evaluator = Evaluator(quantiles=(0.1, 0.5, 0.9)) agg_metrics, item_metrics = backtest_metrics( train_dataset=train_ds, test_dataset=test_ds, forecaster=predictor, evaluator=evaluator, )
def test_hybridize(Estimator, hyperparameters): estimator = Estimator.from_hyperparameters(freq=freq, **hyperparameters) backtest_metrics(train_dataset=train_ds, test_dataset=test_ds, forecaster=estimator)
trainer=Trainer(epochs=10, num_batches_per_epoch=10), ) train1_output = estimator.train_model(dataset.train) # callback to overwrite parameters of the new model with the already trained model def copy_params(net): params1 = train1_output.trained_net.collect_params() params2 = net.collect_params() for p1, p2 in zip(params1.values(), params2.values()): p2.set_data(p1.data()) estimator = SimpleFeedForwardEstimator( prediction_length=dataset.metadata.prediction_length, freq=dataset.metadata.freq, trainer=Trainer(epochs=5, num_batches_per_epoch=10, post_initialize_cb=copy_params), ) new_pred = estimator.train(dataset.train) ev = Evaluator(num_workers=0) agg_metrics1, _ = backtest_metrics(dataset.test, train1_output.predictor, evaluator=ev) agg_metrics2, _ = backtest_metrics(dataset.test, new_pred, evaluator=ev) df = pd.DataFrame([agg_metrics1, agg_metrics2], index=["model1", "model2"]) print(df)
for dataset_name in datasets: for Estimator in Estimators: dataset = get_dataset( dataset_name=dataset_name, regenerate=False, path="../datasets/", ) estimator = Estimator( prediction_length=dataset.metadata.prediction_length, freq=dataset.metadata.time_granularity, ) estimator_name = type(estimator).__name__ print(f"evaluating {estimator_name} on {dataset_name}") agg_metrics, item_metrics = backtest_metrics( train_dataset=dataset.train, test_dataset=dataset.test, forecaster=estimator, ) persist_evaluation( estimator_name=estimator_name, dataset=dataset_name, evaluation=agg_metrics, evaluation_path=dir_path, )