def test_callbacks(): n_epochs = 4 history = TrainingHistory() iter_avg = ModelIterationAveraging(avg_strategy=NTA(epochs=2 * n_epochs)) dataset = "m4_hourly" dataset = get_dataset(dataset) prediction_length = dataset.metadata.prediction_length freq = dataset.metadata.freq estimator = SimpleFeedForwardEstimator( prediction_length=prediction_length, freq=freq, trainer=Trainer(epochs=n_epochs, callbacks=[history, iter_avg]), ) predictor = estimator.train(dataset.train, num_workers=None) assert len(history.loss_history) == n_epochs ws = WarmStart(predictor=predictor) estimator = SimpleFeedForwardEstimator( prediction_length=prediction_length, freq=freq, trainer=Trainer(epochs=n_epochs, callbacks=[history, iter_avg, ws]), ) predictor = estimator.train(dataset.train, num_workers=None) assert len(history.loss_history) == n_epochs * 2
def train(file_path, P, frac): target, df = create_dataset(file_path) i = 0 rolling_test = [] train_size = int(frac * df.shape[0]) starts = [pd.Timestamp(df.index[0]) for _ in range(len(target))] delay = 0 grouper_train = MultivariateGrouper(max_target_dim=df.shape[0]) grouper_test = MultivariateGrouper(max_target_dim=df.shape[0]) train_ds = ListDataset([{ FieldName.TARGET: targets, FieldName.START: start } for (targets, start) in zip(target[:, 0:train_size - P], starts)], freq='1B') train_ds = grouper_train(train_ds) while train_size + delay < df.shape[0]: delay = int(P) * i test_ds = ListDataset([{ FieldName.TARGET: targets, FieldName.START: start } for (targets, start) in zip(target[:, 0:train_size + delay], starts) ], freq='1B') test_ds = grouper_test(test_ds) rolling_test.append(test_ds) i += 1 estimator = GPVAREstimator(prediction_length=pred_len, context_length=6, freq='1B', target_dim=df.shape[1], trainer=Trainer(ctx="cpu", epochs=200)) return train_ds, rolling_test, estimator, train_size
def test_listing_1(): """ Test GluonTS paper examples from arxiv paper: https://arxiv.org/abs/1906.05264 Listing 1 """ from gluonts.dataset.repository.datasets import get_dataset from gluonts.evaluation import backtest_metrics, Evaluator from gluonts.model.deepar import DeepAREstimator from gluonts.mx.trainer import Trainer # We use electricity in the paper but that would take too long to run in # the unit test dataset_info, train_ds, test_ds = constant_dataset() meta = dataset_info.metadata estimator = DeepAREstimator( freq=meta.freq, prediction_length=1, trainer=Trainer(epochs=1, batch_size=32), ) predictor = estimator.train(train_ds) evaluator = Evaluator(quantiles=(0.1, 0.5, 0.9)) agg_metrics, item_metrics = backtest_metrics( test_dataset=test_ds, predictor=predictor, evaluator=evaluator, )
def assert_valid_param(param_name: str, param_values: List[Any]) -> None: try: for x in param_values: Trainer(**{param_name: x}) except Exception as e: pytest.fail(f'Unexpected exception when initializing Trainer: "{e}"') raise e
def __init__( self, freq: str, context_length: int, prediction_length: int, trainer: Trainer = Trainer(), num_layers: int = 1, num_cells: int = 50, cell_type: str = "lstm", num_parallel_samples: int = 100, cardinality: List[int] = list([1]), embedding_dimension: int = 10, distr_output: DistributionOutput = StudentTOutput(), ) -> None: model = RNN(mode=cell_type, num_layers=num_layers, num_hidden=num_cells) super().__init__( model=model, is_sequential=True, freq=freq, context_length=context_length, prediction_length=prediction_length, trainer=trainer, num_parallel_samples=num_parallel_samples, cardinality=cardinality, embedding_dimension=embedding_dimension, distr_output=distr_output, )
def test_quantile_levels(): from gluonts.dataset.common import ListDataset from gluonts.model.tft import TemporalFusionTransformerEstimator from gluonts.mx.trainer import Trainer dataset = ListDataset( [{"start": "2020-01-01", "target": [10.0] * 50}], freq="D" ) estimator = TemporalFusionTransformerEstimator( freq="D", prediction_length=2, trainer=Trainer(epochs=1) ) predictor = estimator.train(training_data=dataset) forecast = next(iter(predictor.predict(dataset))) assert isinstance(forecast, QuantileForecast) assert isinstance(predictor, GluonPredictor) assert isinstance( predictor.prediction_net, TemporalFusionTransformerPredictionNetwork ) assert all( float(k) == q for k, q in zip( forecast.forecast_keys, predictor.prediction_net.output.quantiles ) )
def __init__( self, freq: str, context_length: int, prediction_length: int, trainer: Trainer = Trainer(), hidden_dim_sequence=list([50]), num_parallel_samples: int = 100, cardinality: List[int] = list([1]), embedding_dimension: int = 10, distr_output: DistributionOutput = StudentTOutput(), ) -> None: model = nn.HybridSequential() for layer, layer_dim in enumerate(hidden_dim_sequence): model.add( nn.Dense( layer_dim, flatten=False, activation="relu", prefix="mlp_%d_" % layer, )) super().__init__( model=model, is_sequential=False, freq=freq, context_length=context_length, prediction_length=prediction_length, trainer=trainer, num_parallel_samples=num_parallel_samples, cardinality=cardinality, embedding_dimension=embedding_dimension, distr_output=distr_output, )
def test_smoke( hybridize: bool, target_dim_sample: int, use_marginal_transformation: bool ): num_batches_per_epoch = 1 estimator = GPVAREstimator( distr_output=LowrankGPOutput(rank=2), num_cells=1, num_layers=1, pick_incomplete=True, prediction_length=metadata.prediction_length, target_dim=target_dim, target_dim_sample=target_dim_sample, freq=metadata.freq, use_marginal_transformation=use_marginal_transformation, trainer=Trainer( epochs=2, batch_size=10, learning_rate=1e-4, num_batches_per_epoch=num_batches_per_epoch, hybridize=hybridize, ), ) predictor = estimator.train(training_data=dataset.train) agg_metrics, _ = backtest_metrics( test_dataset=dataset.test, predictor=predictor, num_samples=10, evaluator=MultivariateEvaluator( quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9) ), ) assert agg_metrics["ND"] < 2.5
def test_symbol_and_array(hybridize: bool): # Tests for cases like the one presented in issue 1211, in which the Inflated # Beta outputs used a method only available to arrays and not to symbols. # We simply go through a short training to ensure no exceptions are raised. data = [ { "target": [0, 0.0460043, 0.263906, 0.4103112, 1], "start": pd.to_datetime("1999-01-04"), }, { "target": [1, 0.65815564, 0.44982578, 0.58875054, 0], "start": pd.to_datetime("1999-01-04"), }, ] dataset = common.ListDataset(data, freq="W-MON", one_dim_target=True) trainer = Trainer(epochs=1, num_batches_per_epoch=2, hybridize=hybridize) estimator = deepar.DeepAREstimator( freq="W", prediction_length=2, trainer=trainer, distr_output=ZeroAndOneInflatedBetaOutput(), context_length=2, batch_size=1, scaling=False, ) estimator.train(dataset)
def __init__( self, freq: str, prediction_length: int, num_hidden_global: int = 50, num_layers_global: int = 1, num_factors: int = 10, num_hidden_local: int = 5, num_layers_local: int = 1, cell_type: str = "lstm", trainer: Trainer = Trainer(), context_length: Optional[int] = None, num_parallel_samples: int = 100, cardinality: List[int] = list([1]), embedding_dimension: int = 10, distr_output: DistributionOutput = StudentTOutput(), ) -> None: super().__init__(trainer=trainer) assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert num_layers_global > 0, "The value of `num_layers` should be > 0" assert num_hidden_global > 0, "The value of `num_hidden` should be > 0" assert num_factors > 0, "The value of `num_factors` should be > 0" assert (num_hidden_local > 0), "The value of `num_hidden_local` should be > 0" assert (num_layers_local > 0), "The value of `num_layers_local` should be > 0" assert all([c > 0 for c in cardinality ]), "Elements of `cardinality` should be > 0" assert (embedding_dimension > 0), "The value of `embedding_dimension` should be > 0" assert (num_parallel_samples > 0), "The value of `num_parallel_samples` should be > 0" self.freq = freq self.context_length = (context_length if context_length is not None else prediction_length) self.prediction_length = prediction_length self.distr_output = distr_output self.num_parallel_samples = num_parallel_samples self.cardinality = cardinality self.embedding_dimensions = [embedding_dimension for _ in cardinality] self.global_model = RNNModel( mode=cell_type, num_hidden=num_hidden_global, num_layers=num_layers_global, num_output=num_factors, ) # TODO: Allow the local model to be defined as an arbitrary local model, e.g. DF-GP and DF-LDS self.local_model = RNNModel( mode=cell_type, num_hidden=num_hidden_local, num_layers=num_layers_local, num_output=1, )
def initialize_model() -> nn.HybridBlock: # dummy training data N = 10 # number of time series T = 100 # number of timesteps prediction_length = 24 freq = "1H" custom_dataset = np.zeros(shape=(N, T)) start = pd.Timestamp("01-01-2019", freq=freq) # can be different for each time series train_ds = ListDataset( [{ "target": x, "start": start } for x in custom_dataset[:, :-prediction_length]], freq=freq, ) # create a simple model estimator = SimpleFeedForwardEstimator( num_hidden_dimensions=[10], prediction_length=prediction_length, context_length=T, freq=freq, trainer=Trainer( ctx="cpu", epochs=1, learning_rate=1e-3, num_batches_per_epoch=1, ), ) # train model predictor = estimator.train(train_ds) return predictor.prediction_net
def __init__( self, freq: str, prediction_length: int, context_length: Optional[int] = None, trainer: Trainer = Trainer(), hidden_dim: int = 32, variable_dim: Optional[int] = None, num_heads: int = 4, num_outputs: int = 3, num_instance_per_series: int = 100, dropout_rate: float = 0.1, time_features: List[TimeFeature] = [], static_cardinalities: Dict[str, int] = {}, dynamic_cardinalities: Dict[str, int] = {}, static_feature_dims: Dict[str, int] = {}, dynamic_feature_dims: Dict[str, int] = {}, past_dynamic_features: List[str] = [], batch_size: int = 32, ) -> None: super(TemporalFusionTransformerEstimator, self).__init__(trainer=trainer, batch_size=batch_size) assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" self.freq = freq self.prediction_length = prediction_length self.context_length = context_length or prediction_length self.dropout_rate = dropout_rate self.hidden_dim = hidden_dim self.variable_dim = variable_dim or hidden_dim self.num_heads = num_heads self.num_outputs = num_outputs self.num_instance_per_series = num_instance_per_series if not time_features: self.time_features = time_features_from_frequency_str(self.freq) else: self.time_features = time_features self.static_cardinalities = static_cardinalities self.dynamic_cardinalities = dynamic_cardinalities self.static_feature_dims = static_feature_dims self.dynamic_feature_dims = dynamic_feature_dims self.past_dynamic_features = past_dynamic_features self.past_dynamic_cardinalities = {} self.past_dynamic_feature_dims = {} for name in self.past_dynamic_features: if name in self.dynamic_cardinalities: self.past_dynamic_cardinalities[ name] = self.dynamic_cardinalities.pop(name) elif name in self.dynamic_feature_dims: self.past_dynamic_feature_dims[ name] = self.dynamic_feature_dims.pop(name) else: raise ValueError( f"Feature name {name} is not provided in feature dicts")
def __init__( self, model: HybridBlock, is_sequential: bool, freq: str, context_length: int, prediction_length: int, trainer: Trainer = Trainer(), num_parallel_samples: int = 100, cardinality: List[int] = list([1]), embedding_dimension: int = 10, distr_output: DistributionOutput = StudentTOutput(), batch_size: int = 32, ) -> None: super().__init__(trainer=trainer, batch_size=batch_size) # TODO: error checking self.freq = freq self.context_length = context_length self.prediction_length = prediction_length self.distr_output = distr_output self.num_parallel_samples = num_parallel_samples self.cardinality = cardinality self.embedding_dimensions = [embedding_dimension for _ in cardinality] self.model = model self.is_sequential = is_sequential
def test_lstnet( skip_size, ar_window, lead_time, prediction_length, hybridize, scaling, dtype, ): estimator = LSTNetEstimator( skip_size=skip_size, ar_window=ar_window, num_series=NUM_SERIES, channels=6, kernel_size=2, context_length=4, freq=freq, lead_time=lead_time, prediction_length=prediction_length, trainer=Trainer(epochs=1, batch_size=2, learning_rate=0.01, hybridize=hybridize), scaling=scaling, dtype=dtype, ) predictor = estimator.train(dataset.train) with tempfile.TemporaryDirectory() as directory: predictor.serialize(Path(directory)) predictor_copy = Predictor.deserialize(Path(directory)) assert predictor == predictor_copy forecast_it, ts_it = make_evaluation_predictions(dataset=dataset.test, predictor=predictor, num_samples=NUM_SAMPLES) forecasts = list(forecast_it) tss = list(ts_it) assert len(forecasts) == len(tss) == len(dataset.test) test_ds = dataset.test.list_data[0] for fct in forecasts: assert fct.freq == freq assert fct.samples.shape == ( NUM_SAMPLES, prediction_length, NUM_SERIES, ) assert (fct.start_date == pd.period_range( start=test_ds["start"], periods=test_ds["target"].shape[1], # number of test periods freq=freq, )[-prediction_length]) evaluator = MultivariateEvaluator( quantiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(dataset.test)) assert agg_metrics["ND"] < 1.0
def test_dynamic_integration( train_length: int, test_length: int, prediction_length: int, target_start: str, rolling_start: str, num_dynamic_feat: int, ): """ Trains an estimator on a rolled dataset with dynamic features. Tests https://github.com/awslabs/gluon-ts/issues/1390 """ train_ds = create_dynamic_dataset(target_start, train_length, num_dynamic_feat) rolled_ds = generate_rolling_dataset( dataset=create_dynamic_dataset(target_start, test_length, num_dynamic_feat), strategy=StepStrategy(prediction_length=prediction_length), start_time=pd.Timestamp(rolling_start), ) estimator = DeepAREstimator( freq="D", prediction_length=prediction_length, context_length=2 * prediction_length, use_feat_dynamic_real=True, trainer=Trainer(epochs=1), ) predictor = estimator.train(training_data=train_ds) forecast_it, ts_it = make_evaluation_predictions(rolled_ds, predictor=predictor, num_samples=100) training_agg_metrics, _ = Evaluator(num_workers=0)(ts_it, forecast_it) # it should have failed by this point if the dynamic features were wrong assert training_agg_metrics
def __init__( self, freq: str, prediction_length: int, cardinality: List[int], embedding_dimension: int, encoder_mlp_layer: List[int], decoder_mlp_layer: List[int], decoder_mlp_static_dim: int, scaler: Scaler = NOPScaler(), context_length: Optional[int] = None, quantiles: Optional[List[float]] = None, trainer: Trainer = Trainer(), num_parallel_samples: int = 100, ) -> None: encoder = MLPEncoder(layer_sizes=encoder_mlp_layer) super(MLP2QRForecaster, self).__init__( freq=freq, prediction_length=prediction_length, encoder=encoder, cardinality=cardinality, embedding_dimension=embedding_dimension, decoder_mlp_layer=decoder_mlp_layer, decoder_mlp_static_dim=decoder_mlp_static_dim, context_length=context_length, scaler=scaler, quantiles=quantiles, trainer=trainer, num_parallel_samples=num_parallel_samples, )
def evaluate(dataset_name, estimator): dataset = get_dataset(dataset_name) estimator = estimator( prediction_length=dataset.metadata.prediction_length, freq=dataset.metadata.freq, use_feat_static_cat=True, cardinality=[ feat_static_cat.cardinality for feat_static_cat in dataset.metadata.feat_static_cat ], trainer=Trainer( epochs=epochs, num_batches_per_epoch=num_batches_per_epoch, ), ) print(f"evaluating {estimator} on {dataset}") predictor = estimator.train(dataset.train) forecast_it, ts_it = make_evaluation_predictions(dataset.test, predictor=predictor, num_samples=100) agg_metrics, item_metrics = Evaluator()(ts_it, forecast_it, num_series=len(dataset.test)) pprint.pprint(agg_metrics) eval_dict = agg_metrics eval_dict["dataset"] = dataset_name eval_dict["estimator"] = type(estimator).__name__ return eval_dict
def __init__( self, freq: str, prediction_length: int, cardinality: List[int], embedding_dimension: int, encoder: Seq2SeqEncoder, decoder_mlp_layer: List[int], decoder_mlp_static_dim: int, scaler: Scaler = NOPScaler(), context_length: Optional[int] = None, quantiles: Optional[List[float]] = None, trainer: Trainer = Trainer(), train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, num_parallel_samples: int = 100, batch_size: int = 32, ) -> None: assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" assert ( context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert quantiles is None or all( 0 <= d <= 1 for d in quantiles ), "Elements of `quantiles` should be >= 0 and <= 1" super().__init__(trainer=trainer, batch_size=batch_size) self.context_length = ( context_length if context_length is not None else prediction_length ) self.prediction_length = prediction_length self.freq = freq self.quantiles = ( quantiles if quantiles is not None else [0.1, 0.5, 0.9] ) self.encoder = encoder self.decoder_mlp_layer = decoder_mlp_layer self.decoder_mlp_static_dim = decoder_mlp_static_dim self.scaler = scaler self.embedder = FeatureEmbedder( cardinalities=cardinality, embedding_dims=[embedding_dimension for _ in cardinality], ) self.train_sampler = ( train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length ) ) self.validation_sampler = ( validation_sampler if validation_sampler is not None else ValidationSplitSampler(min_future=prediction_length) ) self.num_parallel_samples = num_parallel_samples
def prepare(df, P, frac, ep): rolling_test = [] train_size = int(frac * df.shape[0]) i = 0 delay = 0 train_ds = ListDataset([{ "start": pd.Timestamp(df.index[0]), "target": df.Diff[0:train_size - P], 'feat_dynamic_real': [ df.fear[0:train_size - P], df.anger[0:train_size - P], df.anticipation[0:train_size - P], df.trust[0:train_size - P], df.suprise[0:train_size - P], df.positive[0:train_size - P], df.negative[0:train_size - P], df.sadness[0:train_size - P], df.disgust[0:train_size - P], df.joy[0:train_size - P], df.Volume_of_tweets[0:train_size - P], df.Retweet[0:train_size - P], df.Replies[0:train_size - P], df.Likes[0:train_size - P] ] }], freq='1B') while train_size + delay < df.shape[0]: delay = int(P) * i test_ds = ListDataset([ dict(start=pd.Timestamp(df.index[0]), target=df.Diff[0:train_size + delay], feat_dynamic_real=[ df.fear[0:train_size + delay], df.anger[0:train_size + delay], df.anticipation[0:train_size + delay], df.trust[0:train_size + delay], df.suprise[0:train_size + delay], df.positive[0:train_size + delay], df.negative[0:train_size + delay], df.sadness[0:train_size + delay], df.disgust[0:train_size + delay], df.joy[0:train_size + delay], df.Volume_of_tweets[0:train_size + delay], df.Retweet[0:train_size + delay], df.Replies[0:train_size + delay], df.Likes[0:train_size + delay] ]) ], freq='1B') i += 1 rolling_test.append(test_ds) print("We have 1 training set of", train_size, "days and then ", len(rolling_test), "testing sets of ", delay, " days total") estimator = DeepAREstimator(prediction_length=P, context_length=5, freq='1B', use_feat_dynamic_real=True, trainer=Trainer( ctx="cpu", epochs=ep, )) #hybridize=False, ), ) return train_ds, rolling_test, estimator, train_size, i
def __init__( self, prediction_length: int, freq: str, context_length: Optional[int] = None, decoder_mlp_dim_seq: List[int] = None, trainer: Trainer = Trainer(), quantiles: List[float] = None, scaling: bool = False, scaling_decoder_dynamic_feature: bool = False, ) -> None: assert (prediction_length > 0), f"Invalid prediction length: {prediction_length}." assert decoder_mlp_dim_seq is None or all( d > 0 for d in decoder_mlp_dim_seq ), "Elements of `mlp_hidden_dimension_seq` should be > 0" assert quantiles is None or all( 0 <= d <= 1 for d in quantiles), "Elements of `quantiles` should be >= 0 and <= 1" self.decoder_mlp_dim_seq = (decoder_mlp_dim_seq if decoder_mlp_dim_seq is not None else [30]) self.quantiles = (quantiles if quantiles is not None else [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) # `use_static_feat` and `use_dynamic_feat` always True because network # always receives input; either from the input data or constants encoder = RNNEncoder( mode="gru", hidden_size=50, num_layers=1, bidirectional=True, prefix="encoder_", use_static_feat=True, use_dynamic_feat=True, ) decoder = ForkingMLPDecoder( dec_len=prediction_length, final_dim=self.decoder_mlp_dim_seq[-1], hidden_dimension_sequence=self.decoder_mlp_dim_seq[:-1], prefix="decoder_", ) quantile_output = QuantileOutput(self.quantiles) super().__init__( encoder=encoder, decoder=decoder, quantile_output=quantile_output, freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, scaling=scaling, scaling_decoder_dynamic_feature=scaling_decoder_dynamic_feature, )
def __init__( self, freq: str, prediction_length: int, context_length: int, num_series: int, skip_size: int, ar_window: int, channels: int, lead_time: int = 0, kernel_size: int = 6, trainer: Trainer = Trainer(), dropout_rate: Optional[float] = 0.2, output_activation: Optional[str] = None, rnn_cell_type: str = "gru", rnn_num_cells: int = 100, rnn_num_layers: int = 3, skip_rnn_cell_type: str = "gru", skip_rnn_num_layers: int = 1, skip_rnn_num_cells: int = 10, scaling: bool = True, train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, batch_size: int = 32, dtype: DType = np.float32, ) -> None: super().__init__( trainer=trainer, lead_time=lead_time, batch_size=batch_size, dtype=dtype, ) self.freq = freq self.num_series = num_series self.skip_size = skip_size self.ar_window = ar_window self.prediction_length = prediction_length self.context_length = context_length self.channels = channels self.kernel_size = kernel_size self.dropout_rate = dropout_rate self.output_activation = output_activation self.rnn_cell_type = rnn_cell_type self.rnn_num_layers = rnn_num_layers self.rnn_num_cells = rnn_num_cells self.skip_rnn_cell_type = skip_rnn_cell_type self.skip_rnn_num_layers = skip_rnn_num_layers self.skip_rnn_num_cells = skip_rnn_num_cells self.scaling = scaling self.train_sampler = (train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length + lead_time)) self.validation_sampler = (validation_sampler if validation_sampler is not None else ValidationSplitSampler( min_future=prediction_length + lead_time)) self.dtype = dtype
def __init__( self, freq: str, prediction_length: int, meta_context_length: Optional[List[int]] = None, meta_loss_function: Optional[List[str]] = None, meta_bagging_size: int = 10, trainer: Trainer = Trainer(), num_stacks: int = 30, widths: Optional[List[int]] = None, num_blocks: Optional[List[int]] = None, num_block_layers: Optional[List[int]] = None, expansion_coefficient_lengths: Optional[List[int]] = None, sharing: Optional[List[bool]] = None, stack_types: Optional[List[str]] = None, **kwargs, ) -> None: super().__init__() assert (prediction_length > 0), "The value of `prediction_length` should be > 0" self.freq = freq self.prediction_length = prediction_length assert meta_loss_function is None or all( [ loss_function in VALID_LOSS_FUNCTIONS for loss_function in meta_loss_function ] ), f"Each loss function has to be one of the following: {VALID_LOSS_FUNCTIONS}." assert meta_context_length is None or all([ context_length > 0 for context_length in meta_context_length ]), "The value of each `context_length` should be > 0" assert (meta_bagging_size is None or meta_bagging_size > 0 ), "The value of each `context_length` should be > 0" self.meta_context_length = ( meta_context_length if meta_context_length is not None else [multiplier * prediction_length for multiplier in range(2, 8)]) self.meta_loss_function = (meta_loss_function if meta_loss_function is not None else VALID_LOSS_FUNCTIONS) self.meta_bagging_size = meta_bagging_size # The following arguments are validated in the NBEATSEstimator: self.trainer = trainer print(f"TRAINER:{str(trainer)}") self.num_stacks = num_stacks self.widths = widths self.num_blocks = num_blocks self.num_block_layers = num_block_layers self.expansion_coefficient_lengths = expansion_coefficient_lengths self.sharing = sharing self.stack_types = stack_types # Actually instantiate the different models self.estimators = self._estimator_factory(**kwargs)
def evaluate_nn(config): """ Pass a simple neural network to evaluate_gluon""" from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator model = SimpleFeedForwardEstimator( freq=config['freq'], prediction_length=config['prediction_length'], trainer=Trainer(epochs=config['params'].get('epochs', 10))) evaluate_gluon(config, model)
def __init__( self, encoder: Seq2SeqEncoder, decoder: Seq2SeqDecoder, quantile_output: QuantileOutput, freq: str, prediction_length: int, context_length: Optional[int] = None, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, cardinality: List[int] = None, embedding_dimension: List[int] = None, add_time_feature: bool = False, add_age_feature: bool = False, enable_decoder_dynamic_feature: bool = False, trainer: Trainer = Trainer(), scaling: bool = False, dtype: DType = np.float32, ) -> None: super().__init__(trainer=trainer) assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert ( use_feat_static_cat or not cardinality ), "You should set `cardinality` if and only if `use_feat_static_cat=True`" assert cardinality is None or all( c > 0 for c in cardinality), "Elements of `cardinality` should be > 0" assert embedding_dimension is None or all( e > 0 for e in embedding_dimension ), "Elements of `embedding_dimension` should be > 0" self.encoder = encoder self.decoder = decoder self.quantile_output = quantile_output self.freq = freq self.prediction_length = prediction_length self.context_length = (context_length if context_length is not None else 4 * self.prediction_length) self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.cardinality = (cardinality if cardinality and use_feat_static_cat else [1]) self.embedding_dimension = ( embedding_dimension if embedding_dimension is not None else [min(50, (cat + 1) // 2) for cat in self.cardinality]) self.add_time_feature = add_time_feature self.add_age_feature = add_age_feature self.use_dynamic_feat = (use_feat_dynamic_real or add_age_feature or add_time_feature) self.enable_decoder_dynamic_feature = enable_decoder_dynamic_feature self.scaling = scaling self.dtype = dtype
def __init__( self, freq: str, prediction_length: int, cardinalities: List[int] = [], context_length: Optional[int] = None, trainer: Trainer = Trainer(), model_dim: int = 64, ffn_dim_multiplier: int = 2, num_heads: int = 4, num_layers: int = 3, num_outputs: int = 3, kernel_sizes: List[int] = [3, 5, 7, 9], distance_encoding: Optional[str] = "dot", pre_layer_norm: bool = False, dropout: float = 0.1, temperature: float = 1.0, time_features: Optional[List[TimeFeature]] = None, use_feat_dynamic_real: bool = True, use_feat_dynamic_cat: bool = False, use_feat_static_real: bool = False, use_feat_static_cat: bool = True, train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, batch_size: int = 32, ): super().__init__(trainer=trainer, batch_size=batch_size) self.freq = freq self.prediction_length = prediction_length self.context_length = context_length or prediction_length self.model_dim = model_dim self.ffn_dim_multiplier = ffn_dim_multiplier self.num_heads = num_heads self.num_layers = num_layers self.num_outputs = num_outputs self.cardinalities = cardinalities self.kernel_sizes = kernel_sizes self.distance_encoding = distance_encoding self.pre_layer_norm = pre_layer_norm self.dropout = dropout self.temperature = temperature self.time_features = time_features or time_features_from_frequency_str( self.freq) self.use_feat_dynamic_cat = use_feat_dynamic_cat self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.use_feat_static_real = use_feat_static_real self.train_sampler = (train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length)) self.validation_sampler = (validation_sampler if validation_sampler is not None else ValidationSplitSampler( min_future=prediction_length))
def test_deepar(): from gluonts.model.deepar import DeepAREstimator config = {} config['directory'] = 'results/deepar' model = DeepAREstimator(freq="30min", prediction_length=48, trainer=Trainer(epochs=3)) evaluate_model(model, config)
def test_nn(): from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator config = {} config['directory'] = 'results/nn' model = SimpleFeedForwardEstimator(freq="30min", prediction_length=48, trainer=Trainer(epochs=3)) evaluate_model(model, config)
def __init__( self, freq: str, prediction_length: int, sampling: bool = True, trainer: Trainer = Trainer(), num_hidden_dimensions: Optional[List[int]] = None, context_length: Optional[int] = None, distr_output: DistributionOutput = StudentTOutput(), imputation_method: Optional[MissingValueImputation] = None, batch_normalization: bool = False, mean_scaling: bool = True, num_parallel_samples: int = 100, train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, batch_size: int = 32, ) -> None: """ Defines an estimator. All parameters should be serializable. """ super().__init__(trainer=trainer, batch_size=batch_size) assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert num_hidden_dimensions is None or ([ d > 0 for d in num_hidden_dimensions ]), "Elements of `num_hidden_dimensions` should be > 0" assert (num_parallel_samples > 0), "The value of `num_parallel_samples` should be > 0" self.num_hidden_dimensions = (num_hidden_dimensions if num_hidden_dimensions is not None else list([40, 40])) self.prediction_length = prediction_length self.context_length = (context_length if context_length is not None else prediction_length) self.freq = freq self.distr_output = distr_output self.batch_normalization = batch_normalization self.mean_scaling = mean_scaling self.num_parallel_samples = num_parallel_samples self.sampling = sampling self.imputation_method = (imputation_method if imputation_method is not None else DummyValueImputation( self.distr_output.value_in_support)) self.train_sampler = (train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length)) self.validation_sampler = (validation_sampler if validation_sampler is not None else ValidationSplitSampler( min_future=prediction_length))
def __init__( self, prediction_interval_length: float, context_interval_length: float, num_marks: int, time_distr_output: TPPDistributionOutput = WeibullOutput(), embedding_dim: int = 5, trainer: Trainer = Trainer(hybridize=False), num_hidden_dimensions: int = 10, num_parallel_samples: int = 100, num_training_instances: int = 100, freq: str = "H", batch_size: int = 32, ) -> None: assert ( not trainer.hybridize ), "DeepTPP currently only supports the non-hybridized training" super().__init__(trainer=trainer, batch_size=batch_size) assert ( prediction_interval_length > 0 ), "The value of `prediction_interval_length` should be > 0" assert ( context_interval_length is None or context_interval_length > 0 ), "The value of `context_interval_length` should be > 0" assert ( num_hidden_dimensions > 0 ), "The value of `num_hidden_dimensions` should be > 0" assert ( num_parallel_samples > 0 ), "The value of `num_parallel_samples` should be > 0" assert num_marks > 0, "The value of `num_marks` should be > 0" assert ( num_training_instances > 0 ), "The value of `num_training_instances` should be > 0" self.num_hidden_dimensions = num_hidden_dimensions self.prediction_interval_length = prediction_interval_length self.context_interval_length = ( context_interval_length if context_interval_length is not None else prediction_interval_length ) self.num_marks = num_marks self.time_distr_output = time_distr_output self.embedding_dim = embedding_dim self.num_parallel_samples = num_parallel_samples self.num_training_instances = num_training_instances self.freq = freq
def __init__( self, freq: str, prediction_length: int, cardinality: int, trainer: Trainer = Trainer(), context_length: Optional[int] = None, kernel_output: KernelOutput = RBFKernelOutput(), params_scaling: bool = True, dtype: DType = np.float64, max_iter_jitter: int = 10, jitter_method: str = "iter", sample_noise: bool = True, time_features: Optional[List[TimeFeature]] = None, num_parallel_samples: int = 100, batch_size: int = 32, ) -> None: self.float_type = dtype super().__init__( trainer=trainer, batch_size=batch_size, dtype=self.float_type ) assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" assert cardinality > 0, "The value of `cardinality` should be > 0" assert ( context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert ( num_parallel_samples > 0 ), "The value of `num_parallel_samples` should be > 0" self.freq = freq self.prediction_length = prediction_length self.context_length = ( context_length if context_length is not None else prediction_length ) self.cardinality = cardinality self.kernel_output = kernel_output self.params_scaling = params_scaling self.max_iter_jitter = max_iter_jitter self.jitter_method = jitter_method self.sample_noise = sample_noise self.time_features = ( time_features if time_features is not None else time_features_from_frequency_str(self.freq) ) self.num_parallel_samples = num_parallel_samples