Exemple #1
0
def train_model_fn(cutoff_week_id, config, hyperparameters):

    train_ds = train_input_fn(os.environ['SM_CHANNEL_TRAIN'] +
                              '/gluonts_ds_cutoff_' + str(cutoff_week_id) +
                              '.pkl')

    nb_ts = len(train_ds)

    # New tuning
    estimator = SimpleFeedForwardEstimator(
        freq=config.get_prediction_freq(),
        prediction_length=config.get_prediction_length(),
        context_length=hyperparameters['context_length'],
        num_hidden_dimensions=[
            hyperparameters['len_hidden_dimensions']
            for i in range(hyperparameters['num_hidden_dimensions'])
        ],
        trainer=Trainer(
            epochs=hyperparameters['epochs'],
            batch_size=hyperparameters['batch_size'],
            num_batches_per_epoch=hyperparameters['num_batches_per_epoch'],
            learning_rate=hyperparameters['learning_rate']))

    predictor = estimator.train(train_ds)

    predictor.serialize(Path(os.environ['SM_MODEL_DIR']))

    return 0
Exemple #2
0
def initialize_model() -> nn.HybridBlock:
    # dummy training data
    N = 10  # number of time series
    T = 100  # number of timesteps
    prediction_length = 24
    freq = "1H"
    custom_dataset = np.zeros(shape=(N, T))
    start = pd.Timestamp("01-01-2019",
                         freq=freq)  # can be different for each time series
    train_ds = ListDataset(
        [{
            "target": x,
            "start": start
        } for x in custom_dataset[:, :-prediction_length]],
        freq=freq,
    )
    # create a simple model
    estimator = SimpleFeedForwardEstimator(
        num_hidden_dimensions=[10],
        prediction_length=prediction_length,
        context_length=T,
        freq=freq,
        trainer=Trainer(
            ctx="cpu",
            epochs=1,
            learning_rate=1e-3,
            num_batches_per_epoch=1,
        ),
    )

    # train model
    predictor = estimator.train(train_ds)

    return predictor.prediction_net
Exemple #3
0
def evaluate_nn(config):
    """ Pass a simple neural network to evaluate_gluon"""
    from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
    model = SimpleFeedForwardEstimator(
        freq=config['freq'],
        prediction_length=config['prediction_length'],
        trainer=Trainer(epochs=config['params'].get('epochs', 10)))

    evaluate_gluon(config, model)
Exemple #4
0
def test_nn():
    from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator

    config = {}
    config['directory'] = 'results/nn'

    model = SimpleFeedForwardEstimator(freq="30min", 
                                prediction_length=48, 
                                trainer=Trainer(epochs=3))

    evaluate_model(model, config)
Exemple #5
0
def test_callbacks():
    n_epochs = 4

    history = TrainingHistory()
    iter_avg = ModelIterationAveraging(avg_strategy=NTA(epochs=2 * n_epochs))

    dataset = "m4_hourly"
    dataset = get_dataset(dataset)
    prediction_length = dataset.metadata.prediction_length
    freq = dataset.metadata.freq

    estimator = SimpleFeedForwardEstimator(
        prediction_length=prediction_length,
        freq=freq,
        trainer=Trainer(epochs=n_epochs, callbacks=[history, iter_avg]),
    )

    predictor = estimator.train(dataset.train, num_workers=None)

    assert len(history.loss_history) == n_epochs

    ws = WarmStart(predictor=predictor)

    estimator = SimpleFeedForwardEstimator(
        prediction_length=prediction_length,
        freq=freq,
        trainer=Trainer(epochs=n_epochs, callbacks=[history, iter_avg, ws]),
    )
    predictor = estimator.train(dataset.train, num_workers=None)

    assert len(history.loss_history) == n_epochs * 2
Exemple #6
0
 def create_estimator(
     self,
     freq: str,
     prediction_length: int,
     time_features: bool,
     training_time: float,
     validation_milestones: List[float],
     callbacks: List[Callback],
 ) -> Estimator:
     return SimpleFeedForwardEstimator(
         freq=freq,
         prediction_length=prediction_length,
         num_hidden_dimensions=[self.hidden_dim] * self.num_layers,
         trainer=self._create_trainer(
             training_time,
             validation_milestones,
             callbacks,  # type: ignore
         ),
         context_length=self.context_length_multiple * prediction_length,
     )
Exemple #7
0
    def fit(self, df, future_regressor=[]):
        """Train algorithm given data supplied.

        Args:
            df (pandas.DataFrame): Datetime Indexed
        """
        df = self.basic_profile(df)

        try:
            from mxnet.random import seed as mxnet_seed

            mxnet_seed(self.random_seed)
        except Exception:
            pass

        gluon_train = df.transpose()
        self.train_index = gluon_train.index

        gluon_freq = str(self.frequency).split('-')[0]
        if gluon_freq in ["MS", "1MS"]:
            gluon_freq = "M"

        if int(self.verbose) > 1:
            print(f"Gluon Frequency is {gluon_freq}")

        if str(self.context_length).replace('.', '').isdigit():
            self.gluon_context_length = int(float(self.context_length))
        elif 'forecastlength' in str(self.context_length).lower():
            len_int = int([x for x in str(self.context_length)
                           if x.isdigit()][0])
            self.gluon_context_length = int(len_int * self.forecast_length)
        else:
            self.gluon_context_length = 2 * self.forecast_length
            self.context_length = '2ForecastLength'
        ts_metadata = {
            'num_series':
            len(gluon_train.index),
            'freq':
            gluon_freq,
            'gluon_start':
            [gluon_train.columns[0] for _ in range(len(gluon_train.index))],
            'context_length':
            self.gluon_context_length,
            'forecast_length':
            self.forecast_length,
        }
        self.test_ds = ListDataset(
            [{
                FieldName.TARGET: target,
                FieldName.START: start
            }
             for (target,
                  start) in zip(gluon_train.values, ts_metadata['gluon_start'])
             ],
            freq=ts_metadata['freq'],
        )
        if self.gluon_model == 'DeepAR':
            from gluonts.model.deepar import DeepAREstimator

            estimator = DeepAREstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'NPTS':
            from gluonts.model.npts import NPTSEstimator

            estimator = NPTSEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
            )

        elif self.gluon_model == 'MQCNN':
            from gluonts.model.seq2seq import MQCNNEstimator

            estimator = MQCNNEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'SFF':
            from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator

            estimator = SimpleFeedForwardEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(
                    epochs=self.epochs,
                    learning_rate=self.learning_rate,
                    hybridize=False,
                    num_batches_per_epoch=100,
                ),
            )

        elif self.gluon_model == 'Transformer':
            from gluonts.model.transformer import TransformerEstimator

            estimator = TransformerEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'DeepState':
            from gluonts.model.deepstate import DeepStateEstimator

            estimator = DeepStateEstimator(
                prediction_length=ts_metadata['forecast_length'],
                past_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                use_feat_static_cat=False,
                cardinality=[1],
                trainer=Trainer(ctx='cpu',
                                epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'DeepFactor':
            from gluonts.model.deep_factor import DeepFactorEstimator

            estimator = DeepFactorEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'WaveNet':
            # Usually needs more epochs/training iterations than other models do
            from gluonts.model.wavenet import WaveNetEstimator

            estimator = WaveNetEstimator(
                freq=ts_metadata['freq'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        else:
            raise ValueError("'gluon_model' not recognized.")

        self.GluonPredictor = estimator.train(self.test_ds)
        self.ts_metadata = ts_metadata
        self.fit_runtime = datetime.datetime.now() - self.startTime
        return self
Exemple #8
0
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.trainer import Trainer

from gluonts.dataset.repository.datasets import get_dataset
import matplotlib.pyplot as plt
from gluonts.evaluation import Evaluator
import json

dataset = get_dataset("m4_hourly", regenerate=True)

# model
estimator = SimpleFeedForwardEstimator(
    num_hidden_dimensions=[10],
    prediction_length=dataset.metadata.prediction_length,
    context_length=100,
    freq=dataset.metadata.freq,
    trainer=Trainer(ctx="cpu",
                    epochs=5,
                    learning_rate=1e-3,
                    num_batches_per_epoch=100),
)

# train
predictor = estimator.train(dataset.train)

# test

from gluonts.evaluation.backtest import make_evaluation_predictions

forecast_it, ts_it = make_evaluation_predictions(
    dataset=dataset.test,  # test dataset
    predictor=predictor,  # predictor
                        "target": np.random.normal(
                            loc=100, scale=10, size=(100)),
                        "start": "2020-01-01 00:00:00",
                        "info": {
                            "some_key": [4, 5, 6]
                        },
                    },
                ],
                freq="5min",
            ),
            SimpleFeedForwardEstimator(
                freq="5min",
                prediction_length=4,
                context_length=20,
                trainer=Trainer(
                    epochs=2,
                    num_batches_per_epoch=2,
                    batch_size=16,
                    hybridize=False,
                ),
            ),
        ),
    ],
)
def test_item_id_info(dataset: Dataset, estimator: Estimator):
    predictor = estimator.train(dataset)
    forecasts = predictor.predict(dataset)
    for data_entry, forecast in zip(dataset, forecasts):
        assert (not "item_id"
                in data_entry) or data_entry["item_id"] == forecast.item_id
        assert (not "info"
Exemple #10
0
def train(args):
    freq = args.freq.replace('"', '')
    prediction_length = args.prediction_length
    context_length = args.context_length
    use_feat_dynamic_real = args.use_feat_dynamic_real
    use_past_feat_dynamic_real = args.use_past_feat_dynamic_real
    use_feat_static_cat = args.use_feat_static_cat
    use_log1p = args.use_log1p
    
    print('freq:', freq)
    print('prediction_length:', prediction_length)
    print('context_length:', context_length)
    print('use_feat_dynamic_real:', use_feat_dynamic_real)
    print('use_past_feat_dynamic_real:', use_past_feat_dynamic_real)
    print('use_feat_static_cat:', use_feat_static_cat)
    print('use_log1p:', use_log1p)
    
    batch_size = args.batch_size
    print('batch_size:', batch_size)

    train = load_json(os.path.join(args.train, 'train_'+freq+'.json'))
    test = load_json(os.path.join(args.test, 'test_'+freq+'.json'))
    
    num_timeseries = len(train)
    print('num_timeseries:', num_timeseries)

    train_ds = ListDataset(parse_data(train, use_log1p=use_log1p), freq=freq)
    test_ds = ListDataset(parse_data(test, use_log1p=use_log1p), freq=freq)
    
    predictor = None
    
    trainer= Trainer(ctx="cpu", 
                    epochs=args.epochs, 
                    num_batches_per_epoch=args.num_batches_per_epoch,
                    learning_rate=args.learning_rate, 
                    learning_rate_decay_factor=args.learning_rate_decay_factor,
                    patience=args.patience,
                    minimum_learning_rate=args.minimum_learning_rate,
                    clip_gradient=args.clip_gradient,
                    weight_decay=args.weight_decay,
                    init=args.init.replace('"', ''),
                    hybridize=args.hybridize)
    print('trainer:', trainer)
    
    cardinality = None
    if args.cardinality != '':
        cardinality = args.cardinality.replace('"', '').replace(' ', '').replace('[', '').replace(']', '').split(',')
        for i in range(len(cardinality)):
            cardinality[i] = int(cardinality[i])
    print('cardinality:', cardinality)
    
    embedding_dimension = [min(50, (cat+1)//2) for cat in cardinality] if cardinality is not None else None
    print('embedding_dimension:', embedding_dimension)
    
    algo_name = args.algo_name.replace('"', '')
    print('algo_name:', algo_name)
    
    if algo_name == 'CanonicalRNN':
        estimator = CanonicalRNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            num_layers=5, 
            num_cells=50, 
            cell_type='lstm', 
            num_parallel_samples=100,
            cardinality=cardinality,
            embedding_dimension=10,
        )
    elif algo_name == 'DeepFactor':
        estimator = DeepFactorEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            cardinality=cardinality,
            embedding_dimension=10,
        )
    elif algo_name == 'DeepAR':
        estimator = DeepAREstimator(
            freq = freq,  # – Frequency of the data to train on and predict
            prediction_length = prediction_length,  # – Length of the prediction horizon
            trainer = trainer,  # – Trainer object to be used (default: Trainer())
            context_length = context_length,  # – Number of steps to unroll the RNN for before computing predictions (default: None, in which case context_length = prediction_length)
            num_layers = 2,  # – Number of RNN layers (default: 2)
            num_cells = 40,  # – Number of RNN cells for each layer (default: 40)
            cell_type = 'lstm',  # – Type of recurrent cells to use (available: ‘lstm’ or ‘gru’; default: ‘lstm’)
            dropoutcell_type = 'ZoneoutCell',  # – Type of dropout cells to use (available: ‘ZoneoutCell’, ‘RNNZoneoutCell’, ‘VariationalDropoutCell’ or ‘VariationalZoneoutCell’; default: ‘ZoneoutCell’)
            dropout_rate = 0.1,  # – Dropout regularization parameter (default: 0.1)
            use_feat_dynamic_real = use_feat_dynamic_real,  # – Whether to use the feat_dynamic_real field from the data (default: False)
            use_feat_static_cat = use_feat_static_cat,  # – Whether to use the feat_static_cat field from the data (default: False)
            use_feat_static_real = False,  # – Whether to use the feat_static_real field from the data (default: False)
            cardinality = cardinality,  # – Number of values of each categorical feature. This must be set if use_feat_static_cat == True (default: None)
            embedding_dimension = embedding_dimension,  # – Dimension of the embeddings for categorical features (default: [min(50, (cat+1)//2) for cat in cardinality])
        #     distr_output = StudentTOutput(),  # – Distribution to use to evaluate observations and sample predictions (default: StudentTOutput())
        #     scaling = True,  # – Whether to automatically scale the target values (default: true)
        #     lags_seq = None,  # – Indices of the lagged target values to use as inputs of the RNN (default: None, in which case these are automatically determined based on freq)
        #     time_features = None,  # – Time features to use as inputs of the RNN (default: None, in which case these are automatically determined based on freq)
        #     num_parallel_samples = 100,  # – Number of evaluation samples per time series to increase parallelism during inference. This is a model optimization that does not affect the accuracy (default: 100)
        #     imputation_method = None,  # – One of the methods from ImputationStrategy
        #     train_sampler = None,  # – Controls the sampling of windows during training.
        #     validation_sampler = None,  # – Controls the sampling of windows during validation.
        #     alpha = None,  # – The scaling coefficient of the activation regularization
        #     beta = None,  # – The scaling coefficient of the temporal activation regularization
            batch_size = batch_size,  # – The size of the batches to be used training and prediction.
        #     minimum_scale = None,  # – The minimum scale that is returned by the MeanScaler
        #     default_scale = None,  # – Default scale that is applied if the context length window is completely unobserved. If not set, the scale in this case will be the mean scale in the batch.
        #     impute_missing_values = None,  # – Whether to impute the missing values during training by using the current model parameters. Recommended if the dataset contains many missing values. However, this is a lot slower than the default mode.
        #     num_imputation_samples = None,  # – How many samples to use to impute values when impute_missing_values=True
        )
    elif algo_name == 'DeepState':
        estimator = DeepStateEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=trainer,
            batch_size=batch_size,
            use_feat_dynamic_real=use_feat_dynamic_real,
            use_feat_static_cat=use_feat_static_cat,
            cardinality=cardinality,
        )
    elif algo_name == 'DeepVAR':
        estimator = DeepVAREstimator(  # use multi
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            target_dim=96,
        )
    elif algo_name == 'GaussianProcess':
#         # TODO
#         estimator = GaussianProcessEstimator(
#             freq=freq,
#             prediction_length=prediction_length,
#             context_length=context_length,
#             trainer=trainer,
#             batch_size=batch_size,
#             cardinality=num_timeseries,
#         )
        pass
    elif algo_name == 'GPVAR':
        estimator = GPVAREstimator(  # use multi
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            target_dim=96,
        )
    elif algo_name == 'LSTNet':
        estimator = LSTNetEstimator(  # use multi
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            num_series=96,
            skip_size=4,
            ar_window=4,
            channels=72,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'NBEATS':
        estimator = NBEATSEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'DeepRenewalProcess':
        estimator = DeepRenewalProcessEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            num_cells=40,
            num_layers=2,
        )
    elif algo_name == 'Tree':
        estimator = TreePredictor(
            freq = freq,
            prediction_length = prediction_length,
            context_length = context_length,
            n_ignore_last = 0,
            lead_time = 0,
            max_n_datapts = 1000000,
            min_bin_size = 100,  # Used only for "QRX" method.
            use_feat_static_real = False,
            use_feat_dynamic_cat = False,
            use_feat_dynamic_real = use_feat_dynamic_real,
            cardinality = cardinality,
            one_hot_encode = False,
            model_params = {'eta': 0.1, 'max_depth': 6, 'silent': 0, 'nthread': -1, 'n_jobs': -1, 'gamma': 1, 'subsample': 0.9, 'min_child_weight': 1, 'colsample_bytree': 0.9, 'lambda': 1, 'booster': 'gbtree'},
            max_workers = 4,  # default: None
            method = "QRX",  # "QRX",  "QuantileRegression", "QRF"
            quantiles=None,  # Used only for "QuantileRegression" method.
            model=None,
            seed=None,
        )
    elif algo_name == 'SelfAttention':
#         # TODO
#         estimator = SelfAttentionEstimator(
#             freq=freq,
#             prediction_length=prediction_length,
#             context_length=context_length,
#             trainer=trainer,
#             batch_size=batch_size,
#         )
        pass
    elif algo_name == 'MQCNN':
        estimator = MQCNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            use_past_feat_dynamic_real=use_past_feat_dynamic_real,
            use_feat_dynamic_real=use_feat_dynamic_real,
            use_feat_static_cat=use_feat_static_cat,
            cardinality=cardinality,
            embedding_dimension=embedding_dimension,
            add_time_feature=True,
            add_age_feature=False,
            enable_encoder_dynamic_feature=True,
            enable_decoder_dynamic_feature=True,
            seed=None,
            decoder_mlp_dim_seq=None,
            channels_seq=None,
            dilation_seq=None,
            kernel_size_seq=None,
            use_residual=True,
            quantiles=None,
            distr_output=None,
            scaling=None,
            scaling_decoder_dynamic_feature=False,
            num_forking=None,
            max_ts_len=None,
        )
    elif algo_name == 'MQRNN':
        estimator = MQRNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'Seq2Seq':
        # # TODO
        # estimator = Seq2SeqEstimator(
        #     freq=freq,
        #     prediction_length=prediction_length,
        #     context_length=context_length,
        #     trainer=trainer,
        #     cardinality=cardinality,
        #     embedding_dimension=4,
        #     encoder=Seq2SeqEncoder(),
        #     decoder_mlp_layer=[4],
        #     decoder_mlp_static_dim=4
        # )
        pass
    elif algo_name == 'SimpleFeedForward':
        estimator = SimpleFeedForwardEstimator(
            num_hidden_dimensions=[40, 40],
            prediction_length=prediction_length,
            context_length=context_length,
            freq=freq,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'TemporalFusionTransformer':
        estimator = TemporalFusionTransformerEstimator(
            prediction_length=prediction_length,
            context_length=context_length,
            freq=freq,
            trainer=trainer,
            batch_size=batch_size,
            hidden_dim = 32, 
            variable_dim = None, 
            num_heads = 4, 
            num_outputs = 3, 
            num_instance_per_series = 100, 
            dropout_rate = 0.1, 
        #     time_features = [], 
        #     static_cardinalities = {}, 
        #     dynamic_cardinalities = {}, 
        #     static_feature_dims = {}, 
        #     dynamic_feature_dims = {}, 
        #     past_dynamic_features = []
        )
    elif algo_name == 'DeepTPP':
#         # TODO
#         estimator = DeepTPPEstimator(
#             prediction_interval_length=prediction_length,
#             context_interval_length=context_length,
#             freq=freq,
#             trainer=trainer,
#             batch_size=batch_size,
#             num_marks=len(cardinality) if cardinality is not None else 0,
#         )
        pass
    elif algo_name == 'Transformer':
        estimator = TransformerEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=trainer,
            batch_size=batch_size,
            cardinality=cardinality,
        )
    elif algo_name == 'WaveNet':
        estimator = WaveNetEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=trainer,
            batch_size=batch_size,
            cardinality=cardinality,
        )
    elif algo_name == 'Naive2':
        # TODO Multiplicative seasonality is not appropriate for zero and negative values
        predictor = Naive2Predictor(freq=freq, prediction_length=prediction_length, season_length=context_length)
    elif algo_name == 'NPTS':
        predictor = NPTSPredictor(freq=freq, prediction_length=prediction_length, context_length=context_length)
    elif algo_name == 'Prophet':
        def configure_model(model):
            model.add_seasonality(
                name='weekly', period=7, fourier_order=3, prior_scale=0.1
            )
            return model
        predictor = ProphetPredictor(freq=freq, prediction_length=prediction_length, init_model=configure_model)
    elif algo_name == 'ARIMA':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='arima',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'ETS':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='ets',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'TBATS':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='tbats',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'CROSTON':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='croston',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'MLP':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='mlp',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'SeasonalNaive':
        predictor = SeasonalNaivePredictor(freq=freq, prediction_length=prediction_length)
    else:
        print('[ERROR]:', algo_name, 'not supported')
        return
    
    if predictor is None:
        try:
            predictor = estimator.train(train_ds, test_ds)
        except Exception as e:
            print(e)
            try:
                grouper_train = MultivariateGrouper(max_target_dim=num_timeseries)
                train_ds_multi = grouper_train(train_ds)
                test_ds_multi = grouper_train(test_ds)
                predictor = estimator.train(train_ds_multi, test_ds_multi)
            except Exception as e:
                print(e)

    forecast_it, ts_it = make_evaluation_predictions(
        dataset=test_ds,  # test dataset
        predictor=predictor,  # predictor
        num_samples=100,  # number of sample paths we want for evaluation
    )

    forecasts = list(forecast_it)
    tss = list(ts_it)
#     print(len(forecasts), len(tss))
    
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds))

    print(json.dumps(agg_metrics, indent=4))
    
    model_dir = os.path.join(args.model_dir, algo_name)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    predictor.serialize(Path(model_dir))
Exemple #11
0
from gluonts.evaluation import Evaluator
from gluonts.evaluation.backtest import backtest_metrics
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.mx.trainer import Trainer
import pandas as pd

if __name__ == "__main__":
    print(f"datasets available: {dataset_recipes.keys()}")

    # we pick m4_hourly as it only contains a few hundred time series
    dataset = get_dataset("m4_hourly", regenerate=False)

    # First train a model
    estimator = SimpleFeedForwardEstimator(
        prediction_length=dataset.metadata.prediction_length,
        freq=dataset.metadata.freq,
        trainer=Trainer(epochs=10, num_batches_per_epoch=10),
    )

    train1_output = estimator.train_model(dataset.train)

    # callback to overwrite parameters of the new model with the already trained model
    def copy_params(net):
        params1 = train1_output.trained_net.collect_params()
        params2 = net.collect_params()
        for p1, p2 in zip(params1.values(), params2.values()):
            p2.set_data(p1.data())

    estimator = SimpleFeedForwardEstimator(
        prediction_length=dataset.metadata.prediction_length,
        freq=dataset.metadata.freq,
Exemple #12
0
    def __init__(self):

        self.DARE = {
            "num_layers": 2,
            "num_cells": 40,
            "cell_type": "lstm",
            "dropout_rate": 0.1,
            "context_length": None,
            "scaling": True,
            "distr_output": student_t.StudentTOutput(),
            "embedding_dimension": 20,
            "time_features": None,
            "lags_seq": None,
            "use_feat_dynamic_real": False,
            "use_feat_static_cat": False,
            "cardinality": None,
            "prediction_length": 50,
            "freq": "1H",
            "trainer": tnr,
        }
        self.ModelDARE = {
            "DeepAREstimate": [DeepAREstimator(**self.DARE), self.DARE],
        }

        self.SFF = {
            "num_hidden_dimensions": [40, 40],
            "context_length": None,
            "batch_normalization": False,
            "mean_scaling": True,
            "distr_output": student_t.StudentTOutput(),
            "prediction_length": 50,
            "context_length": None,
            "freq": "1H",
            "trainer": tnr,
        }
        self.ModelSFF = {
            "SimpleFeedForward":
            [SimpleFeedForwardEstimator(**self.SFF), self.SFF],
        }

        self.WN = {"prediction_length": 100, "freq": "1H", "trainer": tnr}
        self.ModelWN = {
            "WaveNet": [WaveNetEstimator(**self.WN), self.WN],
        }

        self.CRNN = {
            "prediction_length": 380,
            "context_length": 200,
            "freq": "1H",
            "trainer": tnr
        }
        self.ModelCRNN = {
            "canonical": [CanonicalRNNEstimator(**self.CRNN), self.CRNN],
        }

        self.GP = {
            "prediction_length": 380,
            "context_length": None,
            "freq": "1H",
            "cardinality": 1,
            "trainer": tnr,
        }
        self.ModelGP = {
            "gaussian": [GaussianProcessEstimator(**self.GP), self.GP],
        }

        # self.PE = {
        #     "prediction_length": 380,
        #     "freq": "1H",

        # }
        # self.ModelPE= {
        #     "prophet": ProphetEstimator(**self.PE)

        # }

        # self.RFE = {
        #     "prediction_length": 100,
        #     "freq": "1H",

        # }
        # self.ModelRFE= {
        #     "R" : RForecastPredictor(**self.RFE),

        # }

        self.SNE = {
            "prediction_length": 380,
            "freq": "1H",
        }
        self.ModelSNE = {
            "seasonalnaive": [SeasonalNaiveEstimator(**self.SNE), self.SNE]
        }

        self.S2Q = {
            "prediction_length": 380,
            "context_length": None,
            "freq": "1H",
            "embedding_dimension": 1,
            "cardinality": [1],
            "encoder": encode,
            "decoder_mlp_layer": [3],
            "decoder_mlp_static_dim": 1,
            "trainer": tnr,
        }
        self.ModelS2Q = {"seq2seq": [Seq2SeqEstimator(**self.S2Q), self.S2Q]}

        self.AppliedEurekaRegressorModels = {
            "teams": [self.ModelSFF, self.ModelDARE],
            "position": [self.ModelSFF, self.ModelDARE]
        }
def train(bucket, seq, algo, freq, prediction_length, epochs, learning_rate,
          hybridize, num_batches_per_epoch):

    #create train dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TRAIN'] +
                     "/train.csv",
                     header=0,
                     index_col=0)

    training_data = ListDataset([{
        "start": df.index[0],
        "target": df.usage[:],
        "item_id": df.client[:]
    }],
                                freq=freq)

    #create test dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TEST'] +
                     "/test.csv",
                     header=0,
                     index_col=0)

    test_data = ListDataset([{
        "start": df.index[0],
        "target": df.usage[:],
        "item_id": 'client_12'
    }],
                            freq=freq)

    hook = Hook.create_from_json_file()
    #determine estimators##################################
    if algo == "DeepAR":
        estimator = DeepAREstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=1,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("DeepAR training is complete SUCCESS")
    elif algo == "SFeedFwd":
        estimator = SimpleFeedForwardEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    elif algo == "lstnet":
        # Needed for LSTNet ONLY
        grouper = MultivariateGrouper(max_target_dim=6)
        training_data = grouper(training_data)
        test_data = grouper(test_data)
        context_length = prediction_length
        num_series = 1
        skip_size = 1
        ar_window = 1
        channels = 4

        estimator = LSTNetEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            num_series=num_series,
            skip_size=skip_size,
            ar_window=ar_window,
            channels=channels,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    elif algo == "seq2seq":
        estimator = MQCNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    else:
        estimator = TransformerEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")

    ###################################################

    #evaluate trained model on test data
    forecast_it, ts_it = make_evaluation_predictions(test_data,
                                                     predictor,
                                                     num_samples=100)
    print("EVALUATION is complete SUCCESS")
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss),
                                          iter(forecasts),
                                          num_series=len(test_data))
    print("METRICS retrieved SUCCESS")
    #bucket = "bwp-sandbox"

    mainpref = "gluonts/blog-models/"
    prefix = mainpref + str(seq) + "/"
    agg_df = pd.DataFrame(agg_metrics, index=[0])
    file = "metrics" + str(seq) + ".csv"
    os.system('mkdir metrics')
    cspath = os.path.join('metrics', file)
    agg_df.to_csv(cspath)
    s3.upload_file(cspath, bucket, mainpref + "metrics/" + file)

    hook.save_scalar("MAPE", agg_metrics["MAPE"], sm_metric=True)
    hook.save_scalar("RMSE", agg_metrics["RMSE"], sm_metric=True)
    hook.save_scalar("MASE", agg_metrics["MASE"], sm_metric=True)
    hook.save_scalar("MSE", agg_metrics["MSE"], sm_metric=True)

    print("MAPE:", agg_metrics["MAPE"])

    #save the model
    predictor.serialize(pathlib.Path(os.environ['SM_MODEL_DIR']))

    uploadDirectory(os.environ['SM_MODEL_DIR'], prefix, bucket)

    return predictor
Exemple #14
0
    def fit(self, df, future_regressor=None):
        """Train algorithm given data supplied.

        Args:
            df (pandas.DataFrame): Datetime Indexed
        """
        if not _has_gluonts:
            raise ImportError(
                "GluonTS installation not found or installed version is incompatible with AutoTS."
            )

        df = self.basic_profile(df)

        try:
            from mxnet.random import seed as mxnet_seed

            mxnet_seed(self.random_seed)
        except Exception:
            pass

        gluon_train = df.to_numpy().T
        self.train_index = df.columns
        self.train_columns = df.index

        gluon_freq = str(self.frequency).split('-')[0]
        if self.regression_type == "User":
            if future_regressor is None:
                raise ValueError(
                    "regression_type='User' but no future_regressor supplied")
        if gluon_freq in ["MS", "1MS"]:
            gluon_freq = "M"

        if int(self.verbose) > 1:
            print(f"Gluon Frequency is {gluon_freq}")
        if int(self.verbose) < 1:
            try:
                logging.getLogger().disabled = True
                logging.getLogger("mxnet").addFilter(lambda record: False)
            except Exception:
                pass

        if str(self.context_length).replace('.', '').isdigit():
            self.gluon_context_length = int(float(self.context_length))
        elif 'forecastlength' in str(self.context_length).lower():
            len_int = int([x for x in str(self.context_length)
                           if x.isdigit()][0])
            self.gluon_context_length = int(len_int * self.forecast_length)
        else:
            self.gluon_context_length = 20
            self.context_length = '20'
        ts_metadata = {
            'num_series':
            len(self.train_index),
            'freq':
            gluon_freq,
            'start_ts':
            df.index[0],
            'gluon_start':
            [self.train_columns[0] for _ in range(len(self.train_index))],
            'context_length':
            self.gluon_context_length,
            'forecast_length':
            self.forecast_length,
        }
        if self.gluon_model in self.multivariate_mods:
            if self.regression_type == "User":
                regr = future_regressor.to_numpy().T
                self.regr_train = regr
                self.test_ds = ListDataset(
                    [{
                        "start": df.index[0],
                        "target": gluon_train,
                        "feat_dynamic_real": regr,
                    }],
                    freq=ts_metadata['freq'],
                    one_dim_target=False,
                )
            else:
                self.test_ds = ListDataset(
                    [{
                        "start": df.index[0],
                        "target": gluon_train
                    }],
                    freq=ts_metadata['freq'],
                    one_dim_target=False,
                )
        else:
            if self.regression_type == "User":
                self.gluon_train = gluon_train
                regr = future_regressor.to_numpy().T
                self.regr_train = regr
                self.test_ds = ListDataset(
                    [{
                        FieldName.TARGET: target,
                        FieldName.START: ts_metadata['start_ts'],
                        FieldName.FEAT_DYNAMIC_REAL: regr,
                    } for target in gluon_train],
                    freq=ts_metadata['freq'],
                )
            else:
                # use the actual start date, if NaN given (semi-hidden)
                # ts_metadata['gluon_start'] = df.notna().idxmax().tolist()
                self.test_ds = ListDataset(
                    [{
                        FieldName.TARGET: target,
                        FieldName.START: start
                    } for (target, start
                           ) in zip(gluon_train, ts_metadata['gluon_start'])],
                    freq=ts_metadata['freq'],
                )
        if self.gluon_model == 'DeepAR':
            from gluonts.model.deepar import DeepAREstimator

            estimator = DeepAREstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'NPTS':
            from gluonts.model.npts import NPTSEstimator

            estimator = NPTSEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
            )

        elif self.gluon_model == 'MQCNN':
            from gluonts.model.seq2seq import MQCNNEstimator

            estimator = MQCNNEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'SFF':
            from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator

            estimator = SimpleFeedForwardEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(
                    epochs=self.epochs,
                    learning_rate=self.learning_rate,
                    hybridize=False,
                    num_batches_per_epoch=100,
                ),
            )

        elif self.gluon_model == 'Transformer':
            from gluonts.model.transformer import TransformerEstimator

            estimator = TransformerEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'DeepState':
            from gluonts.model.deepstate import DeepStateEstimator

            estimator = DeepStateEstimator(
                prediction_length=ts_metadata['forecast_length'],
                past_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                use_feat_static_cat=False,
                cardinality=[1],
                trainer=Trainer(ctx='cpu',
                                epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'DeepFactor':
            from gluonts.model.deep_factor import DeepFactorEstimator

            estimator = DeepFactorEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'WaveNet':
            # Usually needs more epochs/training iterations than other models do
            from gluonts.model.wavenet import WaveNetEstimator

            estimator = WaveNetEstimator(
                freq=ts_metadata['freq'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'DeepVAR':
            from gluonts.model.deepvar import DeepVAREstimator

            estimator = DeepVAREstimator(
                target_dim=gluon_train.shape[0],
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'GPVAR':
            from gluonts.model.gpvar import GPVAREstimator

            estimator = GPVAREstimator(
                target_dim=gluon_train.shape[0],
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'LSTNet':
            from gluonts.model.lstnet import LSTNetEstimator

            estimator = LSTNetEstimator(
                freq=ts_metadata['freq'],
                num_series=len(self.train_index),
                skip_size=0,
                ar_window=1,
                channels=2,
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'NBEATS':
            from gluonts.model.n_beats import NBEATSEstimator

            estimator = NBEATSEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'Rotbaum':
            from gluonts.model.rotbaum import TreeEstimator

            estimator = TreeEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                # trainer=Trainer(epochs=self.epochs, learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'DeepRenewalProcess':
            from gluonts.model.renewal import DeepRenewalProcessEstimator

            estimator = DeepRenewalProcessEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                num_layers=1,  # original paper used 1 layer, 10 cells
                num_cells=10,
                freq=ts_metadata['freq'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'SelfAttention':
            from gluonts.model.san import SelfAttentionEstimator

            estimator = SelfAttentionEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(
                    epochs=self.epochs,
                    learning_rate=self.learning_rate,
                    use_feature_dynamic_real=False,
                ),
            )
        elif self.gluon_model == 'TemporalFusionTransformer':
            from gluonts.model.tft import TemporalFusionTransformerEstimator

            estimator = TemporalFusionTransformerEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'DeepTPP':
            from gluonts.model.tpp.deeptpp import DeepTPPEstimator

            estimator = DeepTPPEstimator(
                prediction_interval_length=ts_metadata['forecast_length'],
                context_interval_length=ts_metadata['context_length'],
                num_marks=1,  # cardinality
                freq=ts_metadata['freq'],
                trainer=Trainer(
                    epochs=self.epochs,
                    learning_rate=self.learning_rate,
                    hybridize=False,
                ),
            )
        else:
            raise ValueError("'gluon_model' not recognized.")

        self.GluonPredictor = estimator.train(self.test_ds)
        self.ts_metadata = ts_metadata
        self.fit_runtime = datetime.datetime.now() - self.startTime
        return self
Exemple #15
0
    "target": new_data.Close[:-1]
}],
                                  freq="1min")

trainer = Trainer(epochs=10, ctx="cpu", num_batches_per_epoch=75)
estimator = deepar.DeepAREstimator(freq="1min",
                                   prediction_length=390,
                                   trainer=trainer,
                                   num_layers=2)
#predictor = estimator.train(training_data=data)

trial_estimator = SimpleFeedForwardEstimator(num_hidden_dimensions=[10],
                                             prediction_length=390,
                                             context_length=780,
                                             freq="1min",
                                             trainer=Trainer(
                                                 ctx="cpu",
                                                 epochs=5,
                                                 learning_rate=1e-30,
                                                 hybridize=False,
                                                 num_batches_per_epoch=100))
predictor = estimator.train(lots_of_data)

prediction = next(predictor.predict(lots_of_data))
print(prediction.mean)
#prediction.plot(output_file='graph.png')

forecast_it, ts_it = make_evaluation_predictions(
    dataset=lots_of_data,  # test dataset
    predictor=predictor,  # predictor
    num_samples=500,  # number of sample paths we want for evaluation
)
    dataset = get_dataset(ds)
    freq = dataset.metadata.freq
    prediction_length = dataset.metadata.prediction_length
    deepar = DeepAREstimator(
        freq=freq,
        prediction_length=prediction_length,
    )

    mqcnn = MQCNNEstimator(
        freq=freq,
        prediction_length=prediction_length,
    )

    sff = SimpleFeedForwardEstimator(
        num_hidden_dimensions=[10],
        context_length=100,
        freq=freq,
        prediction_length=prediction_length,
    )

    wn = WaveNetEstimator(
        freq=freq,
        prediction_length=prediction_length,
    )

    num_batches_per_epoch = 10
    bin_edges = np.array([-1e20, -1e10, 1, 1e20])
    estimators = [deepar, mqcnn, sff, wn]
    batch = [32]
    for est in estimators:
        for b in batch:
            cProfile.run('data_loader(est, ds, b)', 'restats')
Exemple #17
0
def extended_forecasting_tutorial():
	mx.random.seed(0)
	np.random.seed(0)

	print(f"Available datasets: {list(dataset_recipes.keys())}")

	dataset = get_dataset("m4_hourly", regenerate=True)

	# Get the first time series in the training set.
	train_entry = next(iter(dataset.train))
	print(train_entry.keys())

	# Get the first time series in the test set.
	test_entry = next(iter(dataset.test))
	print(test_entry.keys())

	test_series = to_pandas(test_entry)
	train_series = to_pandas(train_entry)

	fig, ax = plt.subplots(2, 1, sharex=True, sharey=True, figsize=(10, 7))

	train_series.plot(ax=ax[0])
	ax[0].grid(which="both")
	ax[0].legend(["train series"], loc="upper left")

	test_series.plot(ax=ax[1])
	ax[1].axvline(train_series.index[-1], color="r")  # End of train dataset.
	ax[1].grid(which="both")
	ax[1].legend(["test series", "end of train series"], loc="upper left")

	plt.show()

	print(f"Length of forecasting window in test dataset: {len(test_series) - len(train_series)}")
	print(f"Recommended prediction horizon: {dataset.metadata.prediction_length}")
	print(f"Frequency of the time series: {dataset.metadata.freq}")

	#--------------------
	# Create artificial datasets.

	artificial_dataset = ComplexSeasonalTimeSeries(
		num_series=10,
		prediction_length=21,
		freq_str="H",
		length_low=30,
		length_high=200,
		min_val=-10000,
		max_val=10000,
		is_integer=False,
		proportion_missing_values=0,
		is_noise=True,
		is_scale=True,
		percentage_unique_timestamps=1,
		is_out_of_bounds_date=True,
	)

	print(f"prediction length: {artificial_dataset.metadata.prediction_length}")
	print(f"frequency: {artificial_dataset.metadata.freq}")

	print(f"type of train dataset: {type(artificial_dataset.train)}")
	print(f"train dataset fields: {artificial_dataset.train[0].keys()}")
	print(f"type of test dataset: {type(artificial_dataset.test)}")
	print(f"test dataset fields: {artificial_dataset.test[0].keys()}")

	train_ds = ListDataset(
		artificial_dataset.train,
		freq=artificial_dataset.metadata.freq
	)

	test_ds = ListDataset(
		artificial_dataset.test,
		freq=artificial_dataset.metadata.freq
	)

	train_entry = next(iter(train_ds))
	print(train_entry.keys())

	test_entry = next(iter(test_ds))
	print(test_entry.keys())

	test_series = to_pandas(test_entry)
	train_series = to_pandas(train_entry)

	fig, ax = plt.subplots(2, 1, sharex=True, sharey=True, figsize=(10, 7))

	train_series.plot(ax=ax[0])
	ax[0].grid(which="both")
	ax[0].legend(["train series"], loc="upper left")

	test_series.plot(ax=ax[1])
	ax[1].axvline(train_series.index[-1], color="r")  # End of train dataset.
	ax[1].grid(which="both")
	ax[1].legend(["test series", "end of train series"], loc="upper left")

	plt.show()

	#--------------------
	# Use your time series and features.

	[f"FieldName.{k} = '{v}'" for k, v in FieldName.__dict__.items() if not k.startswith("_")]

	def create_dataset(num_series, num_steps, period=24, mu=1, sigma=0.3):
		# Create target: noise + pattern.
		# Noise.
		noise = np.random.normal(mu, sigma, size=(num_series, num_steps))

		# Pattern - sinusoid with different phase.
		sin_minusPi_Pi = np.sin(np.tile(np.linspace(-np.pi, np.pi, period), int(num_steps / period)))
		sin_Zero_2Pi = np.sin(np.tile(np.linspace(0, 2 * np.pi, 24), int(num_steps / period)))

		pattern = np.concatenate(
			(
				np.tile(
					sin_minusPi_Pi.reshape(1, -1),
					(int(np.ceil(num_series / 2)),1)
				),
				np.tile(
					sin_Zero_2Pi.reshape(1, -1),
					(int(np.floor(num_series / 2)), 1)
				)
			),
			axis=0
		)

		target = noise + pattern

		# Create time features: use target one period earlier, append with zeros.
		feat_dynamic_real = np.concatenate(
			(
				np.zeros((num_series, period)),
				target[:, :-period]
			),
			axis=1
		)

		# Create categorical static feats: use the sinusoid type as a categorical feature.
		feat_static_cat = np.concatenate(
			(
				np.zeros(int(np.ceil(num_series / 2))),
				np.ones(int(np.floor(num_series / 2)))
			),
			axis=0
		)

		return target, feat_dynamic_real, feat_static_cat

	# Define the parameters of the dataset.
	custom_ds_metadata = {
		"num_series": 100,
		"num_steps": 24 * 7,
		"prediction_length": 24,
		"freq": "1H",
		"start": [
			pd.Timestamp("01-01-2019", freq="1H")
			for _ in range(100)
		]
	}

	data_out = create_dataset(
		custom_ds_metadata["num_series"],
		custom_ds_metadata["num_steps"],
		custom_ds_metadata["prediction_length"]
	)

	target, feat_dynamic_real, feat_static_cat = data_out

	train_ds = ListDataset(
		[
			{
				FieldName.TARGET: target,
				FieldName.START: start,
				FieldName.FEAT_DYNAMIC_REAL: [fdr],
				FieldName.FEAT_STATIC_CAT: [fsc]
			}
			for (target, start, fdr, fsc) in zip(
				target[:, :-custom_ds_metadata["prediction_length"]],
				custom_ds_metadata["start"],
				feat_dynamic_real[:, :-custom_ds_metadata["prediction_length"]],
				feat_static_cat
			)
		],
		freq=custom_ds_metadata["freq"]
	)

	test_ds = ListDataset(
		[
			{
				FieldName.TARGET: target,
				FieldName.START: start,
				FieldName.FEAT_DYNAMIC_REAL: [fdr],
				FieldName.FEAT_STATIC_CAT: [fsc]
			}
			for (target, start, fdr, fsc) in zip(
				target,
				custom_ds_metadata["start"],
				feat_dynamic_real,
				feat_static_cat)
		],
		freq=custom_ds_metadata["freq"]
	)

	train_entry = next(iter(train_ds))
	print(train_entry.keys())

	test_entry = next(iter(test_ds))
	print(test_entry.keys())

	test_series = to_pandas(test_entry)
	train_series = to_pandas(train_entry)

	fig, ax = plt.subplots(2, 1, sharex=True, sharey=True, figsize=(10, 7))

	train_series.plot(ax=ax[0])
	ax[0].grid(which="both")
	ax[0].legend(["train series"], loc="upper left")

	test_series.plot(ax=ax[1])
	ax[1].axvline(train_series.index[-1], color="r")  # End of train dataset
	ax[1].grid(which="both")
	ax[1].legend(["test series", "end of train series"], loc="upper left")

	plt.show()

	#--------------------
	# Transformations.

	from gluonts.transform import (
		AddAgeFeature,
		AddObservedValuesIndicator,
		Chain,
		ExpectedNumInstanceSampler,
		InstanceSplitter,
		SetFieldIfNotPresent,
	)

	# Define a transformation.
	def create_transformation(freq, context_length, prediction_length):
		return Chain(
			[
				AddObservedValuesIndicator(
					target_field=FieldName.TARGET,
					output_field=FieldName.OBSERVED_VALUES,
				),
				AddAgeFeature(
					target_field=FieldName.TARGET,
					output_field=FieldName.FEAT_AGE,
					pred_length=prediction_length,
					log_scale=True,
				),
				InstanceSplitter(
					target_field=FieldName.TARGET,
					is_pad_field=FieldName.IS_PAD,
					start_field=FieldName.START,
					forecast_start_field=FieldName.FORECAST_START,
					instance_sampler=ExpectedNumInstanceSampler(
						num_instances=1,
						min_future=prediction_length,
					),
					past_length=context_length,
					future_length=prediction_length,
					time_series_fields=[
						FieldName.FEAT_AGE,
						FieldName.FEAT_DYNAMIC_REAL,
						FieldName.OBSERVED_VALUES,
					],
				),
			]
		)

	# Transform a dataset.
	transformation = create_transformation(
		custom_ds_metadata["freq"],
		2 * custom_ds_metadata["prediction_length"],  # Can be any appropriate value.
		custom_ds_metadata["prediction_length"]
	)

	train_tf = transformation(iter(train_ds), is_train=True)
	type(train_tf)

	train_tf_entry = next(iter(train_tf))
	print([k for k in train_tf_entry.keys()])

	print(f"past target shape: {train_tf_entry['past_target'].shape}")
	print(f"future target shape: {train_tf_entry['future_target'].shape}")
	print(f"past observed values shape: {train_tf_entry['past_observed_values'].shape}")
	print(f"future observed values shape: {train_tf_entry['future_observed_values'].shape}")
	print(f"past age feature shape: {train_tf_entry['past_feat_dynamic_age'].shape}")
	print(f"future age feature shape: {train_tf_entry['future_feat_dynamic_age'].shape}")
	print(train_tf_entry["feat_static_cat"])

	print([k for k in next(iter(train_ds)).keys()])

	test_tf = transformation(iter(test_ds), is_train=False)

	test_tf_entry = next(iter(test_tf))
	print([k for k in test_tf_entry.keys()])

	print(f"past target shape: {test_tf_entry['past_target'].shape}")
	print(f"future target shape: {test_tf_entry['future_target'].shape}")
	print(f"past observed values shape: {test_tf_entry['past_observed_values'].shape}")
	print(f"future observed values shape: {test_tf_entry['future_observed_values'].shape}")
	print(f"past age feature shape: {test_tf_entry['past_feat_dynamic_age'].shape}")
	print(f"future age feature shape: {test_tf_entry['future_feat_dynamic_age'].shape}")
	print(test_tf_entry["feat_static_cat"])

	#--------------------
	# Training an existing model.

	# Configuring an estimator.
	estimator = SimpleFeedForwardEstimator(
		num_hidden_dimensions=[10],
		prediction_length=custom_ds_metadata["prediction_length"],
		context_length=2*custom_ds_metadata["prediction_length"],
		freq=custom_ds_metadata["freq"],
		trainer=Trainer(
			ctx="cpu",
			epochs=5,
			learning_rate=1e-3,
			hybridize=False,
			num_batches_per_epoch=100
		)
	)

	# Getting a predictor.
	predictor = estimator.train(train_ds)

	#--------------------
	# Saving/Loading an existing model.

	# Save the trained model in tmp/.
	predictor.serialize(Path("/tmp/"))

	# Loads it back.
	predictor_deserialized = Predictor.deserialize(Path("/tmp/"))

	#--------------------
	# Evaluation.

	# Getting the forecasts.
	forecast_it, ts_it = make_evaluation_predictions(
		dataset=test_ds,  # Test dataset.
		predictor=predictor,  # Predictor.
		num_samples=100,  # Number of sample paths we want for evaluation.
	)

	forecasts = list(forecast_it)
	tss = list(ts_it)

	# First entry of the time series list
	ts_entry = tss[0]

	# First 5 values of the time series (convert from pandas to numpy)
	np.array(ts_entry[:5]).reshape(-1,)

	# First entry of test_ds
	test_ds_entry = next(iter(test_ds))

	# First 5 values
	test_ds_entry["target"][:5]

	# First entry of the forecast list
	forecast_entry = forecasts[0]

	print(f"Number of sample paths: {forecast_entry.num_samples}")
	print(f"Dimension of samples: {forecast_entry.samples.shape}")
	print(f"Start date of the forecast window: {forecast_entry.start_date}")
	print(f"Frequency of the time series: {forecast_entry.freq}")

	print(f"Mean of the future window:\n {forecast_entry.mean}")
	print(f"0.5-quantile (median) of the future window:\n {forecast_entry.quantile(0.5)}")

	def plot_prob_forecasts(ts_entry, forecast_entry):
		plot_length = 150
		prediction_intervals = (50.0, 90.0)
		legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

		fig, ax = plt.subplots(1, 1, figsize=(10, 7))
		ts_entry[-plot_length:].plot(ax=ax)  # plot the time series
		forecast_entry.plot(prediction_intervals=prediction_intervals, color="g")
		plt.grid(which="both")
		plt.legend(legend, loc="upper left")
		plt.show()

	plot_prob_forecasts(ts_entry, forecast_entry)

	# Compute metrics.
	evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
	agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds))

	print(json.dumps(agg_metrics, indent=4))

	print(item_metrics.head())

	item_metrics.plot(x="MSIS", y="MASE", kind="scatter")
	plt.grid(which="both")
	plt.show()

	#--------------------
	# Create your own model.

	from gluonts.core.component import validated
	from gluonts.dataset.loader import TrainDataLoader
	from gluonts.mx import (
		as_in_context,
		batchify,
		copy_parameters,
		get_hybrid_forward_input_names,
		GluonEstimator,
		RepresentableBlockPredictor,
	)
	from gluonts.transform import (
		ExpectedNumInstanceSampler,
		Transformation,
		InstanceSplitter,
		TestSplitSampler,
		SelectFields,
		Chain
	)

	# Point forecasts with a simple feedforward network.
	class MyNetwork(mx.gluon.HybridBlock):
		def __init__(self, prediction_length, num_cells, **kwargs):
			super().__init__(**kwargs)
			self.prediction_length = prediction_length
			self.num_cells = num_cells

			with self.name_scope():
				# Set up a 3 layer neural network that directly predicts the target values.
				self.nn = mx.gluon.nn.HybridSequential()
				self.nn.add(mx.gluon.nn.Dense(units=self.num_cells, activation="relu"))
				self.nn.add(mx.gluon.nn.Dense(units=self.num_cells, activation="relu"))
				self.nn.add(mx.gluon.nn.Dense(units=self.prediction_length, activation="softrelu"))

	class MyTrainNetwork(MyNetwork):
		def hybrid_forward(self, F, past_target, future_target):
			prediction = self.nn(past_target)
			# Calculate L1 loss with the future_target to learn the median.
			return (prediction - future_target).abs().mean(axis=-1)

	class MyPredNetwork(MyTrainNetwork):
		# The prediction network only receives past_target and returns predictions.
		def hybrid_forward(self, F, past_target):
			prediction = self.nn(past_target)
			return prediction.expand_dims(axis=1)

	class MyEstimator(GluonEstimator):
		@validated()
		def __init__(
			self,
			prediction_length: int,
			context_length: int,
			freq: str,
			num_cells: int,
			batch_size: int = 32,
			trainer: Trainer = Trainer()
		) -> None:
			super().__init__(trainer=trainer, batch_size=batch_size)
			self.prediction_length = prediction_length
			self.context_length = context_length
			self.freq = freq
			self.num_cells = num_cells

		def create_transformation(self):
			return Chain([])

		def create_training_data_loader(self, dataset, **kwargs):
			instance_splitter = InstanceSplitter(
				target_field=FieldName.TARGET,
				is_pad_field=FieldName.IS_PAD,
				start_field=FieldName.START,
				forecast_start_field=FieldName.FORECAST_START,
				instance_sampler=ExpectedNumInstanceSampler(
					num_instances=1,
					min_future=self.prediction_length
				),
				past_length=self.context_length,
				future_length=self.prediction_length,
			)
			input_names = get_hybrid_forward_input_names(MyTrainNetwork)
			return TrainDataLoader(
				dataset=dataset,
				transform=instance_splitter + SelectFields(input_names),
				batch_size=self.batch_size,
				stack_fn=functools.partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype),
				decode_fn=functools.partial(as_in_context, ctx=self.trainer.ctx),
				**kwargs,
			)

		def create_training_network(self) -> MyTrainNetwork:
			return MyTrainNetwork(
				prediction_length=self.prediction_length,
				num_cells = self.num_cells
			)

		def create_predictor(
			self, transformation: Transformation, trained_network: mx.gluon.HybridBlock
		) -> Predictor:
			prediction_splitter = InstanceSplitter(
				target_field=FieldName.TARGET,
				is_pad_field=FieldName.IS_PAD,
				start_field=FieldName.START,
				forecast_start_field=FieldName.FORECAST_START,
				instance_sampler=TestSplitSampler(),
				past_length=self.context_length,
				future_length=self.prediction_length,
			)

			prediction_network = MyPredNetwork(
				prediction_length=self.prediction_length,
				num_cells=self.num_cells
			)

			copy_parameters(trained_network, prediction_network)

			return RepresentableBlockPredictor(
				input_transform=transformation + prediction_splitter,
				prediction_net=prediction_network,
				batch_size=self.trainer.batch_size,
				freq=self.freq,
				prediction_length=self.prediction_length,
				ctx=self.trainer.ctx,
			)

	estimator = MyEstimator(
		prediction_length=custom_ds_metadata["prediction_length"],
		context_length=2*custom_ds_metadata["prediction_length"],
		freq=custom_ds_metadata["freq"],
		num_cells=40,
		trainer=Trainer(
			ctx="cpu",
			epochs=5,
			learning_rate=1e-3,
			hybridize=False,
			num_batches_per_epoch=100
		)
	)

	predictor = estimator.train(train_ds)

	forecasts = list(forecast_it)
	tss = list(ts_it)

	plot_prob_forecasts(tss[0], forecasts[0])

	# Probabilistic forecasting.
	class MyProbNetwork(mx.gluon.HybridBlock):
		def __init__(
			self,
			prediction_length,
			distr_output,
			num_cells,
			num_sample_paths=100,
			**kwargs
		) -> None:
			super().__init__(**kwargs)
			self.prediction_length = prediction_length
			self.distr_output = distr_output
			self.num_cells = num_cells
			self.num_sample_paths = num_sample_paths
			self.proj_distr_args = distr_output.get_args_proj()

			with self.name_scope():
				# Set up a 2 layer neural network that its ouput will be projected to the distribution parameters.
				self.nn = mx.gluon.nn.HybridSequential()
				self.nn.add(mx.gluon.nn.Dense(units=self.num_cells, activation="relu"))
				self.nn.add(mx.gluon.nn.Dense(units=self.prediction_length * self.num_cells, activation="relu"))

	class MyProbTrainNetwork(MyProbNetwork):
		def hybrid_forward(self, F, past_target, future_target):
			# Compute network output.
			net_output = self.nn(past_target)

			# (batch, prediction_length * nn_features)  ->  (batch, prediction_length, nn_features).
			net_output = net_output.reshape(0, self.prediction_length, -1)

			# Project network output to distribution parameters domain.
			distr_args = self.proj_distr_args(net_output)

			# Compute distribution.
			distr = self.distr_output.distribution(distr_args)

			# Negative log-likelihood.
			loss = distr.loss(future_target)
			return loss

	class MyProbPredNetwork(MyProbTrainNetwork):
		# The prediction network only receives past_target and returns predictions.
		def hybrid_forward(self, F, past_target):
			# Repeat past target: from (batch_size, past_target_length) to
			# (batch_size * num_sample_paths, past_target_length).
			repeated_past_target = past_target.repeat(
				repeats=self.num_sample_paths, axis=0
			)

			# Compute network output.
			net_output = self.nn(repeated_past_target)

			# (batch * num_sample_paths, prediction_length * nn_features)  ->  (batch * num_sample_paths, prediction_length, nn_features).
			net_output = net_output.reshape(0, self.prediction_length, -1)

			# Rroject network output to distribution parameters domain.
			distr_args = self.proj_distr_args(net_output)

			# Compute distribution.
			distr = self.distr_output.distribution(distr_args)

			# Get (batch_size * num_sample_paths, prediction_length) samples.
			samples = distr.sample()

			# Reshape from (batch_size * num_sample_paths, prediction_length) to
			# (batch_size, num_sample_paths, prediction_length).
			return samples.reshape(shape=(-1, self.num_sample_paths, self.prediction_length))

	class MyProbEstimator(GluonEstimator):
		@validated()
		def __init__(
			self,
			prediction_length: int,
			context_length: int,
			freq: str,
			distr_output: DistributionOutput,
			num_cells: int,
			num_sample_paths: int = 100,
			batch_size: int = 32,
			trainer: Trainer = Trainer()
		) -> None:
			super().__init__(trainer=trainer, batch_size=batch_size)
			self.prediction_length = prediction_length
			self.context_length = context_length
			self.freq = freq
			self.distr_output = distr_output
			self.num_cells = num_cells
			self.num_sample_paths = num_sample_paths

		def create_transformation(self):
			return Chain([])

		def create_training_data_loader(self, dataset, **kwargs):
			instance_splitter = InstanceSplitter(
				target_field=FieldName.TARGET,
				is_pad_field=FieldName.IS_PAD,
				start_field=FieldName.START,
				forecast_start_field=FieldName.FORECAST_START,
				instance_sampler=ExpectedNumInstanceSampler(
					num_instances=1,
					min_future=self.prediction_length
				),
				past_length=self.context_length,
				future_length=self.prediction_length,
			)
			input_names = get_hybrid_forward_input_names(MyProbTrainNetwork)
			return TrainDataLoader(
				dataset=dataset,
				transform=instance_splitter + SelectFields(input_names),
				batch_size=self.batch_size,
				stack_fn=functools.partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype),
				decode_fn=functools.partial(as_in_context, ctx=self.trainer.ctx),
				**kwargs,
			)

		def create_training_network(self) -> MyProbTrainNetwork:
			return MyProbTrainNetwork(
				prediction_length=self.prediction_length,
				distr_output=self.distr_output,
				num_cells=self.num_cells,
				num_sample_paths=self.num_sample_paths
			)

		def create_predictor(
			self, transformation: Transformation, trained_network: mx.gluon.HybridBlock
		) -> Predictor:
			prediction_splitter = InstanceSplitter(
				target_field=FieldName.TARGET,
				is_pad_field=FieldName.IS_PAD,
				start_field=FieldName.START,
				forecast_start_field=FieldName.FORECAST_START,
				instance_sampler=TestSplitSampler(),
				past_length=self.context_length,
				future_length=self.prediction_length,
			)

			prediction_network = MyProbPredNetwork(
				prediction_length=self.prediction_length,
				distr_output=self.distr_output,
				num_cells=self.num_cells,
				num_sample_paths=self.num_sample_paths
			)

			copy_parameters(trained_network, prediction_network)

			return RepresentableBlockPredictor(
				input_transform=transformation + prediction_splitter,
				prediction_net=prediction_network,
				batch_size=self.trainer.batch_size,
				freq=self.freq,
				prediction_length=self.prediction_length,
				ctx=self.trainer.ctx,
			)

	estimator = MyProbEstimator(
		prediction_length=custom_ds_metadata["prediction_length"],
		context_length=2*custom_ds_metadata["prediction_length"],
		freq=custom_ds_metadata["freq"],
		distr_output=GaussianOutput(),
		num_cells=40,
		trainer=Trainer(
			ctx="cpu",
			epochs=5,
			learning_rate=1e-3,
			hybridize=False,
			num_batches_per_epoch=100
		)
	)

	predictor = estimator.train(train_ds)

	forecast_it, ts_it = make_evaluation_predictions(
		dataset=test_ds,  # Test dataset.
		predictor=predictor,  # Predictor.
		num_samples=100,  # Number of sample paths we want for evaluation.
	)

	forecasts = list(forecast_it)
	tss = list(ts_it)

	plot_prob_forecasts(tss[0], forecasts[0])

	#--------------------
	# Add features and scaling.

	class MyProbNetwork(mx.gluon.HybridBlock):
		def __init__(
			self,
			prediction_length,
			context_length,
			distr_output,
			num_cells,
			num_sample_paths=100,
			scaling=True,
			**kwargs
		) -> None:
			super().__init__(**kwargs)
			self.prediction_length = prediction_length
			self.context_length = context_length
			self.distr_output = distr_output
			self.num_cells = num_cells
			self.num_sample_paths = num_sample_paths
			self.proj_distr_args = distr_output.get_args_proj()
			self.scaling = scaling

			with self.name_scope():
				# Set up a 2 layer neural network that its ouput will be projected to the distribution parameters.
				self.nn = mx.gluon.nn.HybridSequential()
				self.nn.add(mx.gluon.nn.Dense(units=self.num_cells, activation="relu"))
				self.nn.add(mx.gluon.nn.Dense(units=self.prediction_length * self.num_cells, activation="relu"))

				if scaling:
					self.scaler = MeanScaler(keepdims=True)
				else:
					self.scaler = NOPScaler(keepdims=True)

		def compute_scale(self, past_target, past_observed_values):
			# Scale shape is (batch_size, 1).
			_, scale = self.scaler(
				past_target.slice_axis(
					axis=1, begin=-self.context_length, end=None
				),
				past_observed_values.slice_axis(
					axis=1, begin=-self.context_length, end=None
				),
			)

			return scale

	class MyProbTrainNetwork(MyProbNetwork):
		def hybrid_forward(self, F, past_target, future_target, past_observed_values, past_feat_dynamic_real):
			# Compute scale.
			scale = self.compute_scale(past_target, past_observed_values)

			# Scale target and time features.
			past_target_scale = F.broadcast_div(past_target, scale)
			past_feat_dynamic_real_scale = F.broadcast_div(past_feat_dynamic_real.squeeze(axis=-1), scale)

			# Concatenate target and time features to use them as input to the network.
			net_input = F.concat(past_target_scale, past_feat_dynamic_real_scale, dim=-1)

			# Compute network output.
			net_output = self.nn(net_input)

			# (batch, prediction_length * nn_features)  ->  (batch, prediction_length, nn_features).
			net_output = net_output.reshape(0, self.prediction_length, -1)

			# Project network output to distribution parameters domain.
			distr_args = self.proj_distr_args(net_output)

			# Compute distribution.
			distr = self.distr_output.distribution(distr_args, scale=scale)

			# Negative log-likelihood.
			loss = distr.loss(future_target)
			return loss

	class MyProbPredNetwork(MyProbTrainNetwork):
		# The prediction network only receives past_target and returns predictions.
		def hybrid_forward(self, F, past_target, past_observed_values, past_feat_dynamic_real):
			# Repeat fields: from (batch_size, past_target_length) to
			# (batch_size * num_sample_paths, past_target_length).
			repeated_past_target = past_target.repeat(
				repeats=self.num_sample_paths, axis=0
			)
			repeated_past_observed_values = past_observed_values.repeat(
				repeats=self.num_sample_paths, axis=0
			)
			repeated_past_feat_dynamic_real = past_feat_dynamic_real.repeat(
				repeats=self.num_sample_paths, axis=0
			)

			# Compute scale.
			scale = self.compute_scale(repeated_past_target, repeated_past_observed_values)

			# Scale repeated target and time features.
			repeated_past_target_scale = F.broadcast_div(repeated_past_target, scale)
			repeated_past_feat_dynamic_real_scale = F.broadcast_div(repeated_past_feat_dynamic_real.squeeze(axis=-1), scale)

			# Concatenate target and time features to use them as input to the network.
			net_input = F.concat(repeated_past_target_scale, repeated_past_feat_dynamic_real_scale, dim=-1)

			# Compute network oputput.
			net_output = self.nn(net_input)

			# (batch * num_sample_paths, prediction_length * nn_features)  ->  (batch * num_sample_paths, prediction_length, nn_features).
			net_output = net_output.reshape(0, self.prediction_length, -1)

			# Project network output to distribution parameters domain.
			distr_args = self.proj_distr_args(net_output)

			# Pompute distribution.
			distr = self.distr_output.distribution(distr_args, scale=scale)

			# Get (batch_size * num_sample_paths, prediction_length) samples.
			samples = distr.sample()

			# Reshape from (batch_size * num_sample_paths, prediction_length) to
			# (batch_size, num_sample_paths, prediction_length).
			return samples.reshape(shape=(-1, self.num_sample_paths, self.prediction_length))

	class MyProbEstimator(GluonEstimator):
		@validated()
		def __init__(
			self,
			prediction_length: int,
			context_length: int,
			freq: str,
			distr_output: DistributionOutput,
			num_cells: int,
			num_sample_paths: int = 100,
			scaling: bool = True,
			batch_size: int = 32,
			trainer: Trainer = Trainer()
		) -> None:
			super().__init__(trainer=trainer, batch_size=batch_size)
			self.prediction_length = prediction_length
			self.context_length = context_length
			self.freq = freq
			self.distr_output = distr_output
			self.num_cells = num_cells
			self.num_sample_paths = num_sample_paths
			self.scaling = scaling

		def create_transformation(self):
			# Feature transformation that the model uses for input.
			return AddObservedValuesIndicator(
				target_field=FieldName.TARGET,
				output_field=FieldName.OBSERVED_VALUES,
			)

		def create_training_data_loader(self, dataset, **kwargs):
			instance_splitter = InstanceSplitter(
				target_field=FieldName.TARGET,
				is_pad_field=FieldName.IS_PAD,
				start_field=FieldName.START,
				forecast_start_field=FieldName.FORECAST_START,
				instance_sampler=ExpectedNumInstanceSampler(
					num_instances=1,
					min_future=self.prediction_length
				),
				past_length=self.context_length,
				future_length=self.prediction_length,
				time_series_fields=[
					FieldName.FEAT_DYNAMIC_REAL,
					FieldName.OBSERVED_VALUES,
				],
			)
			input_names = get_hybrid_forward_input_names(MyProbTrainNetwork)
			return TrainDataLoader(
				dataset=dataset,
				transform=instance_splitter + SelectFields(input_names),
				batch_size=self.batch_size,
				stack_fn=functools.partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype),
				decode_fn=functools.partial(as_in_context, ctx=self.trainer.ctx),
				**kwargs,
			)

		def create_training_network(self) -> MyProbTrainNetwork:
			return MyProbTrainNetwork(
				prediction_length=self.prediction_length,
				context_length=self.context_length,
				distr_output=self.distr_output,
				num_cells=self.num_cells,
				num_sample_paths=self.num_sample_paths,
				scaling=self.scaling
			)

		def create_predictor(
			self, transformation: Transformation, trained_network: mx.gluon.HybridBlock
		) -> Predictor:
			prediction_splitter = InstanceSplitter(
				target_field=FieldName.TARGET,
				is_pad_field=FieldName.IS_PAD,
				start_field=FieldName.START,
				forecast_start_field=FieldName.FORECAST_START,
				instance_sampler=TestSplitSampler(),
				past_length=self.context_length,
				future_length=self.prediction_length,
				time_series_fields=[
					FieldName.FEAT_DYNAMIC_REAL,
					FieldName.OBSERVED_VALUES,
				],
			)

			prediction_network = MyProbPredNetwork(
				prediction_length=self.prediction_length,
				context_length=self.context_length,
				distr_output=self.distr_output,
				num_cells=self.num_cells,
				num_sample_paths=self.num_sample_paths,
				scaling=self.scaling
			)

			copy_parameters(trained_network, prediction_network)

			return RepresentableBlockPredictor(
				input_transform=transformation + prediction_splitter,
				prediction_net=prediction_network,
				batch_size=self.trainer.batch_size,
				freq=self.freq,
				prediction_length=self.prediction_length,
				ctx=self.trainer.ctx,
			)

	estimator = MyProbEstimator(
		prediction_length=custom_ds_metadata["prediction_length"],
		context_length=2*custom_ds_metadata["prediction_length"],
		freq=custom_ds_metadata["freq"],
		distr_output=GaussianOutput(),
		num_cells=40,
		trainer=Trainer(
			ctx="cpu",
			epochs=5,
			learning_rate=1e-3,
			hybridize=False,
			num_batches_per_epoch=100
		)
	)

	predictor = estimator.train(train_ds)

	forecast_it, ts_it = make_evaluation_predictions(
		dataset=test_ds,  # Test dataset.
		predictor=predictor,  # Predictor.
		num_samples=100,  # Number of sample paths we want for evaluation.
	)

	forecasts = list(forecast_it)
	tss = list(ts_it)

	plot_prob_forecasts(tss[0], forecasts[0])

	#--------------------
	# From feedforward to RNN.

	class MyProbRNN(mx.gluon.HybridBlock):
		def __init__(self,
			prediction_length,
			context_length,
			distr_output,
			num_cells,
			num_layers,
			num_sample_paths=100,
			scaling=True,
			**kwargs
		) -> None:
			super().__init__(**kwargs)
			self.prediction_length = prediction_length
			self.context_length = context_length
			self.distr_output = distr_output
			self.num_cells = num_cells
			self.num_layers = num_layers
			self.num_sample_paths = num_sample_paths
			self.proj_distr_args = distr_output.get_args_proj()
			self.scaling = scaling

			with self.name_scope():
				self.rnn = mx.gluon.rnn.HybridSequentialRNNCell()
				for k in range(self.num_layers):
					cell = mx.gluon.rnn.LSTMCell(hidden_size=self.num_cells)
					cell = mx.gluon.rnn.ResidualCell(cell) if k > 0 else cell
					self.rnn.add(cell)

				if scaling:
					self.scaler = MeanScaler(keepdims=True)
				else:
					self.scaler = NOPScaler(keepdims=True)

		def compute_scale(self, past_target, past_observed_values):
			# Scale is computed on the context length last units of the past target
			# scale shape is (batch_size, 1, *target_shape).
			_, scale = self.scaler(
				past_target.slice_axis(
					axis=1, begin=-self.context_length, end=None
				),
				past_observed_values.slice_axis(
					axis=1, begin=-self.context_length, end=None
				),
			)

			return scale

		def unroll_encoder(
			self,
			F,
			past_target,
			past_observed_values,
			future_target=None,
			future_observed_values=None
		):
			# Overall target field.
			# Input target from -(context_length + prediction_length + 1) to -1.
			if future_target is not None:  # during training
				target_in = F.concat(
					past_target, future_target, dim=-1
				).slice_axis(
					axis=1, begin=-(self.context_length + self.prediction_length + 1), end=-1
				)

				# Overall observed_values field.
				# Input observed_values corresponding to target_in.
				observed_values_in = F.concat(
					past_observed_values, future_observed_values, dim=-1
				).slice_axis(
					axis=1, begin=-(self.context_length + self.prediction_length + 1), end=-1
				)

				rnn_length = self.context_length + self.prediction_length
			else:  # During inference.
				target_in = past_target.slice_axis(
					axis=1, begin=-(self.context_length + 1), end=-1
				)

				# Overall observed_values field.
				# Input observed_values corresponding to target_in.
				observed_values_in = past_observed_values.slice_axis(
					axis=1, begin=-(self.context_length + 1), end=-1
				)

				rnn_length = self.context_length

			# Compute scale.
			scale = self.compute_scale(target_in, observed_values_in)

			# Scale target_in.
			target_in_scale = F.broadcast_div(target_in, scale)

			# Compute network output.
			net_output, states = self.rnn.unroll(
				inputs=target_in_scale,
				length=rnn_length,
				layout="NTC",
				merge_outputs=True,
			)

			return net_output, states, scale

	class MyProbTrainRNN(MyProbRNN):
		def hybrid_forward(
			self,
			F,
			past_target,
			future_target,
			past_observed_values,
			future_observed_values
		):
			net_output, _, scale = self.unroll_encoder(
				F, past_target, past_observed_values, future_target, future_observed_values
			)

			# Output target from -(context_length + prediction_length) to end.
			target_out = F.concat(
				past_target, future_target, dim=-1
			).slice_axis(
				axis=1, begin=-(self.context_length + self.prediction_length), end=None
			)

			# Project network output to distribution parameters domain.
			distr_args = self.proj_distr_args(net_output)

			# Compute distribution
			distr = self.distr_output.distribution(distr_args, scale=scale)

			# Negative log-likelihood.
			loss = distr.loss(target_out)
			return loss

	class MyProbPredRNN(MyProbTrainRNN):
		def sample_decoder(self, F, past_target, states, scale):
			# Repeat fields: from (batch_size, past_target_length) to
			# (batch_size * num_sample_paths, past_target_length).
			repeated_states = [
				s.repeat(repeats=self.num_sample_paths, axis=0)
				for s in states
			]
			repeated_scale = scale.repeat(repeats=self.num_sample_paths, axis=0)

			# First decoder input is the last value of the past_target, i.e.,
			# the previous value of the first time step we want to forecast.
			decoder_input = past_target.slice_axis(
				axis=1, begin=-1, end=None
			).repeat(
				repeats=self.num_sample_paths, axis=0
			)

			# List with samples at each time step.
			future_samples = []

			# For each future time step we draw new samples for this time step and update the state
			# the drawn samples are the inputs to the rnn at the next time step.
			for k in range(self.prediction_length):
				rnn_outputs, repeated_states = self.rnn.unroll(
					inputs=decoder_input,
					length=1,
					begin_state=repeated_states,
					layout="NTC",
					merge_outputs=True,
				)

				# Project network output to distribution parameters domain.
				distr_args = self.proj_distr_args(rnn_outputs)

				# Compute distribution.
				distr = self.distr_output.distribution(distr_args, scale=repeated_scale)

				# Draw samples (batch_size * num_samples, 1).
				new_samples = distr.sample()

				# Append the samples of the current time step.
				future_samples.append(new_samples)

				# Update decoder input for the next time step.
				decoder_input = new_samples

			samples = F.concat(*future_samples, dim=1)

			# (batch_size, num_samples, prediction_length).
			return samples.reshape(shape=(-1, self.num_sample_paths, self.prediction_length))

		def hybrid_forward(self, F, past_target, past_observed_values):
			# Unroll encoder over context_length.
			net_output, states, scale = self.unroll_encoder(
				F, past_target, past_observed_values
			)

			samples = self.sample_decoder(F, past_target, states, scale)

			return samples

	class MyProbRNNEstimator(GluonEstimator):
		@validated()
		def __init__(
			self,
			prediction_length: int,
			context_length: int,
			freq: str,
			distr_output: DistributionOutput,
			num_cells: int,
			num_layers: int,
			num_sample_paths: int = 100,
			scaling: bool = True,
			batch_size: int = 32,
			trainer: Trainer = Trainer()
		) -> None:
			super().__init__(trainer=trainer, batch_size=batch_size)
			self.prediction_length = prediction_length
			self.context_length = context_length
			self.freq = freq
			self.distr_output = distr_output
			self.num_cells = num_cells
			self.num_layers = num_layers
			self.num_sample_paths = num_sample_paths
			self.scaling = scaling

		def create_transformation(self):
			# Feature transformation that the model uses for input.
			return AddObservedValuesIndicator(
				target_field=FieldName.TARGET,
				output_field=FieldName.OBSERVED_VALUES,
			)

		def create_training_data_loader(self, dataset, **kwargs):
			instance_splitter = InstanceSplitter(
				target_field=FieldName.TARGET,
				is_pad_field=FieldName.IS_PAD,
				start_field=FieldName.START,
				forecast_start_field=FieldName.FORECAST_START,
				instance_sampler=ExpectedNumInstanceSampler(
					num_instances=1,
					min_future=self.prediction_length,
				),
				past_length=self.context_length + 1,
				future_length=self.prediction_length,
				time_series_fields=[
					FieldName.FEAT_DYNAMIC_REAL,
					FieldName.OBSERVED_VALUES,
				],
			)
			input_names = get_hybrid_forward_input_names(MyProbTrainRNN)
			return TrainDataLoader(
				dataset=dataset,
				transform=instance_splitter + SelectFields(input_names),
				batch_size=self.batch_size,
				stack_fn=functools.partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype),
				decode_fn=functools.partial(as_in_context, ctx=self.trainer.ctx),
				**kwargs,
			)

		def create_training_network(self) -> MyProbTrainRNN:
			return MyProbTrainRNN(
				prediction_length=self.prediction_length,
				context_length=self.context_length,
				distr_output=self.distr_output,
				num_cells=self.num_cells,
				num_layers=self.num_layers,
				num_sample_paths=self.num_sample_paths,
				scaling=self.scaling
			)

		def create_predictor(
			self, transformation: Transformation, trained_network: mx.gluon.HybridBlock
		) -> Predictor:
			prediction_splitter = InstanceSplitter(
				target_field=FieldName.TARGET,
				is_pad_field=FieldName.IS_PAD,
				start_field=FieldName.START,
				forecast_start_field=FieldName.FORECAST_START,
				instance_sampler=TestSplitSampler(),
				past_length=self.context_length + 1,
				future_length=self.prediction_length,
				time_series_fields=[
					FieldName.FEAT_DYNAMIC_REAL,
					FieldName.OBSERVED_VALUES,
				],
			)
			prediction_network = MyProbPredRNN(
				prediction_length=self.prediction_length,
				context_length=self.context_length,
				distr_output=self.distr_output,
				num_cells=self.num_cells,
				num_layers=self.num_layers,
				num_sample_paths=self.num_sample_paths,
				scaling=self.scaling
			)

			copy_parameters(trained_network, prediction_network)

			return RepresentableBlockPredictor(
				input_transform=transformation + prediction_splitter,
				prediction_net=prediction_network,
				batch_size=self.trainer.batch_size,
				freq=self.freq,
				prediction_length=self.prediction_length,
				ctx=self.trainer.ctx,
			)

	estimator = MyProbRNNEstimator(
		prediction_length=24,
		context_length=48,
		freq="1H",
		num_cells=40,
		num_layers=2,
		distr_output=GaussianOutput(),
		trainer=Trainer(
			ctx="cpu",
			epochs=5,
			learning_rate=1e-3,
			hybridize=False,
			num_batches_per_epoch=100
		)
	)

	predictor = estimator.train(train_ds)

	forecast_it, ts_it = make_evaluation_predictions(
		dataset=test_ds,  # Test dataset.
		predictor=predictor,  # Predictor.
		num_samples=100,  # Number of sample paths we want for evaluation.
	)

	forecasts = list(forecast_it)
	tss = list(ts_it)

	plot_prob_forecasts(tss[0], forecasts[0])
Exemple #18
0
test_series.plot()
plt.axvline(train_series.index[-1], color='r')  # end of train dataset
plt.grid(which="both")
plt.legend(["test series", "end of train series"], loc="upper left")
plt.show()

from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.trainer import Trainer

estimator = SimpleFeedForwardEstimator(
    num_hidden_dimensions=[10],
    prediction_length=dataset.metadata.prediction_length,
    context_length=100,
    freq=dataset.metadata.time_granularity,
    trainer=Trainer(
        ctx="cpu",
        epochs=5,
        learning_rate=1E-3,
        hybridize=True,
        num_batches_per_epoch=200,
    ),
)
predictor = estimator.train(dataset.train)

from gluonts.evaluation.backtest import make_evaluation_predictions

forecast_it, ts_it = make_evaluation_predictions(
    dataset=dataset.test,  # test dataset
    predictor=predictor,  # predictor
    num_eval_samples=100,  # number of sample paths we want for evaluation
)
Exemple #19
0
def quick_start_tutorial():
	# Provided datasets.

	print(f"Available datasets: {list(dataset_recipes.keys())}")

	dataset = get_dataset("m4_hourly", regenerate=True)

	entry = next(iter(dataset.train))

	plt.figure()
	train_series = to_pandas(entry)
	train_series.plot()
	plt.grid(which="both")
	plt.legend(["train series"], loc="upper left")

	entry = next(iter(dataset.test))

	plt.figure()
	test_series = to_pandas(entry)
	test_series.plot()
	plt.axvline(train_series.index[-1], color="r")  # End of train dataset.
	plt.grid(which="both")
	plt.legend(["test series", "end of train series"], loc="upper left")

	plt.show()

	#--------------------
	# Custom datasets.

	N = 10  # Number of time series.
	T = 100  # Number of timesteps.
	prediction_length = 24
	freq = "1H"
	custom_dataset = np.random.normal(size=(N, T))
	start = pd.Timestamp("01-01-2019", freq=freq)  # Can be different for each time series.

	# Train dataset: cut the last window of length "prediction_length", add "target" and "start" fields.
	train_ds = ListDataset(
		[{"target": x, "start": start} for x in custom_dataset[:, :-prediction_length]],
		freq=freq
	)
	# Test dataset: use the whole dataset, add "target" and "start" fields.
	test_ds = ListDataset(
		[{"target": x, "start": start} for x in custom_dataset],
		freq=freq
	)

	#--------------------
	# Training an existing model (Estimator).

	estimator = SimpleFeedForwardEstimator(
		num_hidden_dimensions=[10],
		prediction_length=dataset.metadata.prediction_length,
		context_length=100,
		freq=dataset.metadata.freq,
		trainer=Trainer(
			ctx="cpu",
			epochs=5,
			learning_rate=1e-3,
			num_batches_per_epoch=100
		)
	)

	predictor = estimator.train(dataset.train)

	#--------------------
	# Visualize and evaluate forecasts.

	forecast_it, ts_it = make_evaluation_predictions(
		dataset=dataset.test,  # Test dataset.
		predictor=predictor,  # Predictor.
		num_samples=100,  # Number of sample paths we want for evaluation.
	)

	forecasts = list(forecast_it)
	tss = list(ts_it)

	# First entry of the time series list.
	ts_entry = tss[0]

	# First 5 values of the time series (convert from pandas to numpy).
	print(np.array(ts_entry[:5]).reshape(-1,))

	# First entry of dataset.test.
	dataset_test_entry = next(iter(dataset.test))

	# First 5 values.
	print(dataset_test_entry["target"][:5])

	# First entry of the forecast list.
	forecast_entry = forecasts[0]

	print(f"Number of sample paths: {forecast_entry.num_samples}")
	print(f"Dimension of samples: {forecast_entry.samples.shape}")
	print(f"Start date of the forecast window: {forecast_entry.start_date}")
	print(f"Frequency of the time series: {forecast_entry.freq}")

	print(f"Mean of the future window:\n {forecast_entry.mean}")
	print(f"0.5-quantile (median) of the future window:\n {forecast_entry.quantile(0.5)}")

	def plot_prob_forecasts(ts_entry, forecast_entry):
		plot_length = 150
		prediction_intervals = (50.0, 90.0)
		legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

		fig, ax = plt.subplots(1, 1, figsize=(10, 7))
		ts_entry[-plot_length:].plot(ax=ax)  # Plot the time series.
		forecast_entry.plot(prediction_intervals=prediction_intervals, color="g")
		plt.grid(which="both")
		plt.legend(legend, loc="upper left")
		plt.show()

	plot_prob_forecasts(ts_entry, forecast_entry)

	evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
	agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(dataset.test))

	print(json.dumps(agg_metrics, indent=4))
	print(item_metrics.head())

	item_metrics.plot(x="MSIS", y="MASE", kind="scatter")
	plt.grid(which="both")

	plt.show()
def get_deep_nn_forecasts(dataset_name, lag, input_file_name, method, external_forecast_horizon = None, integer_conversion = False):
    print("Started loading " + dataset_name)

    df, frequency, forecast_horizon, contain_missing_values, contain_equal_length = loader.convert_tsf_to_dataframe(BASE_DIR + "/tsf_data/" + input_file_name, 'NaN', VALUE_COL_NAME)

    train_series_list = []
    test_series_list = []
    train_series_full_list = []
    test_series_full_list = []
    final_forecasts = []

    if frequency is not None:
        freq = FREQUENCY_MAP[frequency]
        seasonality = SEASONALITY_MAP[frequency]
    else:
        freq = "1Y"
        seasonality = 1

    if isinstance(seasonality, list):
        seasonality = min(seasonality) # Use to calculate MASE

    # If the forecast horizon is not given within the .tsf file, then it should be provided as a function input
    if forecast_horizon is None:
        if external_forecast_horizon is None:
            raise Exception("Please provide the required forecast horizon")
        else:
            forecast_horizon = external_forecast_horizon

    start_exec_time = datetime.now()

    for index, row in df.iterrows():
        if TIME_COL_NAME in df.columns:
            train_start_time = row[TIME_COL_NAME]
        else:
            train_start_time = datetime.strptime('1900-01-01 00-00-00', '%Y-%m-%d %H-%M-%S') # Adding a dummy timestamp, if the timestamps are not available in the dataset or consider_time is False

        series_data = row[VALUE_COL_NAME]

        # Creating training and test series. Test series will be only used during evaluation
        train_series_data = series_data[:len(series_data) - forecast_horizon]
        test_series_data = series_data[(len(series_data) - forecast_horizon) : len(series_data)]

        train_series_list.append(train_series_data)
        test_series_list.append(test_series_data)

        # We use full length training series to train the model as we do not tune hyperparameters
        train_series_full_list.append({
            FieldName.TARGET: train_series_data,
            FieldName.START: pd.Timestamp(train_start_time, freq=freq)
        })

        test_series_full_list.append({
            FieldName.TARGET: series_data,
            FieldName.START: pd.Timestamp(train_start_time, freq=freq)
        })

    train_ds = ListDataset(train_series_full_list, freq=freq)
    test_ds = ListDataset(test_series_full_list, freq=freq)

    if (method == "feed_forward"):
        estimator = SimpleFeedForwardEstimator(freq=freq,
                                               context_length=lag,
                                               prediction_length=forecast_horizon)
    elif(method == "deepar"):
        estimator = DeepAREstimator(freq=freq,
                                    context_length=lag,
                                    prediction_length=forecast_horizon)
    elif(method =="nbeats"):
        estimator = NBEATSEstimator(freq=freq,
                                    context_length=lag,
                                    prediction_length=forecast_horizon)
    elif (method == "wavenet"):
        estimator = WaveNetEstimator(freq=freq,
                                     prediction_length=forecast_horizon)
    elif (method == "transformer"):
        estimator = TransformerEstimator(freq=freq,
                                     context_length=lag,
                                     prediction_length=forecast_horizon)

    predictor = estimator.train(training_data=train_ds)

    forecast_it, ts_it = make_evaluation_predictions(dataset=test_ds, predictor=predictor, num_samples=100)

    # Time series predictions
    forecasts = list(forecast_it)

    # Get median (0.5 quantile) of the 100 sample forecasts as final point forecasts
    for f in forecasts:
        final_forecasts.append(f.median)

    if integer_conversion:
        final_forecasts = np.round(final_forecasts)

    if not os.path.exists(BASE_DIR + "/results/fixed_horizon_forecasts/"):
        os.makedirs(BASE_DIR + "/results/fixed_horizon_forecasts/")

    # write the forecasting results to a file
    file_name = dataset_name + "_" + method + "_lag_" + str(lag)
    forecast_file_path = BASE_DIR + "/results/fixed_horizon_forecasts/" + file_name + ".txt"

    with open(forecast_file_path, "w") as output:
        writer = csv.writer(output, lineterminator='\n')
        writer.writerows(final_forecasts)

    finish_exec_time = datetime.now()

    # Execution time
    exec_time = finish_exec_time - start_exec_time
    print(exec_time)

    if not os.path.exists(BASE_DIR + "/results/fixed_horizon_execution_times/"):
        os.makedirs(BASE_DIR + "/results/fixed_horizon_execution_times/")

    with open(BASE_DIR + "/results/fixed_horizon_execution_times/" + file_name + ".txt", "w") as output_time:
        output_time.write(str(exec_time))

    # Write training dataset and the actual results into separate files, which are then used for error calculations
    # We do not use the built-in evaluation method in GluonTS as some of the error measures we use are not implemented in that
    temp_dataset_path =  BASE_DIR + "/results/fixed_horizon_forecasts/" + dataset_name + "_dataset.txt"
    temp_results_path = BASE_DIR + "/results/fixed_horizon_forecasts/" + dataset_name + "_results.txt"

    with open(temp_dataset_path, "w") as output_dataset:
        writer = csv.writer(output_dataset, lineterminator='\n')
        writer.writerows(train_series_list)

    with open(temp_results_path, "w") as output_results:
        writer = csv.writer(output_results, lineterminator='\n')
        writer.writerows(test_series_list)

    if not os.path.exists(BASE_DIR + "/results/fixed_horizon_errors/"):
        os.makedirs(BASE_DIR + "/results/fixed_horizon_errors/")

    subprocess.call(["Rscript", "--vanilla", BASE_DIR + "/utils/error_calc_helper.R", BASE_DIR, forecast_file_path, temp_results_path, temp_dataset_path, str(seasonality), file_name ])

    # Remove intermediate files
    os.system("rm " + temp_dataset_path)
    os.system("rm " + temp_results_path)
Exemple #21
0
def forecast_dataset(dataset,
                     epochs=100,
                     learning_rate=1e-3,
                     num_samples=100,
                     model="SimpleFeedForward",
                     r_method="ets",
                     alpha=0,
                     distrib="Gaussian"):
    if distrib == "Gaussian":
        distr_output = GaussianOutput()
    elif distrib == "Laplace":
        distr_output = LaplaceOutput()
    elif distrib == "PiecewiseLinear":
        distr_output = PiecewiseLinearOutput(num_pieces=2)
    elif distrib == "Uniform":
        distr_output = UniformOutput()
    elif distrib == "Student":
        distr_output = StudentTOutput()
    else:
        distr_output = None

    if model != "GaussianProcess":
        ctx = mx.Context("gpu")
    else:
        ctx = mx.Context("cpu")

    # Trainer
    trainer = Trainer(epochs=epochs,
                      learning_rate=learning_rate,
                      num_batches_per_epoch=100,
                      ctx=ctx,
                      hybridize=True if model[0] != "c" else False)

    # Estimator (if machine learning model)
    if model == "SimpleFeedForward":  # 10s / epochs for context 60*24
        estimator = SimpleFeedForwardEstimator(
            num_hidden_dimensions=[10],
            prediction_length=dataset.prediction_length,
            context_length=dataset.context_length,
            freq=dataset.freq,
            trainer=trainer,
            distr_output=distr_output)
    elif model == "cSimpleFeedForward":  # 10s / epochs for context 60*24
        estimator = CustomSimpleFeedForwardEstimator(
            prediction_length=dataset.prediction_length,
            context_length=dataset.context_length,
            freq=dataset.freq,
            trainer=trainer,
            num_cells=40,
            alpha=alpha,
            distr_output=distr_output,
            distr_output_type=distrib)
    elif model == "CanonicalRNN":  # 80s /epochs for context 60*24, idem for 60*1
        estimator = canonical.CanonicalRNNEstimator(
            freq=dataset.freq,
            context_length=dataset.context_length,
            prediction_length=dataset.prediction_length,
            trainer=trainer,
            distr_output=distr_output,
        )
    elif model == "DeepAr":
        estimator = deepar.DeepAREstimator(
            freq=dataset.freq,
            context_length=dataset.context_length,
            prediction_length=dataset.prediction_length,
            trainer=trainer,
            distr_output=distr_output,
        )
    elif model == "DeepFactor":  # 120 s/epochs if one big time serie, 1.5s if 183 time series
        estimator = deep_factor.DeepFactorEstimator(
            freq=dataset.freq,
            context_length=dataset.context_length,
            prediction_length=dataset.prediction_length,
            trainer=trainer,
            distr_output=distr_output,
        )
    elif model == "DeepState":  # Very slow on cpu
        estimator = deepstate.DeepStateEstimator(
            freq=dataset.freq,
            prediction_length=dataset.prediction_length,
            trainer=trainer,
            cardinality=list([1]),
            use_feat_static_cat=False)
    elif model == "GaussianProcess":  # CPU / GPU problem
        estimator = gp_forecaster.GaussianProcessEstimator(
            freq=dataset.freq,
            prediction_length=dataset.prediction_length,
            trainer=trainer,
            cardinality=1)
    elif model == "NPTS":
        estimator = npts.NPTSEstimator(
            freq=dataset.freq, prediction_length=dataset.prediction_length)
    elif model == "MQCNN":
        estimator = seq2seq.MQCNNEstimator(
            prediction_length=dataset.prediction_length,
            freq=dataset.freq,
            context_length=dataset.context_length,
            trainer=trainer,
            quantiles=list([0.005, 0.05, 0.25, 0.5, 0.75, 0.95, 0.995]))
    elif model == "MQRNN":
        estimator = seq2seq.MQRNNEstimator(
            prediction_length=dataset.prediction_length,
            freq=dataset.freq,
            context_length=dataset.context_length,
            trainer=trainer,
            quantiles=list([0.005, 0.05, 0.25, 0.5, 0.75, 0.95, 0.995]))
    elif model == "RNN2QR":  # Must be investigated
        estimator = seq2seq.RNN2QRForecaster(
            prediction_length=dataset.prediction_length,
            freq=dataset.freq,
            context_length=dataset.context_length,
            trainer=trainer,
            cardinality=dataset.cardinality,
            embedding_dimension=1,
            encoder_rnn_layer=1,
            encoder_rnn_num_hidden=1,
            decoder_mlp_layer=[1],
            decoder_mlp_static_dim=1)
    elif model == "SeqToSeq":  # Must be investigated
        estimator = seq2seq.Seq2SeqEstimator(
            prediction_length=dataset.prediction_length,
            freq=dataset.freq,
            context_length=dataset.context_length,
            trainer=trainer,
            cardinality=[1],
            embedding_dimension=1,
            decoder_mlp_layer=[1],
            decoder_mlp_static_dim=1,
            encoder=Seq2SeqEncoder())
    elif model == "Transformer":  # Make the computer lag the first time
        estimator = transformer.TransformerEstimator(
            prediction_length=dataset.prediction_length,
            freq=dataset.freq,
            context_length=dataset.context_length,
            trainer=trainer)

    else:
        estimator = None

    # Predictor (directly if non machine learning model and from estimator if machine learning)
    if model == "Prophet":
        predictor = prophet.ProphetPredictor(
            freq=dataset.freq,
            prediction_length=dataset.prediction_length,
        )
    elif model == "R":
        predictor = r_forecast.RForecastPredictor(
            freq=dataset.freq,
            prediction_length=dataset.prediction_length,
            method_name=r_method)
    elif model == "SeasonalNaive":
        predictor = seasonal_naive.SeasonalNaivePredictor(
            freq=dataset.freq,
            prediction_length=dataset.prediction_length,
            season_length=24)
    else:
        predictor = estimator.train(dataset.train_ds)
        if model[0] != "c":
            predictor.serialize(Path("temp"))
            predictor = Predictor.deserialize(
                Path("temp"), ctx=mx.cpu(0))  # fix for deepstate

    # Evaluate
    forecast_it, ts_it = make_evaluation_predictions(
        dataset=dataset.test_ds,  # test dataset
        predictor=predictor,  # predictor
        num_samples=num_samples,  # num of sample paths we want for evaluation
    )

    return list(forecast_it), list(ts_it)
Exemple #22
0
from gluonts.dataset.repository.datasets import get_dataset
from gluonts.evaluation import Evaluator
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.support.util import get_download_path
from gluonts.trainer import Trainer
from gluonts.model.predictor import Predictor

if __name__ == "__main__":

    dataset = get_dataset("exchange_rate")

    estimator = SimpleFeedForwardEstimator(
        prediction_length=dataset.metadata.prediction_length,
        freq=dataset.metadata.freq,
        trainer=Trainer(epochs=5, num_batches_per_epoch=10),
    )

    predictor = estimator.train(dataset.train)

    # save the trained model in a path ~/.mxnet/gluon-ts/feedforward/
    # or $MXNET_HOME/feedforward if MXNET_HOME is defined
    model_path = get_download_path() / "feedforward"
    os.makedirs(model_path, exist_ok=True)

    predictor.serialize(model_path)

    # loads it back and evaluate predictions accuracy with the deserialized model
    predictor_deserialized = Predictor.deserialize(model_path)
def train(args):

    # Parse arguments
    epochs = args.epochs
    pred_length = args.pred_length
    batch_size = args.batch_size
    lr = args.lr

    model_dir = args.model_dir
    data_dir = args.data_dir
    num_gpus = args.num_gpus
    output_dir = args.output_dir

    device = "gpu" if num_gpus > 0 else "cpu"
    FREQ = 'H'
    target_col = 'traffic_volume'
    related_cols = [
        'holiday', 'temp', 'rain_1h', 'snow_1h', 'clouds_all', 'weather_main',
        'weather_description'
    ]

    # Get training data
    target_train_df = pd.read_csv(os.path.join(data_dir, 'target_train.csv'),
                                  index_col=0)
    related_train_df = pd.read_csv(os.path.join(data_dir, 'related_train.csv'),
                                   index_col=0)

    num_steps, num_series = target_train_df.shape
    target = target_train_df.values

    start_train_dt = '2017-01-01 00:00:00'
    custom_ds_metadata = {
        'num_series': num_series,
        'num_steps': num_steps,
        'prediction_length': pred_length,
        'freq': FREQ,
        'start': start_train_dt
    }

    # Prepare GlounTS Dataset
    related_list = [related_train_df[c].values for c in related_cols]
    train_lst = []

    target_vec = target[:-pred_length].squeeze()
    related_vecs = [
        related[:-pred_length].squeeze() for related in related_list
    ]
    dic = {
        FieldName.TARGET: target_vec,
        FieldName.START: start_train_dt,
        FieldName.FEAT_DYNAMIC_REAL: related_vecs
    }
    train_lst.append(dic)

    test_lst = []

    target_vec = target.squeeze()
    related_vecs = [related.squeeze() for related in related_list]
    dic = {
        FieldName.TARGET: target_vec,
        FieldName.START: start_train_dt,
        FieldName.FEAT_DYNAMIC_REAL: related_vecs
    }
    test_lst.append(dic)

    train_ds = ListDataset(train_lst, freq=FREQ)
    test_ds = ListDataset(test_lst, freq=FREQ)

    # Define Estimator
    trainer = Trainer(ctx=device,
                      epochs=epochs,
                      learning_rate=lr,
                      batch_size=batch_size)

    mlp_estimator = SimpleFeedForwardEstimator(num_hidden_dimensions=[50],
                                               prediction_length=pred_length,
                                               context_length=2 * pred_length,
                                               freq=FREQ,
                                               trainer=trainer)

    # Train the model
    mlp_predictor = mlp_estimator.train(train_ds)

    # Evaluate trained model on test data
    forecast_it, ts_it = make_evaluation_predictions(test_ds,
                                                     mlp_predictor,
                                                     num_samples=100)
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss),
                                          iter(forecasts),
                                          num_series=len(test_ds))

    metrics = [
        'RMSE', 'MAPE', 'wQuantileLoss[0.1]', 'wQuantileLoss[0.5]',
        'wQuantileLoss[0.9]', 'mean_wQuantileLoss'
    ]
    metrics_dic = dict(
        (key, value) for key, value in agg_metrics.items() if key in metrics)
    print(json.dumps(metrics_dic, indent=2))

    # Save the model
    mlp_predictor.serialize(pathlib.Path(model_dir))
    return mlp_predictor