Exemple #1
0
def GlounTS():
    #from pts.dataset import ListDataset
    #from pts.model.deepar import DeepAREstimator
    #from pts import Trainer
    #from pts.dataset import to_pandas
    # gluonts crash in my sistem.
    from gluonts.dataset.common import ListDataset
    from gluonts.model.deepar import DeepAREstimator
    from gluonts.trainer import Trainer
    training_data = ListDataset([{
        "start": df.index[0],
        "target": df.value[:"2015-03-08 23:22:53"]
    }],
                                freq="5min")
    #estimator = DeepAREstimator(freq="5min",input_size = 43, prediction_length=forecast_size, trainer=Trainer(epochs=20))
    estimator = DeepAREstimator(freq="5min",
                                prediction_length=forecast_size,
                                trainer=Trainer(epochs=20))
    predictor = estimator.train(training_data=training_data)
    test_data = ListDataset([{
        "start": df.index[0],
        "target": df.value[:"2015-03-08 23:22:53"]
    }],
                            freq="5min")
    GluonTS_prediction = next(predictor.predict(test_data))
    GluonTS_mean_yhat = GluonTS_prediction.mean
    GluonTS_median_yhat = GluonTS_prediction.median
    return GluonTS_mean_yhat.tolist(), GluonTS_median_yhat.tolist(
    ), GluonTS_prediction
Exemple #2
0
def run_model(data_train,
              data_meta,
              save_path,
              num_epochs=50,
              lr=1e-3,
              batch_size=64,
              scaling=False,
              context_length=3,
              num_layers=3,
              embedding_dimension=16,
              context='gpu'):
    estimator = DeepAREstimator(freq=data_meta['freq'],
                                prediction_length=82,
                                scaling=scaling,
                                context_length=context_length,
                                num_layers=num_layers,
                                embedding_dimension=embedding_dimension,
                                trainer=Trainer(batch_size=batch_size,
                                                epochs=num_epochs,
                                                learning_rate=lr,
                                                ctx=context,
                                                hybridize=False))
    predictor = estimator.train(data_train)
    predictor.serialize(Path(save_path))
    return predictor
def train_predictor(df_dict, end_train_date, regions_list, target_col, feat_dynamic_cols=None):
    estimator = DeepAREstimator(freq=data_freq, 
                                prediction_length=nb_hours_pred,
                                trainer=Trainer(epochs=max_epochs, learning_rate = learning_rate,
                                                learning_rate_decay_factor=0.01, patience=patience),
                                use_feat_dynamic_real=feat_dynamic_cols is not None)
    if feat_dynamic_cols is not None:
        
        training_data = ListDataset(
            [{"item_id": region,
                "start": df_dict[region].index[0],
              "target": df_dict[region][target_col][:end_train_date],
             "feat_dynamic_real": [df_dict[region][feat_dynamic_col][:end_train_date]
                                   for feat_dynamic_col in feat_dynamic_cols] 
             }
            for region in regions_list],
            freq = data_freq
        )
    else:
        training_data = ListDataset(
            [{"item_id": region,
                "start": df_dict[region].index[0],
              "target": df_dict[region][target_col][:end_train_date]
             }
            for region in regions_list],
            freq = data_freq
        )

    predictor = estimator.train(training_data=training_data)
    
    return predictor
Exemple #4
0
def test_dynamic_integration(
    train_length: int,
    test_length: int,
    prediction_length: int,
    target_start: str,
    rolling_start: str,
    num_dynamic_feat: int,
):
    """
    Trains an estimator on a rolled dataset with dynamic features.
    Tests https://github.com/awslabs/gluon-ts/issues/1390
    """
    train_ds = create_dynamic_dataset(target_start, train_length,
                                      num_dynamic_feat)
    rolled_ds = generate_rolling_dataset(
        dataset=create_dynamic_dataset(target_start, test_length,
                                       num_dynamic_feat),
        strategy=StepStrategy(prediction_length=prediction_length),
        start_time=pd.Timestamp(rolling_start),
    )
    estimator = DeepAREstimator(
        freq="D",
        prediction_length=prediction_length,
        context_length=2 * prediction_length,
        use_feat_dynamic_real=True,
        trainer=Trainer(epochs=1),
    )
    predictor = estimator.train(training_data=train_ds)
    forecast_it, ts_it = make_evaluation_predictions(rolled_ds,
                                                     predictor=predictor,
                                                     num_samples=100)
    training_agg_metrics, _ = Evaluator(num_workers=0)(ts_it, forecast_it)
    # it should have failed by this point if the dynamic features were wrong
    assert training_agg_metrics
Exemple #5
0
def test_listing_1():
    """
    Test GluonTS paper examples from arxiv paper:
    https://arxiv.org/abs/1906.05264

    Listing 1
    """
    from gluonts.dataset.repository.datasets import get_dataset
    from gluonts.model.deepar import DeepAREstimator
    from gluonts.trainer import Trainer
    from gluonts.evaluation import Evaluator
    from gluonts.evaluation.backtest import backtest_metrics

    # We use electricity in the paper but that would take too long to run in
    # the unit test
    dataset_info, train_ds, test_ds = constant_dataset()

    meta = dataset_info.metadata

    estimator = DeepAREstimator(
        freq=meta.time_granularity,
        prediction_length=1,
        trainer=Trainer(epochs=1, batch_size=32),
    )
    predictor = estimator.train(train_ds)

    evaluator = Evaluator(quantiles=(0.1, 0.5, 0.9))
    agg_metrics, item_metrics = backtest_metrics(
        train_dataset=train_ds,
        test_dataset=test_ds,
        forecaster=predictor,
        evaluator=evaluator,
    )
def train(epochs, prediction_length, num_layers, dropout_rate):

    #create train dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TRAIN'] +
                     "/train.csv",
                     header=0,
                     index_col=0)

    training_data = ListDataset([{
        "start": df.index[0],
        "target": df.value[:]
    }],
                                freq="5min")

    #define DeepAR estimator
    deepar_estimator = DeepAREstimator(freq="5min",
                                       prediction_length=prediction_length,
                                       dropout_rate=dropout_rate,
                                       num_layers=num_layers,
                                       trainer=Trainer(epochs=epochs))

    #train the model
    deepar_predictor = deepar_estimator.train(training_data=training_data)

    #create test dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TEST'] +
                     "/test.csv",
                     header=0,
                     index_col=0)

    test_data = ListDataset([{
        "start": df.index[0],
        "target": df.value[:]
    }],
                            freq="5min")

    #evaluate trained model on test data
    forecast_it, ts_it = make_evaluation_predictions(test_data,
                                                     deepar_predictor,
                                                     num_samples=100)
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss),
                                          iter(forecasts),
                                          num_series=len(test_data))

    print("MSE:", agg_metrics["MSE"])

    #save the model
    deepar_predictor.serialize(pathlib.Path(os.environ['SM_MODEL_DIR']))

    return deepar_predictor
Exemple #7
0
    def fit(self,
            *,
            timeout: float = None,
            iterations: int = None) -> CallResult[None]:
        """ Fits DeepAR model using training data from set_training_data and hyperparameters
            
            Keyword Arguments:
                timeout {float} -- timeout, considered (default: {None})
                iterations {int} -- iterations, considered (default: {None})
            
            Returns:
                CallResult[None]
        """

        if iterations is None:
            iterations = self.hyperparams["epochs"]
            has_finished = True
        else:
            has_finished = False

        estimator = DeepAREstimator(
            freq=self._freq,
            prediction_length=self.hyperparams['prediction_length'],
            context_length=self.hyperparams['context_length'],
            use_feat_static_cat=self._deepar_dataset.has_cat_cols()
            or self._deepar_dataset.has_group_cols(),
            use_feat_dynamic_real=self._deepar_dataset.has_real_cols(),
            cardinality=self._deepar_dataset.get_cardinality(),
            distr_output=self._deepar_dataset.get_distribution_type(),
            dropout_rate=self.hyperparams['dropout_rate'],
            trainer=Trainer(
                epochs=iterations,
                learning_rate=self.hyperparams['learning_rate'],
                batch_size=self.hyperparams['training_batch_size'],
                num_batches_per_epoch=self.hyperparams['steps_per_epoch']))

        logger.info(f"Fitting for {iterations} iterations")
        start_time = time.time()
        predictor = estimator.train(self._train_data)
        predictor.batch_size = self.hyperparams['inference_batch_size']
        self._is_fit = True
        logger.info(
            f"Fit for {iterations} epochs, took {time.time() - start_time}s")

        if not os.path.isdir(self.hyperparams['weights_dir']):
            os.mkdir(self.hyperparams['weights_dir'])
        predictor.serialize(Path(self.hyperparams['weights_dir']))

        return CallResult(None, has_finished=has_finished)
Exemple #8
0
def model_eval(estimator=None,
               TD=None,
               cardinalities=None,
               istrain=True,
               ismetric=True,
               isplot=True,
               pars=None):
    from gluonts.model.deepar import DeepAREstimator
    from gluonts.trainer import Trainer
    p = pars

    if estimator is None:
        estimator = DeepAREstimator(
            prediction_length=p.get("single_pred_length", 28),
            freq="D",
            distr_output=p.get("distr_output", None),
            use_feat_static_cat=True,
            use_feat_dynamic_real=True,
            cardinality=p.get("cardinality", None),
            trainer=Trainer(
                learning_rate=p.get("lr", 1e-4),  # 1e-4,  #1e-3,
                epochs=p.get("epoch", None),
                num_batches_per_epoch=p.get("num_batches_per_epoch", 10),
                batch_size=p.get("batch_size", 8),
            ))
    if istrain: estimator = estimator.train(TD.train)

    #### Evaluate  ########################################################################
    from gluonts.evaluation.backtest import make_evaluation_predictions
    forecast_it, ts_it = make_evaluation_predictions(dataset=TD.test,
                                                     predictor=estimator,
                                                     num_samples=p.get(
                                                         "num_samples", 5))
    forecasts, tss = list(forecast_it), list(ts_it)

    if isplot:
        forecast_graph(forecasts, tss, p.get("ii_series", 0))

    ####### Metrics ######################################################################
    agg_metrics, item_metrics = None, None
    if ismetric:
        agg_metrics, item_metrics = forecast_metrics(tss,
                                                     forecasts,
                                                     TD,
                                                     quantiles=[0.1, 0.5, 0.9],
                                                     show=True,
                                                     dir_save=None)

    return estimator, forecasts, tss, agg_metrics, item_metrics
Exemple #9
0
def easy_train():
    import pandas as pd
    df = pd.read_csv("optiver_hacktheburgh/sp.csv",
                     header=0,
                     index_col=0,
                     usecols=[0, 2],
                     skiprows=lambda x: x % 5 != 0)
    # df[:100].plot(linewidth=2)
    print("Showing")
    # plt.show()
    from gluonts.dataset.common import ListDataset
    training_data = ListDataset([{
        "start": df.index[0],
        "target": df.values.flatten()
    }],
                                freq="1s")
    #from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
    from gluonts.model.deepar import DeepAREstimator
    from gluonts.trainer import Trainer
    estimator = DeepAREstimator(freq="1min",
                                prediction_length=100,
                                trainer=Trainer(epochs=20))
    predictor = estimator.train(training_data=training_data)
    test_data = ListDataset([{
        "start": df.index[0],
        "target": df.values.flatten()[:1000]
    }],
                            freq="10s")
    full_test_data = ListDataset([{
        "start": df.index[0],
        "target": df.values.flatten()
    }],
                                 freq="10s")

    means = []
    for i, (test_entry, forecast) in enumerate(
            zip(full_test_data, predictor.predict(test_data))):
        # if i > 0:
        #  break
        print(forecast.dim())
        plt.plot(test_entry["target"])
        #forecast.plot(color='g', prediction_intervals=[], output_file="test.png")
        means.extend(list(forecast.mean))
        print(forecast.mean)
    l = len(test_entry["target"])
    plt.axhline(y=means[0], xmin=0, xmax=l, linewidth=2, color='r')
    plt.axvline(x=5000, color='b')
    plt.grid(which='both')
    plt.show()
Exemple #10
0
def run_model(data_train,
              data_meta,
              save_path,
              num_epochs=50,
              lr=1e-3,
              batch_size=64):
    estimator = DeepAREstimator(
        freq=data_meta['freq'],
        prediction_length=data_meta['prediction_length'],
        trainer=Trainer(batch_size=batch_size,
                        epochs=num_epochs,
                        learning_rate=lr,
                        ctx='cpu',
                        hybridize=False))
    predictor = estimator.train(data_train)
    predictor.serialize(Path(save_path))
    return predictor
Exemple #11
0
def deepar(data="m4_quarterly", seed=42, epochs=100, batches=50):

    dataset = get_dataset(data, regenerate=False)
    mx.random.seed(seed)
    np.random.seed(seed)

    trainer = Trainer(
        ctx=mx.cpu(0),
        epochs=epochs,
        num_batches_per_epoch=batches,
        learning_rate=1e-3,
    )

    cardinality = int(dataset.metadata.feat_static_cat[0].cardinality)
    estimator = DeepAREstimator(
        trainer=trainer,
        cardinality=[cardinality],
        context_length=dataset.metadata.prediction_length,
        prediction_length=dataset.metadata.prediction_length,
        freq=dataset.metadata.freq,
        use_feat_static_cat=True
    )

    # predictor = estimator.train(training_data=dataset.train,
    #                             validation_data=dataset.test)
    predictor = estimator.train(training_data=dataset.train)

    forecast_it, ts_it = make_evaluation_predictions(
        dataset.test, predictor=predictor, num_samples=100
    )

    agg_metrics, item_metrics = Evaluator()(
        ts_it, forecast_it, num_series=len(dataset.test)
    )

    metrics = ["MASE", "sMAPE", "MSIS", "wQuantileLoss[0.5]", "wQuantileLoss[0.9]"]
    output = {key: round(value, 8) for key, value in agg_metrics.items() if key in metrics}

    output["seed"] = seed
    output["epochs"] = epochs
    output["num_batches"] = batches

    df = pd.DataFrame([output])

    return df
Exemple #12
0
def deepar_test():
	url = "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv"
	df = pd.read_csv(url, header=0, index_col=0)
	data = ListDataset(
		[{
			"start": df.index[0],
			"target": df.value[:"2015-04-05 00:00:00"]
		}],
		freq="5min"
	)

	trainer = Trainer(epochs=10)
	estimator = DeepAREstimator(freq="5min", prediction_length=12, trainer=trainer)
	predictor = estimator.train(training_data=data)

	prediction = next(predictor.predict(data))
	print(prediction.mean)
	prediction.plot(output_file="./graph.png")
Exemple #13
0
    def train(self, **kwargs):

        epochs = kwargs.get("epochs", 10)

        # Adjust class freq.
        self.freq = pd.infer_freq(self.train_df.index)
        if self.freq == "MS":
            self.freq = "M"

        estimator = DeepAREstimator(
            freq=self.freq,
            prediction_length=self.forecast_len,
            trainer=Trainer(epochs=epochs,
                            batch_size=64,
                            ctx="gpu" if self.GPU else "cpu"),
        )

        self.model = estimator.train(
            training_data=self.format_input(self.train_df, self.freq))
def train(dataset: ListDataset, frequency: str, horizon: int, model_name: str,
          num_layers: int, num_cells: int, epochs: int, patience: int,
          weight_decay: float, dropout_rate: float, batch_size: int,
          snapshot_dir: str, overwrite: bool):
    """
    Train a model.

    :param dataset:
    :param model_name:
    :param horizon:
    :param frequency:
    :param snapshot_dir:
    :param epochs:
    :param patience:
    :param weight_decay:
    :param batch_size:
    :param dropout_rate:
    :param num_layers:
    :param num_cells:
    :param overwrite:
    :return:
    """
    model_dir = Path(snapshot_dir)
    if not overwrite and os.path.isdir(snapshot_dir):
        return Predictor.deserialize(model_dir)
    trainer = Trainer(epochs=epochs,
                      patience=patience,
                      weight_decay=weight_decay,
                      batch_size=batch_size)
    if model_name == 'deepar':
        estimator = DeepAREstimator(freq=frequency,
                                    scaling=False,
                                    dropout_rate=dropout_rate,
                                    num_layers=num_layers,
                                    num_cells=num_cells,
                                    prediction_length=horizon,
                                    trainer=trainer)
    else:
        raise Exception(f'Unknown model {model_name}')
    predictor = estimator.train(training_data=dataset)
    model_dir.mkdir(parents=True, exist_ok=overwrite)
    predictor.serialize(model_dir)
    return predictor
    def train_deepar(train_ds,
                     context_length=10,
                     prediction_length=20,
                     period=4320,
                     epochs=2):
        freq = "{}H".format(period / 3600)
        estimator = DeepAREstimator(prediction_length=prediction_length,
                                    context_length=context_length,
                                    freq=freq,
                                    num_cells=50,
                                    trainer=Trainer(ctx="gpu",
                                                    epochs=epochs,
                                                    learning_rate=1e-3,
                                                    hybridize=False,
                                                    num_batches_per_epoch=100,
                                                    batch_size=64),
                                    num_parallel_samples=500)
        predictor = estimator.train(train_ds)

        return predictor
def train_predictor(region_df_dict, end_train_date, regions_list, max_epochs, learning_rate, target_col,
                    feat_dynamic_cols=None):

    estimator = DeepAREstimator(freq=md.FREQ,
                                prediction_length=md.NB_HOURS_PRED,
                                trainer=Trainer(epochs=max_epochs, learning_rate=learning_rate,
                                                learning_rate_decay_factor=md.LR_DECAY_FACTOR),
                                use_feat_dynamic_real=feat_dynamic_cols is not None)
    if feat_dynamic_cols is not None:

        training_data = ListDataset(
            [{"item_id": region,
              "start": region_df_dict[region].index[0],
              "target": region_df_dict[region][target_col][:end_train_date],
              "feat_dynamic_real": [region_df_dict[region][feat_dynamic_col][:end_train_date]
                                    for feat_dynamic_col in feat_dynamic_cols]
              }
             for region in regions_list],
            freq=md.FREQ
        )
    else:
        training_data = ListDataset(
            [{"item_id": region,
              "start": region_df_dict[region].index[0],
              "target": region_df_dict[region][target_col][:end_train_date]
              }
             for region in regions_list],
            freq=md.FREQ
        )
    model_path = predictor_path(region_df_dict, regions_list, max_epochs, learning_rate, feat_dynamic_cols)
    model_dir, model_name = os.path.split(model_path)
    logging.info("Training deepar model {}".format(model_name))
    logging.getLogger().setLevel(logging.WARNING)
    predictor = estimator.train(training_data=training_data)
    logging.getLogger().setLevel(logging.INFO)

    logging.info("Saving model with {} epochs and learning rate of {}".format(max_epochs, learning_rate))
    with open(model_path, "wb") as file:
        pickle.dump(predictor, file)

    return predictor
def test_general_functionality() -> None:
    ds_info, train_ds, test_ds = constant_dataset()
    freq = ds_info.metadata.freq
    prediction_length = ds_info.prediction_length

    trainer = Trainer(epochs=3, num_batches_per_epoch=5)

    estimator = DeepAREstimator(prediction_length=prediction_length,
                                freq=freq,
                                trainer=trainer)

    predictor = estimator.train(training_data=train_ds)

    agg_metrics, item_metrics = backtest_metrics(
        test_dataset=test_ds,
        predictor=predictor,
        evaluator=Evaluator(calculate_owa=False),
    )

    # just some sanity check
    assert (agg_metrics is not None and item_metrics is not None
            ), "Metrics should not be None if everything went smooth."
Exemple #18
0
def init_model():
    epochs = None
    context = 'cpu'
    if args.epochs is not None:
        epochs = args.epochs
    if args.gpu:
        context = 'gpu'

    predictor = None
    if args.train:
        my_trainer = Trainer(
            ctx=context
        )  # TODO: Find a way to make it such that we do not set epoch when there is no need to
        estimator = DeepAREstimator(freq="5min",
                                    prediction_length=args.prediction,
                                    trainer=my_trainer)

        predictor = estimator.train(training_data=training_data)
        predictor.serialize(Path("models/"))
    else:
        # predictor = Predictor.deserialize(Path("models/"))
        predictor = RepresentableBlockPredictor.deserialize(Path("models/"))
        predictor.ctx = mx.Context('cpu')
    return predictor
Exemple #19
0
def train(args):

    # Parse arguments
    epochs = args.epochs
    pred_length = args.pred_length
    num_layers = args.num_layers
    num_cells = args.num_cells
    dropout_rate = args.dropout_rate
    batch_size = args.batch_size
    lr = args.lr
    model_dir = args.model_dir
    data_dir = args.data_dir
    num_gpus = args.num_gpus
    output_dir = args.output_dir
    device = "gpu" if num_gpus > 0 else "cpu"
    FREQ = 'D'

    # Get training data
    target_df = pd.read_csv(os.path.join(data_dir, 'target_train.csv'))
    target_df.set_index(target_df.columns[0], inplace=True)
    target = target_df.values
    num_steps, num_series = target_df.shape
    start_dt = target_df.index[0]

    custom_ds_metadata = {
        'num_series': num_series,
        'num_steps': num_steps,
        'prediction_length': pred_length,
        'freq': FREQ,
        'start': [start_dt for _ in range(num_series)]
    }

    # Prepare GlounTS Dataset
    train_lst = []
    for i in range(0, num_series):
        target_vec = target[:-pred_length, i]
        dic = {FieldName.TARGET: target_vec, FieldName.START: start_dt}

        train_lst.append(dic)

    test_lst = []
    for i in range(0, num_series):
        target_vec = target[:, i]
        dic = {FieldName.TARGET: target_vec, FieldName.START: start_dt}
        test_lst.append(dic)

    train_ds = ListDataset(train_lst, freq=FREQ)
    test_ds = ListDataset(test_lst, freq=FREQ)
    train_entry = next(iter(train_ds))
    train_entry.keys()

    # Define Estimator
    trainer = Trainer(ctx=device,
                      epochs=epochs,
                      learning_rate=lr,
                      batch_size=batch_size)

    deepar_estimator = DeepAREstimator(freq=FREQ,
                                       prediction_length=pred_length,
                                       num_cells=num_cells,
                                       dropout_rate=dropout_rate,
                                       num_layers=num_layers,
                                       distr_output=StudentTOutput(),
                                       trainer=trainer)

    # Train the model
    deepar_predictor = deepar_estimator.train(train_ds)

    # Evaluate trained model on test data
    forecast_it, ts_it = make_evaluation_predictions(test_ds,
                                                     deepar_predictor,
                                                     num_samples=100)
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss),
                                          iter(forecasts),
                                          num_series=len(test_ds))

    metrics = [
        'RMSE', 'MAPE', 'wQuantileLoss[0.1]', 'wQuantileLoss[0.5]',
        'wQuantileLoss[0.9]', 'mean_wQuantileLoss'
    ]
    metrics_dic = dict(
        (key, value) for key, value in agg_metrics.items() if key in metrics)
    print(json.dumps(metrics_dic, indent=2))

    # Save the model
    deepar_predictor.serialize(pathlib.Path(model_dir))
    return deepar_predictor
Exemple #20
0
    def fit(self, df, future_regressor=[]):
        """Train algorithm given data supplied.

        Args:
            df (pandas.DataFrame): Datetime Indexed
        """
        df = self.basic_profile(df)

        try:
            from mxnet.random import seed as mxnet_seed

            mxnet_seed(self.random_seed)
        except Exception:
            pass

        gluon_train = df.transpose()
        self.train_index = gluon_train.index

        gluon_freq = str(self.frequency).split('-')[0]
        if gluon_freq in ["MS", "1MS"]:
            gluon_freq = "M"

        if int(self.verbose) > 1:
            print(f"Gluon Frequency is {gluon_freq}")

        if str(self.context_length).replace('.', '').isdigit():
            self.gluon_context_length = int(float(self.context_length))
        elif 'forecastlength' in str(self.context_length).lower():
            len_int = int([x for x in str(self.context_length)
                           if x.isdigit()][0])
            self.gluon_context_length = int(len_int * self.forecast_length)
        else:
            self.gluon_context_length = 2 * self.forecast_length
            self.context_length = '2ForecastLength'
        ts_metadata = {
            'num_series':
            len(gluon_train.index),
            'freq':
            gluon_freq,
            'gluon_start':
            [gluon_train.columns[0] for _ in range(len(gluon_train.index))],
            'context_length':
            self.gluon_context_length,
            'forecast_length':
            self.forecast_length,
        }
        self.test_ds = ListDataset(
            [{
                FieldName.TARGET: target,
                FieldName.START: start
            }
             for (target,
                  start) in zip(gluon_train.values, ts_metadata['gluon_start'])
             ],
            freq=ts_metadata['freq'],
        )
        if self.gluon_model == 'DeepAR':
            from gluonts.model.deepar import DeepAREstimator

            estimator = DeepAREstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'NPTS':
            from gluonts.model.npts import NPTSEstimator

            estimator = NPTSEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
            )

        elif self.gluon_model == 'MQCNN':
            from gluonts.model.seq2seq import MQCNNEstimator

            estimator = MQCNNEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'SFF':
            from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator

            estimator = SimpleFeedForwardEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(
                    epochs=self.epochs,
                    learning_rate=self.learning_rate,
                    hybridize=False,
                    num_batches_per_epoch=100,
                ),
            )

        elif self.gluon_model == 'Transformer':
            from gluonts.model.transformer import TransformerEstimator

            estimator = TransformerEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'DeepState':
            from gluonts.model.deepstate import DeepStateEstimator

            estimator = DeepStateEstimator(
                prediction_length=ts_metadata['forecast_length'],
                past_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                use_feat_static_cat=False,
                cardinality=[1],
                trainer=Trainer(ctx='cpu',
                                epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'DeepFactor':
            from gluonts.model.deep_factor import DeepFactorEstimator

            estimator = DeepFactorEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'WaveNet':
            # Usually needs more epochs/training iterations than other models do
            from gluonts.model.wavenet import WaveNetEstimator

            estimator = WaveNetEstimator(
                freq=ts_metadata['freq'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        else:
            raise ValueError("'gluon_model' not recognized.")

        self.GluonPredictor = estimator.train(self.test_ds)
        self.ts_metadata = ts_metadata
        self.fit_runtime = datetime.datetime.now() - self.startTime
        return self
Exemple #21
0
    FieldName.FEAT_STATIC_CAT: fsc
} for (target, fsc) in zip(df_train[0:number_of_products],
                           ts_code[0:number_of_products].reshape(-1, 1))],
                       freq=freq)

# reshape de la data de test para solo ocupar number_of_products
test_ds = ListDataset([{
    FieldName.TARGET: target,
    FieldName.START: start_test,
    FieldName.FEAT_STATIC_CAT: fsc
} for (target, fsc) in zip(df_test[0:number_of_products],
                           ts_code[0:number_of_products].reshape(-1, 1))],
                      freq=freq)

# entrenar el predictor
predictor = estimator.train(training_data=train_ds)

# evaluar las predicciones para el conjunto de testing
forecast_it, ts_it = make_evaluation_predictions(dataset=test_ds,
                                                 predictor=predictor,
                                                 num_samples=100)

print("Obtención de valores de acondicionamiento de series de tiempo ...")
tss = list(tqdm(ts_it, total=len(df_test)))
print("Obtención de valores de acondicionamiento de series de tiempo ...")
forecasts = list(tqdm(forecast_it, total=len(df_test)))

# plotear las predicciones en un intervalo de confianza
for i in tqdm(range(number_of_products - 1)):
    ts_entry = tss[i]
    ts_entry.columns = [list(df.columns)[i]]
Exemple #22
0
plt.plot(custom_datasetx[0])
plt.show()

start = pd.Timestamp("01-01-2019", freq=freq)

train_ds = [{
    'target': x,
    'start': start
} for x in custom_datasetx[:, :, :-prediction_length]]
test_ds = [{'target': x, 'start': start} for x in custom_datasetx[:, :, :]]

# Trainer parameters
epochs = 1
learning_rate = 1E-3
batch_size = 1
num_batches_per_epoch = 2

# create estimator
estimator = DeepAREstimator(
    prediction_length=prediction_length,
    context_length=prediction_length,
    freq=freq,

    #     trainer=Trainer(ctx="gpu", epochs=epochs, learning_rate=learning_rate, hybridize=True,
    #                     batch_size=batch_size, num_batches_per_epoch=num_batches_per_epoch,),
    distr_output=MultivariateGaussianOutput(dim=2),
)

predictor = estimator.train(train_ds)
Exemple #23
0
def train(args):
    
    # Parse arguments
    epochs = args.epochs
    pred_length = args.pred_length
    num_layers = args.num_layers
    num_cells = args.num_cells
    dropout_rate = args.dropout_rate
    batch_size = args.batch_size
    lr = args.lr
    
    model_dir = args.model_dir
    data_dir = args.data_dir
    num_gpus = args.num_gpus
    output_dir = args.output_dir
    
    device = "gpu" if num_gpus > 0 else "cpu"
    FREQ = 'D'    
    
    target_col = 'Weekly_Sales_sum'
    related_cols = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment']
       
    # Get training data
    target_train_df = pd.read_csv(os.path.join(data_dir, 'target_train.csv'), index_col=0, header=[0,1])
    related_train_df = pd.read_csv(os.path.join(data_dir, 'related_train.csv'), index_col=0, header=[0,1])
    store_df = pd.read_csv(os.path.join(data_dir, 'item.csv'), index_col=0)
    
    num_steps, num_series = target_train_df.shape
    target = target_train_df.values

    start_train_dt = target_train_df.index[0]
    custom_ds_metadata = {'num_series': num_series, 
                          'num_steps': num_steps,
                          'prediction_length': pred_length,
                          'freq': FREQ,
                          'start': [start_train_dt for _ in range(num_series)] 
                         }

    # Prepare GlounTS Dataset
    related_list = [related_train_df[c].values for c in related_cols]

    train_lst = []
    for i in range(0, num_series):
        target_vec = target[:-pred_length, i]
        related_vecs = [related[:-pred_length, i] for related in related_list]
        item = store_df.loc[i+1]
        dic = {FieldName.TARGET: target_vec, 
               FieldName.START: start_train_dt,
               FieldName.FEAT_DYNAMIC_REAL: related_vecs,
               FieldName.FEAT_STATIC_CAT: [item[0]],
               FieldName.FEAT_STATIC_REAL: [item[1]]
              } 
        train_lst.append(dic)

    test_lst = []
    for i in range(0, num_series):
        target_vec = target[:, i]
        related_vecs = [related[:, i] for related in related_list]
        item = store_df.loc[i+1]    
        dic = {FieldName.TARGET: target_vec, 
               FieldName.START: start_train_dt,
               FieldName.FEAT_DYNAMIC_REAL: related_vecs,
               FieldName.FEAT_STATIC_CAT: [item[0]],
               FieldName.FEAT_STATIC_REAL: [item[1]]
              } 
        test_lst.append(dic)

    train_ds = ListDataset(train_lst, freq=FREQ)
    test_ds = ListDataset(test_lst, freq=FREQ)   

    # Define Estimator    
    trainer = Trainer(
        ctx=device,
        epochs=epochs,
        learning_rate=lr,
        batch_size=batch_size
    )
    
    deepar_estimator = DeepAREstimator(freq=FREQ, 
                                       prediction_length=pred_length,
                                       use_feat_dynamic_real=True,
                                       use_feat_static_cat=True,
                                       use_feat_static_real=True,
                                       cardinality=[3],
                                       num_cells=30,
                                       distr_output=StudentTOutput(),
                                       trainer=trainer)
    # Train the model
    deepar_predictor = deepar_estimator.train(train_ds)
    
    # Evaluate trained model on test data
    forecast_it, ts_it = make_evaluation_predictions(test_ds, deepar_predictor, num_samples=100)
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds))

    metrics = ['RMSE', 'MAPE', 'wQuantileLoss[0.1]', 'wQuantileLoss[0.5]', 'wQuantileLoss[0.9]', 'mean_wQuantileLoss']
    metrics_dic = dict((key,value) for key, value in agg_metrics.items() if key in metrics)
    print(json.dumps(metrics_dic, indent=2))

    # Save the model
    deepar_predictor.serialize(pathlib.Path(model_dir))
    return deepar_predictor
def train(bucket, seq, algo, freq, prediction_length, epochs, learning_rate,
          hybridize, num_batches_per_epoch):

    #create train dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TRAIN'] +
                     "/train.csv",
                     header=0,
                     index_col=0)

    training_data = ListDataset([{
        "start": df.index[0],
        "target": df.usage[:],
        "item_id": df.client[:]
    }],
                                freq=freq)

    #create test dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TEST'] +
                     "/test.csv",
                     header=0,
                     index_col=0)

    test_data = ListDataset([{
        "start": df.index[0],
        "target": df.usage[:],
        "item_id": 'client_12'
    }],
                            freq=freq)

    hook = Hook.create_from_json_file()
    #determine estimators##################################
    if algo == "DeepAR":
        estimator = DeepAREstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=1,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("DeepAR training is complete SUCCESS")
    elif algo == "SFeedFwd":
        estimator = SimpleFeedForwardEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    elif algo == "lstnet":
        # Needed for LSTNet ONLY
        grouper = MultivariateGrouper(max_target_dim=6)
        training_data = grouper(training_data)
        test_data = grouper(test_data)
        context_length = prediction_length
        num_series = 1
        skip_size = 1
        ar_window = 1
        channels = 4

        estimator = LSTNetEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            num_series=num_series,
            skip_size=skip_size,
            ar_window=ar_window,
            channels=channels,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    elif algo == "seq2seq":
        estimator = MQCNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    else:
        estimator = TransformerEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")

    ###################################################

    #evaluate trained model on test data
    forecast_it, ts_it = make_evaluation_predictions(test_data,
                                                     predictor,
                                                     num_samples=100)
    print("EVALUATION is complete SUCCESS")
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss),
                                          iter(forecasts),
                                          num_series=len(test_data))
    print("METRICS retrieved SUCCESS")
    #bucket = "bwp-sandbox"

    mainpref = "gluonts/blog-models/"
    prefix = mainpref + str(seq) + "/"
    agg_df = pd.DataFrame(agg_metrics, index=[0])
    file = "metrics" + str(seq) + ".csv"
    os.system('mkdir metrics')
    cspath = os.path.join('metrics', file)
    agg_df.to_csv(cspath)
    s3.upload_file(cspath, bucket, mainpref + "metrics/" + file)

    hook.save_scalar("MAPE", agg_metrics["MAPE"], sm_metric=True)
    hook.save_scalar("RMSE", agg_metrics["RMSE"], sm_metric=True)
    hook.save_scalar("MASE", agg_metrics["MASE"], sm_metric=True)
    hook.save_scalar("MSE", agg_metrics["MSE"], sm_metric=True)

    print("MAPE:", agg_metrics["MAPE"])

    #save the model
    predictor.serialize(pathlib.Path(os.environ['SM_MODEL_DIR']))

    uploadDirectory(os.environ['SM_MODEL_DIR'], prefix, bucket)

    return predictor
    context_length          =1,
    use_feat_dynamic_real = False,
    use_feat_static_cat   = False,
    cardinality           = None,
    scaling=False,
    trainer               = Trainer(
    learning_rate         = 5e-3,
    epochs                = 10,
    num_batches_per_epoch = 10,
    batch_size            = 30
    
    )

)

predictor = estimator.train(gluonts_ds)



from gluonts.evaluation.backtest import make_evaluation_predictions 
import json
def plot_prob_forecasts(ts_entry, forecast_entry):
    plot_length = 150
    prediction_intervals = (50.0, 90.0)
    legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

    fig, ax = plt.subplots(1, 1, figsize=(10, 7))
    ts_entry[-plot_length:].plot(ax=ax)  # plot the time series
    forecast_entry.plot(prediction_intervals=prediction_intervals, color='g')
    plt.grid(which="both")
    plt.legend(legend, loc="upper left")
Exemple #26
0
estimator = DeepAREstimator(
    prediction_length     = 12,
    freq                  = "D",
    distr_output          = NegativeBinomialOutput(),
    use_feat_static_cat   =True,
    use_feat_dynamic_real =True,
    cardinality           = [3049, 7, 3, 10, 3],
    trainer               = Trainer(
    learning_rate         = 1e-3,
    epochs                = 1,
    num_batches_per_epoch = 10,
    batch_size            = 10
    )
)

predictor = estimator.train(TD.train)

from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.evaluation import Evaluator

forecast_it, ts_it = make_evaluation_predictions(
    dataset=TD.test,
    predictor=predictor,
    num_samples=100
)



from gluonts.evaluation import Evaluator
agg_metrics, item_metrics = Evaluator()(ts_it, forecast_it, num_series=len(TD.test) )
print(agg_metrics)
Exemple #27
0
def train_models(
    train,
    models,
    forecast_len,
    full_df=None,
    seasonality="infer_from_data",
    in_sample=None,
    freq=None,
    GPU=None,
):

    seasons = select_seasonality(train, seasonality)

    periods = select_seasonality(train, "periodocity")

    models_dict = {}
    for m in models:
        if in_sample:
            print(
                "Model {} is being trained for in sample prediction".format(m))
        else:
            print("Model {} is being trained for out of sample prediction".
                  format(m))
        if m == "ARIMA":
            models_dict[m] = pm.auto_arima(train, seasonal=True, m=seasons)
        if m == "Prophet":
            if freq == "D":
                model = Prophet(daily_seasonality=True)
            else:
                model = Prophet()
            models_dict[m] = model.fit(prophet_dataframe(train))
        if m == "HWAAS":
            try:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend="add",
                    seasonal="add",
                    damped=True,
                ).fit(use_boxcox=True)
            except:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend="add",
                    seasonal="add",
                    damped=True,
                ).fit(use_boxcox=False)
        if m == "HWAMS":
            try:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend="add",
                    seasonal="mul",
                    damped=True,
                ).fit(use_boxcox=True)
            except:
                try:
                    models_dict[m] = ExponentialSmoothing(
                        train,
                        seasonal_periods=seasons,
                        trend="add",
                        seasonal="mul",
                        damped=True,
                    ).fit(use_boxcox=False)
                except:
                    models_dict[m] = ExponentialSmoothing(
                        train,
                        seasonal_periods=seasons,
                        trend=None,
                        seasonal="add").fit(use_boxcox=False)

        # if m=="HOLT":
        #   models_dict["HOLT"] = Holt(train,exponential=True).fit()
        if m == "PYAF":
            model = autof()
            model.train(
                iInputDS=train.reset_index(),
                iTime="Date",
                iSignal="Target",
                iHorizon=len(train),
            )  # bad coding to have horison here
            models_dict[m] = model.forecast(iInputDS=train.reset_index(),
                                            iHorizon=forecast_len)
        if m == "Gluonts":
            freqed = pd.infer_freq(train.index)
            if freqed == "MS":
                freq = "M"
            else:
                freq = freqed
            estimator = DeepAREstimator(
                freq=freq,
                prediction_length=forecast_len,
                trainer=Trainer(epochs=6, ctx="gpu"),
            )  # use_feat_dynamic_real=True
            if GPU:
                models_dict[m] = estimator.train(
                    training_data=gluonts_dataframe(train))
            else:
                models_dict[m] = estimator.train(
                    training_data=gluonts_dataframe(train))
        if m == "NBEATS":

            if GPU:
                device = torch.device("cuda")
            else:
                device = torch.device("cpu")

            if os.path.isfile(CHECKPOINT_NAME):
                os.remove(CHECKPOINT_NAME)
            stepped = 35
            batch_size = 10
            if in_sample:
                x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=True, device=device)
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                # test_losses = []
                for r in range(stepped):

                    train_100_grad_steps(data, device, net,
                                         optimiser)  # test_losses
                models_dict[m] = {}
                models_dict[m]["model"] = net
                models_dict[m]["x_test"] = x_test
                models_dict[m]["y_test"] = y_test
                models_dict[m]["constant"] = norm_constant

            else:  # if out_sample train is df

                x_train, y_train, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=False, device=device)

                batch_size = 10  # greater than 4 for viz
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                stepped = 5
                # test_losses = []
                for r in range(stepped):
                    # _, forecast = net(torch.tensor(x_train, dtype=torch.float)) ### Not Used
                    # if GPU:
                    #   p = forecast.detach().numpy()                               ### Not Used
                    # else:
                    #   p = forecast.detach().numpy()                               ### Not Used
                    train_100_grad_steps(data, device, net,
                                         optimiser)  # test_losses
                models_dict[m] = {}
                models_dict[m]["model"] = net
                models_dict[m]["tuple"] = (x_train, y_train, net,
                                           norm_constant)

        # if m=="TBA":
        #   bat = TBATS(use_arma_errors=False,use_box_cox=True)
        #   models_dict[m] = bat.fit(train)
        if m == "TATS":
            bat = TBATS(
                seasonal_periods=list(get_unique_N(season_list(train), 1)),
                use_arma_errors=False,
                use_trend=True,
            )
            models_dict[m] = bat.fit(train)
        if m == "TBAT":
            bat = TBATS(use_arma_errors=False,
                        use_box_cox=True,
                        use_trend=True)
            models_dict[m] = bat.fit(train)
        if m == "TBATS1":
            bat = TBATS(
                seasonal_periods=[seasons],
                use_arma_errors=False,
                use_box_cox=True,
                use_trend=True,
            )
            models_dict[m] = bat.fit(train)
        if m == "TBATP1":
            bat = TBATS(
                seasonal_periods=[periods],
                use_arma_errors=False,
                use_box_cox=True,
                use_trend=True,
            )
            models_dict[m] = bat.fit(train)
        if m == "TBATS2":
            bat = TBATS(
                seasonal_periods=list(get_unique_N(season_list(train), 2)),
                use_arma_errors=False,
                use_box_cox=True,
                use_trend=True,
            )
            models_dict[m] = bat.fit(train)

        # if m=="ProphetGluonts":
        #   freqed = pd.infer_freq(train.index)
        #   if freqed=="MS":
        #     freq= "M"
        #   else:
        #     freq= freqed
        #   models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True
        #   models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"])

    return models_dict, seasons
Exemple #28
0
    prediction_length=prediction_length,
    context_length=prediction_length*2,
    num_layers=2,
    num_cells=128,
    cell_type='gru',
    dropout_rate=0.1,
    scaling=True,
    lags_seq=np.arange(1, 1+1).tolist(),
    freq=freq,
    use_feat_dynamic_real=False,
    use_feat_static_cat=False,
    use_feat_static_real=False,
    distr_output=distr,
    cardinality=None,
    trainer=deep_ar_trainer)
deep_ar_predictor = deep_ar_estimator.train(train_ds, test_ds)


print("Generating Deep AR forecasts.......")
deep_ar_forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_ds, predictor=deep_ar_predictor, num_samples=100)
tss = list(tqdm(ts_it, total=len(test_ds)))
deep_ar_forecasts = list(tqdm(deep_ar_forecast_it, total=len(test_ds)))


# estimador de deep renewal
trainer = Trainer(
    ctx=mx.context.gpu() if is_gpu & args.use_cuda else mx.context.cpu(),
    batch_size=args.batch_size,
    learning_rate=args.learning_rate,
    epochs=20,
Exemple #29
0
# Create the model object

estimator = DeepAREstimator(
    freq="D",
    context_length=14,  # How many past events do I look at to make prediction
    prediction_length=prediction_length,
    num_layers=num_layers,
    num_cells=num_cells,
    #num_parallel_samples=8, # Added 12/22/2020 -- Doesn't seem to be working in parallel
    dropout_rate=0.1,  # Added 12/22/2020
    cell_type='lstm',
    trainer=Trainer(epochs=21))  # modify as needed

# Train the model on the json version (created in the step above)
# of the training portion of the data set
predictor = estimator.train(training_data=training_data)

# Set up the data results of the model
forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_data,
    predictor=predictor,
    num_samples=
    num_samples,  # This is running x times through the probablistic model
)
forecasts = list(forecast_it)
tss = list(ts_it)

# This function is taken from a tutorial. Still, with some tweaking and citations, it sould be added to Steinbeck.py


def plot_prob_forecasts(ts_entry, forecast_entry):
Exemple #30
0
def train_models(train,
                 models,
                 forecast_len,
                 full_df=None,
                 seasonality="infer_from_data",
                 in_sample=None):

    seasons = select_seasonality(train, seasonality)

    models_dict = {}
    for m in models:
        if m == "ARIMA":
            models_dict["ARIMA"] = pm.auto_arima(train,
                                                 seasonal=True,
                                                 m=seasons)
        if m == "Prophet":
            model = Prophet()
            models_dict["Prophet"] = model.fit(prophet_dataframe(train))
        if m == "HWAAS":
            models_dict["HWAAS"] = ExponentialSmoothing(
                train,
                seasonal_periods=seasons,
                trend='add',
                seasonal='add',
                damped=True).fit(use_boxcox=True)
        if m == "HWAMS":
            models_dict["HWAMS"] = ExponentialSmoothing(
                train,
                seasonal_periods=seasons,
                trend='add',
                seasonal='mul',
                damped=True).fit(use_boxcox=True)
        # if m=="HOLT":
        #   models_dict["HOLT"] = Holt(train,exponential=True).fit()
        if m == "PYAF":
            model = autof.cForecastEngine()
            model.train(iInputDS=train.reset_index(),
                        iTime='Date',
                        iSignal='Target',
                        iHorizon=len(train))  # bad coding to have horison here
            models_dict["PYAF"] = model.forecast(iInputDS=train.reset_index(),
                                                 iHorizon=forecast_len)
        if m == "Gluonts":
            freqed = pd.infer_freq(train.index)
            if freqed == "MS":
                freq = "M"
            else:
                freq = freqed
            estimator = DeepAREstimator(
                freq=freq,
                prediction_length=forecast_len,
                trainer=Trainer(epochs=2))  #use_feat_dynamic_real=True
            print(train)
            print(type(train))
            print(gluonts_dataframe(train))
            models_dict["Gluonts"] = estimator.train(
                training_data=gluonts_dataframe(train))
        if m == "NBEATS":

            device = torch.device('cpu')
            seasons = select_seasonality(train, seasonality)

            if os.path.isfile(CHECKPOINT_NAME):
                os.remove(CHECKPOINT_NAME)
            stepped = 5
            batch_size = 10
            if in_sample:
                x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=True)
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                #test_losses = []
                for r in range(stepped):

                    train_100_grad_steps(data, device, net,
                                         optimiser)  #test_losses
                models_dict["NBEATS"] = {}
                models_dict["NBEATS"]["model"] = net
                models_dict["NBEATS"]["x_test"] = x_test
                models_dict["NBEATS"]["y_test"] = y_test
                models_dict["NBEATS"]["constant"] = norm_constant

            else:  # if out_sample train is df

                x_train, y_train, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=False)

                batch_size = 10  # greater than 4 for viz
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                stepped = 5
                #test_losses = []
                for r in range(stepped):
                    _, forecast = net(torch.tensor(
                        x_train, dtype=torch.float))  ### Not Used
                    p = forecast.detach().numpy()  ### Not Used
                    train_100_grad_steps(data, device, net,
                                         optimiser)  #test_losses
                models_dict["NBEATS"] = {}
                models_dict["NBEATS"]["model"] = net
                models_dict["NBEATS"]["tuple"] = (x_train, y_train, net,
                                                  norm_constant)

        # if m=="ProphetGluonts":
        #   freqed = pd.infer_freq(train.index)
        #   if freqed=="MS":
        #     freq= "M"
        #   else:
        #     freq= freqed
        #   models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True
        #   models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"])


# create a forecast engine. This is the main object handling all the operations
# We use the test-dataset as the last step of our training to generate the evaluation-metrics and do not use the test-dataset during prediction.
# get the best time series model for predicting one week

    return models_dict