예제 #1
0
def test_related_time_series_fail():
    params = dict(freq="1D", prediction_length=3, prophet={})

    dataset = ListDataset(
        data_iter=[
            {
                'start': '2017-01-01',
                'target': np.array([1.0, 2.0, 3.0, 4.0]),
                'feat_dynamic_real': np.array(
                    [
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                    ]
                ),
            }
        ],
        freq=params['freq'],
    )

    with pytest.raises(AssertionError) as excinfo:
        predictor = ProphetPredictor(**params)
        list(predictor.predict(dataset))

    assert str(excinfo.value) == (
        'Length mismatch for dynamic real-valued feature #0: '
        'expected 7, got 6'
    )
예제 #2
0
def test_feat_dynamic_real_success():
    params = dict(
        freq="1D", prediction_length=3, prophet_params=dict(n_changepoints=20)
    )

    dataset = ListDataset(
        data_iter=[
            {
                "start": "2017-01-01",
                "target": np.array([1.0, 2.0, 3.0, 4.0]),
                "feat_dynamic_real": np.array(
                    [
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                    ]
                ),
            }
        ],
        freq=params["freq"],
    )

    predictor = ProphetPredictor(**params)

    act_fcst = next(predictor.predict(dataset))
    exp_fcst = np.arange(5.0, 5.0 + params["prediction_length"])

    assert np.all(np.isclose(act_fcst.quantile(0.1), exp_fcst, atol=0.02))
    assert np.all(np.isclose(act_fcst.quantile(0.5), exp_fcst, atol=0.02))
    assert np.all(np.isclose(act_fcst.quantile(0.9), exp_fcst, atol=0.02))
예제 #3
0
def test_feat_dynamic_real_bad_size():
    params = dict(freq="1D", prediction_length=3, prophet_params={})

    dataset = ListDataset(
        data_iter=[
            {
                "start": "2017-01-01",
                "target": np.array([1.0, 2.0, 3.0, 4.0]),
                "feat_dynamic_real": np.array(
                    [
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                    ]
                ),
            }
        ],
        freq=params["freq"],
    )

    with pytest.raises(AssertionError) as excinfo:
        predictor = ProphetPredictor(**params)
        list(predictor.predict(dataset))

    assert str(excinfo.value) == (
        "Length mismatch for dynamic real-valued feature #0: "
        "expected 7, got 6"
    )
예제 #4
0
def gluonts_prophet(dataset,freq,pred_length,prophet_params={}):
    
    params = dict(freq=freq, prediction_length=pred_length, prophet_params=prophet_params)
    predictor = ProphetPredictor(**params)
    fcst = predictor.predict(dataset)
    fcstlist = []
    for i in fcst:
        fcstlist.append(i)
    return fcstlist
예제 #5
0
def test_min_obs():
    params = dict(freq="1D", prediction_length=10, prophet={})

    dataset = ListDataset(
        data_iter=[{'start': '2017-01-01', 'target': np.array([1.0])}],
        freq=params['freq'],
    )

    predictor = ProphetPredictor(**params)

    act_forecast = next(predictor.predict(dataset))
    exp_forecast = np.ones(params["prediction_length"])

    assert np.array_equal(act_forecast.yhat, exp_forecast)
    assert np.array_equal(act_forecast.yhat_lower, exp_forecast)
    assert np.array_equal(act_forecast.yhat_upper, exp_forecast)
예제 #6
0
def test_min_obs_error():
    params = dict(freq="1D", prediction_length=10, prophet_params={})

    dataset = ListDataset(
        data_iter=[{"start": "2017-01-01", "target": np.array([1.0])}],
        freq=params["freq"],
    )

    with pytest.raises(ValueError) as excinfo:
        predictor = ProphetPredictor(**params)
        list(predictor.predict(dataset))

    act_error_msg = str(excinfo.value)
    exp_error_msg = "Dataframe has less than 2 non-NaN rows."

    assert act_error_msg == exp_error_msg
예제 #7
0
 def define_DeepAR_predictor(self, freq, prediction_length, epochs,
                             num_layers, batch_size):
     self.predictor = DeepAREstimator(freq=freq,
                                      prediction_length=prediction_length,
                                      context_length=prediction_length,
                                      trainer=Trainer(
                                          ctx="cpu",
                                          epochs=epochs,
                                          batch_size=batch_size,
                                          num_batches_per_epoch=100),
                                      num_layers=num_layers,
                                      use_feat_dynamic_real=True)
예제 #8
0
def test_related_time_series_success():
    params = dict(
        freq="1D", prediction_length=3, prophet=dict(n_changepoints=20)
    )

    dataset = ListDataset(
        data_iter=[
            {
                'start': '2017-01-01',
                'target': np.array([1.0, 2.0, 3.0, 4.0]),
                'feat_dynamic_real': np.array(
                    [
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                    ]
                ),
            }
        ],
        freq=params['freq'],
    )

    predictor = ProphetPredictor(**params)
    list(predictor.predict(dataset))
예제 #9
0
def test_mean_forecast():
    params = dict(
        freq="1D",
        prediction_length=10,
        min_nonnan_obs=3,
        prophet=dict(n_changepoints=20),
    )

    dataset = ListDataset(
        data_iter=[
            {'start': '2017-01-01', 'target': [2.0, 3.0, 'nan', 'nan']}
        ],
        freq=params['freq'],
    )

    predictor = ProphetPredictor(**params)

    act_forecast = next(predictor.predict(dataset))
    exp_forecast = 2.5 * np.ones(params["prediction_length"])

    assert np.array_equal(act_forecast.yhat, exp_forecast)
    assert np.array_equal(act_forecast.yhat_lower, exp_forecast)
    assert np.array_equal(act_forecast.yhat_upper, exp_forecast)
예제 #10
0
 def __init__(self,
              freq="D",
              prediction_length=30,
              epochs=50,
              batch_size=16,
              num_batches_per_epoch=100,
              num_layers=4,
              list_products=list_products):
     self.predictor = DeepAREstimator(
         freq=freq,
         prediction_length=prediction_length,
         trainer=Trainer(ctx="cpu",
                         epochs=epochs,
                         batch_size=batch_size,
                         num_batches_per_epoch=num_batches_per_epoch),
         num_layers=num_layers)
     self.algorithm = algorithm
     self.list_products_names = TransactionsData.get_list_names(
         list_products)
예제 #11
0
def train(args):
    freq = args.freq.replace('"', '')
    prediction_length = args.prediction_length
    context_length = args.context_length
    use_feat_dynamic_real = args.use_feat_dynamic_real
    use_past_feat_dynamic_real = args.use_past_feat_dynamic_real
    use_feat_static_cat = args.use_feat_static_cat
    use_log1p = args.use_log1p
    
    print('freq:', freq)
    print('prediction_length:', prediction_length)
    print('context_length:', context_length)
    print('use_feat_dynamic_real:', use_feat_dynamic_real)
    print('use_past_feat_dynamic_real:', use_past_feat_dynamic_real)
    print('use_feat_static_cat:', use_feat_static_cat)
    print('use_log1p:', use_log1p)
    
    batch_size = args.batch_size
    print('batch_size:', batch_size)

    train = load_json(os.path.join(args.train, 'train_'+freq+'.json'))
    test = load_json(os.path.join(args.test, 'test_'+freq+'.json'))
    
    num_timeseries = len(train)
    print('num_timeseries:', num_timeseries)

    train_ds = ListDataset(parse_data(train, use_log1p=use_log1p), freq=freq)
    test_ds = ListDataset(parse_data(test, use_log1p=use_log1p), freq=freq)
    
    predictor = None
    
    trainer= Trainer(ctx="cpu", 
                    epochs=args.epochs, 
                    num_batches_per_epoch=args.num_batches_per_epoch,
                    learning_rate=args.learning_rate, 
                    learning_rate_decay_factor=args.learning_rate_decay_factor,
                    patience=args.patience,
                    minimum_learning_rate=args.minimum_learning_rate,
                    clip_gradient=args.clip_gradient,
                    weight_decay=args.weight_decay,
                    init=args.init.replace('"', ''),
                    hybridize=args.hybridize)
    print('trainer:', trainer)
    
    cardinality = None
    if args.cardinality != '':
        cardinality = args.cardinality.replace('"', '').replace(' ', '').replace('[', '').replace(']', '').split(',')
        for i in range(len(cardinality)):
            cardinality[i] = int(cardinality[i])
    print('cardinality:', cardinality)
    
    embedding_dimension = [min(50, (cat+1)//2) for cat in cardinality] if cardinality is not None else None
    print('embedding_dimension:', embedding_dimension)
    
    algo_name = args.algo_name.replace('"', '')
    print('algo_name:', algo_name)
    
    if algo_name == 'CanonicalRNN':
        estimator = CanonicalRNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            num_layers=5, 
            num_cells=50, 
            cell_type='lstm', 
            num_parallel_samples=100,
            cardinality=cardinality,
            embedding_dimension=10,
        )
    elif algo_name == 'DeepFactor':
        estimator = DeepFactorEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            cardinality=cardinality,
            embedding_dimension=10,
        )
    elif algo_name == 'DeepAR':
        estimator = DeepAREstimator(
            freq = freq,  # – Frequency of the data to train on and predict
            prediction_length = prediction_length,  # – Length of the prediction horizon
            trainer = trainer,  # – Trainer object to be used (default: Trainer())
            context_length = context_length,  # – Number of steps to unroll the RNN for before computing predictions (default: None, in which case context_length = prediction_length)
            num_layers = 2,  # – Number of RNN layers (default: 2)
            num_cells = 40,  # – Number of RNN cells for each layer (default: 40)
            cell_type = 'lstm',  # – Type of recurrent cells to use (available: ‘lstm’ or ‘gru’; default: ‘lstm’)
            dropoutcell_type = 'ZoneoutCell',  # – Type of dropout cells to use (available: ‘ZoneoutCell’, ‘RNNZoneoutCell’, ‘VariationalDropoutCell’ or ‘VariationalZoneoutCell’; default: ‘ZoneoutCell’)
            dropout_rate = 0.1,  # – Dropout regularization parameter (default: 0.1)
            use_feat_dynamic_real = use_feat_dynamic_real,  # – Whether to use the feat_dynamic_real field from the data (default: False)
            use_feat_static_cat = use_feat_static_cat,  # – Whether to use the feat_static_cat field from the data (default: False)
            use_feat_static_real = False,  # – Whether to use the feat_static_real field from the data (default: False)
            cardinality = cardinality,  # – Number of values of each categorical feature. This must be set if use_feat_static_cat == True (default: None)
            embedding_dimension = embedding_dimension,  # – Dimension of the embeddings for categorical features (default: [min(50, (cat+1)//2) for cat in cardinality])
        #     distr_output = StudentTOutput(),  # – Distribution to use to evaluate observations and sample predictions (default: StudentTOutput())
        #     scaling = True,  # – Whether to automatically scale the target values (default: true)
        #     lags_seq = None,  # – Indices of the lagged target values to use as inputs of the RNN (default: None, in which case these are automatically determined based on freq)
        #     time_features = None,  # – Time features to use as inputs of the RNN (default: None, in which case these are automatically determined based on freq)
        #     num_parallel_samples = 100,  # – Number of evaluation samples per time series to increase parallelism during inference. This is a model optimization that does not affect the accuracy (default: 100)
        #     imputation_method = None,  # – One of the methods from ImputationStrategy
        #     train_sampler = None,  # – Controls the sampling of windows during training.
        #     validation_sampler = None,  # – Controls the sampling of windows during validation.
        #     alpha = None,  # – The scaling coefficient of the activation regularization
        #     beta = None,  # – The scaling coefficient of the temporal activation regularization
            batch_size = batch_size,  # – The size of the batches to be used training and prediction.
        #     minimum_scale = None,  # – The minimum scale that is returned by the MeanScaler
        #     default_scale = None,  # – Default scale that is applied if the context length window is completely unobserved. If not set, the scale in this case will be the mean scale in the batch.
        #     impute_missing_values = None,  # – Whether to impute the missing values during training by using the current model parameters. Recommended if the dataset contains many missing values. However, this is a lot slower than the default mode.
        #     num_imputation_samples = None,  # – How many samples to use to impute values when impute_missing_values=True
        )
    elif algo_name == 'DeepState':
        estimator = DeepStateEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=trainer,
            batch_size=batch_size,
            use_feat_dynamic_real=use_feat_dynamic_real,
            use_feat_static_cat=use_feat_static_cat,
            cardinality=cardinality,
        )
    elif algo_name == 'DeepVAR':
        estimator = DeepVAREstimator(  # use multi
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            target_dim=96,
        )
    elif algo_name == 'GaussianProcess':
#         # TODO
#         estimator = GaussianProcessEstimator(
#             freq=freq,
#             prediction_length=prediction_length,
#             context_length=context_length,
#             trainer=trainer,
#             batch_size=batch_size,
#             cardinality=num_timeseries,
#         )
        pass
    elif algo_name == 'GPVAR':
        estimator = GPVAREstimator(  # use multi
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            target_dim=96,
        )
    elif algo_name == 'LSTNet':
        estimator = LSTNetEstimator(  # use multi
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            num_series=96,
            skip_size=4,
            ar_window=4,
            channels=72,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'NBEATS':
        estimator = NBEATSEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'DeepRenewalProcess':
        estimator = DeepRenewalProcessEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            num_cells=40,
            num_layers=2,
        )
    elif algo_name == 'Tree':
        estimator = TreePredictor(
            freq = freq,
            prediction_length = prediction_length,
            context_length = context_length,
            n_ignore_last = 0,
            lead_time = 0,
            max_n_datapts = 1000000,
            min_bin_size = 100,  # Used only for "QRX" method.
            use_feat_static_real = False,
            use_feat_dynamic_cat = False,
            use_feat_dynamic_real = use_feat_dynamic_real,
            cardinality = cardinality,
            one_hot_encode = False,
            model_params = {'eta': 0.1, 'max_depth': 6, 'silent': 0, 'nthread': -1, 'n_jobs': -1, 'gamma': 1, 'subsample': 0.9, 'min_child_weight': 1, 'colsample_bytree': 0.9, 'lambda': 1, 'booster': 'gbtree'},
            max_workers = 4,  # default: None
            method = "QRX",  # "QRX",  "QuantileRegression", "QRF"
            quantiles=None,  # Used only for "QuantileRegression" method.
            model=None,
            seed=None,
        )
    elif algo_name == 'SelfAttention':
#         # TODO
#         estimator = SelfAttentionEstimator(
#             freq=freq,
#             prediction_length=prediction_length,
#             context_length=context_length,
#             trainer=trainer,
#             batch_size=batch_size,
#         )
        pass
    elif algo_name == 'MQCNN':
        estimator = MQCNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            use_past_feat_dynamic_real=use_past_feat_dynamic_real,
            use_feat_dynamic_real=use_feat_dynamic_real,
            use_feat_static_cat=use_feat_static_cat,
            cardinality=cardinality,
            embedding_dimension=embedding_dimension,
            add_time_feature=True,
            add_age_feature=False,
            enable_encoder_dynamic_feature=True,
            enable_decoder_dynamic_feature=True,
            seed=None,
            decoder_mlp_dim_seq=None,
            channels_seq=None,
            dilation_seq=None,
            kernel_size_seq=None,
            use_residual=True,
            quantiles=None,
            distr_output=None,
            scaling=None,
            scaling_decoder_dynamic_feature=False,
            num_forking=None,
            max_ts_len=None,
        )
    elif algo_name == 'MQRNN':
        estimator = MQRNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'Seq2Seq':
        # # TODO
        # estimator = Seq2SeqEstimator(
        #     freq=freq,
        #     prediction_length=prediction_length,
        #     context_length=context_length,
        #     trainer=trainer,
        #     cardinality=cardinality,
        #     embedding_dimension=4,
        #     encoder=Seq2SeqEncoder(),
        #     decoder_mlp_layer=[4],
        #     decoder_mlp_static_dim=4
        # )
        pass
    elif algo_name == 'SimpleFeedForward':
        estimator = SimpleFeedForwardEstimator(
            num_hidden_dimensions=[40, 40],
            prediction_length=prediction_length,
            context_length=context_length,
            freq=freq,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'TemporalFusionTransformer':
        estimator = TemporalFusionTransformerEstimator(
            prediction_length=prediction_length,
            context_length=context_length,
            freq=freq,
            trainer=trainer,
            batch_size=batch_size,
            hidden_dim = 32, 
            variable_dim = None, 
            num_heads = 4, 
            num_outputs = 3, 
            num_instance_per_series = 100, 
            dropout_rate = 0.1, 
        #     time_features = [], 
        #     static_cardinalities = {}, 
        #     dynamic_cardinalities = {}, 
        #     static_feature_dims = {}, 
        #     dynamic_feature_dims = {}, 
        #     past_dynamic_features = []
        )
    elif algo_name == 'DeepTPP':
#         # TODO
#         estimator = DeepTPPEstimator(
#             prediction_interval_length=prediction_length,
#             context_interval_length=context_length,
#             freq=freq,
#             trainer=trainer,
#             batch_size=batch_size,
#             num_marks=len(cardinality) if cardinality is not None else 0,
#         )
        pass
    elif algo_name == 'Transformer':
        estimator = TransformerEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=trainer,
            batch_size=batch_size,
            cardinality=cardinality,
        )
    elif algo_name == 'WaveNet':
        estimator = WaveNetEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=trainer,
            batch_size=batch_size,
            cardinality=cardinality,
        )
    elif algo_name == 'Naive2':
        # TODO Multiplicative seasonality is not appropriate for zero and negative values
        predictor = Naive2Predictor(freq=freq, prediction_length=prediction_length, season_length=context_length)
    elif algo_name == 'NPTS':
        predictor = NPTSPredictor(freq=freq, prediction_length=prediction_length, context_length=context_length)
    elif algo_name == 'Prophet':
        def configure_model(model):
            model.add_seasonality(
                name='weekly', period=7, fourier_order=3, prior_scale=0.1
            )
            return model
        predictor = ProphetPredictor(freq=freq, prediction_length=prediction_length, init_model=configure_model)
    elif algo_name == 'ARIMA':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='arima',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'ETS':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='ets',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'TBATS':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='tbats',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'CROSTON':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='croston',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'MLP':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='mlp',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'SeasonalNaive':
        predictor = SeasonalNaivePredictor(freq=freq, prediction_length=prediction_length)
    else:
        print('[ERROR]:', algo_name, 'not supported')
        return
    
    if predictor is None:
        try:
            predictor = estimator.train(train_ds, test_ds)
        except Exception as e:
            print(e)
            try:
                grouper_train = MultivariateGrouper(max_target_dim=num_timeseries)
                train_ds_multi = grouper_train(train_ds)
                test_ds_multi = grouper_train(test_ds)
                predictor = estimator.train(train_ds_multi, test_ds_multi)
            except Exception as e:
                print(e)

    forecast_it, ts_it = make_evaluation_predictions(
        dataset=test_ds,  # test dataset
        predictor=predictor,  # predictor
        num_samples=100,  # number of sample paths we want for evaluation
    )

    forecasts = list(forecast_it)
    tss = list(ts_it)
#     print(len(forecasts), len(tss))
    
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds))

    print(json.dumps(agg_metrics, indent=4))
    
    model_dir = os.path.join(args.model_dir, algo_name)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    predictor.serialize(Path(model_dir))
예제 #12
0
 def load_predictor(self, path: Path) -> Predictor:
     file = path / "metadata.pickle"
     with file.open("r") as f:
         meta = json.load(f)
     return ProphetPredictor(freq=meta["freq"],
                             prediction_length=meta["prediction_length"])
예제 #13
0
 def define_Prophet_predictor(self, freq, prediction_length,
                              prophet_params):
     self.predictor = ProphetPredictor(freq=freq,
                                       prediction_length=prediction_length,
                                       prophet_params=prophet_params)
예제 #14
0
class Predictor_sales(object):
    def __init__(self,
                 freq="D",
                 prediction_length=30,
                 epochs=50,
                 batch_size=16,
                 num_batches_per_epoch=100,
                 num_layers=4,
                 list_products=list_products):
        self.predictor = DeepAREstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            batch_size=batch_size,
                            num_batches_per_epoch=num_batches_per_epoch),
            num_layers=num_layers)
        self.algorithm = algorithm
        self.list_products_names = TransactionsData.get_list_names(
            list_products)

    # DeepAR instance to be explicitly trained before predicting
    def define_DeepAR_predictor(self, freq, prediction_length, epochs,
                                num_layers, batch_size):
        self.predictor = DeepAREstimator(freq=freq,
                                         prediction_length=prediction_length,
                                         context_length=prediction_length,
                                         trainer=Trainer(
                                             ctx="cpu",
                                             epochs=epochs,
                                             batch_size=batch_size,
                                             num_batches_per_epoch=100),
                                         num_layers=num_layers,
                                         use_feat_dynamic_real=True)

    # Prophet instance to implicitly trained during definition
    def define_Prophet_predictor(self, freq, prediction_length,
                                 prophet_params):
        self.predictor = ProphetPredictor(freq=freq,
                                          prediction_length=prediction_length,
                                          prophet_params=prophet_params)

    # ARIMA instance to implicitly trained during definition
    def train_ARIMA_predictor(self, eval_ds, p):
        return auto_arima(eval_ds.list_data[p]['target'][:-prediction_length],
                          error_action='ignore',
                          suppress_warnings=True,
                          n_jobs=-1)

    def train_predictor(self, train_ds):
        self.predictor = self.predictor.train(training_data=train_ds)
        return self.predictor

    # Making predictions depend on the algo instance
    def make_predictions(self, eval_ds):
        if self.algorithm == 'DeepAR':
            forecast_it, ts_it = make_evaluation_predictions(
                eval_ds, predictor=self.predictor, num_samples=100)

        elif self.algorithm == 'Prophet':
            train_ds = copy.deepcopy(eval_ds)
            for p in range(len(list_products)):
                train_ds.list_data[p]['target'] = train_ds.list_data[p][
                    'target'][:-prediction_length]
            forecast_it = self.predictor.predict(train_ds)
            ts_it = []
            for p in range(len(list_products)):
                ts_it.append(
                    pd.DataFrame({0: eval_ds.list_data[p]['target']},
                                 index=pd.date_range(
                                     min_date,
                                     periods=len(
                                         eval_ds.list_data[p]['target']),
                                     freq=freq,
                                     tz=None)))
        elif self.algorithm == 'ARIMA':
            ts_it = []
            period_list_pred = pd.date_range(
                min_date,
                periods=len(eval_ds.list_data[0]['target']),
                freq=freq,
                tz=None)[-prediction_length:]
            for p in range(len(list_products)):
                AMIMA_predictor = self.train_ARIMA_predictor(eval_ds, p)
                pred = AMIMA_predictor.predict(n_periods=prediction_length)
                if p == 0:
                    forecast_it = pd.DataFrame({
                        'OrderDate': period_list_pred,
                        'Product': pred
                    })
                else:
                    temp = pd.DataFrame({
                        'OrderDate': period_list_pred,
                        'Product': pred
                    })
                    forecast_it = forecast_it.merge(temp,
                                                    on='OrderDate',
                                                    how='left')
                forecast_it = forecast_it.rename(
                    columns={'Product': self.list_products_names[p]})
                ts_it.append(
                    pd.DataFrame({0: eval_ds.list_data[p]['target']},
                                 index=pd.date_range(
                                     min_date,
                                     periods=len(
                                         eval_ds.list_data[p]['target']),
                                     freq=freq,
                                     tz=None)))
            return forecast_it, ts_it
        return list(forecast_it), list(ts_it)

    # Plotting depends on the prediction output structure
    def plot_prob_forecasts(self, forecast_plot, ts_plot):
        if len(list_products) != 1:
            print('Which product no?')
            p = int(
                input({
                    key: value
                    for (key, value) in enumerate(self.list_products_names)
                }))
        else:
            p = 0
        if self.algorithm not in ['ARIMA']:
            ts_entry = ts_plot[
                p]  # we plot only the first time serie to forecast
            forecast_entry = forecast_plot[p]
            plot_length = 70
            prediction_intervals = (50.0, 90.0)
            legend = ["observations", "median prediction"] + [
                f"{k}% prediction interval" for k in prediction_intervals
            ][::-1]
            _, ax = plt.subplots(1, 1, figsize=(10, 7))
            pd.plotting.register_matplotlib_converters()
            ts_entry[-plot_length:].plot(ax=ax)  # plot the time series
            forecast_entry.plot(prediction_intervals=prediction_intervals,
                                color='b')
            plt.grid(which="both")
            plt.legend(legend, loc="upper left")
            plt.show()

        else:
            history_plot_lenth = min(prediction_length * 5, len(ts_plot[0]))
            ts_plot = ts_plot[p][-history_plot_lenth:].set_index(
                pd.DatetimeIndex(ts_plot[p][-history_plot_lenth:].index))
            forecast_plot = forecast_plot.set_index(
                pd.DatetimeIndex(forecast_plot['OrderDate'])).drop(
                    columns=['OrderDate']).iloc[:, p]
            plt.figure(figsize=(10, 6))
            plt.plot(ts_plot, color='C0', label='Observations')
            plt.plot(forecast_plot, color='b', label='Predictions')
            plt.legend()
            plt.show()

    # Run saving function before plotting anything
    def save_csv(self, name, forecast_it, ts_it, scaler):
        ts_name = "ts " + name + ".csv"
        forecast_name = "forecast " + name + ".csv"
        #ts_name = "ts" +"_"+ str(data)+ "_"+ str(min_date) +"_"+ str(max_date) +"_"+ str(algorithm) +"_"+ str(freq) +"_"+ name +"_"+str(list_products[0])+ ".csv"
        #forecast_name = "forecast" +"_"+ str(data)+"_"+ str(min_date) +"_"+ str(max_date) +"_"+ str(algorithm) +"_"+ str(freq) +"_"+ name +"_"+str(list_products[0])+".csv"

        if self.algorithm not in ['ARIMA']:
            if len(list_products) != 1:
                forecast_entry = []
                for p in range(len(list_products)):
                    forecast_entry.append(forecast_it[p].mean)
                start_dt = pd.date_range(min_date,
                                         periods=len(ts_it[0]),
                                         freq=freq,
                                         tz=None)[-prediction_length]
                #print(start_dt)
                forecast_csv = pd.DataFrame(data=scaler.inverse_transform(
                    np.array(forecast_entry).transpose()),
                                            columns=self.list_products_names,
                                            index=pd.date_range(
                                                start_dt,
                                                periods=prediction_length,
                                                freq=freq))
                forecast_csv = forecast_csv.rename_axis(
                    'OrderDate').reset_index()
                forecast_csv.to_csv(os.path.join(OUTPUT_FOLDER, forecast_name),
                                    index=False)
                for p in range(len(list_products)):
                    if p == 0:
                        ts_csv = ts_it[0]
                    else:
                        ts_csv = ts_csv.join(ts_it[p], rsuffix=p)
                idx_ts = ts_csv.index
                ts_csv = scaler.inverse_transform(ts_csv)
                ts_csv = pd.DataFrame(ts_csv,
                                      columns=self.list_products_names,
                                      index=idx_ts)
                ts_csv = ts_csv.rename_axis('OrderDate').reset_index()
                ts_csv.to_csv(os.path.join(OUTPUT_FOLDER, ts_name),
                              index=False)

            else:
                forecast_entry = forecast_it[0]
                ts_entry = ts_it[0]
                forecast_csv = pd.Series(scaler.inverse_transform(
                    np.array(forecast_entry.mean).reshape(-1, 1)).reshape(-1),
                                         index=pd.date_range(
                                             forecast_entry.start_date,
                                             periods=prediction_length,
                                             freq=freq),
                                         name=self.list_products_names[0])
                forecast_csv = forecast_csv.rename_axis(
                    'OrderDate').reset_index()
                forecast_csv.to_csv(os.path.join(OUTPUT_FOLDER, forecast_name),
                                    index=False)
                idx_ts = ts_entry.index
                ts_csv = scaler.inverse_transform(
                    np.array(ts_entry).reshape(-1, 1)).reshape(-1)
                ts_csv = pd.DataFrame(ts_csv,
                                      columns=self.list_products_names,
                                      index=idx_ts)
                ts_csv = ts_csv.rename_axis('OrderDate').reset_index()
                ts_csv.to_csv(os.path.join(OUTPUT_FOLDER, ts_name),
                              index=False)

        else:  # For ARIMA
            idx_fs = forecast_it.set_index('OrderDate').index
            forecast_csv = pd.DataFrame(data=scaler.inverse_transform(
                np.array(forecast_it.set_index('OrderDate'))),
                                        columns=self.list_products_names,
                                        index=idx_fs)
            forecast_csv.rename_axis('OrderDate').reset_index().to_csv(
                os.path.join(OUTPUT_FOLDER, forecast_name), index=False)
            if len(list_products) != 1:
                for p in range(len(list_products)):
                    if p == 0:
                        ts_csv = ts_it[0]
                    else:
                        ts_csv = ts_csv.join(ts_it[p], rsuffix=p)
                idx_ts = ts_csv.index
                ts_csv = scaler.inverse_transform(ts_csv)
                ts_csv = pd.DataFrame(ts_csv,
                                      columns=self.list_products_names,
                                      index=idx_ts)
                ts_csv = ts_csv.rename_axis('OrderDate').reset_index()
                ts_csv.to_csv(os.path.join(OUTPUT_FOLDER, ts_name),
                              index=False)

            else:
                ts_entry = ts_it[0]
                idx_ts = ts_entry.index
                ts_csv = pd.DataFrame(scaler.inverse_transform(
                    np.array(ts_entry).reshape(-1, 1)).reshape(-1),
                                      columns=self.list_products_names,
                                      index=idx_ts)
                ts_csv = ts_csv.rename_axis('OrderDate').reset_index()
                ts_csv.to_csv(os.path.join(OUTPUT_FOLDER, ts_name),
                              index=False)

    # MSE computation on test data
    def mse_compute(self, forecast_txt, ts_txt, scaler=None):
        ts_csv = ts_txt.copy()
        forecast_csv = forecast_txt.copy()
        ts_csv = ts_csv.loc[ts_csv['OrderDate'].isin(
            forecast_csv['OrderDate'])]
        ts_csv.set_index('OrderDate', inplace=True)
        forecast_csv.set_index('OrderDate', inplace=True)
        mse_products = []
        for p in range(len(list_products)):
            if scaler is not None:
                ts_csv = scaler.transform(ts_csv)
                forecast_csv = scaler.transform(forecast_csv)
                mse_products.append(
                    mean_squared_error(ts_csv[:, p], forecast_csv[:, p]))
            else:
                mse_products.append(
                    mean_squared_error(ts_csv.iloc[:, p],
                                       forecast_csv.iloc[:, p]))
        mse_df = pd.DataFrame({
            'Granulcolname': self.list_products_names,
            'MSE': mse_products
        })
        if scaler is not None:
            print(">> Rescaled MSE:")
        else:
            print(">> Actual MSE, no rescaling:")
        print(mse_df)
        return (mse_df)

    def dtw_compute(self, forecast_txt, ts_txt, scaler=None):
        import dtw

        ts_csv = ts_txt.copy()
        forecast_csv = forecast_txt.copy()
        ts_csv = ts_csv.loc[ts_csv['OrderDate'].isin(
            forecast_csv['OrderDate'])]
        ts_csv.set_index('OrderDate', inplace=True)
        forecast_csv.set_index('OrderDate', inplace=True)
        dtw_products = []
        for p in range(len(list_products)):
            if scaler is not None:
                ts_csv = scaler.transform(ts_csv)
                forecast_csv = scaler.transform(forecast_csv)
                distance = dtw.dtw(ts_csv[:, p],
                                   forecast_csv[:, p],
                                   distance_only=True).distance
            else:
                distance = dtw.dtw(np.array(ts_csv.iloc[:, p]),
                                   np.array(forecast_csv.iloc[:, p]),
                                   distance_only=True).distance
            dtw_products.append(distance)
        dtw_df = pd.DataFrame({
            'Granulcolname': self.list_products_names,
            'DTW': dtw_products
        })
        if scaler is not None:
            print(">> Rescaled DTW:")
        else:
            print(">> Actual DTW, no rescaling:")
        print(dtw_df)
        return (dtw_df)
예제 #15
0
def test_prophet_serialization():
    predictor = ProphetPredictor(freq="1D", prediction_length=3)
    assert predictor == serde.decode(serde.encode(predictor))