예제 #1
0
def test_related_time_series_fail():
    params = dict(freq="1D", prediction_length=3, prophet={})

    dataset = ListDataset(
        data_iter=[
            {
                'start': '2017-01-01',
                'target': np.array([1.0, 2.0, 3.0, 4.0]),
                'feat_dynamic_real': np.array(
                    [
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                    ]
                ),
            }
        ],
        freq=params['freq'],
    )

    with pytest.raises(AssertionError) as excinfo:
        predictor = ProphetPredictor(**params)
        list(predictor.predict(dataset))

    assert str(excinfo.value) == (
        'Length mismatch for dynamic real-valued feature #0: '
        'expected 7, got 6'
    )
예제 #2
0
def test_feat_dynamic_real_bad_size():
    params = dict(freq="1D", prediction_length=3, prophet_params={})

    dataset = ListDataset(
        data_iter=[
            {
                "start": "2017-01-01",
                "target": np.array([1.0, 2.0, 3.0, 4.0]),
                "feat_dynamic_real": np.array(
                    [
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                    ]
                ),
            }
        ],
        freq=params["freq"],
    )

    with pytest.raises(AssertionError) as excinfo:
        predictor = ProphetPredictor(**params)
        list(predictor.predict(dataset))

    assert str(excinfo.value) == (
        "Length mismatch for dynamic real-valued feature #0: "
        "expected 7, got 6"
    )
예제 #3
0
def test_feat_dynamic_real_success():
    params = dict(
        freq="1D", prediction_length=3, prophet_params=dict(n_changepoints=20)
    )

    dataset = ListDataset(
        data_iter=[
            {
                "start": "2017-01-01",
                "target": np.array([1.0, 2.0, 3.0, 4.0]),
                "feat_dynamic_real": np.array(
                    [
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                    ]
                ),
            }
        ],
        freq=params["freq"],
    )

    predictor = ProphetPredictor(**params)

    act_fcst = next(predictor.predict(dataset))
    exp_fcst = np.arange(5.0, 5.0 + params["prediction_length"])

    assert np.all(np.isclose(act_fcst.quantile(0.1), exp_fcst, atol=0.02))
    assert np.all(np.isclose(act_fcst.quantile(0.5), exp_fcst, atol=0.02))
    assert np.all(np.isclose(act_fcst.quantile(0.9), exp_fcst, atol=0.02))
예제 #4
0
def gluonts_prophet(dataset,freq,pred_length,prophet_params={}):
    
    params = dict(freq=freq, prediction_length=pred_length, prophet_params=prophet_params)
    predictor = ProphetPredictor(**params)
    fcst = predictor.predict(dataset)
    fcstlist = []
    for i in fcst:
        fcstlist.append(i)
    return fcstlist
예제 #5
0
def test_min_obs():
    params = dict(freq="1D", prediction_length=10, prophet={})

    dataset = ListDataset(
        data_iter=[{'start': '2017-01-01', 'target': np.array([1.0])}],
        freq=params['freq'],
    )

    predictor = ProphetPredictor(**params)

    act_forecast = next(predictor.predict(dataset))
    exp_forecast = np.ones(params["prediction_length"])

    assert np.array_equal(act_forecast.yhat, exp_forecast)
    assert np.array_equal(act_forecast.yhat_lower, exp_forecast)
    assert np.array_equal(act_forecast.yhat_upper, exp_forecast)
예제 #6
0
def test_min_obs_error():
    params = dict(freq="1D", prediction_length=10, prophet_params={})

    dataset = ListDataset(
        data_iter=[{"start": "2017-01-01", "target": np.array([1.0])}],
        freq=params["freq"],
    )

    with pytest.raises(ValueError) as excinfo:
        predictor = ProphetPredictor(**params)
        list(predictor.predict(dataset))

    act_error_msg = str(excinfo.value)
    exp_error_msg = "Dataframe has less than 2 non-NaN rows."

    assert act_error_msg == exp_error_msg
예제 #7
0
def test_related_time_series_success():
    params = dict(
        freq="1D", prediction_length=3, prophet=dict(n_changepoints=20)
    )

    dataset = ListDataset(
        data_iter=[
            {
                'start': '2017-01-01',
                'target': np.array([1.0, 2.0, 3.0, 4.0]),
                'feat_dynamic_real': np.array(
                    [
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                    ]
                ),
            }
        ],
        freq=params['freq'],
    )

    predictor = ProphetPredictor(**params)
    list(predictor.predict(dataset))
예제 #8
0
def test_mean_forecast():
    params = dict(
        freq="1D",
        prediction_length=10,
        min_nonnan_obs=3,
        prophet=dict(n_changepoints=20),
    )

    dataset = ListDataset(
        data_iter=[
            {'start': '2017-01-01', 'target': [2.0, 3.0, 'nan', 'nan']}
        ],
        freq=params['freq'],
    )

    predictor = ProphetPredictor(**params)

    act_forecast = next(predictor.predict(dataset))
    exp_forecast = 2.5 * np.ones(params["prediction_length"])

    assert np.array_equal(act_forecast.yhat, exp_forecast)
    assert np.array_equal(act_forecast.yhat_lower, exp_forecast)
    assert np.array_equal(act_forecast.yhat_upper, exp_forecast)
예제 #9
0
class Predictor_sales(object):
    def __init__(self,
                 freq="D",
                 prediction_length=30,
                 epochs=50,
                 batch_size=16,
                 num_batches_per_epoch=100,
                 num_layers=4,
                 list_products=list_products):
        self.predictor = DeepAREstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            batch_size=batch_size,
                            num_batches_per_epoch=num_batches_per_epoch),
            num_layers=num_layers)
        self.algorithm = algorithm
        self.list_products_names = TransactionsData.get_list_names(
            list_products)

    # DeepAR instance to be explicitly trained before predicting
    def define_DeepAR_predictor(self, freq, prediction_length, epochs,
                                num_layers, batch_size):
        self.predictor = DeepAREstimator(freq=freq,
                                         prediction_length=prediction_length,
                                         context_length=prediction_length,
                                         trainer=Trainer(
                                             ctx="cpu",
                                             epochs=epochs,
                                             batch_size=batch_size,
                                             num_batches_per_epoch=100),
                                         num_layers=num_layers,
                                         use_feat_dynamic_real=True)

    # Prophet instance to implicitly trained during definition
    def define_Prophet_predictor(self, freq, prediction_length,
                                 prophet_params):
        self.predictor = ProphetPredictor(freq=freq,
                                          prediction_length=prediction_length,
                                          prophet_params=prophet_params)

    # ARIMA instance to implicitly trained during definition
    def train_ARIMA_predictor(self, eval_ds, p):
        return auto_arima(eval_ds.list_data[p]['target'][:-prediction_length],
                          error_action='ignore',
                          suppress_warnings=True,
                          n_jobs=-1)

    def train_predictor(self, train_ds):
        self.predictor = self.predictor.train(training_data=train_ds)
        return self.predictor

    # Making predictions depend on the algo instance
    def make_predictions(self, eval_ds):
        if self.algorithm == 'DeepAR':
            forecast_it, ts_it = make_evaluation_predictions(
                eval_ds, predictor=self.predictor, num_samples=100)

        elif self.algorithm == 'Prophet':
            train_ds = copy.deepcopy(eval_ds)
            for p in range(len(list_products)):
                train_ds.list_data[p]['target'] = train_ds.list_data[p][
                    'target'][:-prediction_length]
            forecast_it = self.predictor.predict(train_ds)
            ts_it = []
            for p in range(len(list_products)):
                ts_it.append(
                    pd.DataFrame({0: eval_ds.list_data[p]['target']},
                                 index=pd.date_range(
                                     min_date,
                                     periods=len(
                                         eval_ds.list_data[p]['target']),
                                     freq=freq,
                                     tz=None)))
        elif self.algorithm == 'ARIMA':
            ts_it = []
            period_list_pred = pd.date_range(
                min_date,
                periods=len(eval_ds.list_data[0]['target']),
                freq=freq,
                tz=None)[-prediction_length:]
            for p in range(len(list_products)):
                AMIMA_predictor = self.train_ARIMA_predictor(eval_ds, p)
                pred = AMIMA_predictor.predict(n_periods=prediction_length)
                if p == 0:
                    forecast_it = pd.DataFrame({
                        'OrderDate': period_list_pred,
                        'Product': pred
                    })
                else:
                    temp = pd.DataFrame({
                        'OrderDate': period_list_pred,
                        'Product': pred
                    })
                    forecast_it = forecast_it.merge(temp,
                                                    on='OrderDate',
                                                    how='left')
                forecast_it = forecast_it.rename(
                    columns={'Product': self.list_products_names[p]})
                ts_it.append(
                    pd.DataFrame({0: eval_ds.list_data[p]['target']},
                                 index=pd.date_range(
                                     min_date,
                                     periods=len(
                                         eval_ds.list_data[p]['target']),
                                     freq=freq,
                                     tz=None)))
            return forecast_it, ts_it
        return list(forecast_it), list(ts_it)

    # Plotting depends on the prediction output structure
    def plot_prob_forecasts(self, forecast_plot, ts_plot):
        if len(list_products) != 1:
            print('Which product no?')
            p = int(
                input({
                    key: value
                    for (key, value) in enumerate(self.list_products_names)
                }))
        else:
            p = 0
        if self.algorithm not in ['ARIMA']:
            ts_entry = ts_plot[
                p]  # we plot only the first time serie to forecast
            forecast_entry = forecast_plot[p]
            plot_length = 70
            prediction_intervals = (50.0, 90.0)
            legend = ["observations", "median prediction"] + [
                f"{k}% prediction interval" for k in prediction_intervals
            ][::-1]
            _, ax = plt.subplots(1, 1, figsize=(10, 7))
            pd.plotting.register_matplotlib_converters()
            ts_entry[-plot_length:].plot(ax=ax)  # plot the time series
            forecast_entry.plot(prediction_intervals=prediction_intervals,
                                color='b')
            plt.grid(which="both")
            plt.legend(legend, loc="upper left")
            plt.show()

        else:
            history_plot_lenth = min(prediction_length * 5, len(ts_plot[0]))
            ts_plot = ts_plot[p][-history_plot_lenth:].set_index(
                pd.DatetimeIndex(ts_plot[p][-history_plot_lenth:].index))
            forecast_plot = forecast_plot.set_index(
                pd.DatetimeIndex(forecast_plot['OrderDate'])).drop(
                    columns=['OrderDate']).iloc[:, p]
            plt.figure(figsize=(10, 6))
            plt.plot(ts_plot, color='C0', label='Observations')
            plt.plot(forecast_plot, color='b', label='Predictions')
            plt.legend()
            plt.show()

    # Run saving function before plotting anything
    def save_csv(self, name, forecast_it, ts_it, scaler):
        ts_name = "ts " + name + ".csv"
        forecast_name = "forecast " + name + ".csv"
        #ts_name = "ts" +"_"+ str(data)+ "_"+ str(min_date) +"_"+ str(max_date) +"_"+ str(algorithm) +"_"+ str(freq) +"_"+ name +"_"+str(list_products[0])+ ".csv"
        #forecast_name = "forecast" +"_"+ str(data)+"_"+ str(min_date) +"_"+ str(max_date) +"_"+ str(algorithm) +"_"+ str(freq) +"_"+ name +"_"+str(list_products[0])+".csv"

        if self.algorithm not in ['ARIMA']:
            if len(list_products) != 1:
                forecast_entry = []
                for p in range(len(list_products)):
                    forecast_entry.append(forecast_it[p].mean)
                start_dt = pd.date_range(min_date,
                                         periods=len(ts_it[0]),
                                         freq=freq,
                                         tz=None)[-prediction_length]
                #print(start_dt)
                forecast_csv = pd.DataFrame(data=scaler.inverse_transform(
                    np.array(forecast_entry).transpose()),
                                            columns=self.list_products_names,
                                            index=pd.date_range(
                                                start_dt,
                                                periods=prediction_length,
                                                freq=freq))
                forecast_csv = forecast_csv.rename_axis(
                    'OrderDate').reset_index()
                forecast_csv.to_csv(os.path.join(OUTPUT_FOLDER, forecast_name),
                                    index=False)
                for p in range(len(list_products)):
                    if p == 0:
                        ts_csv = ts_it[0]
                    else:
                        ts_csv = ts_csv.join(ts_it[p], rsuffix=p)
                idx_ts = ts_csv.index
                ts_csv = scaler.inverse_transform(ts_csv)
                ts_csv = pd.DataFrame(ts_csv,
                                      columns=self.list_products_names,
                                      index=idx_ts)
                ts_csv = ts_csv.rename_axis('OrderDate').reset_index()
                ts_csv.to_csv(os.path.join(OUTPUT_FOLDER, ts_name),
                              index=False)

            else:
                forecast_entry = forecast_it[0]
                ts_entry = ts_it[0]
                forecast_csv = pd.Series(scaler.inverse_transform(
                    np.array(forecast_entry.mean).reshape(-1, 1)).reshape(-1),
                                         index=pd.date_range(
                                             forecast_entry.start_date,
                                             periods=prediction_length,
                                             freq=freq),
                                         name=self.list_products_names[0])
                forecast_csv = forecast_csv.rename_axis(
                    'OrderDate').reset_index()
                forecast_csv.to_csv(os.path.join(OUTPUT_FOLDER, forecast_name),
                                    index=False)
                idx_ts = ts_entry.index
                ts_csv = scaler.inverse_transform(
                    np.array(ts_entry).reshape(-1, 1)).reshape(-1)
                ts_csv = pd.DataFrame(ts_csv,
                                      columns=self.list_products_names,
                                      index=idx_ts)
                ts_csv = ts_csv.rename_axis('OrderDate').reset_index()
                ts_csv.to_csv(os.path.join(OUTPUT_FOLDER, ts_name),
                              index=False)

        else:  # For ARIMA
            idx_fs = forecast_it.set_index('OrderDate').index
            forecast_csv = pd.DataFrame(data=scaler.inverse_transform(
                np.array(forecast_it.set_index('OrderDate'))),
                                        columns=self.list_products_names,
                                        index=idx_fs)
            forecast_csv.rename_axis('OrderDate').reset_index().to_csv(
                os.path.join(OUTPUT_FOLDER, forecast_name), index=False)
            if len(list_products) != 1:
                for p in range(len(list_products)):
                    if p == 0:
                        ts_csv = ts_it[0]
                    else:
                        ts_csv = ts_csv.join(ts_it[p], rsuffix=p)
                idx_ts = ts_csv.index
                ts_csv = scaler.inverse_transform(ts_csv)
                ts_csv = pd.DataFrame(ts_csv,
                                      columns=self.list_products_names,
                                      index=idx_ts)
                ts_csv = ts_csv.rename_axis('OrderDate').reset_index()
                ts_csv.to_csv(os.path.join(OUTPUT_FOLDER, ts_name),
                              index=False)

            else:
                ts_entry = ts_it[0]
                idx_ts = ts_entry.index
                ts_csv = pd.DataFrame(scaler.inverse_transform(
                    np.array(ts_entry).reshape(-1, 1)).reshape(-1),
                                      columns=self.list_products_names,
                                      index=idx_ts)
                ts_csv = ts_csv.rename_axis('OrderDate').reset_index()
                ts_csv.to_csv(os.path.join(OUTPUT_FOLDER, ts_name),
                              index=False)

    # MSE computation on test data
    def mse_compute(self, forecast_txt, ts_txt, scaler=None):
        ts_csv = ts_txt.copy()
        forecast_csv = forecast_txt.copy()
        ts_csv = ts_csv.loc[ts_csv['OrderDate'].isin(
            forecast_csv['OrderDate'])]
        ts_csv.set_index('OrderDate', inplace=True)
        forecast_csv.set_index('OrderDate', inplace=True)
        mse_products = []
        for p in range(len(list_products)):
            if scaler is not None:
                ts_csv = scaler.transform(ts_csv)
                forecast_csv = scaler.transform(forecast_csv)
                mse_products.append(
                    mean_squared_error(ts_csv[:, p], forecast_csv[:, p]))
            else:
                mse_products.append(
                    mean_squared_error(ts_csv.iloc[:, p],
                                       forecast_csv.iloc[:, p]))
        mse_df = pd.DataFrame({
            'Granulcolname': self.list_products_names,
            'MSE': mse_products
        })
        if scaler is not None:
            print(">> Rescaled MSE:")
        else:
            print(">> Actual MSE, no rescaling:")
        print(mse_df)
        return (mse_df)

    def dtw_compute(self, forecast_txt, ts_txt, scaler=None):
        import dtw

        ts_csv = ts_txt.copy()
        forecast_csv = forecast_txt.copy()
        ts_csv = ts_csv.loc[ts_csv['OrderDate'].isin(
            forecast_csv['OrderDate'])]
        ts_csv.set_index('OrderDate', inplace=True)
        forecast_csv.set_index('OrderDate', inplace=True)
        dtw_products = []
        for p in range(len(list_products)):
            if scaler is not None:
                ts_csv = scaler.transform(ts_csv)
                forecast_csv = scaler.transform(forecast_csv)
                distance = dtw.dtw(ts_csv[:, p],
                                   forecast_csv[:, p],
                                   distance_only=True).distance
            else:
                distance = dtw.dtw(np.array(ts_csv.iloc[:, p]),
                                   np.array(forecast_csv.iloc[:, p]),
                                   distance_only=True).distance
            dtw_products.append(distance)
        dtw_df = pd.DataFrame({
            'Granulcolname': self.list_products_names,
            'DTW': dtw_products
        })
        if scaler is not None:
            print(">> Rescaled DTW:")
        else:
            print(">> Actual DTW, no rescaling:")
        print(dtw_df)
        return (dtw_df)