Exemple #1
0
    def test_future_reg(self):
        log.info("testing: Future Regressors")
        df = pd.read_csv(PEYTON_FILE, nrows=NROWS + 50)
        m = NeuralProphet(
            epochs=EPOCHS,
            batch_size=BATCH_SIZE,
        )

        df["A"] = df["y"].rolling(7, min_periods=1).mean()
        df["B"] = df["y"].rolling(30, min_periods=1).mean()
        regressors_df_future = pd.DataFrame(data={
            "A": df["A"][-50:],
            "B": df["B"][-50:]
        })
        df = df[:-50]
        m = m.add_future_regressor(name="A")
        m = m.add_future_regressor(name="B", mode="multiplicative")
        metrics_df = m.fit(df, freq="D")
        future = m.make_future_dataframe(df=df,
                                         regressors_df=regressors_df_future,
                                         n_historic_predictions=10,
                                         periods=50)
        forecast = m.predict(df=future)

        if self.plot:
            m.plot(forecast)
            m.plot_components(forecast)
            m.plot_parameters()
            plt.show()
def test_loader():
    df = pd.read_csv(PEYTON_FILE, nrows=100)
    df["A"] = np.arange(len(df))
    df["B"] = np.arange(len(df)) * 0.1
    df1 = df[:50]
    df2 = df[50:]
    m = NeuralProphet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=True,
        n_lags=3,
        n_forecasts=2,
    )
    m.add_future_regressor("A")
    m.add_lagged_regressor("B")
    config_normalization = configure.Normalization("auto", False, True, False)
    df_dict = {"df1": df1.copy(), "df2": df2.copy()}
    config_normalization.init_data_params(df_dict, m.config_covar,
                                          m.regressors_config, m.events_config)
    m.config_normalization = config_normalization
    df_dict = m._normalize(df_dict)
    dataset = m._create_dataset(df_dict, predict_mode=False)
    loader = DataLoader(dataset,
                        batch_size=min(1024, len(df)),
                        shuffle=True,
                        drop_last=False)
    for inputs, targets, meta in loader:
        assert set(meta["df_name"]) == set(df_dict.keys())
        break
Exemple #3
0
    def test_future_reg(self):
        log.info("testing: Future Regressors")
        df = pd.read_csv(PEYTON_FILE)
        m = NeuralProphet(
            n_forecasts=1,
            n_lags=0,
            epochs=EPOCHS,
        )

        df["A"] = df["y"].rolling(7, min_periods=1).mean()
        df["B"] = df["y"].rolling(30, min_periods=1).mean()

        m = m.add_future_regressor(name="A", regularization=0.5)
        m = m.add_future_regressor(name="B",
                                   mode="multiplicative",
                                   regularization=0.3)

        metrics_df = m.fit(df, freq="D")
        regressors_df = pd.DataFrame(data={
            "A": df["A"][:50],
            "B": df["B"][:50]
        })
        future = m.make_future_dataframe(df=df,
                                         regressors_df=regressors_df,
                                         n_historic_predictions=10,
                                         periods=50)
        forecast = m.predict(df=future)

        if self.plot:
            # print(forecast.to_string())
            # m.plot_last_forecast(forecast, include_previous_forecasts=3)
            m.plot(forecast)
            m.plot_components(forecast)
            m.plot_parameters()
            plt.show()
def test_newer_sample_weight():
    dates = pd.date_range(start="2020-01-01", periods=100, freq="D")
    a = [0, 1] * 50
    y = -1 * np.array(a[:50])
    y = np.concatenate([y, np.array(a[50:])])
    # first half: y = -a
    # second half: y = a
    df = pd.DataFrame({"ds": dates, "y": y, "a": a})

    newer_bias = 5
    m = NeuralProphet(
        epochs=10,
        batch_size=10,
        learning_rate=1.0,
        newer_samples_weight=newer_bias,
        newer_samples_start=0.0,
        # growth='off',
        n_changepoints=0,
        daily_seasonality=False,
        weekly_seasonality=False,
        yearly_seasonality=False,
    )
    m.add_future_regressor("a")
    metrics_df = m.fit(df)

    # test that second half dominates
    # -> positive relationship of a and y
    dates = pd.date_range(start="2020-01-01", periods=100, freq="D")
    a = [1] * 100
    y = [None] * 100
    df = pd.DataFrame({"ds": dates, "y": y, "a": a})
    forecast1 = m.predict(df[:10])
    forecast2 = m.predict(df[-10:])
    avg_a1 = np.mean(forecast1["future_regressor_a"])
    avg_a2 = np.mean(forecast2["future_regressor_a"])
    log.info("avg regressor a contribution first samples: {}".format(avg_a1))
    log.info("avg regressor a contribution last samples: {}".format(avg_a2))
    # must hold
    assert avg_a1 > 0.1
    assert avg_a2 > 0.1

    # this is less strict, as it also depends on trend, but should still hold
    avg_y1 = np.mean(forecast1["yhat1"])
    avg_y2 = np.mean(forecast2["yhat1"])
    log.info("avg yhat first samples: {}".format(avg_y1))
    log.info("avg yhat last samples: {}".format(avg_y2))
    assert avg_y1 > -0.9
    assert avg_y2 > 0.1
def test_globaltimedataset():
    df = pd.read_csv(PEYTON_FILE, nrows=100)
    df1 = df[:50]
    df2 = df[50:]
    m1 = NeuralProphet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=True,
    )
    m2 = NeuralProphet(
        n_lags=3,
        n_forecasts=2,
    )
    m3 = NeuralProphet()
    # TODO m3.add_country_holidays("US")
    config_normalization = configure.Normalization("auto", False, True, False)
    for m in [m1, m2, m3]:
        df_dict = {"df1": df1.copy(), "df2": df2.copy()}
        config_normalization.init_data_params(df_dict, m.config_covar,
                                              m.regressors_config,
                                              m.events_config)
        m.config_normalization = config_normalization
        df_dict = m._normalize(df_dict)
        dataset = m._create_dataset(df_dict, predict_mode=False)
        dataset = m._create_dataset(df_dict, predict_mode=True)

    # lagged_regressors, future_regressors
    df4 = df.copy()
    df4["A"] = np.arange(len(df4))
    df4["B"] = np.arange(len(df4)) * 0.1
    m4 = NeuralProphet(n_lags=2)
    m4.add_future_regressor("A")
    m4.add_lagged_regressor("B")
    config_normalization = configure.Normalization("auto", False, True, False)
    for m in [m4]:
        df_dict = {"df4": df4.copy()}
        config_normalization.init_data_params(df_dict, m.config_covar,
                                              m.regressors_config,
                                              m.events_config)
        m.config_normalization = config_normalization
        df_dict = m._normalize(df_dict)
        dataset = m._create_dataset(df_dict, predict_mode=False)
        dataset = m._create_dataset(df_dict, predict_mode=True)
def test_newer_sample_weight():
    dates = pd.date_range(start="2020-01-01", periods=1000, freq="D")
    a = [0, 1] * 500
    y = -2 * np.array(a[:500])
    y = np.concatenate([y, 2 * np.array(a[500:])])
    # first half: y = -2a
    # second half: y = 2a
    df = pd.DataFrame({"ds": dates, "y": y, "a": a})
    m = NeuralProphet(
        epochs=10,
        batch_size=128,
        newer_samples_weight=10,
        newer_samples_start=0.0,
        learning_rate=0.1,
        daily_seasonality=False,
        weekly_seasonality=False,
        yearly_seasonality=False,
    )
    m.add_future_regressor("a")
    metrics_df = m.fit(df)

    # test that second half dominates
    # -> positive relationship of a and y
    dates = pd.date_range(start="2020-01-01", periods=1000, freq="D")
    a = [1] * 1000
    y = [None] * 1000
    df = pd.DataFrame({"ds": dates, "y": y, "a": a})
    forecast1 = m.predict(df[:10])
    forecast2 = m.predict(df[-10:])
    avg_a1 = np.mean(forecast1["future_regressor_a"])
    avg_a2 = np.mean(forecast2["future_regressor_a"])
    # must hold
    assert avg_a1 > 0.5
    assert avg_a2 > 0.5

    # this is less strict, as it also depends on trend, but should still hold
    avg_y1 = np.mean(forecast1["yhat1"])
    avg_y2 = np.mean(forecast2["yhat1"])
    assert avg_y1 > -1.5
    assert avg_y2 > 0.5
Exemple #7
0
    np.linspace(start=0, stop=freq * 2 * np.math.pi, num=df.shape[0]))
freq = 3
df["x2"] = np.sin(
    np.linspace(start=0, stop=freq * 2 * np.math.pi, num=df.shape[0]))
df["y"] = df["x1"] + df["x2"]

df.set_index("ds")["y"].plot()

df_train = df.iloc[:int(df.shape[0] / 2)]
df_test = df.iloc[int(df.shape[0] / 2):]

# %%
t1 = process_time()
model_nprophet = NeuralProphet()
model_nprophet = NeuralProphet(n_lags=100, n_forecasts=10)
model_nprophet.add_future_regressor("x1")
model_nprophet.add_future_regressor("x2")
model_nprophet.fit(df_train, freq="D")
t2 = process_time() - t1

t3 = process_time()
future_nprophet = model_nprophet.make_future_dataframe(
    df=df_train,  #.iloc[[-1]],
    regressors_df=df_test[["x1", "x2"]],
    periods=df_test.shape[0],
)
df_pred_nprophet = model_nprophet.predict(future_nprophet)
t4 = process_time() - t3
print(t2, t4)

# df_pred_nprophet.set_index('ds')['yhat1'].plot()
Exemple #8
0
 def seek_the_oracle(current_series, args, series, forecast_length,
                     future_regressor):
     """Prophet for for loop or parallel."""
     current_series = current_series.rename(columns={series: 'y'})
     current_series['ds'] = current_series.index
     try:
         quant_range = (1 - args['prediction_interval']) / 2
         quantiles = [quant_range, 0.5, (1 - quant_range)]
         m = NeuralProphet(
             quantiles=quantiles,
             growth=self.growth,
             n_changepoints=self.n_changepoints,
             changepoints_range=self.changepoints_range,
             trend_reg=self.trend_reg,
             trend_reg_threshold=self.trend_reg_threshold,
             ar_sparsity=self.ar_sparsity,
             yearly_seasonality=self.yearly_seasonality,
             weekly_seasonality=self.weekly_seasonality,
             daily_seasonality=self.daily_seasonality,
             seasonality_mode=self.seasonality_mode,
             seasonality_reg=self.seasonality_reg,
             n_lags=self.n_lags,
             n_forecasts=forecast_length,
             num_hidden_layers=self.num_hidden_layers,
             d_hidden=self.d_hidden,
             learning_rate=self.learning_rate,
             loss_func=self.loss_func,
             train_speed=self.train_speed,
             normalize=self.normalize,
             collect_metrics=False,
         )
     except Exception:
         m = NeuralProphet(
             growth=self.growth,
             n_changepoints=self.n_changepoints,
             changepoints_range=self.changepoints_range,
             trend_reg=self.trend_reg,
             trend_reg_threshold=self.trend_reg_threshold,
             ar_sparsity=self.ar_sparsity,
             yearly_seasonality=self.yearly_seasonality,
             weekly_seasonality=self.weekly_seasonality,
             daily_seasonality=self.daily_seasonality,
             seasonality_mode=self.seasonality_mode,
             seasonality_reg=self.seasonality_reg,
             n_lags=self.n_lags,
             n_forecasts=forecast_length,
             num_hidden_layers=self.num_hidden_layers,
             d_hidden=self.d_hidden,
             learning_rate=self.learning_rate,
             loss_func=self.loss_func,
             train_speed=self.train_speed,
             normalize=self.normalize,
             collect_metrics=False,
         )
     if args['holiday']:
         m.add_country_holidays(country_name=args['holiday_country'])
     if args['regression_type'] == 'User':
         current_series = pd.concat(
             [current_series, args['regressor_train']], axis=1)
         for nme in args['regressor_name']:
             m.add_future_regressor(nme)
     m.fit(current_series,
           freq=args['freq'],
           progress_print=False,
           minimal=True)
     if args['regression_type'] == 'User':
         if future_regressor.ndim > 1:
             if future_regressor.shape[1] > 1:
                 ft_regr = (future_regressor.mean(
                     axis=1).to_frame().merge(
                         future_regressor.std(axis=1).to_frame(),
                         left_index=True,
                         right_index=True,
                     ))
             else:
                 ft_regr = future_regressor.copy()
             ft_regr.columns = args['regressor_train'].columns
             regr = pd.concat([args['regressor_train'], ft_regr])
             regr.columns = args['regressor_train'].columns
             # regr.index.name = 'ds'
             # regr.reset_index(drop=False, inplace=True)
             # future = future.merge(regr, on="ds", how='left')
         else:
             # a = np.append(args['regressor_train'], future_regressor.values)
             regr = future_regressor
         future = m.make_future_dataframe(current_series,
                                          periods=forecast_length,
                                          regressors_df=regr)
     else:
         future = m.make_future_dataframe(current_series,
                                          periods=forecast_length)
     fcst = m.predict(future, decompose=False)
     fcst = fcst.tail(forecast_length)  # remove the backcast
     # predicting that someday they will change back to fbprophet format
     if "yhat2" in fcst.columns:
         fcst['yhat1'] = fcst.fillna(0).sum(axis=1, numeric_only=True)
     try:
         forecast = fcst['yhat1']
     except Exception:
         forecast = fcst['yhat']
     forecast.name = series
     # not yet supported, so fill with the NaN column for now if missing
     try:
         lower_forecast = fcst['yhat_lower']
         upper_forecast = fcst['yhat_upper']
     except Exception:
         lower_forecast = fcst['y']
         upper_forecast = fcst['y']
     lower_forecast.name = series
     upper_forecast.name = series
     return (forecast, lower_forecast, upper_forecast)