def test_future_reg(self): log.info("testing: Future Regressors") df = pd.read_csv(PEYTON_FILE, nrows=NROWS + 50) m = NeuralProphet( epochs=EPOCHS, batch_size=BATCH_SIZE, ) df["A"] = df["y"].rolling(7, min_periods=1).mean() df["B"] = df["y"].rolling(30, min_periods=1).mean() regressors_df_future = pd.DataFrame(data={ "A": df["A"][-50:], "B": df["B"][-50:] }) df = df[:-50] m = m.add_future_regressor(name="A") m = m.add_future_regressor(name="B", mode="multiplicative") metrics_df = m.fit(df, freq="D") future = m.make_future_dataframe(df=df, regressors_df=regressors_df_future, n_historic_predictions=10, periods=50) forecast = m.predict(df=future) if self.plot: m.plot(forecast) m.plot_components(forecast) m.plot_parameters() plt.show()
def test_loader(): df = pd.read_csv(PEYTON_FILE, nrows=100) df["A"] = np.arange(len(df)) df["B"] = np.arange(len(df)) * 0.1 df1 = df[:50] df2 = df[50:] m = NeuralProphet( yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True, n_lags=3, n_forecasts=2, ) m.add_future_regressor("A") m.add_lagged_regressor("B") config_normalization = configure.Normalization("auto", False, True, False) df_dict = {"df1": df1.copy(), "df2": df2.copy()} config_normalization.init_data_params(df_dict, m.config_covar, m.regressors_config, m.events_config) m.config_normalization = config_normalization df_dict = m._normalize(df_dict) dataset = m._create_dataset(df_dict, predict_mode=False) loader = DataLoader(dataset, batch_size=min(1024, len(df)), shuffle=True, drop_last=False) for inputs, targets, meta in loader: assert set(meta["df_name"]) == set(df_dict.keys()) break
def test_future_reg(self): log.info("testing: Future Regressors") df = pd.read_csv(PEYTON_FILE) m = NeuralProphet( n_forecasts=1, n_lags=0, epochs=EPOCHS, ) df["A"] = df["y"].rolling(7, min_periods=1).mean() df["B"] = df["y"].rolling(30, min_periods=1).mean() m = m.add_future_regressor(name="A", regularization=0.5) m = m.add_future_regressor(name="B", mode="multiplicative", regularization=0.3) metrics_df = m.fit(df, freq="D") regressors_df = pd.DataFrame(data={ "A": df["A"][:50], "B": df["B"][:50] }) future = m.make_future_dataframe(df=df, regressors_df=regressors_df, n_historic_predictions=10, periods=50) forecast = m.predict(df=future) if self.plot: # print(forecast.to_string()) # m.plot_last_forecast(forecast, include_previous_forecasts=3) m.plot(forecast) m.plot_components(forecast) m.plot_parameters() plt.show()
def test_newer_sample_weight(): dates = pd.date_range(start="2020-01-01", periods=100, freq="D") a = [0, 1] * 50 y = -1 * np.array(a[:50]) y = np.concatenate([y, np.array(a[50:])]) # first half: y = -a # second half: y = a df = pd.DataFrame({"ds": dates, "y": y, "a": a}) newer_bias = 5 m = NeuralProphet( epochs=10, batch_size=10, learning_rate=1.0, newer_samples_weight=newer_bias, newer_samples_start=0.0, # growth='off', n_changepoints=0, daily_seasonality=False, weekly_seasonality=False, yearly_seasonality=False, ) m.add_future_regressor("a") metrics_df = m.fit(df) # test that second half dominates # -> positive relationship of a and y dates = pd.date_range(start="2020-01-01", periods=100, freq="D") a = [1] * 100 y = [None] * 100 df = pd.DataFrame({"ds": dates, "y": y, "a": a}) forecast1 = m.predict(df[:10]) forecast2 = m.predict(df[-10:]) avg_a1 = np.mean(forecast1["future_regressor_a"]) avg_a2 = np.mean(forecast2["future_regressor_a"]) log.info("avg regressor a contribution first samples: {}".format(avg_a1)) log.info("avg regressor a contribution last samples: {}".format(avg_a2)) # must hold assert avg_a1 > 0.1 assert avg_a2 > 0.1 # this is less strict, as it also depends on trend, but should still hold avg_y1 = np.mean(forecast1["yhat1"]) avg_y2 = np.mean(forecast2["yhat1"]) log.info("avg yhat first samples: {}".format(avg_y1)) log.info("avg yhat last samples: {}".format(avg_y2)) assert avg_y1 > -0.9 assert avg_y2 > 0.1
def test_globaltimedataset(): df = pd.read_csv(PEYTON_FILE, nrows=100) df1 = df[:50] df2 = df[50:] m1 = NeuralProphet( yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True, ) m2 = NeuralProphet( n_lags=3, n_forecasts=2, ) m3 = NeuralProphet() # TODO m3.add_country_holidays("US") config_normalization = configure.Normalization("auto", False, True, False) for m in [m1, m2, m3]: df_dict = {"df1": df1.copy(), "df2": df2.copy()} config_normalization.init_data_params(df_dict, m.config_covar, m.regressors_config, m.events_config) m.config_normalization = config_normalization df_dict = m._normalize(df_dict) dataset = m._create_dataset(df_dict, predict_mode=False) dataset = m._create_dataset(df_dict, predict_mode=True) # lagged_regressors, future_regressors df4 = df.copy() df4["A"] = np.arange(len(df4)) df4["B"] = np.arange(len(df4)) * 0.1 m4 = NeuralProphet(n_lags=2) m4.add_future_regressor("A") m4.add_lagged_regressor("B") config_normalization = configure.Normalization("auto", False, True, False) for m in [m4]: df_dict = {"df4": df4.copy()} config_normalization.init_data_params(df_dict, m.config_covar, m.regressors_config, m.events_config) m.config_normalization = config_normalization df_dict = m._normalize(df_dict) dataset = m._create_dataset(df_dict, predict_mode=False) dataset = m._create_dataset(df_dict, predict_mode=True)
def test_newer_sample_weight(): dates = pd.date_range(start="2020-01-01", periods=1000, freq="D") a = [0, 1] * 500 y = -2 * np.array(a[:500]) y = np.concatenate([y, 2 * np.array(a[500:])]) # first half: y = -2a # second half: y = 2a df = pd.DataFrame({"ds": dates, "y": y, "a": a}) m = NeuralProphet( epochs=10, batch_size=128, newer_samples_weight=10, newer_samples_start=0.0, learning_rate=0.1, daily_seasonality=False, weekly_seasonality=False, yearly_seasonality=False, ) m.add_future_regressor("a") metrics_df = m.fit(df) # test that second half dominates # -> positive relationship of a and y dates = pd.date_range(start="2020-01-01", periods=1000, freq="D") a = [1] * 1000 y = [None] * 1000 df = pd.DataFrame({"ds": dates, "y": y, "a": a}) forecast1 = m.predict(df[:10]) forecast2 = m.predict(df[-10:]) avg_a1 = np.mean(forecast1["future_regressor_a"]) avg_a2 = np.mean(forecast2["future_regressor_a"]) # must hold assert avg_a1 > 0.5 assert avg_a2 > 0.5 # this is less strict, as it also depends on trend, but should still hold avg_y1 = np.mean(forecast1["yhat1"]) avg_y2 = np.mean(forecast2["yhat1"]) assert avg_y1 > -1.5 assert avg_y2 > 0.5
np.linspace(start=0, stop=freq * 2 * np.math.pi, num=df.shape[0])) freq = 3 df["x2"] = np.sin( np.linspace(start=0, stop=freq * 2 * np.math.pi, num=df.shape[0])) df["y"] = df["x1"] + df["x2"] df.set_index("ds")["y"].plot() df_train = df.iloc[:int(df.shape[0] / 2)] df_test = df.iloc[int(df.shape[0] / 2):] # %% t1 = process_time() model_nprophet = NeuralProphet() model_nprophet = NeuralProphet(n_lags=100, n_forecasts=10) model_nprophet.add_future_regressor("x1") model_nprophet.add_future_regressor("x2") model_nprophet.fit(df_train, freq="D") t2 = process_time() - t1 t3 = process_time() future_nprophet = model_nprophet.make_future_dataframe( df=df_train, #.iloc[[-1]], regressors_df=df_test[["x1", "x2"]], periods=df_test.shape[0], ) df_pred_nprophet = model_nprophet.predict(future_nprophet) t4 = process_time() - t3 print(t2, t4) # df_pred_nprophet.set_index('ds')['yhat1'].plot()
def seek_the_oracle(current_series, args, series, forecast_length, future_regressor): """Prophet for for loop or parallel.""" current_series = current_series.rename(columns={series: 'y'}) current_series['ds'] = current_series.index try: quant_range = (1 - args['prediction_interval']) / 2 quantiles = [quant_range, 0.5, (1 - quant_range)] m = NeuralProphet( quantiles=quantiles, growth=self.growth, n_changepoints=self.n_changepoints, changepoints_range=self.changepoints_range, trend_reg=self.trend_reg, trend_reg_threshold=self.trend_reg_threshold, ar_sparsity=self.ar_sparsity, yearly_seasonality=self.yearly_seasonality, weekly_seasonality=self.weekly_seasonality, daily_seasonality=self.daily_seasonality, seasonality_mode=self.seasonality_mode, seasonality_reg=self.seasonality_reg, n_lags=self.n_lags, n_forecasts=forecast_length, num_hidden_layers=self.num_hidden_layers, d_hidden=self.d_hidden, learning_rate=self.learning_rate, loss_func=self.loss_func, train_speed=self.train_speed, normalize=self.normalize, collect_metrics=False, ) except Exception: m = NeuralProphet( growth=self.growth, n_changepoints=self.n_changepoints, changepoints_range=self.changepoints_range, trend_reg=self.trend_reg, trend_reg_threshold=self.trend_reg_threshold, ar_sparsity=self.ar_sparsity, yearly_seasonality=self.yearly_seasonality, weekly_seasonality=self.weekly_seasonality, daily_seasonality=self.daily_seasonality, seasonality_mode=self.seasonality_mode, seasonality_reg=self.seasonality_reg, n_lags=self.n_lags, n_forecasts=forecast_length, num_hidden_layers=self.num_hidden_layers, d_hidden=self.d_hidden, learning_rate=self.learning_rate, loss_func=self.loss_func, train_speed=self.train_speed, normalize=self.normalize, collect_metrics=False, ) if args['holiday']: m.add_country_holidays(country_name=args['holiday_country']) if args['regression_type'] == 'User': current_series = pd.concat( [current_series, args['regressor_train']], axis=1) for nme in args['regressor_name']: m.add_future_regressor(nme) m.fit(current_series, freq=args['freq'], progress_print=False, minimal=True) if args['regression_type'] == 'User': if future_regressor.ndim > 1: if future_regressor.shape[1] > 1: ft_regr = (future_regressor.mean( axis=1).to_frame().merge( future_regressor.std(axis=1).to_frame(), left_index=True, right_index=True, )) else: ft_regr = future_regressor.copy() ft_regr.columns = args['regressor_train'].columns regr = pd.concat([args['regressor_train'], ft_regr]) regr.columns = args['regressor_train'].columns # regr.index.name = 'ds' # regr.reset_index(drop=False, inplace=True) # future = future.merge(regr, on="ds", how='left') else: # a = np.append(args['regressor_train'], future_regressor.values) regr = future_regressor future = m.make_future_dataframe(current_series, periods=forecast_length, regressors_df=regr) else: future = m.make_future_dataframe(current_series, periods=forecast_length) fcst = m.predict(future, decompose=False) fcst = fcst.tail(forecast_length) # remove the backcast # predicting that someday they will change back to fbprophet format if "yhat2" in fcst.columns: fcst['yhat1'] = fcst.fillna(0).sum(axis=1, numeric_only=True) try: forecast = fcst['yhat1'] except Exception: forecast = fcst['yhat'] forecast.name = series # not yet supported, so fill with the NaN column for now if missing try: lower_forecast = fcst['yhat_lower'] upper_forecast = fcst['yhat_upper'] except Exception: lower_forecast = fcst['y'] upper_forecast = fcst['y'] lower_forecast.name = series upper_forecast.name = series return (forecast, lower_forecast, upper_forecast)