def test_global_modeling_no_exogenous_variable(): ### GLOBAL MODELLING - NO EXOGENOUS VARIABLE log.info("Global Modeling - No exogenous variables") df = pd.read_csv(PEYTON_FILE, nrows=512) df1_0 = df.iloc[:128, :].copy(deep=True) df2_0 = df.iloc[128:256, :].copy(deep=True) df3_0 = df.iloc[256:384, :].copy(deep=True) df4_0 = df.iloc[384:, :].copy(deep=True) train_input = { 0: df1_0, 1: { "df1": df1_0, "df2": df2_0 }, 2: { "df1": df1_0, "df2": df2_0 } } test_input = {0: df3_0, 1: {"df1": df3_0}, 2: {"df1": df3_0, "df2": df4_0}} info_input = { 0: "Testing df train / df test - no events, no regressors", 1: "Testing dict df train / df test - no events, no regressors", 2: "Testing dict df train / dict df test - no events, no regressors", } for i in range(0, 3): log.info(info_input[i]) m = NeuralProphet(n_forecasts=2, n_lags=10, epochs=EPOCHS, batch_size=BATCH_SIZE) metrics = m.fit(train_input[i], freq="D") forecast = m.predict(df=test_input[i]) forecast_trend = m.predict_trend(df=test_input[i]) forecast_seasonal_componets = m.predict_seasonal_components( df=test_input[i]) if PLOT: forecast = forecast if isinstance(forecast, list) else [forecast] for key in forecast: fig1 = m.plot(forecast[key]) fig2 = m.plot(forecast[key]) with pytest.raises(ValueError): forecast = m.predict({"df4": df4_0}) log.info( "Error - dict with names not provided in the train dict (not in the data params dict)" ) with pytest.raises(ValueError): metrics = m.test({"df4": df4_0}) log.info( "Error - dict with names not provided in the train dict (not in the data params dict)" ) m = NeuralProphet( n_forecasts=2, n_lags=10, epochs=EPOCHS, batch_size=BATCH_SIZE, ) m.fit({"df1": df1_0, "df2": df2_0}, freq="D") with pytest.raises(ValueError): forecast = m.predict({"df4": df4_0}) # log.info("unknown_data_normalization was not set to True") with pytest.raises(ValueError): metrics = m.test({"df4": df4_0}) # log.info("unknown_data_normalization was not set to True") with pytest.raises(ValueError): forecast_trend = m.predict_trend({"df4": df4_0}) # log.info("unknown_data_normalization was not set to True") with pytest.raises(ValueError): forecast_seasonal_componets = m.predict_seasonal_components( {"df4": df4_0}) # log.info("unknown_data_normalization was not set to True") # Set unknown_data_normalization to True - now there should be no errors m.config_normalization.unknown_data_normalization = True forecast = m.predict({"df4": df4_0}) metrics = m.test({"df4": df4_0}) forecast_trend = m.predict_trend({"df4": df4_0}) forecast_seasonal_componets = m.predict_seasonal_components({"df4": df4_0}) m.plot_parameters(df_name="df1") m.plot_parameters()
m = NeuralProphet( normalize='standardize', num_hidden_layers = 1, n_forecasts=60, n_lags=2, #seasonality_mode="multiplicative", epochs=10 ) m.fit(Product_1766, freq='D') future = m.make_future_dataframe(Product_1766, periods=60, n_historic_predictions=len(Product_1766)) forecast = m.predict(future) m.plot(forecast) plt.show() # print(df.index) # print(Product_1766.index) # df_train, df_val = m.split_df(Product_1766, valid_p=0.2, freq='D') # train_metrics = m.fit(df_train, freq='D') # val_metrics = m.test(df_val) # print(train_metrics) # print(val_metrics) metrics = m.fit(Product_1766, validate_each_epoch=True, valid_p=0.2, freq='D') print(metrics)
def test_events(): log.info("testing: Events") df = pd.read_csv(PEYTON_FILE)[-NROWS:] playoffs = pd.DataFrame({ "event": "playoff", "ds": pd.to_datetime([ "2008-01-13", "2009-01-03", "2010-01-16", "2010-01-24", "2010-02-07", "2011-01-08", "2013-01-12", "2014-01-12", "2014-01-19", "2014-02-02", "2015-01-11", "2016-01-17", "2016-01-24", "2016-02-07", ]), }) superbowls = pd.DataFrame({ "event": "superbowl", "ds": pd.to_datetime(["2010-02-07", "2014-02-02", "2016-02-07"]), }) events_df = pd.concat((playoffs, superbowls)) m = NeuralProphet( n_lags=2, n_forecasts=30, daily_seasonality=False, epochs=EPOCHS, batch_size=BATCH_SIZE, ) # set event windows m = m.add_events(["superbowl", "playoff"], lower_window=-1, upper_window=1, mode="multiplicative", regularization=0.5) # add the country specific holidays m = m.add_country_holidays("US", mode="additive", regularization=0.5) m.add_country_holidays("Indonesia") m.add_country_holidays("Thailand") m.add_country_holidays("Philippines") m.add_country_holidays("Pakistan") m.add_country_holidays("Belarus") history_df = m.create_df_with_events(df, events_df) metrics_df = m.fit(history_df, freq="D") future = m.make_future_dataframe(df=history_df, events_df=events_df, periods=30, n_historic_predictions=90) forecast = m.predict(df=future) log.debug("Event Parameters:: {}".format(m.model.event_params)) if PLOT: m.plot_components(forecast) m.plot(forecast) m.plot_parameters() plt.show()
def test_events(self): log.info("testing: Events") df = pd.read_csv(PEYTON_FILE) playoffs = pd.DataFrame({ "event": "playoff", "ds": pd.to_datetime([ "2008-01-13", "2009-01-03", "2010-01-16", "2010-01-24", "2010-02-07", "2011-01-08", "2013-01-12", "2014-01-12", "2014-01-19", "2014-02-02", "2015-01-11", "2016-01-17", "2016-01-24", "2016-02-07", ]), }) superbowls = pd.DataFrame({ "event": "superbowl", "ds": pd.to_datetime(["2010-02-07", "2014-02-02", "2016-02-07"]), }) events_df = pd.concat((playoffs, superbowls)) m = NeuralProphet( n_lags=5, n_forecasts=30, yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False, epochs=EPOCHS, ) # set event windows m = m.add_events(["superbowl", "playoff"], lower_window=-1, upper_window=1, mode="multiplicative", regularization=0.5) # add the country specific holidays m = m.add_country_holidays("US", mode="additive", regularization=0.5) history_df = m.create_df_with_events(df, events_df) metrics_df = m.fit(history_df, freq="D") # create the test data history_df = m.create_df_with_events( df.iloc[100:500, :].reset_index(drop=True), events_df) future = m.make_future_dataframe(df=history_df, events_df=events_df, periods=30, n_historic_predictions=3) forecast = m.predict(df=future) log.debug("Event Parameters:: {}".format(m.model.event_params)) if self.plot: m.plot_components(forecast) m.plot(forecast) m.plot_parameters() plt.show()
# %% t1 = process_time() model_nprophet = NeuralProphet() model_nprophet = NeuralProphet(n_lags=100, n_forecasts=10) model_nprophet.add_future_regressor("x1") model_nprophet.add_future_regressor("x2") model_nprophet.fit(df_train, freq="D") t2 = process_time() - t1 t3 = process_time() future_nprophet = model_nprophet.make_future_dataframe( df=df_train, #.iloc[[-1]], regressors_df=df_test[["x1", "x2"]], periods=df_test.shape[0], ) df_pred_nprophet = model_nprophet.predict(future_nprophet) t4 = process_time() - t3 print(t2, t4) # df_pred_nprophet.set_index('ds')['yhat1'].plot() # fig1 = model_nprophet.plot(df_pred_nprophet) # %% t1 = process_time() model_nprophet = NeuralProphet(n_lags=100, n_forecasts=10) model_nprophet.fit(df_train[["ds", "y"]], freq="D") t2 = process_time() - t1 t3 = process_time() future_nprophet = model_nprophet.make_future_dataframe(
def seek_the_oracle(current_series, args, series, forecast_length, future_regressor): """Prophet for for loop or parallel.""" current_series = current_series.rename(columns={series: 'y'}) current_series['ds'] = current_series.index try: quant_range = (1 - args['prediction_interval']) / 2 quantiles = [quant_range, 0.5, (1 - quant_range)] m = NeuralProphet( quantiles=quantiles, growth=self.growth, n_changepoints=self.n_changepoints, changepoints_range=self.changepoints_range, trend_reg=self.trend_reg, trend_reg_threshold=self.trend_reg_threshold, ar_sparsity=self.ar_sparsity, yearly_seasonality=self.yearly_seasonality, weekly_seasonality=self.weekly_seasonality, daily_seasonality=self.daily_seasonality, seasonality_mode=self.seasonality_mode, seasonality_reg=self.seasonality_reg, n_lags=self.n_lags, n_forecasts=forecast_length, num_hidden_layers=self.num_hidden_layers, d_hidden=self.d_hidden, learning_rate=self.learning_rate, loss_func=self.loss_func, train_speed=self.train_speed, normalize=self.normalize, collect_metrics=False, ) except Exception: m = NeuralProphet( growth=self.growth, n_changepoints=self.n_changepoints, changepoints_range=self.changepoints_range, trend_reg=self.trend_reg, trend_reg_threshold=self.trend_reg_threshold, ar_sparsity=self.ar_sparsity, yearly_seasonality=self.yearly_seasonality, weekly_seasonality=self.weekly_seasonality, daily_seasonality=self.daily_seasonality, seasonality_mode=self.seasonality_mode, seasonality_reg=self.seasonality_reg, n_lags=self.n_lags, n_forecasts=forecast_length, num_hidden_layers=self.num_hidden_layers, d_hidden=self.d_hidden, learning_rate=self.learning_rate, loss_func=self.loss_func, train_speed=self.train_speed, normalize=self.normalize, collect_metrics=False, ) if args['holiday']: m.add_country_holidays(country_name=args['holiday_country']) if args['regression_type'] == 'User': current_series = pd.concat( [current_series, args['regressor_train']], axis=1) for nme in args['regressor_name']: m.add_future_regressor(nme) m.fit(current_series, freq=args['freq'], progress_print=False, minimal=True) if args['regression_type'] == 'User': if future_regressor.ndim > 1: if future_regressor.shape[1] > 1: ft_regr = (future_regressor.mean( axis=1).to_frame().merge( future_regressor.std(axis=1).to_frame(), left_index=True, right_index=True, )) else: ft_regr = future_regressor.copy() ft_regr.columns = args['regressor_train'].columns regr = pd.concat([args['regressor_train'], ft_regr]) regr.columns = args['regressor_train'].columns # regr.index.name = 'ds' # regr.reset_index(drop=False, inplace=True) # future = future.merge(regr, on="ds", how='left') else: # a = np.append(args['regressor_train'], future_regressor.values) regr = future_regressor future = m.make_future_dataframe(current_series, periods=forecast_length, regressors_df=regr) else: future = m.make_future_dataframe(current_series, periods=forecast_length) fcst = m.predict(future, decompose=False) fcst = fcst.tail(forecast_length) # remove the backcast # predicting that someday they will change back to fbprophet format if "yhat2" in fcst.columns: fcst['yhat1'] = fcst.fillna(0).sum(axis=1, numeric_only=True) try: forecast = fcst['yhat1'] except Exception: forecast = fcst['yhat'] forecast.name = series # not yet supported, so fill with the NaN column for now if missing try: lower_forecast = fcst['yhat_lower'] upper_forecast = fcst['yhat_upper'] except Exception: lower_forecast = fcst['y'] upper_forecast = fcst['y'] lower_forecast.name = series upper_forecast.name = series return (forecast, lower_forecast, upper_forecast)
d_hidden=None, # Dimension of hidden layers of AR-Net ar_sparsity=None, # Sparcity in the AR coefficients learning_rate=None, epochs=40, loss_func="Huber", normalize= "auto", # Type of normalization ('minmax', 'standardize', 'soft', 'off') impute_missing=True, log_level=None, # Determines the logging level of the logger object ) metrics = model.fit(df, validate_each_epoch=True, freq="D") future = model.make_future_dataframe(df, periods=365, n_historic_predictions=len(df)) forecast = model.predict(future) fig, ax = plt.subplots(figsize=(14, 10)) model.plot(forecast, xlabel="Date", ylabel="Gold Price", ax=ax) ax.set_title("Gold Price Predictions", fontsize=28, fontweight="bold") plt.show() forecast[(forecast.ds > '2021-01-07') & (forecast.ds < '2021-01-12')] # ### SPLITTING THE TRAIN AND THE TEST #### # filter_date = str(int(datetime.today().strftime('%Y'))) + '-' + datetime.today().strftime('%m') + '-' + '01' # train = gold_prices[gold_prices['date'] < filter_date] # test = gold_prices[gold_prices['date'] >= filter_date] # train = train.reset_index().rename(columns={"date":"ds", "adj_close":"y"}) # test = test.reset_index().rename(columns={"date":"ds", "adj_close":"y"})
ax[0].set_ylabel('Coefficient') ax[1].plot(np.flip(model_nprophet_ar.model.ar_weights.detach().numpy()).flatten()) ax[1].set_title('np') ax[1].set_xlabel('AR Lag') plt.show() # %% [markdown] # Their coefficients are nearly identical. As such predictions from each model are nearly the same: # %% Show final predictions pred_arima = model_arima.predict(start=df_train['ds'].iloc[-1], end=df_train['ds'].iloc[-1] + pd.Timedelta('100D')) pred_nprophet = df_train.copy() for idx in range(100): future_nprophet = model_nprophet_ar.make_future_dataframe( df=pred_nprophet, ) temp = model_nprophet_ar.predict(future_nprophet) temp['y'] = temp[['y','yhat1']].fillna(0).sum(axis=1) temp = temp[['ds','y']] pred_nprophet = pred_nprophet.append(temp.iloc[-1]) pred_nprophet = pred_nprophet.iloc[-101:].reset_index(drop=True) fig, ax = plt.subplots(figsize=(10, 6)) pred_arima.plot(ax=ax, label='ARIMA') pred_nprophet.set_index('ds')['y'].plot(ax=ax, label='np') df_train.set_index('ds')['y'].iloc[-200:].plot(ax=ax, label='actual') ax.set_ylabel("Temp (°C)") fig.legend() plt.show() # %% [markdown] # ## Long lags # Due to the significantly faster fitting time we can train models with significantly longer lags.
for name in energy_list: print(name) column = name df = pd.DataFrame() df['ds'] = train_data['time'] df['y'] = train_data[column] # 모델 설정 model = NeuralProphet() # 훈련 loss = model.fit(df, freq="H") # 예측용 데이터 프레임 만들기 df_pred = model.make_future_dataframe(df, periods=18000) # 예측 predict = model.predict(df_pred) print(predict) # 2021-02-01 ~ 2021-03-01 predict_1 = predict.copy() predict_1 = predict_1.query('ds >= "2021-02-01 00:00:00"') predict_1 = predict_1.query('ds < "2021-03-01 00:00:00"') # 2021-06-09 ~ 2021-07-09 predict_2 = predict.copy() predict_2 = predict_2.query('ds >= "2021-06-09 00:00:00"') predict_2 = predict_2.query('ds < "2021-07-09 00:00:00"') # 제출 파일 업데이트 submission[column] = list(predict_1['yhat1']) + list(predict_2['yhat1'])
def time_pattern(): global target, daypara, df, df2, df_4pycaret, df_temp EPOCH = st.sidebar.slider("Epochs", 100, 1000) model = NeuralProphet( growth="linear", changepoints=None, n_changepoints=30, changepoints_range=0.95, trend_reg=0, trend_reg_threshold=False, yearly_seasonality="auto", weekly_seasonality=True, daily_seasonality="auto", seasonality_mode="additive", seasonality_reg=0, n_forecasts=30, n_lags=60, ##determines autoregression num_hidden_layers=0, d_hidden=None, ar_sparsity=None, learning_rate=None, epochs=EPOCH, loss_func="Huber", normalize="auto", impute_missing=True, ) metrics = model.fit(df2, validate_each_epoch=True, freq="D") future = model.make_future_dataframe(df2, periods=252, n_historic_predictions=len(df2)) with st.spinner("Training..."): forecast = model.predict(future) fig, ax = plt.subplots(1, 2, figsize=(17, 7)) ax[0].plot(metrics["MAE"], 'ob', linewidth=6, label="Training Loss") ax[0].plot(metrics["MAE_val"], '-r', linewidth=2, label="Validation Loss") ax[0].legend(loc='center right') ax[0].tick_params(axis='both', which='major') ax[0].set_xlabel("Epoch") ax[0].set_ylabel("Loss") ax[0].set_title("Model Loss (MAE)") ax[1].plot(metrics["SmoothL1Loss"], 'ob', linewidth=6, label="Training Loss") ax[1].plot(metrics["SmoothL1Loss_val"], '-r', linewidth=2, label="Validation Loss") ax[1].legend(loc='center right') ax[1].tick_params(axis='both', which='major') ax[1].set_xlabel("Epoch") ax[1].set_ylabel("Loss") ax[1].set_title("Model Loss (SmoothL1Loss)") st.subheader("Loss Check") st.pyplot() with st.spinner("Recognizing Time Pattern"): st.subheader("Time Pattern") model.plot_parameters() st.set_option('deprecation.showPyplotGlobalUse', False) st.pyplot()