def predict_product(self, product_id): """ Receives a product id and predicts """ product_ts = self.__get_product_ts(product_id) model = SARIMAX(product_ts, order=(0,1,2), time_varying_regression=True, mle_regression=False, trend='n', seasonal_order=(1,1,1,11)).fit() steps = PREDICTION_TIME * 4 forecast = model.get_forecast(steps=steps, dynamic=True) history = product_ts[(product_ts.index > "2015") & (product_ts.index < "2016")] history = history.fillna(0) # Output predicted_mean = forecast.predicted_mean conf_int = forecast.conf_int() return np.exp(history), np.exp(predicted_mean), np.exp(conf_int)
def sarimax_forecast(df): '''it takes a dataframe split it into train/forecast sets based on the availability of price and then forecasts electricity price for next hour. it returns forecast dataframe ('price','lower_interval', 'upper_interval') and historical price dataframe ('price')''' # split past and furture past = df[~df.price.isnull()] future = df[df.price.isnull()].drop('price', axis=1) # forecast for next time point only future = future.iloc[:1, :] if future.temp.isnull( )[0]: # when weather forecast data is not available for that hour forecast = np.nan lower = np.nan upper = np.nan print('weather data is not available') else: past.index = pd.DatetimeIndex(past.index.values, freq=past.index.inferred_freq) # Build Model sarima = SARIMAX(past.price, exog=past.drop('price', axis=1), order=(1, 1, 1), seasonal_order=(1, 0, 2, 7)) sarima = sarima.fit(maxiter=300) # forecasting results = sarima.get_forecast(1, exog=future, alpha=0.05) forecast = sarima.forecast(1, exog=future, alpha=0.05) lower = results.conf_int()['lower price'][0] upper = results.conf_int()['upper price'][0] # create forecast df with datetimeIndex forecast = pd.DataFrame(dict(price=forecast, lower_interval=lower, upper_interval=upper), index=future.index) forecast.index.name = 'date_time' past = past.iloc[-1:, 0] return forecast, past
def sarimax_forecast(hour=11): '''hour: hour of a day, range(0, 23), returns forecast, upper_intervals, lower_intervals, mape, mase, test, train''' df_all = get_data(hour=hour) # split past and furture past = df_all[~df_all.price.isnull()] future = df_all[df_all.price.isnull()].drop('price', axis=1) future = future.iloc[:1, :] if future.temp.isnull()[0]: forecast = np.array([np.nan]) confidence_int = pd.DataFrame( { 'lower price': np.nan, 'upper price': np.nan }, index=['x']) else: past.index = pd.DatetimeIndex(past.index.values, freq=past.index.inferred_freq) # Build Model sarima = SARIMAX(past.price, past.drop('price', axis=1), order=(1, 1, 1), seasonal_order=(1, 0, 2, 7)) sarima = sarima.fit(maxiter=300) # forecasting results = sarima.get_forecast(1, exog=future, alpha=0.05) forecast = sarima.forecast(1, exog=future, alpha=0.05) confidence_int = results.conf_int() # create forecast df with datetimeIndex lower = confidence_int['lower price'][0] upper = confidence_int['upper price'][0] forecast = pd.DataFrame(dict(price=forecast, lower=lower, upper=upper), index=future.index) past = past.iloc[-1:, 0] return forecast, past
def train_sarima(data=False, hour=11, split_date='2019-10-22 11:00:00', n=30, exog=False): '''hour: hour of a day, range(0, 23), split_date: train, test splitted on this date, n: number of days that will be forecasted, exog: in case of sarimax, takes (list of exog features, order, seasonal_order) returns forecast, upper_intervals, lower_intervals, mape, mase, test, train''' if isinstance(data, bool): if isinstance(exog, bool): df = get_daily(hour=hour) else: df = get_all(hour=hour) else: df = data # formating split_date split_date = pd.DatetimeIndex(np.array([split_date])) # get train and test for plotting only train = df[(df.index <= split_date[0])] test = df[(df.index > split_date[0]) & \ (df.index <= (split_date + pd.Timedelta(days=n))[0])] # will collect following information from forecast forecasts = [] upper = [] lower = [] # loop over to get walk forward forecast for n days for i in range(1, n + 1): # walk one day forward to set train_set predict_date = df[df.index == split_date[0]].index + pd.Timedelta( days=i) train_set = df[df.index < predict_date[0]] train_set.index = pd.DatetimeIndex(train_set.index.values, freq=train_set.index.inferred_freq) # Build Model without exogenous features if isinstance(exog, bool): sarima = SARIMAX(train_set, order=(1, 1, 1), seasonal_order=(1, 0, 2, 7)) sarima = sarima.fit(maxiter=200) # Forecast results = sarima.get_forecast(1, alpha=0.05) forecast = sarima.forecast(1, alpha=0.05) confidence_int = results.conf_int() # Build Model with exogenous features else: # StandardScaling the exogenous features # scaler = StandardScaler() # scaler = scaler.fit(train_set[['wind_speed', 'temp', 'humidity']]) # train_set.loc[:,['wind_speed', 'temp', 'humidity']]=\ # scaler.transform(train_set[['wind_speed', 'temp', 'humidity']]) # training model sarima = SARIMAX(train_set.price, exog=train_set[exog[0]], order=exog[1], seasonal_order=exog[2]) sarima = sarima.fit(maxiter=200) # get features for forecast exog_fore = test[test.index == predict_date[0]][exog[0]] # scaling features for forecast # exog_fore.loc[:,['wind_speed', 'temp', 'humidity']]=\ # scaler.transform(exog_fore[['wind_speed', 'temp', 'humidity']]) # forecasting results = sarima.get_forecast(1, exog=exog_fore, alpha=0.05) forecast = sarima.forecast(1, exog=exog_fore, alpha=0.05) confidence_int = results.conf_int() # add forecast result into the list lower.append(confidence_int['lower price'][0]) upper.append(confidence_int['upper price'][0]) forecasts.append(forecast[0]) # calculate the mape mape = get_mape(test.price, forecasts) mase = get_mase(test.price, forecasts, train.price) # create forecast df with datetimeIndex forecast = pd.DataFrame(forecasts, index=test.index, columns=['price']) return forecast, lower, upper, mape, mase, train, test
plt.show() # %% sarimax = SARIMAX(airpassengers_train, order=(3,1,1), seasonal_order=(0,1,0,12)).fit() sarimax.summary() # %% sarimax.plot_diagnostics(figsize=(16, 8)) plt.show() # %% sarimax_forecast = sarimax.get_forecast(24) sarimax_forecast_conf_int = sarimax_forecast.conf_int() # %% plt.plot(airpassengers_train, label='train') plt.plot(airpassengers_test, label='test') plt.plot(sarimax_forecast.predicted_mean, label='forecast') plt.fill_between(sarimax_forecast_conf_int.index, sarimax_forecast_conf_int.iloc[:, 0], sarimax_forecast_conf_int.iloc[:, 1], color='k', alpha=.2) plt.legend()
best_model = SARIMAX(dft_f['cnt_smooth'], order=(p_best, 1, q_best), seasonal_order=(P_best, 1, Q_best, m)).fit(dis=-1) print(best_model.summary()) best_model.plot_diagnostics(figsize=(12,8)) plt.suptitle(f'Diagnostic Best model') plt.savefig(f'{baseSave}/diagnostic_plot_station_{s}.png', dpi=150) plt.clf() ''' END: Compute Sarima optimization ''' end_endto = args.forecast_upto.date() + timedelta(days=-1) dft_all = pd.read_csv(f'{baseSave}/smoothed_to_compare_s_{s}.csv', header=[0], index_col=[0], sep=';', parse_dates=True) dft_all_upto = dft_all.loc[selection[s]['stop']:end_endto.strftime('%Y-%m-%d')] dft_f_from = dft_f.loc[selection[s]['start']:] pred_uc = best_model.get_forecast(steps=pd.to_datetime(args.forecast_upto.date().strftime('%Y-%m-%d'))) pred_ci = pred_uc.conf_int() ax = dft_f_from['cnt_smooth'].plot(label='Observed Smoothed Data', c='r', figsize=(12, 8)) dft_all_upto['cnt_smooth'].plot(label='Observed "Forecasted" Data', c='b', figsize=(12, 8)) pred_uc.predicted_mean.plot(ax=ax, label='Forecasted Data', c='k', linestyle='--') ax.set_xlabel('Date') ax.set_ylabel('Counter') ax.grid(which='major', linestyle='-') ax.grid(which='minor', linestyle='--') plt.legend() plt.title(f'Forecasted Prediction\n{map_station[s]} {args.forecast_upto.date()}') plt.savefig(f'{baseSave}/forecasted_prediction_station_{s}_to_{args.forecast_upto.date()}.png', dpi=150) plt.clf() os.remove(f'{baseSave}/smoothed_to_compare_s_{s}.csv')
train = btc[btc.index < pd.to_datetime("2020-11-01", format='%Y-%m-%d')] test = btc[btc.index >= pd.to_datetime("2020-11-01", format='%Y-%m-%d')] print(test) plt.plot(train, color="black", label='Training') plt.plot(test, color="red", label='Testing') plt.ylabel('BTC Price') plt.xlabel('Date') plt.xticks(rotation=45) plt.title("Train/Test split for BTC Data") y = train['BTC-USD'] ARMAmodel = SARIMAX(y, order=(1, 0, 1)) ARMAmodel = ARMAmodel.fit() y_pred = ARMAmodel.get_forecast(len(test.index)) y_pred_df = y_pred.conf_int(alpha=0.05) y_pred_df["Predictions"] = ARMAmodel.predict(start=y_pred_df.index[0], end=y_pred_df.index[-1]) y_pred_df.index = test.index y_pred_out = y_pred_df["Predictions"] plt.plot(y_pred_out, color='green', label='ARMA Predictions') plt.legend() import numpy as np from sklearn.metrics import mean_squared_error arma_rmse = np.sqrt( mean_squared_error(test["BTC-USD"].values, y_pred_df["Predictions"])) print("ARMA RMSE: ", arma_rmse)
pred_dynamic = sarima_model.get_prediction(start=pd.to_datetime('1998-01-01'), dynamic=True, full_results=True) pred_dynamic_ci = pred_dynamic.conf_int() ax = y['1990':].plot(label='observed', figsize=(15,12)) pred_dynamic.predicted_mean.plot(label='Dynamic Forecast', ax=ax) ax.fill_between(pred_dynamic_ci.index, pred_dynamic_ci.iloc[:, 0], pred_dynamic_ci.iloc[:, 1], color='k', alpha=0.25) ax.fill_betweenx(ax.get_ylim(), pd.to_datetime('1998-01-01'), y.index[-1], alpha=0.1, zorder=-1) ax.set_xlabel('Date') ax.set_ylabel('CO2 Levels') plt.legend() plt.show() # Visualize forecasts 500 steps in the future pred_uc = sarima_model.get_forecast(steps=500) pred_ci = pred_uc.conf_int() ax = y.plot(label='observed', figsize=(15,12)) pred_uc.predicted_mean.plot(ax=ax, label='forecast') ax.fill_between(pred_ci.index, pred_ci.iloc[:,0], pred_ci.iloc[:,1], color='k', alpha=0.25) ax.set_xlabel('Date') ax.set_ylabel('CO2 Levels') plt.legend() plt.show() #### Seasonal ARIMA w/ Exogenous Regressors (SARIMAX) #### """
def fit_model_sarima(train, val, pdq, seasonal_pdq): sarima_model = SARIMAX(train, order=pdq, seasonal_order=seasonal_pdq).\ fit(disp=0) y_pred_val = sarima_model.get_forecast(steps=48) y_pred = y_pred_val.predicted_mean return mean_absolute_error(val, y_pred)
def train_sarimax(data=False, exog=False, hour=11, split_date='2019-10-22 11:00:00', n=30): ''' hour: hour of a day (0, 23), exog: takes ([exog features], order, seasonal_order) split_date: train, test splitted on this date, n: number of days to forecast, returns test('price', 'forecast_price', 'lower_interval', 'upper_interval'), train('price') and MAPE, MASE''' if isinstance(data, bool): if isinstance(exog, bool): df = get_daily_price(hour=hour) else: df = get_daily_data(hour=hour) else: df = data # formating split_date split_date = pd.DatetimeIndex(np.array([split_date])) split_date = split_date[0] # get train and test for plotting only train = df[(df.index <= split_date)] test = df[(df.index > split_date) & \ (df.index <= (split_date + timedelta(days=n)))].copy() # loop over to get walk forward forecast for n days for i in range(1, n + 1): print( colored( f'################# forecasting for day {i} ##################', 'blue')) # walk one day forward to define train_set predict_date = split_date + timedelta(days=i) train_set = df[df.index < predict_date] train_set.index = pd.DatetimeIndex(train_set.index.values, freq=train_set.index.inferred_freq) # Build Model without exogenous features if isinstance(exog, bool): sarima = SARIMAX(train_set, order=(1, 1, 1), seasonal_order=(1, 0, 2, 7)) sarima = sarima.fit(maxiter=200) # Forecast results = sarima.get_forecast(1, alpha=0.05) forecast = sarima.forecast(1, alpha=0.05) confidence_int = results.conf_int() # Build Model with exogenous features else: # training model sarima = SARIMAX(train_set.price, exog=train_set[exog[0]], order=exog[1], seasonal_order=exog[2]) sarima = sarima.fit(maxiter=200) # get features for forecast exog_fore = test[test.index == predict_date][exog[0]] # forecasting results = sarima.get_forecast(1, exog=exog_fore, alpha=0.05) forecast = sarima.forecast(1, exog=exog_fore, alpha=0.05) confidence_int = results.conf_int() # add forecast result into the list test.loc[predict_date, 'forecast_price'] = forecast[0] test.loc[predict_date, 'lower_interval'] = confidence_int['lower price'][0] test.loc[predict_date, 'upper_interval'] = confidence_int['upper price'][0] train = train[['price']] test = test[[ 'price', 'forecast_price', 'upper_interval', 'lower_interval' ]] # calculate the mape mape = get_mape(test.price, test.forecast_price) mase = get_mase(test.price, test.forecast_price, train.price) return train, test, mape, mase
# ARIMA # %% # ARIMA parameters search results_arima = pm.auto_arima( train['diff'], d=0, start_p=1, start_1=1, max_p=3, max_q=3) print(results_arima.summary()) #%% # using the ARIMA model model_arima = SARIMAX(train['diff'], order=(3, 0, 2)).fit() # prediction prediction_arima = model_arima.get_prediction( start=-50, dynamic=True).predicted_mean # forecasting forecast_arima = model_arima.get_forecast(steps=20).predicted_mean #%% # model diagnostics arima_residual = model_arima.resid arima_mae = np.mean(np.abs(arima_residual)) print(arima_mae) #%% # pymarima results results_arima.plot_diagnostics() plt.show() # %% # plot ARIMAX plt.plot(df['diff'], label='True values')