def predict_ind(data_ind, end_date): # Build and train model best_sarima_full_data = SARIMAX(endog=data_ind['amount'], order=(0, 1, 1), seasonal_order=(1, 1, 0, 52)) best_sarima_full_data = best_sarima_full_data.fit() # Predict #EXCEL #future_prediction_full_data = best_sarima_full_data.get_prediction(start = data_ind.index[-1] + timedelta(days=1), end = end_date, dynamic = True, full_results = True) #CSV future_prediction_full_data = best_sarima_full_data.get_prediction( start=((datetime.strptime(data_ind.index[-1], '%Y-%m-%d').date()) + timedelta(days=1)), end=end_date, dynamic=True, full_results=True) # Create results and confidence intervals future_predicted_amount_full_data = future_prediction_full_data.prediction_results.forecasts[ 0] future_predicted_amount_df_full_data_ind = pd.DataFrame( future_predicted_amount_full_data, index=future_prediction_full_data.row_labels) future_pred_ci_full_data_ind = future_prediction_full_data.conf_int( alpha=0.05) # Returning prediction and CI return future_predicted_amount_df_full_data_ind, future_pred_ci_full_data_ind
def future_forecast(request): df = pd.read_csv('sales/data/IPN31152N.csv', index_col=0) df.index = pd.date_range(start='1972-01-01', end='2020-01-01', freq='M') train_df = df[df.index <= '2017-12-31'] test_df = df[df.index > '2017-12-31'] model1 = SARIMAX(train_df['IPN31152N'], order=(3, 1, 3), seasonal_order=(0, 1, 1, 12)).fit() pred = model1.get_prediction(start='2020-01-31', end='2020-12-31') df_pred = pd.DataFrame(pred.predicted_mean) df_pred.columns = ['IPN31152N'] results = { '2020': [[time_unix(df_pred.index[i]), df_pred.iloc[i]['IPN31152N']] for i in range(0, len(df_pred))] } re = {} re['2020'] = [[[round(i, 2)] for i in pred.predicted_mean]] re['2020'].append( [[round(i, 2)] for i in np.array([round(i, 2) for i in pred.predicted_mean]) - np.array([i for i in df[df.index.year == 2019]['IPN31152N']])]) context = {"data": json.dumps(results), "result_changes": json.dumps(re)} return render(request, 'charts_forecast.html', context=context)
def mod_sarima(train, test, dependent_var_col, trend, p, d, q, P, D, Q, S, is_log, outpath, name, xreg, plot_regressors, mle_regression=True, time_varying_regression=False, periodicity='daily'): """ This function trains and tests the SARIMA model. for this two dataframes must be given, train and test. trend, pdq and PDQS, are the statsmodels.SARIMAX variables. :param train (Pandas Dataframe): train data :param test (Pandas Dataframe): test data :param ts_col (int): column of the objective variable :param trend (str): Parameter controlling the deterministic trend polynomial A(t) :param p (int): Autorregresive parameter :param d (int): Differencing parameter :param q (int): Differencing Moving Average parameter :param P (int): Seasonal Autorregresive parameter :param D (int): Seasonal Differencing parameter :param Q (int): Seasonal Differencing Moving Average parameter :param S (int): Lags for the seasonal :param is_log (bool): true if the series is in logarithm. defaults to False. :param outpath (str): path where the results will be stored :param name (str): name to use when saving the files returned by the model :xreg(list): list of strings with names of columns in the test/train datasets to be used as regressors :plot_regressors: whether the regressors should be plotted in the function :return: mae_error (float): Mean Absolute Error rmse_error (float): root mean squared error res_df (Pandas Dataframe): Dataframe with all data and the prediction in the Forecast column. mod (statsmodel object): Model object. """ print( 'Modelling \n', name, ' Forecast - SARIMAX ' + '(' + str(p) + ',' + str(d) + ',' + str(q) + ')' + 'S' + '(' + str(P) + ',' + str(D) + ',' + str(Q) + ')' + str(S)) # path definition if name not in os.listdir(outpath): os.mkdir(outpath + name) print('creating output folder in: \n', outpath + name) report_output_path = str(outpath) + str(name) + '/' # fit the model if len(xreg) == 0: mod = SARIMAX(train[dependent_var_col], trend=trend, order=(p, d, q), seasonal_order=(P, D, Q, S), time_varying_regression=time_varying_regression, mle_regression=mle_regression).fit() else: mod = SARIMAX(train[dependent_var_col], trend=trend, order=(p, d, q), seasonal_order=(P, D, Q, S), exog=train[xreg], enforce_stationarity=False, time_varying_regression=time_varying_regression, mle_regression=mle_regression).fit() # plot diagnostics plt.figure() plt.title('Plot diagnostics for' + dependent_var_col + ' Forecast - SARIMA ' + '(' + str(p) + ',' + str(d) + ',' + str(q) + ')' + 'S' + '(' + str(P) + ',' + str(D) + ',' + str(Q) + ')' + str(S)) mod.plot_diagnostics(figsize=(15, 9), lags=40) plt.savefig(report_output_path + 'diagnostics_' + name + '.png') # predict with the model # I know this seems like a lot, but to be able to support broken time series in the forecast you need to reset the indexes test_aux = test.copy(deep=True) # TODO: remove this parameter test_aux[xreg] = np.exp(test_aux[xreg]) test_aux[xreg] = test_aux[xreg] * 0.9 test_aux[xreg] = np.log(test_aux[xreg]) test_aux.reset_index(drop=True, inplace=True) train_aux = train.copy(deep=True) train_aux.reset_index(drop=True, inplace=True) # get the predictions with the model if len(xreg) == 0: predictions = mod.predict(train_aux.index.max() + 1, end=train_aux.index.max() + 1 + test_aux.index.max()) conf_intervals = mod.get_prediction( train_aux.index.max() + 1, end=train_aux.index.max() + 1 + test_aux.index.max()).conf_int(alpha=0.5) else: predictions = mod.predict(train_aux.index.max() + 1, end=train_aux.index.max() + 1 + test_aux.index.max(), exog=test_aux[xreg]) conf_intervals = mod.get_prediction( train_aux.index.max() + 1, end=train_aux.index.max() + 1 + test_aux.index.max(), exog=test_aux[xreg]).conf_int(alpha=0.5) predictions.index = test.index conf_intervals.index = test.index # the confidence interval is trimmed for extreme values so they don't overextort after missing dates and doing the inverse log transf (exp) conf_intervals = pd.DataFrame(conf_intervals) # conf_intervals[(conf_intervals['lower log_revenue_emi'] < conf_intervals['lower log_revenue_emi'].quantile(q=0.01)) | ( # conf_intervals['upper log_revenue_emi'] > conf_intervals['upper log_revenue_emi'].quantile(q=0.99))] = np.nan conf_intervals.index = conf_intervals.index.date conf_intervals.index = conf_intervals.index.map(str) # assign the predictions to the test dataframe to be used later in the plotting test['Forecast'] = predictions train['Forecast'] = mod.fittedvalues # add the columns that are in the regressors to the dataframe that will be used and get a dataframe to plot (train aux) columns = [dependent_var_col, 'Forecast'] columns.append(xreg) columns = list(flatten(columns)) train_aux = train[columns] test_aux = test[columns] test_aux = pd.merge(test_aux, conf_intervals, left_index=True, right_index=True) # transform the data back from logarithm if the series is in that scale if is_log is True: res_df = pd.concat([train_aux, test_aux]) res_df['Forecast'] = np.exp(res_df['Forecast']) res_df[dependent_var_col] = np.exp(res_df[dependent_var_col]) mae_error = mean_absolute_error(np.exp(test[dependent_var_col]), np.exp(predictions)) rmse_error = np.sqrt( mean_squared_error(np.exp(test[dependent_var_col]), np.exp(predictions))) mape = mean_absolute_percentage_error(np.exp(test[dependent_var_col]), np.exp(predictions)) preds = np.exp(predictions) else: res_df = pd.concat([train_aux, test_aux]) mae_error = mean_absolute_error(test[dependent_var_col], predictions) rmse_error = np.sqrt( mean_squared_error(test[dependent_var_col], predictions)) mape = mean_absolute_percentage_error(test[dependent_var_col], predictions) preds = predictions # Create a text box for the iteration results textstr = 'MAE:' + str(round(mae_error, 0)) + '\n' + 'MAPE:' + str( round(mape, 2)) aux_res_df = res_df.tail(365) # only plot the 6 months aux_res_df.index = pd.to_datetime(aux_res_df.index) if str(periodicity).upper() is 'daily': aux_res_df = aux_res_df.reindex(pd.date_range(aux_res_df.index.min(), aux_res_df.index.max()), fill_value=np.nan) # Upper and lower confidence intervals lower = aux_res_df[str('lower ' + str(dependent_var_col))] upper = aux_res_df[str('upper ' + str(dependent_var_col))] if is_log is True: lower = np.exp(lower) upper = np.exp(upper) # plot the figure with the prediction fig, ax = plt.subplots(figsize=(15, 10)) plt.subplots_adjust(right=0.85, left=0.05, bottom=0.1) ax2 = ax.twinx() ax.plot(aux_res_df["Forecast"], color='darkred', label='Forecast') ax.plot(aux_res_df[dependent_var_col], color='darkblue', label='Real') if plot_regressors is True: for i in xreg: ax2.plot(aux_res_df[i], color='grey', alpha=0.4, label=str(i)) ax.plot(lower, color='darkgreen', label='Lower', alpha=0.5) ax.plot(upper, color='darkgreen', label='Upper', alpha=0.5) ax.fill_between(upper.dropna().index, upper.dropna(), lower.dropna(), facecolor='darkgreen', alpha=0.2, interpolate=False) ax.axvline(x=pd.to_datetime(test.index.min(), format='%Y-%m-%d'), color='grey', linestyle='--') ax.xaxis.set_major_locator(mticker.MultipleLocator(30)) plt.gcf().autofmt_xdate() # generate a text box props = dict(boxstyle='round', facecolor='white') # place a text box in upper left in axes coords ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=14, verticalalignment='top', bbox=props) ax.legend(title='Forecast Legend', bbox_to_anchor=(1.05, 1), loc='upper left') ax2.legend(title='Regressors', bbox_to_anchor=(1.05, 0.7), loc='center left') plt.savefig(report_output_path + 'Forecast_' + name + '_' + str( datetime.strftime(pd.to_datetime(test.index.min()), format='%Y-%m-%d')) + '.png') plt.title('SARIMAX Forecast of ' + name) plt.show() plt.close('all') # plotting the results in plotly fig = go.Figure() fig.add_trace( go.Scatter(x=res_df.index, y=res_df[dependent_var_col], mode='lines', name='Real')) fig.add_trace( go.Scatter(x=res_df.index, y=res_df['Forecast'], mode='lines+markers', name='Fitted - Forecasted')) fig.add_shape( dict(type="line", x0=test.index.min(), y0=res_df[dependent_var_col].min(), x1=test.index.min(), y1=res_df[dependent_var_col].max(), line=dict(color="grey", width=1))) fig.update_xaxes(rangeslider_visible=True) fig.update_layout(title=dependent_var_col + ' Forecast - SARIMA ' + '(' + str(p) + ',' + str(d) + ',' + str(q) + ')' + 'S' + '(' + str(P) + ',' + str(D) + ',' + str(Q) + ')' + str(S), xaxis_title=dependent_var_col, yaxis_title='Date', font=dict(family="Century gothic", size=18, color="darkgrey")) fig.write_html(report_output_path + name + '_forecast_SARIMA.html') plt.close('all') print('MAE', mae_error) print('RMSE', rmse_error) print('MAPE', mape) print(mod.summary()) return mae_error, rmse_error, mape, name, preds, conf_intervals
continue # plug in results with lowest AIC score sarima_model = SARIMAX(y, order=(1,1,1), seasonal_order=(0,1,1,12)) sarima_model = sarima_model.fit(disp=False) # summary table of SARIMA print("SARIMA summary table:") print(sarima_model.summary().tables[1]) # show plot diagnostics sarima_model.plot_diagnostics(figsize=(15,12)) plt.show() # Show predictions using one-step forecast pred = sarima_model.get_prediction(start=pd.to_datetime('1998-01-01'), dynamic=False) pred_ci = pred.conf_int() ax = y['1990':].plot(label='observed') pred.predicted_mean.plot(ax=ax, label='One step ahead forecast', alpha=0.7) ax.fill_between(pred_ci.index, pred_ci.iloc[:, 0], pred_ci.iloc[:, 1], color='k', alpha=0.2) ax.set_xlabel('Date') ax.set_ylabel('CO2 Levels') plt.legend() plt.show() # Show predictions using dynamic forecast pred_dynamic = sarima_model.get_prediction(start=pd.to_datetime('1998-01-01'), dynamic=True, full_results=True) pred_dynamic_ci = pred_dynamic.conf_int()
test = df.loc[df.year >= 2017] print(test.shape) # ARIMA # %% # ARIMA parameters search results_arima = pm.auto_arima( train['diff'], d=0, start_p=1, start_1=1, max_p=3, max_q=3) print(results_arima.summary()) #%% # using the ARIMA model model_arima = SARIMAX(train['diff'], order=(3, 0, 2)).fit() # prediction prediction_arima = model_arima.get_prediction( start=-50, dynamic=True).predicted_mean # forecasting forecast_arima = model_arima.get_forecast(steps=20).predicted_mean #%% # model diagnostics arima_residual = model_arima.resid arima_mae = np.mean(np.abs(arima_residual)) print(arima_mae) #%% # pymarima results results_arima.plot_diagnostics() plt.show() # %%