def mod(df, p, d, q, P, D, Q, lag):
    # print df
    model = SARIMAX(endog=df['Y'].values,
                    order=(p, d, q),
                    seasonal_order=(P, D, Q, lag)).fit()
    print model.summary()

    plt.plot(df.index, model.resid)
    plt.show()
    plot_acf_pacf(model.resid[7:], 21)
Exemple #2
0
class Sarimax:
    def __init__(self, df, cfg):
        self.series = df[cfg['target_feature']]
        self.model = SARIMAX(self.series,
                             order=(3, 1, 0),
                             seasonal_order=(0, 0, 0, 12))

    def fit_model(self):
        # Fit model
        self.model = self.model.fit(disp=0)
        print(self.model.summary())

    def plot_autocorrelation(self):
        # Plot auto correlation
        autocorrelation_plot(self.series)
        plt.show()

    def predict_arima(self, series):
        return self.model.predict(series)
Exemple #3
0
def Auto_Arima(df,dirloc,filename):
    import itertools
    from statsmodels.tsa.statespace.sarimax import SARIMAX
    
    p=d=q=range(0,3)
    pdq = list(itertools.product(p,d,q))
    seas_decomp=[]
    for x in pdq:
        x1=(x[0],x[1],x[2],12)
        seas_decomp.append(x1)
    print("Computating AIC of Different Sesonal ARIMA.....\n")
    arima_order=[]
    seas_order=[]
    aic_val=[]
    
    for params in pdq:
        for seas_par in seas_decomp:
            mod = SARIMAX(df,order=params,seasonal_order=seas_par,enforce_stationarity=False, enforce_invertibility=False,freq="MS").fit()
            arima_order.append(params)
            seas_order.append(seas_par)
            aic_val.append(round(mod.aic,2))
            print("SARIMA: {} X {} | AIC = {}".format(params,seas_par,round(mod.aic,2)))
            
    results = pd.DataFrame({"ARIMA Order":arima_order,"Seasonal Order":seas_order,"AIC Value":aic_val}) 
    results_sorted = results.sort_values(by="AIC Value",ascending=True)
    results_sorted=results_sorted.reset_index(drop=True)
    print("Selected SARIMA Order:",results_sorted.head(2))
    
    final_model = SARIMAX(df,order=results_sorted["ARIMA Order"][0],seasona_order=results_sorted["Seasonal Order"][0],enforce_stationarity=False, enforce_invertibility=False,freq="MS").fit()
    print("Final Model Result Summary {}".format(final_model.summary()))
    print(results_sorted["ARIMA Order"][0])
    print(results_sorted["Seasonal Order"][0])
    predictions = final_model.predict(start=dt.datetime.strptime("2020-06-01","%Y-%m-%d"),end=dt.datetime.strptime("2020-12-01","%Y-%m-%d"))
    print("Average Monthly WTI Crude Oil Spot Price from June to Dec 2020:")
    print(predictions)
    with open(os.path.join(dirloc[:-5],outputfile),"a") as f:
         f.write("Simulation Result of SARIMA....\n")
         f.write(str(results_sorted))
         f.write("\n")
         f.write(str(predictions))
    f.close()
    return results_sorted
Exemple #4
0
'''

# Prepare the data
X = timeseriesgenerator
y = 

from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


# ========================= SARIMAX =========================
from statsmodels.tsa.statespace.sarimax import SARIMAX

mod = SARIMAX(data['ln_wpi'], trend='c', order=(1,1,(1,0,0,1)))
mod = mod.fit(X_train)
print(mod.summary())

pred = mod.predict(X_test)
plt.plot(X, y)
plt.plot(X_test, pred)




# ========================= XGBoost =========================
from xgboost import XGBRegressor 

xgb = XGBRegressor()
xgb.fit(X_train_scaled, y_train)
pred = xgb.predict(X_test)
Exemple #5
0
df_comp=df_comp.fillna(method='ffill')
# -- redefine column names and add a new column on returns - we will be working on returns
df_comp['market_value']=df_comp.ftse




# split dataset (on straight data = prices)
# ----------
size = int(len(df_comp) * 0.8)
df = df_comp.iloc[:size]
df_test = df_comp.iloc[size:]




# review ACF and PACF (in reality is more functional to run auto_arima vs checking ACF/PACF manually, but this is for sake of example)
# ----------
# not done here



# run SARIMAX model using S&P500 values as exogenous factor to explain FTSE values
# ----------
model_sarimax = SARIMAX(df.market_value, order=(1,0,1), seasonal_order=(2,0,1,5), exog=df.spx).fit()
print(model_sarimax.summary())
print('----------')



# %%
fig, (ax7, ax8) = plt.subplots(1,2, figsize=(16, 4))

plot_acf(airpassengers_train, ax7)
ax3.set_title('ACF of seasonal series')

plot_pacf(airpassengers_train, ax8)
ax4.set_title('PACF of seasonal series')

plt.show()


# %%
sarimax = SARIMAX(airpassengers_train, order=(3,1,1), seasonal_order=(0,1,0,12)).fit()
sarimax.summary()


# %%
sarimax.plot_diagnostics(figsize=(16, 8))
plt.show()


# %%
sarimax_forecast = sarimax.get_forecast(24)
sarimax_forecast_conf_int = sarimax_forecast.conf_int()


# %%
plt.plot(airpassengers_train, label='train')
plt.plot(airpassengers_test, label='test')
Exemple #7
0
#___________________________
#Training the model
#___________________________

#ARIMA(1, 1, 1) SARIMAX(1, 0, 1, 52)
model = SARIMAX(series,
                order=(1, 1, 1),
                seasonal_order=(1, 1, 1, 52),
                trend='n',
                enforce_stationarity=False,
                enforce_invertibility=False).fit()

print("________________________")
print("MODEL SUMMARY")
print(model.summary().tables[1])

# Nice way to check residuals follow a Gaussian distribution
model.plot_diagnostics(figsize=(15, 12))
plt.show()

train_pred = model.predict()
train_pred_cpy = train_pred.copy()
print(train_pred_cpy)
print(type(train_pred_cpy))
print(type(series))

cdf_index = a_organic[0:train_size].index
#print(cdf_index)
#print(type(cdf_index))
Exemple #8
0
def mod_sarima(train,
               test,
               dependent_var_col,
               trend,
               p,
               d,
               q,
               P,
               D,
               Q,
               S,
               is_log,
               outpath,
               name,
               xreg,
               plot_regressors,
               mle_regression=True,
               time_varying_regression=False,
               periodicity='daily'):
    """
This function trains and tests the SARIMA model. for this two dataframes must be given, train and test.
trend, pdq and PDQS, are the statsmodels.SARIMAX variables.
    :param train (Pandas Dataframe): train data
    :param test (Pandas Dataframe): test data
    :param ts_col (int): column of the objective variable
    :param trend (str): Parameter controlling the deterministic trend polynomial A(t)
    :param p (int): Autorregresive parameter
    :param d (int): Differencing parameter
    :param q (int): Differencing Moving Average parameter
    :param P (int): Seasonal Autorregresive parameter
    :param D (int): Seasonal Differencing parameter
    :param Q (int): Seasonal Differencing Moving Average parameter
    :param S (int): Lags for the seasonal
    :param is_log (bool): true if the series is in logarithm. defaults to False.
    :param outpath (str): path where the results will be stored
    :param name (str): name to use when saving the files returned by the model
    :xreg(list): list of strings with names of columns in the test/train datasets to be used as regressors
    :plot_regressors: whether the regressors should be plotted in the function
    :return: mae_error (float): Mean Absolute Error
    rmse_error (float): root mean squared error
     res_df (Pandas Dataframe): Dataframe with all data and the prediction in the Forecast column.
      mod (statsmodel object): Model object.
    """
    print(
        'Modelling \n', name,
        ' Forecast - SARIMAX ' + '(' + str(p) + ',' + str(d) + ',' + str(q) +
        ')' + 'S' + '(' + str(P) + ',' + str(D) + ',' + str(Q) + ')' + str(S))

    # path definition
    if name not in os.listdir(outpath):
        os.mkdir(outpath + name)
        print('creating output folder in: \n', outpath + name)
    report_output_path = str(outpath) + str(name) + '/'

    # fit the model
    if len(xreg) == 0:
        mod = SARIMAX(train[dependent_var_col],
                      trend=trend,
                      order=(p, d, q),
                      seasonal_order=(P, D, Q, S),
                      time_varying_regression=time_varying_regression,
                      mle_regression=mle_regression).fit()
    else:
        mod = SARIMAX(train[dependent_var_col],
                      trend=trend,
                      order=(p, d, q),
                      seasonal_order=(P, D, Q, S),
                      exog=train[xreg],
                      enforce_stationarity=False,
                      time_varying_regression=time_varying_regression,
                      mle_regression=mle_regression).fit()

    # plot diagnostics
    plt.figure()
    plt.title('Plot diagnostics for' + dependent_var_col +
              ' Forecast - SARIMA ' + '(' + str(p) + ',' + str(d) + ',' +
              str(q) + ')' + 'S' + '(' + str(P) + ',' + str(D) + ',' + str(Q) +
              ')' + str(S))
    mod.plot_diagnostics(figsize=(15, 9), lags=40)
    plt.savefig(report_output_path + 'diagnostics_' + name + '.png')

    # predict with the model
    # I know this seems like a lot, but to be able to support broken time series in the forecast you need to reset the indexes

    test_aux = test.copy(deep=True)

    # TODO: remove this parameter
    test_aux[xreg] = np.exp(test_aux[xreg])
    test_aux[xreg] = test_aux[xreg] * 0.9
    test_aux[xreg] = np.log(test_aux[xreg])

    test_aux.reset_index(drop=True, inplace=True)
    train_aux = train.copy(deep=True)
    train_aux.reset_index(drop=True, inplace=True)

    # get the predictions with the model
    if len(xreg) == 0:
        predictions = mod.predict(train_aux.index.max() + 1,
                                  end=train_aux.index.max() + 1 +
                                  test_aux.index.max())
        conf_intervals = mod.get_prediction(
            train_aux.index.max() + 1,
            end=train_aux.index.max() + 1 +
            test_aux.index.max()).conf_int(alpha=0.5)
    else:
        predictions = mod.predict(train_aux.index.max() + 1,
                                  end=train_aux.index.max() + 1 +
                                  test_aux.index.max(),
                                  exog=test_aux[xreg])
        conf_intervals = mod.get_prediction(
            train_aux.index.max() + 1,
            end=train_aux.index.max() + 1 + test_aux.index.max(),
            exog=test_aux[xreg]).conf_int(alpha=0.5)

    predictions.index = test.index
    conf_intervals.index = test.index

    # the confidence interval is trimmed for extreme values so they don't overextort after missing dates and doing the inverse log transf (exp)
    conf_intervals = pd.DataFrame(conf_intervals)
    # conf_intervals[(conf_intervals['lower log_revenue_emi'] < conf_intervals['lower log_revenue_emi'].quantile(q=0.01)) | (
    #         conf_intervals['upper log_revenue_emi'] > conf_intervals['upper log_revenue_emi'].quantile(q=0.99))] = np.nan

    conf_intervals.index = conf_intervals.index.date
    conf_intervals.index = conf_intervals.index.map(str)

    # assign the predictions to the test dataframe to be used later in the plotting
    test['Forecast'] = predictions
    train['Forecast'] = mod.fittedvalues

    # add the columns that are in the regressors to the dataframe that will be used and get a dataframe to plot (train aux)
    columns = [dependent_var_col, 'Forecast']
    columns.append(xreg)
    columns = list(flatten(columns))
    train_aux = train[columns]
    test_aux = test[columns]
    test_aux = pd.merge(test_aux,
                        conf_intervals,
                        left_index=True,
                        right_index=True)

    # transform the data back from logarithm if the series is in that scale
    if is_log is True:
        res_df = pd.concat([train_aux, test_aux])
        res_df['Forecast'] = np.exp(res_df['Forecast'])
        res_df[dependent_var_col] = np.exp(res_df[dependent_var_col])

        mae_error = mean_absolute_error(np.exp(test[dependent_var_col]),
                                        np.exp(predictions))
        rmse_error = np.sqrt(
            mean_squared_error(np.exp(test[dependent_var_col]),
                               np.exp(predictions)))
        mape = mean_absolute_percentage_error(np.exp(test[dependent_var_col]),
                                              np.exp(predictions))

        preds = np.exp(predictions)

    else:
        res_df = pd.concat([train_aux, test_aux])
        mae_error = mean_absolute_error(test[dependent_var_col], predictions)
        rmse_error = np.sqrt(
            mean_squared_error(test[dependent_var_col], predictions))
        mape = mean_absolute_percentage_error(test[dependent_var_col],
                                              predictions)
        preds = predictions

    # Create a text box for the iteration results
    textstr = 'MAE:' + str(round(mae_error, 0)) + '\n' + 'MAPE:' + str(
        round(mape, 2))

    aux_res_df = res_df.tail(365)  # only plot the 6 months
    aux_res_df.index = pd.to_datetime(aux_res_df.index)
    if str(periodicity).upper() is 'daily':
        aux_res_df = aux_res_df.reindex(pd.date_range(aux_res_df.index.min(),
                                                      aux_res_df.index.max()),
                                        fill_value=np.nan)

    # Upper and lower confidence intervals
    lower = aux_res_df[str('lower ' + str(dependent_var_col))]
    upper = aux_res_df[str('upper ' + str(dependent_var_col))]
    if is_log is True:
        lower = np.exp(lower)
        upper = np.exp(upper)

    # plot the figure with the prediction
    fig, ax = plt.subplots(figsize=(15, 10))
    plt.subplots_adjust(right=0.85, left=0.05, bottom=0.1)
    ax2 = ax.twinx()
    ax.plot(aux_res_df["Forecast"], color='darkred', label='Forecast')
    ax.plot(aux_res_df[dependent_var_col], color='darkblue', label='Real')
    if plot_regressors is True:
        for i in xreg:
            ax2.plot(aux_res_df[i], color='grey', alpha=0.4, label=str(i))
    ax.plot(lower, color='darkgreen', label='Lower', alpha=0.5)
    ax.plot(upper, color='darkgreen', label='Upper', alpha=0.5)
    ax.fill_between(upper.dropna().index,
                    upper.dropna(),
                    lower.dropna(),
                    facecolor='darkgreen',
                    alpha=0.2,
                    interpolate=False)
    ax.axvline(x=pd.to_datetime(test.index.min(), format='%Y-%m-%d'),
               color='grey',
               linestyle='--')
    ax.xaxis.set_major_locator(mticker.MultipleLocator(30))
    plt.gcf().autofmt_xdate()
    # generate a text box
    props = dict(boxstyle='round', facecolor='white')
    # place a text box in upper left in axes coords
    ax.text(0.05,
            0.95,
            textstr,
            transform=ax.transAxes,
            fontsize=14,
            verticalalignment='top',
            bbox=props)

    ax.legend(title='Forecast Legend',
              bbox_to_anchor=(1.05, 1),
              loc='upper left')
    ax2.legend(title='Regressors',
               bbox_to_anchor=(1.05, 0.7),
               loc='center left')
    plt.savefig(report_output_path + 'Forecast_' + name + '_' + str(
        datetime.strftime(pd.to_datetime(test.index.min()), format='%Y-%m-%d'))
                + '.png')
    plt.title('SARIMAX Forecast of ' + name)
    plt.show()

    plt.close('all')

    # plotting the results in plotly
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(x=res_df.index,
                   y=res_df[dependent_var_col],
                   mode='lines',
                   name='Real'))
    fig.add_trace(
        go.Scatter(x=res_df.index,
                   y=res_df['Forecast'],
                   mode='lines+markers',
                   name='Fitted - Forecasted'))

    fig.add_shape(
        dict(type="line",
             x0=test.index.min(),
             y0=res_df[dependent_var_col].min(),
             x1=test.index.min(),
             y1=res_df[dependent_var_col].max(),
             line=dict(color="grey", width=1)))
    fig.update_xaxes(rangeslider_visible=True)
    fig.update_layout(title=dependent_var_col + ' Forecast - SARIMA ' + '(' +
                      str(p) + ',' + str(d) + ',' + str(q) + ')' + 'S' + '(' +
                      str(P) + ',' + str(D) + ',' + str(Q) + ')' + str(S),
                      xaxis_title=dependent_var_col,
                      yaxis_title='Date',
                      font=dict(family="Century gothic",
                                size=18,
                                color="darkgrey"))
    fig.write_html(report_output_path + name + '_forecast_SARIMA.html')
    plt.close('all')

    print('MAE', mae_error)
    print('RMSE', rmse_error)
    print('MAPE', mape)
    print(mod.summary())

    return mae_error, rmse_error, mape, name, preds, conf_intervals
Exemple #9
0
generator = TimeseriesGenerator(scaled_train,
                                scaled_train,
                                length=n_input,
                                batch_size=30)

# In[158]:

model = Sequential()
model.add(LSTM(100, activation='relu', input_shape=(n_input, n_features)))
model.add(Dense(50))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# In[159]:

model.summary()

# In[160]:

model.fit_generator(generator, epochs=30)

# In[161]:

model.history.history.keys()

# In[162]:

loss_per_epoch = model.history.history['loss']
plt.plot(range(len(loss_per_epoch)), loss_per_epoch)

# In[163]:
Exemple #10
0
    Q = range(0, 2, 1)
    m = 120
    parameters = product(p, q, P, Q)
    parameters_list = list(parameters)
    print(len(parameters_list))

    ## Uncomment to find the optimization parameters:
    result_df = optimize_SARIMA(parameters_list, 1, 1, m, dft_f['cnt_smooth'])
    print('\nSarima Optimization\n', result_df)
    best_param = result_df['(p,q)x(P,Q)'][0]
    (p_best, q_best, P_best, Q_best) = best_param

    # best_model = SARIMAX(dft_f['cnt_smooth'], order=(1, 1, 1), seasonal_order=(0, 1, 1, m)).fit(dis=-1)

    best_model = SARIMAX(dft_f['cnt_smooth'], order=(p_best, 1, q_best), seasonal_order=(P_best, 1, Q_best, m)).fit(dis=-1)
    print(best_model.summary())

    best_model.plot_diagnostics(figsize=(12,8))
    plt.suptitle(f'Diagnostic Best model')
    plt.savefig(f'{baseSave}/diagnostic_plot_station_{s}.png', dpi=150)
    plt.clf()
    '''
    END: Compute Sarima optimization
    '''
    end_endto = args.forecast_upto.date() + timedelta(days=-1)
    dft_all = pd.read_csv(f'{baseSave}/smoothed_to_compare_s_{s}.csv', header=[0], index_col=[0], sep=';', parse_dates=True)
    dft_all_upto = dft_all.loc[selection[s]['stop']:end_endto.strftime('%Y-%m-%d')]

    dft_f_from = dft_f.loc[selection[s]['start']:]
    pred_uc = best_model.get_forecast(steps=pd.to_datetime(args.forecast_upto.date().strftime('%Y-%m-%d')))
    pred_ci = pred_uc.conf_int()
Exemple #11
0
                                    order=param, 
                                    seasonal_order=param_seasonal,
                                    enforce_stationarity=False, 
                                    enforce_invertability=False)
            results = sarima_model.fit()
            print("ARIMA{}x{} - AIC:{:.2f}".format(param, param_seasonal, results.aic))
        except:
            continue

# plug in results with lowest AIC score
sarima_model = SARIMAX(y, order=(1,1,1), seasonal_order=(0,1,1,12))
sarima_model = sarima_model.fit(disp=False)

# summary table of SARIMA
print("SARIMA summary table:")
print(sarima_model.summary().tables[1])

# show plot diagnostics
sarima_model.plot_diagnostics(figsize=(15,12))
plt.show()

# Show predictions using one-step forecast
pred = sarima_model.get_prediction(start=pd.to_datetime('1998-01-01'), dynamic=False)
pred_ci = pred.conf_int()

ax = y['1990':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One step ahead forecast', alpha=0.7)
ax.fill_between(pred_ci.index, 
                pred_ci.iloc[:, 0], 
                pred_ci.iloc[:, 1], color='k', alpha=0.2)
ax.set_xlabel('Date')
Exemple #12
0
if st.sidebar.checkbox("Show forecasting for time entries", True):
    st.markdown("#### Time series prediction using SARIMAX")
    st.plotly_chart(fig)

# Print the SARIMA MAE and model summary
st.write('SARIMA MAE = ', mean_absolute_error(sarimax_prediction, test))


@st.cache(allow_output_mutation=True)
def load_model():
    model = pm.auto_arima(train,
                          start_p=0,
                          start_q=0,
                          test='adf',
                          max_p=7,
                          max_q=7,
                          m=1,
                          d=None,
                          seasonal=False,
                          start_P=0,
                          D=0,
                          trace=True,
                          error_action='ignore',
                          suppress_warnings=True,
                          stepwise=True)
    return model


model = load_model()
st.text(model.summary())
Exemple #13
0
def projectexample_modelling(series, model_name, parameters):
    """ Function that performs the following plots
        shape of the series
        the first items

            :params series: univariate time series
            :type series: dataframe
            :return:
                - error (int): variable with error code
        """

    # modelling
    error = 0
    try:
        print("{} time series modelling".format('-' * 20))
        print("{} {} model".format('-' * 20, model_name))

        if model_name=='SARIMAX':

            p = parameters[0]
            d = parameters[1]
            q = parameters[2]
            P = parameters[3]
            D = parameters[4]
            Q = parameters[5]
            S = parameters[6]
            t = parameters[7]

            print("{} fitting model".format('-' * 20))
            # fit the model
            model = SARIMAX(series.values,
                             trend = t,
                             order = (p, d, q),
                             seasonal_order = (P, D, Q, S),
                             enforce_stationarity = False,
                             enforce_invertibility = False).fit()


            # Model summary
            print("{} Model summary".format('-' * 20))
            print(model.summary().tables[1])


            # Model diagnostic
            print("{} Model diagnostic".format('-' * 20))
            fig = model.plot_diagnostics(figsize=(20, 12))
            fig.savefig(os.path.join(os.getcwd(), 'figures\\diagnostic_{}.png'.format(model_name)))
            fig.show()

    except Exception as exception_msg:
        print('{} (!) Error in projectexample_modelling: '.format('-' * 20) + str(exception_msg))
        error = 1
        model = []
        return model, error

    # Metrics
    print("{} Metrics".format('-' * 20))
    try:
        # Regression metrics
        y_fitted = model.predict()
        R2 = round(r2_score(series, y_fitted), 3)
        MAE = round(mean_absolute_error(series, y_fitted), 3)
        RMSE = round(np.sqrt(mean_squared_error(series, y_fitted)), 3)

        print("{} R2: {}".format('-' * 20, R2))
        print("{} MAE: {}".format('-' * 20, MAE))
        print("{} RMSE: {}".format('-' * 20, RMSE))

    except Exception as exception_msg:
        print('{} (!) Error in projectexample_modelling (metrics): '.format('-' * 20) + str(exception_msg))
        error = 2
        return model, error




    return model, error
Exemple #14
0
test_last_date = pd.to_datetime(test_data.index[-1])
# seasonally_diffed_data = log_transformed_train_data.diff()[10:]
# test_stationarity(seasonally_diffed_data)
# shapiro_normaly_test(seasonally_diffed_data)
# plot_data_properties(seasonally_diffed_data, 'Log transformed, diff=52 and seasonally differenced data')
decomposition = plot_seasonal_decompose(df['Last'], 'multiplicative')

# best_model, models = best_sarima_model(train_data=log_transformed_train_data,p=range(3),q=range(3),P=range(3),Q=range(3))
# preds_best = np.exp(best_model.predict(start='2019-01-01', dynamic=True, typ='levels'))
# print(f'MAPE{np.round(mean_abs_pct_error(log_transformed_test_data,preds_best),2)}')

agile_model = SARIMAX(endog=log_transformed_train_data,
                      order=(1, 1, 2),
                      seasonal_order=(1, 1, 2, 52),
                      enforce_invertibility=False).fit()
agile_model.summary()

#just do deactive warnings regarding PyCharm and Numpy
# noinspection PyTypeChecker
agile_model_pred = np.exp(
    agile_model.predict(start=test_first_date,
                        end=test_last_date,
                        dynamic=True,
                        typ='levels'))

print(f'MAPE {np.round(mean_abs_pct_error(test_data,agile_model_pred),2)}%')
# print(f'MAE:{np.round(mean_absolute_error(test_data,agile_model_pred),2)}')

# noinspection PyTypeChecker
agile_model_forecast = np.exp(agile_model.forecast(steps=2))
print(agile_model_forecast)
Exemple #15
0
# summary = auto_arima(df1['total'], exogenous=df1[['holiday']], seasonal=True, m=7, error_action="ignore").summary()
# print(summary)
# => we find that the best model is SARIMAX(0,0,1) x (2,0,0,7)

# fourth, split data set (total lines 478)
# ----------
train = df1.iloc[:436]
test = df1.iloc[436:]

#fifth, run SARIMA train_model with the order determined by auto_arima
# ----------
train_model = SARIMAX(train['total'],
                      order=(0, 0, 1),
                      seasonal_order=(2, 0, 0, 7),
                      enforce_invertibility=False).fit()
print(train_model.summary())
# enforce invertibility allows to keep coefficients below 1, just to avoid ValueError

#sixth, test predictions vs test set
# ----------
start = len(train)
end = len(train) + len(test) - 1

predictions = train_model.predict(
    start, end, exog=test[['holiday']]).rename('SARIMAX predictions vs test')

test['total'].plot(legend=True)
predictions.plot(legend=True)
plt.show()

#seventh, evaluate the model on rmse error
Exemple #16
0
    def fit_sarimax(self):

        # sarimax= auto_arima(y=self.data_lag[["fallecimientos"]],
        #                    exogenous=self.data_lag[["casos_total"]],
        #                    start_p=1, start_q=1,
        #                    test='adf',
        #                    max_p=2, max_q=2, m=7,
        #                    start_P=0, seasonal=True,
        #                    d=None, D=1, trace=False,
        #                    error_action='ignore',
        #                    suppress_warnings=True,
        #                    stepwise=True)

        sarimax = SARIMAX(endog=self.data_lag.iloc[:-1, ][["fallecimientos"]],
                          exog=self.data_lag.iloc[:-1, ][["casos_total"]],
                          order=(0, 0, 3),
                          seasonal_order=(0, 0, 0, 0)).fit()

        sum = sarimax.summary()
        predictions = pd.DataFrame(
            sarimax.forecast(steps=5, exog=self.forecast[["casos_total"]]))

        e = pd.DataFrame({
            "Modelo":
            "SARIMAX",
            "Predicción de hoy": [predictions.iloc[0, 0]],
            "Error de hoy": [
                abs(predictions.iloc[0, 0] -
                    self.dt.loc[len(self.dt) - 1, "fallecimientos"])
            ]
        })

        predictions["fecha"] = self.dt.loc[len(self.dt) - 1, "fecha"]
        predictions.columns = ["fallecimientos", "fecha"]
        predictions.reset_index(drop=True, inplace=True)
        for i in range(len(self.forecast)):
            c = 0
            c += i
            predictions.loc[i,
                            "fecha"] = predictions.fecha[i] + timedelta(days=c)

        new = pd.concat(
            (self.dt[["fallecimientos", "fecha"]], predictions.iloc[1:, :]),
            axis=0)

        new["Predicciones"] = np.where(
            new.fecha <= self.dt.loc[len(self.dt) - 1, "fecha"], "Real",
            "Pred")

        fig = px.bar(
            new,
            x="fecha",
            y="fallecimientos",
            color="Predicciones",
        )

        # predictions.columns =["Predicciones_Fallecimientos", "fecha"]
        #
        # load = str(self.dt.loc[len(self.dt)-1, "fecha"] - timedelta(days=1))
        # load = load[0:10] + "_.pkl"
        #
        # with open(load, "rb") as file:
        #     historic = pickle.load(file)
        # predictions["Error"] = 0
        # p=pd.concat([predictions.reset_index(drop=True), historic], ignore_index=True)
        # p = p.loc[p.fecha <= self.dt.loc[len(self.dt)-1, "fecha"],:]
        # p.reset_index(drop=True, inplace=True)
        # for i in range(0,len(p)):
        #     if self.dt.loc[len(self.dt)-1,"fecha"] == p.loc[i,"fecha"]:
        #         p.loc[i,"Error"] = np.sqrt((self.dt.loc[len(self.dt)-1,"fallecimientos"] - p.loc[i,"Predicciones_Fallecimientos"])**2)
        #
        # save = str(self.dt.loc[len(self.dt)-1, "fecha"])
        # save = save[0:10] + "_.pkl"
        #
        # with open(save, "wb") as file:
        #     pickle.dump(p, file)

        return e, fig, sum
Exemple #17
0
df_test = df[df.index > '2017-12-31']
lags = ar_select_order(df_train, maxlag=13, ic='bic', old_names=False).ar_lags
print('(BIC) lags= ', len(lags), ':', lags)

# AR and SARIMAX
## AR(p) is simplest time-model, can nest in SARIMAX(p,d,q,s) with
## moving average MA(q), integration order I(d), seasonality S(s), exogenous X
from statsmodels.tsa.statespace.sarimax import SARIMAX
adf = alf(s, log=1, freq='Q').loc[19591201:20171231]
adf.index = pd.DatetimeIndex(adf.index.astype(str), freq='infer')
arima = SARIMAX(adf, order=(2, 1, 0), trend='c').fit()
fig = arima.plot_diagnostics(figsize=(10, 6))
plt.tight_layout(pad=2)
plt.savefig(os.path.join(imgdir, 'ar.jpg'))
plt.show()
arima.summary()

# Forecasting
## One-step ahead predictions
model = AutoReg(df_train, lags=lags, old_names=False).fit()
print(model.summary())

# Observations to predict are from the test split
from sklearn.metrics import mean_squared_error
all_dates = AutoReg(df, lags=lags, old_names=False)
df_pred = all_dates.predict(model.params,
                            start=df_train.index[-1]).shift(1).iloc[1:]
mse = mean_squared_error(df_test, df_pred)
var = np.mean(np.square(df_test - df_train.mean()))
print(f"Short-term Forecasts:  rmse={np.sqrt(mse):.6f} r2={1-mse/var:.4f}")
fig, ax = plt.subplots(clear=True, num=1, figsize=(4, 6))
Exemple #18
0
class SARIMAXModel(ModelStrategy):
    '''
    A class for a Seasonal Autoregressive Integrated Moving Average Model and the standard operations on it
    '''
    def __init__(self, hparams, log_dir=None):
        univariate = True
        model = None
        name = 'SARIMAX'
        self.auto_params = hparams.get('AUTO_PARAMS', False)
        self.trend_p = int(hparams.get('TREND_P', 10))
        self.trend_d = int(hparams.get('TREND_D', 2))
        self.trend_q = int(hparams.get('TREND_Q', 0))
        self.seasonal_p = int(hparams.get('SEASONAL_P', 5))
        self.seasonal_d = int(hparams.get('SEASONAL_D', 2))
        self.seasonal_q = int(hparams.get('SEASONAL_Q', 0))
        self.m = int(hparams.get('M', 12))
        super(SARIMAXModel, self).__init__(model,
                                           univariate,
                                           name,
                                           log_dir=log_dir)

    def fit(self, dataset):
        '''
        Fits a SARIMAX forecasting model
        :param dataset: A Pandas DataFrame with 2 columns: Date and Consumption
        '''
        if dataset.shape[1] != 2:
            raise Exception(
                'Univariate models cannot fit with datasets with more than 1 feature.'
            )
        dataset.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                       inplace=True)
        series = dataset.set_index('ds')
        if self.auto_params:
            best_model = pmdarima.auto_arima(
                series,
                seasonal=True,
                stationary=False,
                m=self.m,
                information_criterion='aic',
                max_order=2 * (self.p + self.q),
                max_p=2 * self.p,
                max_d=2 * self.d,
                max_q=2 * self.q,
                max_P=2 * self.p,
                max_D=2 * self.d,
                max_Q=2 * self.q,
                error_action='ignore'
            )  # Automatically determine model parameters
            order = best_model.order
            seasonal_order = best_model.seasonal_order
            print("Best SARIMAX params: (p, d, q):", best_model.order,
                  " and  (P, D, Q, s):", best_model.seasonal_order)
        else:
            order = (self.trend_p, self.trend_d, self.trend_q)
            seasonal_order = (self.seasonal_p, self.seasonal_d,
                              self.seasonal_q, self.m)
        self.model = SARIMAX(series,
                             order=order,
                             seasonal_order=seasonal_order,
                             enforce_stationarity=True,
                             enforce_invertibility=True).fit()
        print(self.model.summary())
        return

    def evaluate(self, train_set, test_set, save_dir=None, plot=False):
        '''
        Evaluates performance of SARIMAX model on test set
        :param train_set: A Pandas DataFrame with 2 columns: Date and Consumption
        :param test_set: A Pandas DataFrame with 2 columns: Date and Consumption
        :param save_dir: Directory in which to save forecast metrics
        :param plot: Flag indicating whether to plot the forecast evaluation
        '''
        train_set.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                         inplace=True)
        test_set.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                        inplace=True)
        train_set = train_set.set_index('ds')
        test_set = test_set.set_index('ds')
        train_set["model"] = self.model.fittedvalues
        test_set["forecast"] = self.forecast(
            test_set.shape[0])['Consumption'].tolist()

        df_forecast = train_set.append(test_set).rename(columns={'y': 'gt'})
        test_metrics = self.evaluate_forecast(df_forecast,
                                              save_dir=save_dir,
                                              plot=plot)
        return test_metrics

    def forecast(self, days, recent_data=None):
        '''
        Create a forecast for the test set. Note that this is different than obtaining predictions for the test set.
        The model makes a prediction for the provided example, then uses the result for the next prediction.
        Repeat this process for a specified number of days.
        :param days: Number of days into the future to produce a forecast for
        :param recent_data: A factual example for the first prediction
        :return: An array of predictions
        '''
        forecast_df = self.model.forecast(steps=days).reset_index(level=0)
        forecast_df.columns = ['Date', 'Consumption']
        return forecast_df

    def save(self, save_dir, scaler_dir=None):
        '''
        Saves the model to disk
        :param save_dir: Directory in which to save the model
        '''
        if self.model:
            model_path = os.path.join(save_dir,
                                      self.name + self.train_date + '.pkl')
            self.model.save(model_path)  # Serialize and save the model object

    def load(self, model_path, scaler_path=None):
        '''
        Loads the model from disk
        :param model_path: Path to saved model
        '''
        if os.path.splitext(model_path)[1] != '.pkl':
            raise Exception('Model file path for ' + self.name +
                            ' must have ".pkl" extension.')
        self.model = SARIMAXResults.load(model_path)
        return
# ### Building and Training the model

# In[90]:

#marking start time for model training
start_time = time.time()

# fit model
# seasonal effects and Integrating is set to zero to convert SARIMAX into ARMAX
ARMAX = SARIMAX(Y_train, exog=X_train_fs, order=(0, 0, 0)).fit(disp=False)

#printing time taken to train the model
print("--- %s seconds ---" % (time.time() - start_time))

#print model summary stats
print(ARMAX.summary())

# make prediction
Y_predict_ARMAX = ARMAX.predict(1, len(Y_test), exog=X_test_fs)

# ### Testing the model using validation data

# In[91]:

# Generating a plot for actual and predicted values
plt.figure(figsize=[10, 4])
plt.plot(Y_test, "b-", label="Actual Value of Y")
plt.plot(Y_predict_ARMAX, "g-", label="Predicted Value of Y")
plt.title("Predicition using ARIMAX model")
plt.grid(True)
plt.legend()