model = VAR(df_train) for i in [1, 2, 3, 4, 5, 6, 7, 8, 9]: result = model.fit(i) print('Lag Order =', i) print('AIC : ', result.aic) print('BIC : ', result.bic) print('FPE : ', result.fpe) print('HQIC: ', result.hqic, '\n') # In[53]: #An alternate method to choose the order(p) of the VAR models is to use the model.select_order(maxlags)method. #The selected order(p) is the order that gives the lowest ‘AIC’, ‘BIC’, ‘FPE’ and ‘HQIC’ scores. x = model.select_order(maxlags=12) x.summary() # ### Train the VAR Model # In[54]: model_fitted = model.fit(3) model_fitted.summary() # ### Check for Serial Correlation of Residuals # In[56]: #If there is any correlation left in the residuals, then, #there is some pattern in the time series that is still left to be explained by the model.
adfuller(df_train_diff['y_538']) ## looks good # the model model = VAR(df_train_diff) ## choosing best P (order) of VAR model for i in [1, 2, 3, 4, 5, 6, 7]: result = model.fit(i) print('Lag Order =', i) print('AIC : ', result.aic) print('BIC : ', result.bic) print('FPE : ', result.fpe) print('HQIC: ', result.hqic, '\n') # 4 looks best here, lets try the automatic way best = model.select_order(maxlags=7) best.summary( ) ## hmm two for 4, two for 7. Try both and see what is better maybe? # start with 4 fitted = model.fit(4) fitted.summary() ## durbin-watson for serial error correlation # 2 is good, 0 = positive correlation, 4 = negative correlation from statsmodels.stats.stattools import durbin_watson dw = durbin_watson(fitted.resid) dw ## looks great! ## the forecast lag_order = fitted.k_ar # to verify for_forecast = df_train_diff.values[-lag_order:]