def test_acorr_breusch_godfrey(self): res = self.res #bgf = bgtest(fm, order = 4, type="F") breuschgodfrey_f = dict(statistic=1.179280833676792, pvalue=0.321197487261203, parameters=( 4, 195, ), distr='f') #> bgc = bgtest(fm, order = 4, type="Chisq") #> mkhtest(bgc, "breuschpagan_c", "chi2") breuschgodfrey_c = dict(statistic=4.771042651230007, pvalue=0.3116067133066697, parameters=(4, ), distr='chi2') bg = smsdia.acorr_breusch_godfrey(res, nlags=4) bg_r = [ breuschgodfrey_c['statistic'], breuschgodfrey_c['pvalue'], breuschgodfrey_f['statistic'], breuschgodfrey_f['pvalue'] ] assert_almost_equal(bg, bg_r, decimal=13) # check that lag choice works bg2 = smsdia.acorr_breusch_godfrey(res, nlags=None) bg3 = smsdia.acorr_breusch_godfrey(res, nlags=14) assert_almost_equal(bg2, bg3, decimal=13)
def acorr_breusch_godfrey(resid, nlags=None): """ Breusch-Godfrey Lagrange Multiplier tests for residual autocorrelation. documentation can be found here: https://www.statsmodels.org/stable/generated/statsmodels.stats.diagnostic.acorr_breusch_godfrey.html This test looks for serial correlation in a timeseries. Definition: serial correlation := Serial or auto correlation is a correlation of a signal with a delayed copy of itself. The metric of correlation is the Pearson correlation and indicates a relationship with previous measurements in the series. The presence of serial correlation can be used to understand periodicity. See this for more details: https://www.mathworks.com/help/signal/ug/find-periodicity-using-autocorrelation.html Null hypothesis: There is no serial correlation up to nlags. Alternative hypothesis: There is serial correlation. Parameters ---------- resid : pd.Series Estimation results for which the residuals are tested for serial correlation. nlags : int, default None Number of lags to include in the auxiliary regression. (nlags is highest lag). if nlags is set to None then nlags is: ``` nlags = np.trunc(12. * np.power(nobs / 100., 1 / 4.)) nlags = int(nlags) ``` Returns ------- lm : float Lagrange multiplier test statistic. lmpval : float The p-value for Lagrange multiplier test. """ result = diagnostic.acorr_breusch_godfrey(resid) AcorrBreuschGodfreyResult = namedtuple('BreuschGodfreyResult', 'statistic pvalue') return AcorrBreuschGodfreyResult(result[0], result[1])
def test_acorr_breusch_godfrey(self): res = self.res #bgf = bgtest(fm, order = 4, type="F") breuschgodfrey_f = dict(statistic=1.179280833676792, pvalue=0.321197487261203, parameters=(4,195,), distr='f') #> bgc = bgtest(fm, order = 4, type="Chisq") #> mkhtest(bgc, "breuschpagan_c", "chi2") breuschgodfrey_c = dict(statistic=4.771042651230007, pvalue=0.3116067133066697, parameters=(4,), distr='chi2') bg = smsdia.acorr_breusch_godfrey(res, nlags=4) bg_r = [breuschgodfrey_c['statistic'], breuschgodfrey_c['pvalue'], breuschgodfrey_f['statistic'], breuschgodfrey_f['pvalue']] assert_almost_equal(bg, bg_r, decimal=13) # check that lag choice works bg2 = smsdia.acorr_breusch_godfrey(res, nlags=None) bg3 = smsdia.acorr_breusch_godfrey(res, nlags=14) assert_almost_equal(bg2, bg3, decimal=13)
def get_bgod(model: pd.DataFrame, lags: int) -> tuple: """Calculate test statistics for autocorrelation Parameters ---------- model : OLS Model Model containing residual values. lags : int The amount of lags. Returns ------- Test results from the Breusch-Godfrey Test """ lm_stat, p_value, f_stat, fp_value = acorr_breusch_godfrey(model, nlags=lags) return lm_stat, p_value, f_stat, fp_value
def test_acorr_breusch_godfrey_multidim(self): res = Bunch(resid=np.empty((100, 2))) with pytest.raises(ValueError, match='Model resid must be a 1d array'): smsdia.acorr_breusch_godfrey(res)
def error_analisis(result, plot=False): ''' Inputs: result: Results from Stats after model.fit() plot: True if we want a plot Returns: Print of an statistics analysis of regression errors which includes: Autocorrelation, \ Heterokedasticity, Stationarity and Normality ''' #Autocorrleation print('----------Durbin Watson-------------') out = durbin_watson(result.resid) print('Durbin Watson is: ' + str(out)) if plot: qqplot(result.resid, line='s') pyplot.show() print('--------Breusch Autocorr-----------') try: bre = acorr_breusch_godfrey(result, nlags=12) print('lm: ' + str(bre[0])) print('lmpval: ' + str(bre[1])) print('fval: ' + str(bre[2])) print('fpval: ' + str(bre[3])) if bre[1] < 0.05: print('Evidence for autocorrelation') else: print('Not Evidence for autocorrelation') except: print('Cant calculate statistic') print('-----White Heteroskedasticity------') white_test = het_white(result.resid, result.model.exog) labels = [ 'LM Statistic', 'LM-Test p-value', 'F-Statistic', 'F-Test p-value' ] print(dict(zip(labels, white_test))) if white_test[1] < 0.05: print('Evidence for heteroskedasticity') else: print('Not Evidence for heteroskedasticity') print('----------ADF Test-----------------') try: DFtest(result.resid) except: print("Can't calculate ADF test") print('----------Shapiro Normality--------') stat, p = shapiro(result.resid) print('Statistics=%.3f, p=%.3f' % (stat, p)) # interpret alpha = 0.05 if p > alpha: print('Sample looks Gaussian (fail to reject H0)') else: print('Sample does not look Gaussian (reject H0)') if plot: residuals = pd.DataFrame(result.resid) plt.show() residuals.plot(kind='kde') plt.show()
#8. granger causality test granger_result = grangercausalitytests(dataframe, maxlag=2) print(granger_result) #The Null hypothesis for grangercausalitytests is that the time series #in the second column, x2, does NOT Granger cause the time series in #the first column, x1. #----------------------------------------------------------------- #9. Breusch Godfrey Lagrange Multiplier tests for residual autocorrelation #resid resid = mod.resid() print(resid) acorr_result = acorr_breusch_godfrey(resid, nlags=2) print(acorr_result) '''Returns: lm (float) – Lagrange multiplier test statistic lmpval (float) – p-value for Lagrange multiplier test fval (float) – fstatistic for F test, fval (float) – pvalue for F test''' #----------------------------------------------------------------- ''' Take another model into consideration #9. VECM model #built data newdata = {'goldPrice': gold_data,'stockIndex': stock_data}
def acorr_breusch_godfrey(self, timeseries): model, model_result = self.generate_model(timeseries) result = diagnostic.acorr_breusch_godfrey(model_result) AcorrBreuschGodfreyResult = namedtuple('BreuschGodfreyResult', 'statistic pvalue') return AcorrBreuschGodfreyResult(result[0], result[1])
def ols_diag(df,X,model, nlag=1, remove_outliers=False): ### Small Info print("Dataset:","\t",len(df)) print("X:","\t",len(X)) ## Residdual Normalaity Test print("1. Normality Test: ", "Jarque-Bera", "Test") jb_h0="Residual Normally distributed" jb_h1="Residual Not Normally distributed" jb_p=smt.jarque_bera(model.resid)[1] hypo_out(jb_p, jb_h0, jb_h1) ## Data Linearity Test print("2. Linearity Test: ","Rainbow", "Test") r_h0="Data have linear relationship" r_h1="Data do not have linear relationship" r_t,r_p=smd.linear_rainbow(model) hypo_out(r_p, r_h0, r_h1) ## Hetrosedacity Test: Scaling error print("3. Heteroscedasticity Test: ","Breusch-Pagan", "Test") bp_h0="Data have same variance accross" bp_h1="Data do not have have same variance accross" bp_p=smd.het_breuschpagan(model.resid, model.model.exog)[1] hypo_out(bp_p, bp_h0, bp_h1) ## Autocrrelation Test print("4. Autocorrelation Test: ","Breusch Godfrey", "Test") bg_h0="Data are not related to themself:"+str(nlag)+" lag" bg_h1="Data are related to themself by:"+str(nlag)+" lag" bg_p=smd.acorr_breusch_godfrey(model, nlag)[1] hypo_out(bg_p, bg_h0, bg_h1) ## Sum residulas =0 print("5. Sum of residuals == 0") sr_h0="Sum of residuals = 0" sr_h1="Sum of residual != 0" if round(sum(model.resid),1)==0: sr_p=1 else: sr_p=0 hypo_out(sr_p, sr_h0, sr_h1) ## List of outliers print("6. Checking outliers:") outliers(df,model,remove_outliers=False) ## Endogenity Check: # print("7. Checking Endogenity:"; ) # heatmap(X) ## Multicolinearity test: print("7. Checking multicolinearity") try: heatmap(X) except: print("Cannot perrform this test")
Si el estadistico, esta entre 1,038 y 2,962 entonces podemos concluir que los errores no estan autocorrelacionados. En efecto como el estadistico esta en ese intervalo, podemos decir que los errores no estan autocorrelacionados ''' #Pero que sucede si los errores estan correlacionados en otros errores anteriores al inmediato error anterior?, la prueba de durbin-watson no responde este problema #Lo cual una prueba mas general es utilizar la prueba de Breusch Godfrey #Entonces tendriamos un modelo autorregresivo de la siguiente forma. # ei = X * B + p1*ei-1 + p2*ei-2 + p3*ei-3 + ... + pm*ei-m + Vi #Las hipotesis planteadas son las siguientes # Ho: p1=0, p2=0, p3=0... pm=0 # H1: p1!=0, p2!=0, p3!=0...pm!=0 from statsmodels.stats.diagnostic import acorr_breusch_godfrey acorr_breusch_godfrey( modelo ) #Obtenemos un p valor mayor al 5% por lo tanto podemos concluir que los errores no estan #Autocorrelacionados ''' Aun asi puede que el modelo no se encuentre bien especificado. Puede que omitimos una variable e el modelo, puede que no sea una funcion lineal, por lo tanto, el test que nos permitiria saber eso, es el test de ramsey: Vamos a crear un modelo auxiliar, en donde tenemos en cuenta que el modelo puede ser especificado en forma polinomica y = B0 + B1*X1 + B2*y_estimado^2 + u por ejemplo H0: el modelo esta bien especificado H1: el modelo no esta bien especificado ''' from statsmodels.stats.diagnostic import linear_reset linear_reset(
def check_error_term_autocorrelation(self) -> bool: """ Checks correlation between the observations of error term by: - Durbin-Watson's statistical test, - Breusch-Godfrey's statistical test. If: - silent_mode = True, method returns: a) True (which means that the assumption is fulfilled) if the percentage of statistical tests for which the assumption is fulfilled is higher than or equal to set min_fulfill_ratio b) False (which means that the assumption is not fulfilled) if the percentage of statistical tests for which the assumption is fulfilled is lower than set min_fulfill_ratio - silent_mode = False, method returns True/False as above and shows additional statistics, descriptions which are helpful in assessing the fulfilment of assumption """ durbin_watson_statistic = durbin_watson(self.residuals) bg_test = pd.DataFrame( stats_diag.acorr_breusch_godfrey(self.results)[:2], columns=["value"], index=["Lagrange multiplier statistic", "p-value"]) true_counts = 0 lower_threshold_dw_stat = 1.5 upper_threshold_dw_stat = 2.5 if lower_threshold_dw_stat < durbin_watson_statistic < upper_threshold_dw_stat: true_counts = true_counts + 1 true_counts = true_counts + test_hypothesis( significance_level=self.alpha, p_value=bg_test.iloc[1].value, print_outcome=False) true_ratio = true_counts / 2 if not self.silent_mode: print( Color.BOLD + "Assumption 4. Observations of the error term are uncorrelated with " "each other." + Color.END, "\n") print("This assumption affects on: \n", "- prediction \n", "- interpretation.", "\n") print( "One observation of the error term should not predict the next observation. To " "resolve this issue, you might need to add an independent variable to the model " "that captures this information. Analysts commonly use distributed lag models, " "which use both current values of the dependent variable and past values of " "independent variables.\n") print( Color.BOLD + "Durbin-Watson " + Color.END + "statistical test: \n", "If the value of the statistics equals 2 => no serial correlation. \n", "If the value of the statistics equals 0 => strong positive correlation. \n", "If the value of the statistics equals 4 => strong negative correlation. \n" ) print("The value of Durbin-Watson statistic is " + f"{np.round(durbin_watson(self.residuals), 4)}\n") true_counts = 0 if durbin_watson_statistic < lower_threshold_dw_stat: print("Signs of positive autocorrelation =>" + Color.RED + " Assumption not satisfied" + Color.END + "\n") elif durbin_watson_statistic > upper_threshold_dw_stat: print("Signs of negative autocorrelation =>" + Color.RED + " Assumption not satisfied" + Color.END + "\n") else: print("Little to no autocorrelation =>" + Color.GREEN + " Assumption satisfied" + Color.END + "\n") true_counts = true_counts + 1 print(Color.BOLD + "Breusch-Godfrey " + Color.END + "Lagrange Multiplier statistical tests: \n") print(bg_test, "\n") true_counts = true_counts + test_hypothesis( significance_level=self.alpha, p_value=bg_test.iloc[1].value, null_hypothesis="there doesn't exist " "autocorrelation in the " "error term.") true_ratio = true_counts / 2 check_fulfill_ratio(true_fulfill_ratio=true_ratio, min_fulfill_ratio=self.min_fulfill_ratio) return check_fulfill_ratio(true_fulfill_ratio=true_ratio, min_fulfill_ratio=self.min_fulfill_ratio, print_outcome=False)
# Os p-values estão no segundo grupo de valores from statsmodels.stats.diagnostic import acorr_ljungbox lb = acorr_ljungbox(resid, lags=10) print( "\n", 'Teste de Ljung-Box de independência dos resíduos:', lb[1] ) # A partir do 7o lag, p-value < 0.05 => esses lags apresentam auto-correlação, o que viola # o pressuposto de independência dos resíduos ## Teste de Durbin-Watson para autocorrelação dos resíduos. H_0: resíduos não têm autocorrelação com o seu 1o lag #from statsmodels.stats.stattools import durbin_watson #DW = durbin_watson(resid) #print("\n", 'Teste de Durbin-Watson de independência dos resíduos:', DW[0]) # Teste de Breush-Godfrey para autocorrelação dos resíduos. H_0: resíduos não têm correlação com os seus "n" lags (neste caso definimos n = 10) from statsmodels.stats.diagnostic import acorr_breusch_godfrey bg = acorr_breusch_godfrey(model_output, nlags=10) print( "\n", 'Teste de Breush-Godfrey de independência dos resíduos:', bg ) # P-value (2o valor dos 4 apresentados) < 0.05 => existe autocorrelação dos resíduos # Teste de heterocedasticidade ARCH. H_0: variância é constante. O segundo valor é o p-value from statsmodels.stats.diagnostic import het_arch archTest = het_arch(resid[0], maxlag=5, autolag=None) print("\n", 'Teste ARCH de heterocedasticidade:', archTest[1] ) # P-value < 0.05 => rejeita-se H_0 => variância não é constante ### GJR-GARCH Model ### AR(2) + GJR-GARCH(1,1) from arch import arch_model gjrGarch = arch_model(tsReturns, mean="ARX", lags=2, o=1) # importa as 3 equações ao mesmo tempo
def breusch_godfrey(): names = [ "Lagrange multiplier statistic", "p-value", "f-value", "f p-value" ] test = acorr_breusch_godfrey(res) print(dict(zip(names, test)))
#Money Supply meanMS = np.mean(DataUse.MS) sdMS = np.std(DataUse.MS) varMS = np.var(DataUse.MS) #Create Model Model = sma.wls('work.inflation ~ foreign + MS', work).fit() print(Model.summary()) #Weighted Least Squares used to fix Heteroscedastisity #Test The Model Heteroscedastisity = ds.het_white(Model.resid, exog = work) print('F-statistic %r' % Heteroscedastisity[2]) print('Prob,F %f' % Heteroscedastisity[3]) print('Chi-Square %s' % Heteroscedastisity[0]) print('Prob,Chi-Square %g' % Heteroscedastisity[1]) Autocorrelation = ds.acorr_breusch_godfrey(Model, nlags=(2)) print('F-statistic %r' % Autocorrelation[2]) print('Prob,F %f' % Autocorrelation[3]) print('Chi-Square %s' % Autocorrelation[0]) print('Prob,Chi-Square %g' % Autocorrelation[1])