def ols_model_diagnostics(model): #residuals should look normal - this plot should be more of less a straight line print('Residuals QQ-Plot') fig = statsmodels.api.qqplot(model.resid, scipy.stats.t, fit=True, line='r') matplotlib.pyplot.show() #residuals vs. fitted values - this should look random/no pattern stdres = pandas.DataFrame(model.resid_pearson) fig = matplotlib.pyplot.plot(stdres, 'o', ls='none') l = matplotlib.pyplot.axhline(y=0, color='r') matplotlib.pyplot.ylabel('Standardized Residual') matplotlib.pyplot.xlabel('Observation') print fig #leverage statistics vs. normalized residuals squared plot_leverage_resid2(model) #influence plot: studentized resids vs. leverage. The combination of large residuals and a high leverage (influence on estimation of the model coefficients) indicates an influence point. In both of these plots, mostly checking to see that most points have low leverage influence_plot(model) #test for nonlinearity: looking for the p-value here to be > 0.05 to meet the linearity assumption try: print linear_harvey_collier(model) except linalg.LinAlgError: print "Error: Singular covariance matrix, Harvey Collier nonlinearity test not executed"
def test_harvey_collier(self): # > hc = harvtest(fm, order.by = NULL, data = list()) # > mkhtest_f(hc, 'harvey_collier', 't') harvey_collier = dict(statistic=0.494432160939874, pvalue=0.6215491310408242, parameters=(198), distr="t") # > hc2 = harvtest(fm, order.by=ggdp , data = list()) # > mkhtest_f(hc2, 'harvey_collier_2', 't') harvey_collier_2 = dict(statistic=1.42104628340473, pvalue=0.1568762892441689, parameters=(198), distr="t") hc = smsdia.linear_harvey_collier(self.res) compare_t_est(hc, harvey_collier, decimal=(12, 12))
def test_harvey_collier(self): #> hc = harvtest(fm, order.by = NULL, data = list()) #> mkhtest_f(hc, 'harvey_collier', 't') harvey_collier = dict(statistic=0.494432160939874, pvalue=0.6215491310408242, parameters=(198), distr='t') #> hc2 = harvtest(fm, order.by=ggdp , data = list()) #> mkhtest_f(hc2, 'harvey_collier_2', 't') harvey_collier_2 = dict(statistic=1.42104628340473, pvalue=0.1568762892441689, parameters=(198), distr='t') hc = smsdia.linear_harvey_collier(self.res) compare_t_est(hc, harvey_collier, decimal=(12, 12))