def ols_model_diagnostics(model):

    #residuals should look normal - this plot should be more of less a straight line
    print('Residuals QQ-Plot')
    fig = statsmodels.api.qqplot(model.resid,
                                 scipy.stats.t,
                                 fit=True,
                                 line='r')
    matplotlib.pyplot.show()

    #residuals vs. fitted values - this should look random/no pattern
    stdres = pandas.DataFrame(model.resid_pearson)
    fig = matplotlib.pyplot.plot(stdres, 'o', ls='none')
    l = matplotlib.pyplot.axhline(y=0, color='r')
    matplotlib.pyplot.ylabel('Standardized Residual')
    matplotlib.pyplot.xlabel('Observation')
    print fig

    #leverage statistics vs. normalized residuals squared
    plot_leverage_resid2(model)
    #influence plot: studentized resids vs. leverage. The combination of large residuals and a high leverage (influence on estimation of the model coefficients) indicates an influence point. In both of these plots, mostly checking to see that most points have low leverage
    influence_plot(model)

    #test for nonlinearity: looking for the p-value here to be > 0.05 to meet the linearity assumption
    try:
        print linear_harvey_collier(model)
    except linalg.LinAlgError:
        print "Error: Singular covariance matrix, Harvey Collier nonlinearity test not executed"
Exemple #2
0
    def test_harvey_collier(self):

        # > hc = harvtest(fm, order.by = NULL, data = list())
        # > mkhtest_f(hc, 'harvey_collier', 't')
        harvey_collier = dict(statistic=0.494432160939874, pvalue=0.6215491310408242, parameters=(198), distr="t")

        # > hc2 = harvtest(fm, order.by=ggdp , data = list())
        # > mkhtest_f(hc2, 'harvey_collier_2', 't')
        harvey_collier_2 = dict(statistic=1.42104628340473, pvalue=0.1568762892441689, parameters=(198), distr="t")

        hc = smsdia.linear_harvey_collier(self.res)
        compare_t_est(hc, harvey_collier, decimal=(12, 12))
    def test_harvey_collier(self):
        #> hc = harvtest(fm, order.by = NULL, data = list())
        #> mkhtest_f(hc, 'harvey_collier', 't')
        harvey_collier = dict(statistic=0.494432160939874,
                              pvalue=0.6215491310408242,
                              parameters=(198), distr='t')

        #> hc2 = harvtest(fm, order.by=ggdp , data = list())
        #> mkhtest_f(hc2, 'harvey_collier_2', 't')
        harvey_collier_2 = dict(statistic=1.42104628340473,
                                pvalue=0.1568762892441689,
                                parameters=(198), distr='t')

        hc = smsdia.linear_harvey_collier(self.res)
        compare_t_est(hc, harvey_collier, decimal=(12, 12))