Python cov_hac_simple Beispiele

Programmiersprache: Python

Namespace / Paketname: statsmodels.stats.sandwich_covariance

Methode / Funktion: cov_hac_simple

Beispiele auf hotexamples.com: 12

Python cov_hac_simple - 12 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die statsmodels.stats.sandwich_covariance.cov_hac_simple, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: test_diagnostic.py Projekt: zhang-cindy/statsmodels

    def test_hac(self):
        res = self.res
        #> nw = NeweyWest(fm, lag = 4, prewhite = FALSE, verbose=TRUE)
        #> nw2 = NeweyWest(fm, lag=10, prewhite = FALSE, verbose=TRUE)

        #> mkarray(nw, "cov_hac_4")
        cov_hac_4 = np.array([
            1.385551290884014, -0.3133096102522685, -0.0597207976835705,
            -0.3133096102522685, 0.1081011690351306, 0.000389440793564336,
            -0.0597207976835705, 0.000389440793564339, 0.0862118527405036
        ]).reshape(3, 3, order='F')

        #> mkarray(nw2, "cov_hac_10")
        cov_hac_10 = np.array([
            1.257386180080192, -0.2871560199899846, -0.03958300024627573,
            -0.2871560199899845, 0.1049107028987101, 0.0003896205316866944,
            -0.03958300024627578, 0.0003896205316866961, 0.0985539340694839
        ]).reshape(3, 3, order='F')

        cov = sw.cov_hac_simple(res, nlags=4, use_correction=False)
        bse_hac = sw.se_cov(cov)
        assert_almost_equal(cov, cov_hac_4, decimal=14)
        assert_almost_equal(bse_hac, np.sqrt(np.diag(cov)), decimal=14)

        cov = sw.cov_hac_simple(res, nlags=10, use_correction=False)
        bse_hac = sw.se_cov(cov)
        assert_almost_equal(cov, cov_hac_10, decimal=14)
        assert_almost_equal(bse_hac, np.sqrt(np.diag(cov)), decimal=14)

Beispiel #2

Datei anzeigen

Datei: test_diagnostic.py Projekt: bashtage/statsmodels

    def test_hac(self):
        res = self.res
        #> nw = NeweyWest(fm, lag = 4, prewhite = FALSE, verbose=TRUE)
        #> nw2 = NeweyWest(fm, lag=10, prewhite = FALSE, verbose=TRUE)

        #> mkarray(nw, "cov_hac_4")
        cov_hac_4 = np.array([1.385551290884014, -0.3133096102522685,
            -0.0597207976835705, -0.3133096102522685, 0.1081011690351306,
            0.000389440793564336, -0.0597207976835705, 0.000389440793564339,
            0.0862118527405036]).reshape(3,3, order='F')

        #> mkarray(nw2, "cov_hac_10")
        cov_hac_10 = np.array([1.257386180080192, -0.2871560199899846,
            -0.03958300024627573, -0.2871560199899845, 0.1049107028987101,
            0.0003896205316866944, -0.03958300024627578, 0.0003896205316866961,
            0.0985539340694839]).reshape(3,3, order='F')

        cov = sw.cov_hac_simple(res, nlags=4, use_correction=False)
        bse_hac = sw.se_cov(cov)
        assert_almost_equal(cov, cov_hac_4, decimal=14)
        assert_almost_equal(bse_hac, np.sqrt(np.diag(cov)), decimal=14)

        cov = sw.cov_hac_simple(res, nlags=10, use_correction=False)
        bse_hac = sw.se_cov(cov)
        assert_almost_equal(cov, cov_hac_10, decimal=14)
        assert_almost_equal(bse_hac, np.sqrt(np.diag(cov)), decimal=14)

Beispiel #3

Datei anzeigen

Datei: test_sandwich.py Projekt: Code-fish/statsmodels

def test_hac_simple():

    from statsmodels.datasets import macrodata
    d2 = macrodata.load().data
    g_gdp = 400*np.diff(np.log(d2['realgdp']))
    g_inv = 400*np.diff(np.log(d2['realinv']))
    exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1]])
    res_olsg = OLS(g_inv, exogg).fit()



    #> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=TRUE)
    #Lag truncation parameter chosen: 4
    #                     (Intercept)                   ggdp                  lint
    cov1_r = [[  1.40643899878678802, -0.3180328707083329709, -0.060621111216488610],
             [ -0.31803287070833292,  0.1097308348999818661,  0.000395311760301478],
             [ -0.06062111121648865,  0.0003953117603014895,  0.087511528912470993]]

    #> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=FALSE)
    #Lag truncation parameter chosen: 4
    #                    (Intercept)                  ggdp                  lint
    cov2_r = [[ 1.3855512908840137, -0.313309610252268500, -0.059720797683570477],
             [ -0.3133096102522685,  0.108101169035130618,  0.000389440793564339],
             [ -0.0597207976835705,  0.000389440793564336,  0.086211852740503622]]

    cov1 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=True)
    se1 =  sw.se_cov(cov1)
    cov2 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=False)
    se2 =  sw.se_cov(cov2)
    assert_almost_equal(cov1, cov1_r, decimal=14)
    assert_almost_equal(cov2, cov2_r, decimal=14)

Beispiel #4

Datei anzeigen

Datei: test_sandwich.py Projekt: zhisheng/statsmodels

def test_hac_simple():

    from statsmodels.datasets import macrodata
    d2 = macrodata.load().data
    g_gdp = 400 * np.diff(np.log(d2['realgdp']))
    g_inv = 400 * np.diff(np.log(d2['realinv']))
    exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1]], prepend=True)
    res_olsg = OLS(g_inv, exogg).fit()

    #> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=TRUE)
    #Lag truncation parameter chosen: 4
    #                     (Intercept)                   ggdp                  lint
    cov1_r = [
        [1.40643899878678802, -0.3180328707083329709, -0.060621111216488610],
        [-0.31803287070833292, 0.1097308348999818661, 0.000395311760301478],
        [-0.06062111121648865, 0.0003953117603014895, 0.087511528912470993]
    ]

    #> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=FALSE)
    #Lag truncation parameter chosen: 4
    #                    (Intercept)                  ggdp                  lint
    cov2_r = [
        [1.3855512908840137, -0.313309610252268500, -0.059720797683570477],
        [-0.3133096102522685, 0.108101169035130618, 0.000389440793564339],
        [-0.0597207976835705, 0.000389440793564336, 0.086211852740503622]
    ]

    cov1 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=True)
    se1 = sw.se_cov(cov1)
    cov2 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=False)
    se2 = sw.se_cov(cov2)
    assert_almost_equal(cov1, cov1_r, decimal=14)
    assert_almost_equal(cov2, cov2_r, decimal=14)

Beispiel #5

Datei anzeigen

    def setup(self):
        res_ols = self.res1
        cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=False)
        se1 =  sw.se_cov(cov1)
        self.bse_robust = se1
        self.cov_robust = cov1
        self.small = False

        self.res2 = res.results_ivhac4_large

Beispiel #6

Datei anzeigen

Datei: test_sandwich.py Projekt: timgates42/statsmodels

def test_hac_simple():
    from statsmodels.datasets import macrodata
    d2 = macrodata.load_pandas().data
    g_gdp = 400 * np.diff(np.log(d2['realgdp'].values))
    g_inv = 400 * np.diff(np.log(d2['realinv'].values))
    exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1].values])
    res_olsg = OLS(g_inv, exogg).fit()

    # > NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE,
    #             verbose=TRUE, adjust=TRUE)
    # Lag truncation parameter chosen: 4
    #             (Intercept)                   ggdp                  lint
    cov1_r = [
        [+1.40643899878678802, -0.3180328707083329709, -0.060621111216488610],
        [-0.31803287070833292, 0.1097308348999818661, +0.000395311760301478],
        [-0.06062111121648865, 0.0003953117603014895, +0.087511528912470993]
    ]

    # > NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE,
    #             verbose=TRUE, adjust=FALSE)
    # Lag truncation parameter chosen: 4
    #         (Intercept)                  ggdp                  lint
    cov2_r = [
        [+1.3855512908840137, -0.313309610252268500, -0.059720797683570477],
        [-0.3133096102522685, +0.108101169035130618, +0.000389440793564339],
        [-0.0597207976835705, +0.000389440793564336, +0.086211852740503622]
    ]

    cov1 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=True)
    se1 = sw.se_cov(cov1)
    cov2 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=False)
    se2 = sw.se_cov(cov2)
    # Relax precision requirements for this test due to failure in NumPy 1.23
    assert_allclose(cov1, cov1_r)
    assert_allclose(cov2, cov2_r)
    assert_allclose(np.sqrt(np.diag(cov1_r)), se1)
    assert_allclose(np.sqrt(np.diag(cov2_r)), se2)

    # compare default for nlags
    cov3 = sw.cov_hac_simple(res_olsg, use_correction=False)
    cov4 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=False)
    assert_allclose(cov3, cov4)

Beispiel #7

Datei anzeigen

 def setup(self):
     res_ols = self.res1.get_robustcov_results("HAC", maxlags=4, use_correction=True, use_t=True)
     self.res3 = self.res1
     self.res1 = res_ols
     self.bse_robust = res_ols.bse
     self.cov_robust = res_ols.cov_params()
     cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=True)
     se1 = sw.se_cov(cov1)
     self.bse_robust2 = se1
     self.cov_robust2 = cov1
     self.small = True
     self.res2 = res.results_ivhac4_small

Beispiel #8

Datei anzeigen

 def setup(self):
     res_ols = self.res1.get_robustcov_results('HAC', maxlags=4,
                                         use_correction=True, use_t=True)
     self.res3 = self.res1
     self.res1 = res_ols
     self.bse_robust = res_ols.bse
     self.cov_robust = res_ols.cov_params()
     cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=True)
     se1 =  sw.se_cov(cov1)
     self.bse_robust2 = se1
     self.cov_robust2 = cov1
     self.small = True
     self.res2 = res.results_ivhac4_small

Beispiel #9

Datei anzeigen

Datei: test_glsar_gretl.py Projekt: locolucco209/MongoScraper

    def test_all(self):

        d = macrodata.load().data
        #import datasetswsm.greene as g
        #d = g.load('5-1')

        #growth rates
        gs_l_realinv = 400 * np.diff(np.log(d['realinv']))
        gs_l_realgdp = 400 * np.diff(np.log(d['realgdp']))

        #simple diff, not growthrate, I want heteroscedasticity later for testing
        endogd = np.diff(d['realinv'])
        exogd = add_constant(np.c_[np.diff(d['realgdp']), d['realint'][:-1]])

        endogg = gs_l_realinv
        exogg = add_constant(np.c_[gs_l_realgdp, d['realint'][:-1]])

        res_ols = OLS(endogg, exogg).fit()
        #print res_ols.params

        mod_g1 = GLSAR(endogg, exogg, rho=-0.108136)
        res_g1 = mod_g1.fit()
        #print res_g1.params

        mod_g2 = GLSAR(endogg, exogg, rho=-0.108136)   #-0.1335859) from R
        res_g2 = mod_g2.iterative_fit(maxiter=5)
        #print res_g2.params


        rho = -0.108136

        #                 coefficient   std. error   t-ratio    p-value 95% CONFIDENCE INTERVAL
        partable = np.array([
                        [-9.50990,  0.990456, -9.602, 3.65e-018, -11.4631, -7.55670], # ***
                        [ 4.37040,  0.208146, 21.00,  2.93e-052,  3.95993, 4.78086], # ***
                        [-0.579253, 0.268009, -2.161, 0.0319, -1.10777, -0.0507346]]) #    **

        #Statistics based on the rho-differenced data:

        result_gretl_g1 = dict(
        endog_mean = ("Mean dependent var",   3.113973),
        endog_std = ("S.D. dependent var",   18.67447),
        ssr = ("Sum squared resid",    22530.90),
        mse_resid_sqrt = ("S.E. of regression",   10.66735),
        rsquared = ("R-squared",            0.676973),
        rsquared_adj = ("Adjusted R-squared",   0.673710),
        fvalue = ("F(2, 198)",            221.0475),
        f_pvalue = ("P-value(F)",           3.56e-51),
        resid_acf1 = ("rho",                 -0.003481),
        dw = ("Durbin-Watson",        1.993858))


        #fstatistic, p-value, df1, df2
        reset_2_3 = [5.219019, 0.00619, 2, 197, "f"]
        reset_2 = [7.268492, 0.00762, 1, 198, "f"]
        reset_3 = [5.248951, 0.023, 1, 198, "f"]
        #LM-statistic, p-value, df
        arch_4 = [7.30776, 0.120491, 4, "chi2"]

        #multicollinearity
        vif = [1.002, 1.002]
        cond_1norm = 6862.0664
        determinant = 1.0296049e+009
        reciprocal_condition_number = 0.013819244

        #Chi-square(2): test-statistic, pvalue, df
        normality = [20.2792, 3.94837e-005, 2]

        #tests
        res = res_g1  #with rho from Gretl

        #basic

        assert_almost_equal(res.params, partable[:,0], 4)
        assert_almost_equal(res.bse, partable[:,1], 6)
        assert_almost_equal(res.tvalues, partable[:,2], 2)

        assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2)
        #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl
        #assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=7) #FAIL
        #assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=7) #FAIL
        assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5)
        assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=4)
        assert_approx_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], significant=2)
        #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO

        #arch
        #sm_arch = smsdia.acorr_lm(res.wresid**2, maxlag=4, autolag=None)
        sm_arch = smsdia.het_arch(res.wresid, maxlag=4)
        assert_almost_equal(sm_arch[0], arch_4[0], decimal=4)
        assert_almost_equal(sm_arch[1], arch_4[1], decimal=6)

        #tests
        res = res_g2 #with estimated rho

        #estimated lag coefficient
        assert_almost_equal(res.model.rho, rho, decimal=3)

        #basic
        assert_almost_equal(res.params, partable[:,0], 4)
        assert_almost_equal(res.bse, partable[:,1], 3)
        assert_almost_equal(res.tvalues, partable[:,2], 2)

        assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2)
        #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl
        #assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=7) #FAIL
        #assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=7) #FAIL
        assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5)
        assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=0)
        assert_almost_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], decimal=6)
        #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO



        c = oi.reset_ramsey(res, degree=2)
        compare_ftest(c, reset_2, decimal=(2,4))
        c = oi.reset_ramsey(res, degree=3)
        compare_ftest(c, reset_2_3, decimal=(2,4))

        #arch
        #sm_arch = smsdia.acorr_lm(res.wresid**2, maxlag=4, autolag=None)
        sm_arch = smsdia.het_arch(res.wresid, maxlag=4)
        assert_almost_equal(sm_arch[0], arch_4[0], decimal=1)
        assert_almost_equal(sm_arch[1], arch_4[1], decimal=2)



        '''
        Performing iterative calculation of rho...

                         ITER       RHO        ESS
                           1     -0.10734   22530.9
                           2     -0.10814   22530.9

        Model 4: Cochrane-Orcutt, using observations 1959:3-2009:3 (T = 201)
        Dependent variable: ds_l_realinv
        rho = -0.108136

                         coefficient   std. error   t-ratio    p-value
          -------------------------------------------------------------
          const           -9.50990      0.990456    -9.602    3.65e-018 ***
          ds_l_realgdp     4.37040      0.208146    21.00     2.93e-052 ***
          realint_1       -0.579253     0.268009    -2.161    0.0319    **

        Statistics based on the rho-differenced data:

        Mean dependent var   3.113973   S.D. dependent var   18.67447
        Sum squared resid    22530.90   S.E. of regression   10.66735
        R-squared            0.676973   Adjusted R-squared   0.673710
        F(2, 198)            221.0475   P-value(F)           3.56e-51
        rho                 -0.003481   Durbin-Watson        1.993858
        '''

        '''
        RESET test for specification (squares and cubes)
        Test statistic: F = 5.219019,
        with p-value = P(F(2,197) > 5.21902) = 0.00619

        RESET test for specification (squares only)
        Test statistic: F = 7.268492,
        with p-value = P(F(1,198) > 7.26849) = 0.00762

        RESET test for specification (cubes only)
        Test statistic: F = 5.248951,
        with p-value = P(F(1,198) > 5.24895) = 0.023:
        '''

        '''
        Test for ARCH of order 4

                     coefficient   std. error   t-ratio   p-value
          --------------------------------------------------------
          alpha(0)   97.0386       20.3234       4.775    3.56e-06 ***
          alpha(1)    0.176114      0.0714698    2.464    0.0146   **
          alpha(2)   -0.0488339     0.0724981   -0.6736   0.5014
          alpha(3)   -0.0705413     0.0737058   -0.9571   0.3397
          alpha(4)    0.0384531     0.0725763    0.5298   0.5968

          Null hypothesis: no ARCH effect is present
          Test statistic: LM = 7.30776
          with p-value = P(Chi-square(4) > 7.30776) = 0.120491:
        '''

        '''
        Variance Inflation Factors

        Minimum possible value = 1.0
        Values > 10.0 may indicate a collinearity problem

           ds_l_realgdp    1.002
              realint_1    1.002

        VIF(j) = 1/(1 - R(j)^2), where R(j) is the multiple correlation coefficient
        between variable j and the other independent variables

        Properties of matrix X'X:

         1-norm = 6862.0664
         Determinant = 1.0296049e+009
         Reciprocal condition number = 0.013819244
        '''
        '''
        Test for ARCH of order 4 -
          Null hypothesis: no ARCH effect is present
          Test statistic: LM = 7.30776
          with p-value = P(Chi-square(4) > 7.30776) = 0.120491

        Test of common factor restriction -
          Null hypothesis: restriction is acceptable
          Test statistic: F(2, 195) = 0.426391
          with p-value = P(F(2, 195) > 0.426391) = 0.653468

        Test for normality of residual -
          Null hypothesis: error is normally distributed
          Test statistic: Chi-square(2) = 20.2792
          with p-value = 3.94837e-005:
        '''

        #no idea what this is
        '''
        Augmented regression for common factor test
        OLS, using observations 1959:3-2009:3 (T = 201)
        Dependent variable: ds_l_realinv

                           coefficient   std. error   t-ratio    p-value
          ---------------------------------------------------------------
          const            -10.9481      1.35807      -8.062    7.44e-014 ***
          ds_l_realgdp       4.28893     0.229459     18.69     2.40e-045 ***
          realint_1         -0.662644    0.334872     -1.979    0.0492    **
          ds_l_realinv_1    -0.108892    0.0715042    -1.523    0.1294
          ds_l_realgdp_1     0.660443    0.390372      1.692    0.0923    *
          realint_2          0.0769695   0.341527      0.2254   0.8219

          Sum of squared residuals = 22432.8

        Test of common factor restriction

          Test statistic: F(2, 195) = 0.426391, with p-value = 0.653468
        '''


        ################ with OLS, HAC errors

        #Model 5: OLS, using observations 1959:2-2009:3 (T = 202)
        #Dependent variable: ds_l_realinv
        #HAC standard errors, bandwidth 4 (Bartlett kernel)

        #coefficient   std. error   t-ratio    p-value 95% CONFIDENCE INTERVAL
        #for confidence interval t(199, 0.025) = 1.972

        partable = np.array([
        [-9.48167,      1.17709,     -8.055,    7.17e-014, -11.8029, -7.16049], # ***
        [4.37422,      0.328787,    13.30,     2.62e-029, 3.72587, 5.02258], #***
        [-0.613997,     0.293619,    -2.091,    0.0378, -1.19300, -0.0349939]]) # **

        result_gretl_g1 = dict(
                    endog_mean = ("Mean dependent var",   3.257395),
                    endog_std = ("S.D. dependent var",   18.73915),
                    ssr = ("Sum squared resid",    22799.68),
                    mse_resid_sqrt = ("S.E. of regression",   10.70380),
                    rsquared = ("R-squared",            0.676978),
                    rsquared_adj = ("Adjusted R-squared",   0.673731),
                    fvalue = ("F(2, 199)",            90.79971),
                    f_pvalue = ("P-value(F)",           9.53e-29),
                    llf = ("Log-likelihood",      -763.9752),
                    aic = ("Akaike criterion",     1533.950),
                    bic = ("Schwarz criterion",    1543.875),
                    hqic = ("Hannan-Quinn",         1537.966),
                    resid_acf1 = ("rho",                 -0.107341),
                    dw = ("Durbin-Watson",        2.213805))

        linear_logs = [1.68351, 0.430953, 2, "chi2"]
        #for logs: dropping 70 nan or incomplete observations, T=133
        #(res_ols.model.exog <=0).any(1).sum() = 69  ?not 70
        linear_squares = [7.52477, 0.0232283, 2, "chi2"]

        #Autocorrelation, Breusch-Godfrey test for autocorrelation up to order 4
        lm_acorr4 = [1.17928, 0.321197, 4, 195, "F"]
        lm2_acorr4 = [4.771043, 0.312, 4, "chi2"]
        acorr_ljungbox4 = [5.23587, 0.264, 4, "chi2"]

        #break
        cusum_Harvey_Collier  = [0.494432, 0.621549, 198, "t"] #stats.t.sf(0.494432, 198)*2
        #see cusum results in files
        break_qlr = [3.01985, 0.1, 3, 196, "maxF"]  #TODO check this, max at 2001:4
        break_chow = [13.1897, 0.00424384, 3, "chi2"] # break at 1984:1

        arch_4 = [3.43473, 0.487871, 4, "chi2"]

        normality = [23.962, 0.00001, 2, "chi2"]

        het_white = [33.503723, 0.000003, 5, "chi2"]
        het_breusch_pagan = [1.302014, 0.521520, 2, "chi2"]  #TODO: not available
        het_breusch_pagan_konker = [0.709924, 0.701200, 2, "chi2"]


        reset_2_3 = [5.219019, 0.00619, 2, 197, "f"]
        reset_2 = [7.268492, 0.00762, 1, 198, "f"]
        reset_3 = [5.248951, 0.023, 1, 198, "f"]  #not available

        cond_1norm = 5984.0525
        determinant = 7.1087467e+008
        reciprocal_condition_number = 0.013826504
        vif = [1.001, 1.001]

        names = 'date   residual        leverage       influence        DFFITS'.split()
        cur_dir = os.path.abspath(os.path.dirname(__file__))
        fpath = os.path.join(cur_dir, 'results/leverage_influence_ols_nostars.txt')
        lev = np.genfromtxt(fpath, skip_header=3, skip_footer=1,
                            converters={0:lambda s: s})
        #either numpy 1.6 or python 3.2 changed behavior
        if np.isnan(lev[-1]['f1']):
            lev = np.genfromtxt(fpath, skip_header=3, skip_footer=2,
                                converters={0:lambda s: s})

        lev.dtype.names = names

        res = res_ols #for easier copying

        cov_hac = sw.cov_hac_simple(res, nlags=4, use_correction=False)
        bse_hac =  sw.se_cov(cov_hac)

        assert_almost_equal(res.params, partable[:,0], 5)
        assert_almost_equal(bse_hac, partable[:,1], 5)
        #TODO

        assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2)
        assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=4) #not in gretl
        assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=6) #FAIL
        assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=6) #FAIL
        assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5)
        #f-value is based on cov_hac I guess
        #res2 = res.get_robustcov_results(cov_type='HC1')
        # TODO: fvalue differs from Gretl, trying any of the HCx
        #assert_almost_equal(res2.fvalue, result_gretl_g1['fvalue'][1], decimal=0) #FAIL
        #assert_approx_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], significant=1) #FAIL
        #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO


        c = oi.reset_ramsey(res, degree=2)
        compare_ftest(c, reset_2, decimal=(6,5))
        c = oi.reset_ramsey(res, degree=3)
        compare_ftest(c, reset_2_3, decimal=(6,5))

        linear_sq = smsdia.linear_lm(res.resid, res.model.exog)
        assert_almost_equal(linear_sq[0], linear_squares[0], decimal=6)
        assert_almost_equal(linear_sq[1], linear_squares[1], decimal=7)

        hbpk = smsdia.het_breuschpagan(res.resid, res.model.exog)
        assert_almost_equal(hbpk[0], het_breusch_pagan_konker[0], decimal=6)
        assert_almost_equal(hbpk[1], het_breusch_pagan_konker[1], decimal=6)

        hw = smsdia.het_white(res.resid, res.model.exog)
        assert_almost_equal(hw[:2], het_white[:2], 6)

        #arch
        #sm_arch = smsdia.acorr_lm(res.resid**2, maxlag=4, autolag=None)
        sm_arch = smsdia.het_arch(res.resid, maxlag=4)
        assert_almost_equal(sm_arch[0], arch_4[0], decimal=5)
        assert_almost_equal(sm_arch[1], arch_4[1], decimal=6)

        vif2 = [oi.variance_inflation_factor(res.model.exog, k) for k in [1,2]]

        infl = oi.OLSInfluence(res_ols)
        #print np.max(np.abs(lev['DFFITS'] - infl.dffits[0]))
        #print np.max(np.abs(lev['leverage'] - infl.hat_matrix_diag))
        #print np.max(np.abs(lev['influence'] - infl.influence))  #just added this based on Gretl

        #just rough test, low decimal in Gretl output,
        assert_almost_equal(lev['residual'], res.resid, decimal=3)
        assert_almost_equal(lev['DFFITS'], infl.dffits[0], decimal=3)
        assert_almost_equal(lev['leverage'], infl.hat_matrix_diag, decimal=3)
        assert_almost_equal(lev['influence'], infl.influence, decimal=4)

Beispiel #10

Datei anzeigen

Datei: ex_sandwich.py Projekt: zhisheng/statsmodels

params_true = np.ones(kvars)
y_true = np.dot(exog, params_true)
sigma = 0.1 + np.exp(exog[:,-1])
endog = y_true + sigma * np.random.randn(nobs)

self = sm.OLS(endog, exog).fit()

print self.HC3_se
print sw.se_cov(sw.cov_hc3(self))
#test standalone refactoring
assert_almost_equal(sw.se_cov(sw.cov_hc0(self)), self.HC0_se, 15)
assert_almost_equal(sw.se_cov(sw.cov_hc1(self)), self.HC1_se, 15)
assert_almost_equal(sw.se_cov(sw.cov_hc2(self)), self.HC2_se, 15)
assert_almost_equal(sw.se_cov(sw.cov_hc3(self)), self.HC3_se, 15)
print self.HC0_se
print sw.se_cov(sw.cov_hac_simple(self, nlags=0, use_correction=False))
#test White as HAC with nlags=0, same as nlags=1 ?
bse_hac0 = sw.se_cov(sw.cov_hac_simple(self, nlags=0, use_correction=False))
assert_almost_equal(bse_hac0, self.HC0_se, 15)
print bse_hac0
#test White as HAC with nlags=0, same as nlags=1 ?
bse_hac0c = sw.se_cov(sw.cov_hac_simple(self, nlags=0, use_correction=True))
assert_almost_equal(bse_hac0c, self.HC1_se, 15)

bse_w = sw.se_cov(sw.cov_white_simple(self, use_correction=False))
print bse_w
#test White
assert_almost_equal(bse_w, self.HC0_se, 15)

bse_wc = sw.se_cov(sw.cov_white_simple(self, use_correction=True))
print bse_wc

Beispiel #11

Datei anzeigen

def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
    """create new results instance with robust covariance as default

    Parameters
    ----------
    cov_type : string
        the type of robust sandwich estimator to use. see Notes below
    use_t : bool
        If true, then the t distribution is used for inference.
        If false, then the normal distribution is used.
    kwds : depends on cov_type
        Required or optional arguments for robust covariance calculation.
        see Notes below

    Returns
    -------
    results : results instance
        This method creates a new results instance with the requested
        robust covariance as the default covariance of the parameters.
        Inferential statistics like p-values and hypothesis tests will be
        based on this covariance matrix.

    Notes
    -----
    Warning: Some of the options and defaults in cov_kwds may be changed in a
    future version.

    The covariance keywords provide an option 'scaling_factor' to adjust the
    scaling of the covariance matrix, that is the covariance is multiplied by
    this factor if it is given and is not `None`. This allows the user to
    adjust the scaling of the covariance matrix to match other statistical
    packages.
    For example, `scaling_factor=(nobs - 1.) / (nobs - k_params)` provides a
    correction so that the robust covariance matrices match those of Stata in
    some models like GLM and discrete Models.

    The following covariance types and required or optional arguments are
    currently available:

    - 'HC0', 'HC1', 'HC2', 'HC3' and no keyword arguments:
        heteroscedasticity robust covariance
    - 'HAC' and keywords

        - `maxlag` integer (required) : number of lags to use
        - `kernel` string (optional) : kernel, default is Bartlett
        - `use_correction` bool (optional) : If true, use small sample
              correction

    - 'cluster' and required keyword `groups`, integer group indicator

        - `groups` array_like, integer (required) :
              index of clusters or groups
        - `use_correction` bool (optional) :
              If True the sandwich covariance is calulated with a small
              sample correction.
              If False the the sandwich covariance is calulated without
              small sample correction.
        - `df_correction` bool (optional)
              If True (default), then the degrees of freedom for the
              inferential statistics and hypothesis tests, such as
              pvalues, f_pvalue, conf_int, and t_test and f_test, are
              based on the number of groups minus one instead of the
              total number of observations minus the number of explanatory
              variables. `df_resid` of the results instance is adjusted.
              If False, then `df_resid` of the results instance is not
              adjusted.

    - 'hac-groupsum' Driscoll and Kraay, heteroscedasticity and
        autocorrelation robust standard errors in panel data
        keywords

        - `time` array_like (required) : index of time periods
        - `maxlag` integer (required) : number of lags to use
        - `kernel` string (optional) : kernel, default is Bartlett
        - `use_correction` False or string in ['hac', 'cluster'] (optional) :
              If False the the sandwich covariance is calulated without
              small sample correction.
              If `use_correction = 'cluster'` (default), then the same
              small sample correction as in the case of 'covtype='cluster''
              is used.
        - `df_correction` bool (optional)
              adjustment to df_resid, see cov_type 'cluster' above
              #TODO: we need more options here

    - 'hac-panel' heteroscedasticity and autocorrelation robust standard
        errors in panel data.
        The data needs to be sorted in this case, the time series for
        each panel unit or cluster need to be stacked. The membership to
        a timeseries of an individual or group can be either specified by
        group indicators or by increasing time periods.

        keywords

        - either `groups` or `time` : array_like (required)
          `groups` : indicator for groups
          `time` : index of time periods
        - `maxlag` integer (required) : number of lags to use
        - `kernel` string (optional) : kernel, default is Bartlett
        - `use_correction` False or string in ['hac', 'cluster'] (optional) :
              If False the the sandwich covariance is calulated without
              small sample correction.
        - `df_correction` bool (optional)
              adjustment to df_resid, see cov_type 'cluster' above
              #TODO: we need more options here

    Reminder:
    `use_correction` in "hac-groupsum" and "hac-panel" is not bool,
    needs to be in [False, 'hac', 'cluster']

    TODO: Currently there is no check for extra or misspelled keywords,
    except in the case of cov_type `HCx`

    """

    import statsmodels.stats.sandwich_covariance as sw

    #normalize names
    if cov_type == 'nw-panel':
        cov_type = 'hac-panel'
    if cov_type == 'nw-groupsum':
        cov_type = 'hac-groupsum'
    if 'kernel' in kwds:
            kwds['weights_func'] = kwds.pop('kernel')

    # pop because HCx raises if any kwds
    sc_factor = kwds.pop('scaling_factor', None)

    # TODO: make separate function that returns a robust cov plus info
    use_self = kwds.pop('use_self', False)
    if use_self:
        res = self
    else:
        # this doesn't work for most models, use raw instance instead from fit
        res = self.__class__(self.model, self.params,
                   normalized_cov_params=self.normalized_cov_params,
                   scale=self.scale)

    res.cov_type = cov_type
    # use_t might already be defined by the class, and already set
    if use_t is None:
        use_t = self.use_t
    res.cov_kwds = {'use_t':use_t}  # store for information
    res.use_t = use_t

    adjust_df = False
    if cov_type in ['cluster', 'hac-panel', 'hac-groupsum']:
        df_correction = kwds.get('df_correction', None)
        # TODO: check also use_correction, do I need all combinations?
        if df_correction is not False: # i.e. in [None, True]:
            # user didn't explicitely set it to False
            adjust_df = True

    res.cov_kwds['adjust_df'] = adjust_df

    # verify and set kwds, and calculate cov
    # TODO: this should be outsourced in a function so we can reuse it in
    #       other models
    # TODO: make it DRYer   repeated code for checking kwds
    if cov_type.upper() in ('HC0', 'HC1', 'HC2', 'HC3'):
        if kwds:
            raise ValueError('heteroscedasticity robust covarians ' +
                             'does not use keywords')
        res.cov_kwds['description'] = ('Standard Errors are heteroscedasticity ' +
                                       'robust ' + '(' + cov_type + ')')

        res.cov_params_default = getattr(self, 'cov_' + cov_type.upper(), None)
        if res.cov_params_default is None:
            # results classes that don't have cov_HCx attribute
            res.cov_params_default = sw.cov_white_simple(self,
                                                         use_correction=False)
    elif cov_type.lower() == 'hac':
        maxlags = kwds['maxlags']   # required?, default in cov_hac_simple
        res.cov_kwds['maxlags'] = maxlags
        weights_func = kwds.get('weights_func', sw.weights_bartlett)
        res.cov_kwds['weights_func'] = weights_func
        use_correction = kwds.get('use_correction', False)
        res.cov_kwds['use_correction'] = use_correction
        res.cov_kwds['description'] = ('Standard Errors are heteroscedasticity ' +
             'and autocorrelation robust (HAC) using %d lags and %s small ' +
             'sample correction') % (maxlags, ['without', 'with'][use_correction])

        res.cov_params_default = sw.cov_hac_simple(self, nlags=maxlags,
                                             weights_func=weights_func,
                                             use_correction=use_correction)
    elif cov_type.lower() == 'cluster':
        #cluster robust standard errors, one- or two-way
        groups = kwds['groups']
        if not hasattr(groups, 'shape'):
            groups = np.asarray(groups).T

        if groups.ndim >= 2:
            groups = groups.squeeze()

        res.cov_kwds['groups'] = groups
        use_correction = kwds.get('use_correction', True)
        res.cov_kwds['use_correction'] = use_correction
        if groups.ndim == 1:
            if adjust_df:
                # need to find number of groups
                # duplicate work
                self.n_groups = n_groups = len(np.unique(groups))
            res.cov_params_default = sw.cov_cluster(self, groups,
                                             use_correction=use_correction)

        elif groups.ndim == 2:
            if hasattr(groups, 'values'):
                groups = groups.values

            if adjust_df:
                # need to find number of groups
                # duplicate work
                n_groups0 = len(np.unique(groups[:,0]))
                n_groups1 = len(np.unique(groups[:, 1]))
                self.n_groups = (n_groups0, n_groups1)
                n_groups = min(n_groups0, n_groups1) # use for adjust_df

            # Note: sw.cov_cluster_2groups has 3 returns
            res.cov_params_default = sw.cov_cluster_2groups(self, groups,
                                         use_correction=use_correction)[0]
        else:
            raise ValueError('only two groups are supported')
        res.cov_kwds['description'] = ('Standard Errors are robust to' +
                            'cluster correlation ' + '(' + cov_type + ')')

    elif cov_type.lower() == 'hac-panel':
        #cluster robust standard errors
        res.cov_kwds['time'] = time = kwds.get('time', None)
        res.cov_kwds['groups'] = groups = kwds.get('groups', None)
        #TODO: nlags is currently required
        #nlags = kwds.get('nlags', True)
        #res.cov_kwds['nlags'] = nlags
        #TODO: `nlags` or `maxlags`
        res.cov_kwds['maxlags'] = maxlags = kwds['maxlags']
        use_correction = kwds.get('use_correction', 'hac')
        res.cov_kwds['use_correction'] = use_correction
        weights_func = kwds.get('weights_func', sw.weights_bartlett)
        res.cov_kwds['weights_func'] = weights_func
        # TODO: clumsy time index in cov_nw_panel
        if groups is not None:
            groups = np.asarray(groups)
            tt = (np.nonzero(groups[:-1] != groups[1:])[0] + 1).tolist()
            nobs_ = len(groups)
        elif time is not None:
            # TODO: clumsy time index in cov_nw_panel
            time = np.asarray(time)
            tt = (np.nonzero(time[1:] < time[:-1])[0] + 1).tolist()
            nobs_ = len(time)
        else:
            raise ValueError('either time or groups needs to be given')
        groupidx = lzip([0] + tt, tt + [nobs_])
        self.n_groups = n_groups = len(groupidx)
        res.cov_params_default = sw.cov_nw_panel(self, maxlags, groupidx,
                                            weights_func=weights_func,
                                            use_correction=use_correction)
        res.cov_kwds['description'] = ('Standard Errors are robust to' +
                            'cluster correlation ' + '(' + cov_type + ')')
    elif cov_type.lower() == 'hac-groupsum':
        # Driscoll-Kraay standard errors
        res.cov_kwds['time'] = time = kwds['time']
        #TODO: nlags is currently required
        #nlags = kwds.get('nlags', True)
        #res.cov_kwds['nlags'] = nlags
        #TODO: `nlags` or `maxlags`
        res.cov_kwds['maxlags'] = maxlags = kwds['maxlags']
        use_correction = kwds.get('use_correction', 'cluster')
        res.cov_kwds['use_correction'] = use_correction
        weights_func = kwds.get('weights_func', sw.weights_bartlett)
        res.cov_kwds['weights_func'] = weights_func
        if adjust_df:
            # need to find number of groups
            tt = (np.nonzero(time[1:] < time[:-1])[0] + 1)
            self.n_groups = n_groups = len(tt) + 1
        res.cov_params_default = sw.cov_nw_groupsum(self, maxlags, time,
                                        weights_func=weights_func,
                                        use_correction=use_correction)
        res.cov_kwds['description'] = (
                    'Driscoll and Kraay Standard Errors are robust to ' +
                    'cluster correlation ' + '(' + cov_type + ')')
    else:
        raise ValueError('cov_type not recognized. See docstring for ' +
                         'available options and spelling')

    # generic optional factor to scale covariance

    res.cov_kwds['scaling_factor'] = sc_factor
    if sc_factor is not None:
        res.cov_params_default *= sc_factor

    if adjust_df:
        # Note: df_resid is used for scale and others, add new attribute
        res.df_resid_inference = n_groups - 1

    return res

Beispiel #12

Datei anzeigen

Datei: test_glsar_gretl.py Projekt: EdTenerife/statsmodels

    def test_all(self):

        d = macrodata.load().data
        #import datasetswsm.greene as g
        #d = g.load('5-1')

        #growth rates
        gs_l_realinv = 400 * np.diff(np.log(d['realinv']))
        gs_l_realgdp = 400 * np.diff(np.log(d['realgdp']))

        #simple diff, not growthrate, I want heteroscedasticity later for testing
        endogd = np.diff(d['realinv'])
        exogd = add_constant(np.c_[np.diff(d['realgdp']), d['realint'][:-1]],
                            prepend=True)

        endogg = gs_l_realinv
        exogg = add_constant(np.c_[gs_l_realgdp, d['realint'][:-1]],prepend=True)

        res_ols = OLS(endogg, exogg).fit()
        #print res_ols.params

        mod_g1 = GLSAR(endogg, exogg, rho=-0.108136)
        res_g1 = mod_g1.fit()
        #print res_g1.params

        mod_g2 = GLSAR(endogg, exogg, rho=-0.108136)   #-0.1335859) from R
        res_g2 = mod_g2.iterative_fit(maxiter=5)
        #print res_g2.params


        rho = -0.108136

        #                 coefficient   std. error   t-ratio    p-value 95% CONFIDENCE INTERVAL
        partable = np.array([
                        [-9.50990,  0.990456, -9.602, 3.65e-018, -11.4631, -7.55670], # ***
                        [ 4.37040,  0.208146, 21.00,  2.93e-052,  3.95993, 4.78086], # ***
                        [-0.579253, 0.268009, -2.161, 0.0319, -1.10777, -0.0507346]]) #    **

        #Statistics based on the rho-differenced data:

        result_gretl_g1 = dict(
        endog_mean = ("Mean dependent var",   3.113973),
        endog_std = ("S.D. dependent var",   18.67447),
        ssr = ("Sum squared resid",    22530.90),
        mse_resid_sqrt = ("S.E. of regression",   10.66735),
        rsquared = ("R-squared",            0.676973),
        rsquared_adj = ("Adjusted R-squared",   0.673710),
        fvalue = ("F(2, 198)",            221.0475),
        f_pvalue = ("P-value(F)",           3.56e-51),
        resid_acf1 = ("rho",                 -0.003481),
        dw = ("Durbin-Watson",        1.993858))


        #fstatistic, p-value, df1, df2
        reset_2_3 = [5.219019, 0.00619, 2, 197, "f"]
        reset_2 = [7.268492, 0.00762, 1, 198, "f"]
        reset_3 = [5.248951, 0.023, 1, 198, "f"]
        #LM-statistic, p-value, df
        arch_4 = [7.30776, 0.120491, 4, "chi2"]

        #multicollinearity
        vif = [1.002, 1.002]
        cond_1norm = 6862.0664
        determinant = 1.0296049e+009
        reciprocal_condition_number = 0.013819244

        #Chi-square(2): test-statistic, pvalue, df
        normality = [20.2792, 3.94837e-005, 2]

        #tests
        res = res_g1  #with rho from Gretl

        #basic

        assert_almost_equal(res.params, partable[:,0], 4)
        assert_almost_equal(res.bse, partable[:,1], 6)
        assert_almost_equal(res.tvalues, partable[:,2], 2)

        assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2)
        #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl
        #assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=7) #FAIL
        #assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=7) #FAIL
        assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5)
        assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=4)
        assert_approx_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], significant=2)
        #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO

        #arch
        #sm_arch = smsdia.acorr_lm(res.wresid**2, maxlag=4, autolag=None)
        sm_arch = smsdia.het_arch(res.wresid, maxlag=4)
        assert_almost_equal(sm_arch[0], arch_4[0], decimal=4)
        assert_almost_equal(sm_arch[1], arch_4[1], decimal=6)

        #tests
        res = res_g2 #with estimated rho

        #estimated lag coefficient
        assert_almost_equal(res.model.rho, rho, decimal=3)

        #basic
        assert_almost_equal(res.params, partable[:,0], 4)
        assert_almost_equal(res.bse, partable[:,1], 3)
        assert_almost_equal(res.tvalues, partable[:,2], 2)

        assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2)
        #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl
        #assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=7) #FAIL
        #assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=7) #FAIL
        assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5)
        assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=0)
        assert_almost_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], decimal=6)
        #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO



        c = oi.reset_ramsey(res, degree=2)
        compare_ftest(c, reset_2, decimal=(2,4))
        c = oi.reset_ramsey(res, degree=3)
        compare_ftest(c, reset_2_3, decimal=(2,4))

        #arch
        #sm_arch = smsdia.acorr_lm(res.wresid**2, maxlag=4, autolag=None)
        sm_arch = smsdia.het_arch(res.wresid, maxlag=4)
        assert_almost_equal(sm_arch[0], arch_4[0], decimal=1)
        assert_almost_equal(sm_arch[1], arch_4[1], decimal=2)



        '''
        Performing iterative calculation of rho...

                         ITER       RHO        ESS
                           1     -0.10734   22530.9
                           2     -0.10814   22530.9

        Model 4: Cochrane-Orcutt, using observations 1959:3-2009:3 (T = 201)
        Dependent variable: ds_l_realinv
        rho = -0.108136

                         coefficient   std. error   t-ratio    p-value
          -------------------------------------------------------------
          const           -9.50990      0.990456    -9.602    3.65e-018 ***
          ds_l_realgdp     4.37040      0.208146    21.00     2.93e-052 ***
          realint_1       -0.579253     0.268009    -2.161    0.0319    **

        Statistics based on the rho-differenced data:

        Mean dependent var   3.113973   S.D. dependent var   18.67447
        Sum squared resid    22530.90   S.E. of regression   10.66735
        R-squared            0.676973   Adjusted R-squared   0.673710
        F(2, 198)            221.0475   P-value(F)           3.56e-51
        rho                 -0.003481   Durbin-Watson        1.993858
        '''

        '''
        RESET test for specification (squares and cubes)
        Test statistic: F = 5.219019,
        with p-value = P(F(2,197) > 5.21902) = 0.00619

        RESET test for specification (squares only)
        Test statistic: F = 7.268492,
        with p-value = P(F(1,198) > 7.26849) = 0.00762

        RESET test for specification (cubes only)
        Test statistic: F = 5.248951,
        with p-value = P(F(1,198) > 5.24895) = 0.023:
        '''

        '''
        Test for ARCH of order 4

                     coefficient   std. error   t-ratio   p-value
          --------------------------------------------------------
          alpha(0)   97.0386       20.3234       4.775    3.56e-06 ***
          alpha(1)    0.176114      0.0714698    2.464    0.0146   **
          alpha(2)   -0.0488339     0.0724981   -0.6736   0.5014
          alpha(3)   -0.0705413     0.0737058   -0.9571   0.3397
          alpha(4)    0.0384531     0.0725763    0.5298   0.5968

          Null hypothesis: no ARCH effect is present
          Test statistic: LM = 7.30776
          with p-value = P(Chi-square(4) > 7.30776) = 0.120491:
        '''

        '''
        Variance Inflation Factors

        Minimum possible value = 1.0
        Values > 10.0 may indicate a collinearity problem

           ds_l_realgdp    1.002
              realint_1    1.002

        VIF(j) = 1/(1 - R(j)^2), where R(j) is the multiple correlation coefficient
        between variable j and the other independent variables

        Properties of matrix X'X:

         1-norm = 6862.0664
         Determinant = 1.0296049e+009
         Reciprocal condition number = 0.013819244
        '''
        '''
        Test for ARCH of order 4 -
          Null hypothesis: no ARCH effect is present
          Test statistic: LM = 7.30776
          with p-value = P(Chi-square(4) > 7.30776) = 0.120491

        Test of common factor restriction -
          Null hypothesis: restriction is acceptable
          Test statistic: F(2, 195) = 0.426391
          with p-value = P(F(2, 195) > 0.426391) = 0.653468

        Test for normality of residual -
          Null hypothesis: error is normally distributed
          Test statistic: Chi-square(2) = 20.2792
          with p-value = 3.94837e-005:
        '''

        #no idea what this is
        '''
        Augmented regression for common factor test
        OLS, using observations 1959:3-2009:3 (T = 201)
        Dependent variable: ds_l_realinv

                           coefficient   std. error   t-ratio    p-value
          ---------------------------------------------------------------
          const            -10.9481      1.35807      -8.062    7.44e-014 ***
          ds_l_realgdp       4.28893     0.229459     18.69     2.40e-045 ***
          realint_1         -0.662644    0.334872     -1.979    0.0492    **
          ds_l_realinv_1    -0.108892    0.0715042    -1.523    0.1294
          ds_l_realgdp_1     0.660443    0.390372      1.692    0.0923    *
          realint_2          0.0769695   0.341527      0.2254   0.8219

          Sum of squared residuals = 22432.8

        Test of common factor restriction

          Test statistic: F(2, 195) = 0.426391, with p-value = 0.653468
        '''


        ################ with OLS, HAC errors

        #Model 5: OLS, using observations 1959:2-2009:3 (T = 202)
        #Dependent variable: ds_l_realinv
        #HAC standard errors, bandwidth 4 (Bartlett kernel)

        #coefficient   std. error   t-ratio    p-value 95% CONFIDENCE INTERVAL
        #for confidence interval t(199, 0.025) = 1.972

        partable = np.array([
        [-9.48167,      1.17709,     -8.055,    7.17e-014, -11.8029, -7.16049], # ***
        [4.37422,      0.328787,    13.30,     2.62e-029, 3.72587, 5.02258], #***
        [-0.613997,     0.293619,    -2.091,    0.0378, -1.19300, -0.0349939]]) # **

        result_gretl_g1 = dict(
                    endog_mean = ("Mean dependent var",   3.257395),
                    endog_std = ("S.D. dependent var",   18.73915),
                    ssr = ("Sum squared resid",    22799.68),
                    mse_resid_sqrt = ("S.E. of regression",   10.70380),
                    rsquared = ("R-squared",            0.676978),
                    rsquared_adj = ("Adjusted R-squared",   0.673731),
                    fvalue = ("F(2, 199)",            90.79971),
                    f_pvalue = ("P-value(F)",           9.53e-29),
                    llf = ("Log-likelihood",      -763.9752),
                    aic = ("Akaike criterion",     1533.950),
                    bic = ("Schwarz criterion",    1543.875),
                    hqic = ("Hannan-Quinn",         1537.966),
                    resid_acf1 = ("rho",                 -0.107341),
                    dw = ("Durbin-Watson",        2.213805))

        linear_logs = [1.68351, 0.430953, 2, "chi2"]
        #for logs: dropping 70 nan or incomplete observations, T=133
        #(res_ols.model.exog <=0).any(1).sum() = 69  ?not 70
        linear_squares = [7.52477, 0.0232283, 2, "chi2"]

        #Autocorrelation, Breusch-Godfrey test for autocorrelation up to order 4
        lm_acorr4 = [1.17928, 0.321197, 4, 195, "F"]
        lm2_acorr4 = [4.771043, 0.312, 4, "chi2"]
        acorr_ljungbox4 = [5.23587, 0.264, 4, "chi2"]

        #break
        cusum_Harvey_Collier  = [0.494432, 0.621549, 198, "t"] #stats.t.sf(0.494432, 198)*2
        #see cusum results in files
        break_qlr = [3.01985, 0.1, 3, 196, "maxF"]  #TODO check this, max at 2001:4
        break_chow = [13.1897, 0.00424384, 3, "chi2"] # break at 1984:1

        arch_4 = [3.43473, 0.487871, 4, "chi2"]

        normality = [23.962, 0.00001, 2, "chi2"]

        het_white = [33.503723, 0.000003, 5, "chi2"]
        het_breush_pagan = [1.302014, 0.521520, 2, "chi2"]  #TODO: not available
        het_breush_pagan_konker = [0.709924, 0.701200, 2, "chi2"]


        reset_2_3 = [5.219019, 0.00619, 2, 197, "f"]
        reset_2 = [7.268492, 0.00762, 1, 198, "f"]
        reset_3 = [5.248951, 0.023, 1, 198, "f"]  #not available

        cond_1norm = 5984.0525
        determinant = 7.1087467e+008
        reciprocal_condition_number = 0.013826504
        vif = [1.001, 1.001]

        names = 'date   residual        leverage       influence        DFFITS'.split()
        cur_dir = os.path.abspath(os.path.dirname(__file__))
        fpath = os.path.join(cur_dir, 'results/leverage_influence_ols_nostars.txt')
        lev = np.genfromtxt(fpath, skip_header=3, skip_footer=1,
                            converters={0:lambda s: s})
        #either numpy 1.6 or python 3.2 changed behavior
        if np.isnan(lev[-1]['f1']):
            lev = np.genfromtxt(fpath, skip_header=3, skip_footer=2,
                                converters={0:lambda s: s})

        lev.dtype.names = names

        res = res_ols #for easier copying

        cov_hac = sw.cov_hac_simple(res, nlags=4, use_correction=False)
        bse_hac =  sw.se_cov(cov_hac)

        assert_almost_equal(res.params, partable[:,0], 5)
        assert_almost_equal(bse_hac, partable[:,1], 5)
        #TODO

        assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2)
        #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl
        assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=6) #FAIL
        assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=6) #FAIL
        assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5)
        #f-value is based on cov_hac I guess
        #assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=0) #FAIL
        #assert_approx_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], significant=1) #FAIL
        #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO


        c = oi.reset_ramsey(res, degree=2)
        compare_ftest(c, reset_2, decimal=(6,5))
        c = oi.reset_ramsey(res, degree=3)
        compare_ftest(c, reset_2_3, decimal=(6,5))

        linear_sq = smsdia.linear_lm(res.resid, res.model.exog)
        assert_almost_equal(linear_sq[0], linear_squares[0], decimal=6)
        assert_almost_equal(linear_sq[1], linear_squares[1], decimal=7)

        hbpk = smsdia.het_breushpagan(res.resid, res.model.exog)
        assert_almost_equal(hbpk[0], het_breush_pagan_konker[0], decimal=6)
        assert_almost_equal(hbpk[1], het_breush_pagan_konker[1], decimal=6)

        hw = smsdia.het_white(res.resid, res.model.exog)
        assert_almost_equal(hw[:2], het_white[:2], 6)

        #arch
        #sm_arch = smsdia.acorr_lm(res.resid**2, maxlag=4, autolag=None)
        sm_arch = smsdia.het_arch(res.resid, maxlag=4)
        assert_almost_equal(sm_arch[0], arch_4[0], decimal=5)
        assert_almost_equal(sm_arch[1], arch_4[1], decimal=6)

        vif2 = [oi.variance_inflation_factor(res.model.exog, k) for k in [1,2]]

        infl = oi.OLSInfluence(res_ols)
        #print np.max(np.abs(lev['DFFITS'] - infl.dffits[0]))
        #print np.max(np.abs(lev['leverage'] - infl.hat_matrix_diag))
        #print np.max(np.abs(lev['influence'] - infl.influence))  #just added this based on Gretl

        #just rough test, low decimal in Gretl output,
        assert_almost_equal(lev['residual'], res.resid, decimal=3)
        assert_almost_equal(lev['DFFITS'], infl.dffits[0], decimal=3)
        assert_almost_equal(lev['leverage'], infl.hat_matrix_diag, decimal=3)
        assert_almost_equal(lev['influence'], infl.influence, decimal=4)