Exemple #1
0
    def setup_class(cls):
        #example 3
        nobs = 300
        nobs_i = 5
        n_groups = nobs // nobs_i
        k_vars = 3

        from statsmodels.sandbox.panel.random_panel import PanelSample
        dgp = PanelSample(nobs, k_vars, n_groups)
        dgp.group_means = 2 + np.random.randn(n_groups) #add random intercept
        print('seed', dgp.seed)
        y = dgp.generate_panel()
        x = np.column_stack((dgp.exog[:,1:],
                             dgp.groups[:,None] == np.arange(n_groups)))
        cls.dgp = dgp
        cls.endog = y
        cls.exog = x
        cls.res_ols = OLS(y, x).fit()
Exemple #2
0
    def setup_class(cls):
        #example 3
        nobs = 300
        nobs_i = 5
        n_groups = nobs // nobs_i
        k_vars = 3

        from statsmodels.sandbox.panel.random_panel import PanelSample
        dgp = PanelSample(nobs, k_vars, n_groups)
        dgp.group_means = 2 + np.random.randn(n_groups)  #add random intercept
        print('seed', dgp.seed)
        y = dgp.generate_panel()
        x = np.column_stack(
            (dgp.exog[:, 1:], dgp.groups[:, None] == np.arange(n_groups)))
        cls.dgp = dgp
        cls.endog = y
        cls.exog = x
        cls.res_ols = OLS(y, x).fit()
if 'ex1' in examples:
    nobs = 100
    nobs_i = 5
    n_groups = nobs // nobs_i
    k_vars = 3

    #    dgp = PanelSample(nobs, k_vars, n_groups, corr_structure=cs.corr_equi,
    #                      corr_args=(0.6,))
    #    dgp = PanelSample(nobs, k_vars, n_groups, corr_structure=cs.corr_ar,
    #                      corr_args=([1, -0.95],))
    dgp = PanelSample(nobs,
                      k_vars,
                      n_groups,
                      corr_structure=cs.corr_arma,
                      corr_args=(
                          [1],
                          [1., -0.9],
                      ),
                      seed=377769)
    print('seed', dgp.seed)
    y = dgp.generate_panel()
    noise = y - dgp.y_true
    print(np.corrcoef(y.reshape(-1, n_groups, order='F')))
    print(np.corrcoef(noise.reshape(-1, n_groups, order='F')))

    mod = ShortPanelGLS2(y, dgp.exog, dgp.groups)
    res = mod.fit()
    print(res.params)
    print(res.bse)
    #Now what?
Exemple #4
0
def test_short_panel():
    #this checks that some basic statistical properties are satisfied by the
    ##1lab_results, not verified #1lab_results against other packages
    #Note: the ranking of robust bse is different if within=True
    #I added within keyword to PanelSample to be able to use old example
    #if within is False, then there is no within group variation in exog.
    nobs = 100
    nobs_i = 5
    n_groups = nobs // nobs_i
    k_vars = 3

    dgp = PanelSample(nobs, k_vars, n_groups, corr_structure=cs.corr_arma,
                      corr_args=([1], [1., -0.9],), seed=377769, within=False)
    #print 'seed', dgp.seed
    y = dgp.generate_panel()
    noise = y - dgp.y_true

    #test dgp

    dgp_cov_e = np.array(
              [[ 1.    ,  0.9   ,  0.81  ,  0.729 ,  0.6561],
               [ 0.9   ,  1.    ,  0.9   ,  0.81  ,  0.729 ],
               [ 0.81  ,  0.9   ,  1.    ,  0.9   ,  0.81  ],
               [ 0.729 ,  0.81  ,  0.9   ,  1.    ,  0.9   ],
               [ 0.6561,  0.729 ,  0.81  ,  0.9   ,  1.    ]])

    npt.assert_almost_equal(dgp.cov, dgp_cov_e, 13)

    cov_noise = np.cov(noise.reshape(-1,n_groups, order='F'))
    corr_noise = cov2corr(cov_noise)
    npt.assert_almost_equal(corr_noise, dgp.cov, 1)

    #estimate panel model
    mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res2 = mod2.fit_iterative(2)


    #whitened residual should be uncorrelated
    corr_wresid = np.corrcoef(res2.wresid.reshape(-1,n_groups, order='F'))
    assert_maxabs(corr_wresid, np.eye(5), 0.1)

    #residual should have same correlation as dgp
    corr_resid = np.corrcoef(res2.resid.reshape(-1,n_groups, order='F'))
    assert_maxabs(corr_resid, dgp.cov, 0.1)

    assert_almost_equal(res2.resid.std(),1, decimal=0)

    y_pred = np.dot(mod2.exog, res2.params)
    assert_almost_equal(res2.fittedvalues, y_pred, 13)


    #compare with OLS

    res2_ols = mod2._fit_ols()
    npt.assert_(mod2.res_pooled is res2_ols)

    res2_ols = mod2.res_pooled  #TODO: BUG: requires call to _fit_ols

    #fitting once is the same as OLS
    #note: I need to create new instance, otherwise it continuous fitting
    mod1 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res1 = mod1.fit_iterative(1)

    assert_almost_equal(res1.params, res2_ols.params, decimal=13)
    assert_almost_equal(res1.bse, res2_ols.bse, decimal=13)

    res_ols = OLS(y, dgp.exog).fit()
    assert_almost_equal(res1.params, res_ols.params, decimal=13)
    assert_almost_equal(res1.bse, res_ols.bse, decimal=13)


    #compare with old version
    mod_old = ShortPanelGLS2(y, dgp.exog, dgp.groups)
    res_old = mod_old.fit()

    assert_almost_equal(res2.params, res_old.params, decimal=13)
    assert_almost_equal(res2.bse, res_old.bse, decimal=13)


    mod5 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res5 = mod5.fit_iterative(5)

    #make sure it's different
    #npt.assert_array_less(0.009, em.maxabs(res5.bse, res2.bse))

    cov_clu = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int))
    clubse = se_cov(cov_clu)
    pnwbse = se_cov(sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx))
    bser = np.vstack((res2.bse, res5.bse, clubse, pnwbse))
    bser_mean = np.mean(bser, axis=0)

    #cov_cluster close to robust and PanelGLS
    #is up to 24% larger than mean of bser
    #npt.assert_array_less(0, clubse / bser_mean - 1)
    npt.assert_array_less(clubse / bser_mean - 1, 0.25)
    #cov_nw_panel close to robust and PanelGLS
    npt.assert_array_less(pnwbse / bser_mean - 1, 0.1)
    #OLS underestimates bse, robust at least 60% larger
    npt.assert_array_less(0.6, bser_mean / res_ols.bse  - 1)

    #cov_hac_panel with uniform_kernel is the same as cov_cluster for balanced
    #panel with full length kernel
    #I fixe default correction to be equal
    cov_uni = sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx,
                              weights_func=sw.weights_uniform,
                              use_correction='c')
    assert_almost_equal(cov_uni, cov_clu, decimal=13)

    #without correction
    cov_clu2 = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int),
                              use_correction=False)
    cov_uni2 = sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx,
                              weights_func=sw.weights_uniform,
                              use_correction=False)
    assert_almost_equal(cov_uni2, cov_clu2, decimal=13)

    cov_white = sw.cov_white_simple(mod2.res_pooled)
    cov_pnw0 = sw.cov_nw_panel(mod2.res_pooled, 0, mod2.group.groupidx,
                              use_correction='hac')
    assert_almost_equal(cov_pnw0, cov_white, decimal=13)
    plt.plot(y_true, 'k-', label='true')
    plt.plot(res_ols.fittedvalues, '-', label='ols')
    plt.plot(res.fittedvalues, '-', label='theil')
    plt.legend()
    plt.title('Polynomial fitting: fitted values')
    #plt.show()

    if 3 in examples:
        #example 3
        nobs = 600
        nobs_i = 20
        n_groups = nobs // nobs_i
        k_vars = 3

        from statsmodels.sandbox.panel.random_panel import PanelSample
        dgp = PanelSample(nobs, k_vars, n_groups)
        dgp.group_means = 2 + np.random.randn(n_groups)  #add random intercept
        print 'seed', dgp.seed
        y = dgp.generate_panel()
        X = np.column_stack(
            (dgp.exog[:, 1:], dgp.groups[:, None] == np.arange(n_groups)))
        res_ols = sm.OLS(y, X).fit()
        R = np.c_[np.zeros((n_groups, k_vars - 1)), np.eye(n_groups)]
        r = np.zeros(n_groups)
        R = np.c_[np.zeros((n_groups - 1, k_vars)),
                  np.eye(n_groups - 1) - 1. / n_groups * np.ones(
                      (n_groups - 1, n_groups - 1))]
        r = np.zeros(n_groups - 1)
        R[:, k_vars - 1] = -1

        lambd = 1  #1e-4
Exemple #6
0

examples = ["ex1"]


if "ex1" in examples:
    nobs = 100
    nobs_i = 5
    n_groups = nobs // nobs_i
    k_vars = 3

    #    dgp = PanelSample(nobs, k_vars, n_groups, corr_structure=cs.corr_equi,
    #                      corr_args=(0.6,))
    #    dgp = PanelSample(nobs, k_vars, n_groups, corr_structure=cs.corr_ar,
    #                      corr_args=([1, -0.95],))
    dgp = PanelSample(nobs, k_vars, n_groups, corr_structure=cs.corr_arma, corr_args=([1], [1.0, -0.9]), seed=377769)
    print "seed", dgp.seed
    y = dgp.generate_panel()
    noise = y - dgp.y_true
    print np.corrcoef(y.reshape(-1, n_groups, order="F"))
    print np.corrcoef(noise.reshape(-1, n_groups, order="F"))

    mod = ShortPanelGLS2(y, dgp.exog, dgp.groups)
    res = mod.fit()
    print res.params
    print res.bse
    # Now what?
    # res.resid is of transformed model
    # np.corrcoef(res.resid.reshape(-1,n_groups, order='F'))
    y_pred = np.dot(mod.exog, res.params)
    resid = y - y_pred
Exemple #7
0
    plt.plot(y_true, 'k-', label='true')
    plt.plot(res_ols.fittedvalues, '-', label='ols')
    plt.plot(res.fittedvalues, '-', label='theil')
    plt.legend()
    plt.title('Polynomial fitting: fitted values')
    #plt.show()

    if 3 in examples:
        #example 3
        nobs = 600
        nobs_i = 20
        n_groups = nobs // nobs_i
        k_vars = 3

        from statsmodels.sandbox.panel.random_panel import PanelSample
        dgp = PanelSample(nobs, k_vars, n_groups)
        dgp.group_means = 2 + np.random.randn(n_groups) #add random intercept
        print 'seed', dgp.seed
        y = dgp.generate_panel()
        X = np.column_stack((dgp.exog[:,1:],
                               dgp.groups[:,None] == np.arange(n_groups)))
        res_ols = sm.OLS(y, X).fit()
        R = np.c_[np.zeros((n_groups, k_vars-1)), np.eye(n_groups)]
        r = np.zeros(n_groups)
        R = np.c_[np.zeros((n_groups-1, k_vars)),
                  np.eye(n_groups-1)-1./n_groups * np.ones((n_groups-1, n_groups-1))]
        r = np.zeros(n_groups-1)
        R[:, k_vars-1] = -1

        lambd = 1 #1e-4
        mod = TheilGLS(y, X, r_matrix=R, q_matrix=r, sigma_prior=lambd)
def test_short_panel():
    #this checks that some basic statistical properties are satisfied by the
    #results, not verified results against other packages
    #Note: the ranking of robust bse is different if within=True
    #I added within keyword to PanelSample to be able to use old example
    #if within is False, then there is no within group variation in exog.
    nobs = 100
    nobs_i = 5
    n_groups = nobs // nobs_i
    k_vars = 3

    dgp = PanelSample(nobs, k_vars, n_groups, corr_structure=cs.corr_arma,
                      corr_args=([1], [1., -0.9],), seed=377769, within=False)
    #print 'seed', dgp.seed
    y = dgp.generate_panel()
    noise = y - dgp.y_true

    #test dgp

    dgp_cov_e = np.array(
              [[ 1.    ,  0.9   ,  0.81  ,  0.729 ,  0.6561],
               [ 0.9   ,  1.    ,  0.9   ,  0.81  ,  0.729 ],
               [ 0.81  ,  0.9   ,  1.    ,  0.9   ,  0.81  ],
               [ 0.729 ,  0.81  ,  0.9   ,  1.    ,  0.9   ],
               [ 0.6561,  0.729 ,  0.81  ,  0.9   ,  1.    ]])

    npt.assert_almost_equal(dgp.cov, dgp_cov_e, 13)

    cov_noise = np.cov(noise.reshape(-1,n_groups, order='F'))
    corr_noise = cov2corr(cov_noise)
    npt.assert_almost_equal(corr_noise, dgp.cov, 1)

    #estimate panel model
    mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res2 = mod2.fit_iterative(2)


    #whitened residual should be uncorrelated
    corr_wresid = np.corrcoef(res2.wresid.reshape(-1,n_groups, order='F'))
    assert_maxabs(corr_wresid, np.eye(5), 0.1)

    #residual should have same correlation as dgp
    corr_resid = np.corrcoef(res2.resid.reshape(-1,n_groups, order='F'))
    assert_maxabs(corr_resid, dgp.cov, 0.1)

    assert_almost_equal(res2.resid.std(),1, decimal=0)

    y_pred = np.dot(mod2.exog, res2.params)
    assert_almost_equal(res2.fittedvalues, y_pred, 13)


    #compare with OLS

    res2_ols = mod2._fit_ols()
    npt.assert_(mod2.res_pooled is res2_ols)

    res2_ols = mod2.res_pooled  #TODO: BUG: requires call to _fit_ols

    #fitting once is the same as OLS
    #note: I need to create new instance, otherwise it continuous fitting
    mod1 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res1 = mod1.fit_iterative(1)

    assert_almost_equal(res1.params, res2_ols.params, decimal=13)
    assert_almost_equal(res1.bse, res2_ols.bse, decimal=13)

    res_ols = OLS(y, dgp.exog).fit()
    assert_almost_equal(res1.params, res_ols.params, decimal=13)
    assert_almost_equal(res1.bse, res_ols.bse, decimal=13)


    #compare with old version
    mod_old = ShortPanelGLS2(y, dgp.exog, dgp.groups)
    res_old = mod_old.fit()

    assert_almost_equal(res2.params, res_old.params, decimal=13)
    assert_almost_equal(res2.bse, res_old.bse, decimal=13)


    mod5 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res5 = mod5.fit_iterative(5)

    #make sure it's different
    #npt.assert_array_less(0.009, em.maxabs(res5.bse, res2.bse))

    cov_clu = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int))
    clubse = se_cov(cov_clu)
    pnwbse = se_cov(sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx))
    bser = np.vstack((res2.bse, res5.bse, clubse, pnwbse))
    bser_mean = np.mean(bser, axis=0)

    #cov_cluster close to robust and PanelGLS
    #is up to 24% larger than mean of bser
    #npt.assert_array_less(0, clubse / bser_mean - 1)
    npt.assert_array_less(clubse / bser_mean - 1, 0.25)
    #cov_nw_panel close to robust and PanelGLS
    npt.assert_array_less(pnwbse / bser_mean - 1, 0.1)
    #OLS underestimates bse, robust at least 60% larger
    npt.assert_array_less(0.6, bser_mean / res_ols.bse  - 1)

    #cov_hac_panel with uniform_kernel is the same as cov_cluster for balanced
    #panel with full length kernel
    #I fixe default correction to be equal
    cov_uni = sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx,
                              weights_func=sw.weights_uniform,
                              use_correction='c')
    assert_almost_equal(cov_uni, cov_clu, decimal=13)

    #without correction
    cov_clu2 = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int),
                              use_correction=False)
    cov_uni2 = sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx,
                              weights_func=sw.weights_uniform,
                              use_correction=False)
    assert_almost_equal(cov_uni2, cov_clu2, decimal=13)

    cov_white = sw.cov_white_simple(mod2.res_pooled)
    cov_pnw0 = sw.cov_nw_panel(mod2.res_pooled, 0, mod2.group.groupidx,
                              use_correction='hac')
    assert_almost_equal(cov_pnw0, cov_white, decimal=13)