def test_cov_cluster_2groups(): #comparing cluster robust standard errors to Peterson #requires Petersen's test_data #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt import os cur_dir = os.path.abspath(os.path.dirname(__file__)) fpath = os.path.join(cur_dir,"test_data.txt") pet = np.genfromtxt(fpath) endog = pet[:,-1] group = pet[:,0].astype(int) time = pet[:,1].astype(int) exog = add_constant(pet[:,2], prepend=True) res = OLS(endog, exog).fit() cov01, covg, covt = sw.cov_cluster_2groups(res, group, group2=time) #Reference number from Petersen #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.htm bse_petw = [0.0284, 0.0284] bse_pet0 = [0.0670, 0.0506] bse_pet1 = [0.0234, 0.0334] #year bse_pet01 = [0.0651, 0.0536] #firm and year bse_0 = sw.se_cov(covg) bse_1 = sw.se_cov(covt) bse_01 = sw.se_cov(cov01) #print res.HC0_se, bse_petw - res.HC0_se #print bse_0, bse_0 - bse_pet0 #print bse_1, bse_1 - bse_pet1 #print bse_01, bse_01 - bse_pet01 assert_almost_equal(bse_petw, res.HC0_se, decimal=4) assert_almost_equal(bse_0, bse_pet0, decimal=4) assert_almost_equal(bse_1, bse_pet1, decimal=4) assert_almost_equal(bse_01, bse_pet01, decimal=4)
def test_cov_cluster_2groups(): #comparing cluster robust standard errors to Peterson #requires Petersen's test_data #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt import os cur_dir = os.path.abspath(os.path.dirname(__file__)) fpath = os.path.join(cur_dir, "test_data.txt") pet = np.genfromtxt(fpath) endog = pet[:, -1] group = pet[:, 0].astype(int) time = pet[:, 1].astype(int) exog = add_constant(pet[:, 2], prepend=True) res = OLS(endog, exog).fit() cov01, covg, covt = sw.cov_cluster_2groups(res, group, group2=time) #Reference number from Petersen #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.htm bse_petw = [0.0284, 0.0284] bse_pet0 = [0.0670, 0.0506] bse_pet1 = [0.0234, 0.0334] #year bse_pet01 = [0.0651, 0.0536] #firm and year bse_0 = sw.se_cov(covg) bse_1 = sw.se_cov(covt) bse_01 = sw.se_cov(cov01) #print res.HC0_se, bse_petw - res.HC0_se #print bse_0, bse_0 - bse_pet0 #print bse_1, bse_1 - bse_pet1 #print bse_01, bse_01 - bse_pet01 assert_almost_equal(bse_petw, res.HC0_se, decimal=4) assert_almost_equal(bse_0, bse_pet0, decimal=4) assert_almost_equal(bse_1, bse_pet1, decimal=4) assert_almost_equal(bse_01, bse_pet01, decimal=4)
import statsmodels.sandbox.panel.sandwich_covariance_generic as swg nobs = 100 kvars = 4 #including constant x = np.random.randn(nobs, kvars-1) exog = sm.add_constant(x, prepend=True) params_true = np.ones(kvars) y_true = np.dot(exog, params_true) sigma = 0.1 + np.exp(exog[:,-1]) endog = y_true + sigma * np.random.randn(nobs) self = sm.OLS(endog, exog).fit() print self.HC3_se print sw.se_cov(sw.cov_hc3(self)) #test standalone refactoring assert_almost_equal(sw.se_cov(sw.cov_hc0(self)), self.HC0_se, 15) assert_almost_equal(sw.se_cov(sw.cov_hc1(self)), self.HC1_se, 15) assert_almost_equal(sw.se_cov(sw.cov_hc2(self)), self.HC2_se, 15) assert_almost_equal(sw.se_cov(sw.cov_hc3(self)), self.HC3_se, 15) print self.HC0_se print sw.cov_hac_simple(self, nlags=0, use_correction=False)[1] #test White as HAC with nlags=0, same as nlags=1 ? bse_hac0 = sw.cov_hac_simple(self, nlags=0, use_correction=False)[1] assert_almost_equal(bse_hac0, self.HC0_se, 15) print bse_hac0 #test White as HAC with nlags=0, same as nlags=1 ? bse_hac0c = sw.cov_hac_simple(self, nlags=0, use_correction=True)[1] assert_almost_equal(bse_hac0c, self.HC1_se, 15)
#requires Petersen's test_data #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt pet = np.genfromtxt("test_data.txt") endog = pet[:, -1] group = pet[:, 0].astype(int) time = pet[:, 1].astype(int) exog = sm.add_constant(pet[:, 2], prepend=True) res = sm.OLS(endog, exog).fit() cov01, covg, covt = sw.cov_cluster_2groups(res, group, group2=time) #Reference number from Petersen #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.htm bse_petw = [0.0284, 0.0284] bse_pet0 = [0.0670, 0.0506] bse_pet1 = [0.0234, 0.0334] #year bse_pet01 = [0.0651, 0.0536] #firm and year bse_0 = sw.se_cov(covg) bse_1 = sw.se_cov(covt) bse_01 = sw.se_cov(cov01) print res.HC0_se, bse_petw - res.HC0_se print bse_0, bse_0 - bse_pet0 print bse_1, bse_1 - bse_pet1 print bse_01, bse_01 - bse_pet01 assert_almost_equal(bse_petw, res.HC0_se, decimal=4) assert_almost_equal(bse_0, bse_pet0, decimal=4) assert_almost_equal(bse_1, bse_pet1, decimal=4) assert_almost_equal(bse_01, bse_pet01, decimal=4)
resid = y - y_pred print np.corrcoef(resid.reshape(-1, n_groups, order='F')) print resid.std() err = y_pred - dgp.y_true print err.std() #OLS standard errors are too small mod.res_pooled.params mod.res_pooled.bse #heteroscedasticity robust doesn't help mod.res_pooled.HC1_se #compare with cluster robust se import statsmodels.sandbox.panel.sandwich_covariance as sw print sw.cov_cluster(mod.res_pooled, dgp.groups.astype(int))[1] #not bad, pretty close to panel estimator #and with Newey-West Hac print sw.se_cov(sw.cov_nw_panel(mod.res_pooled, 5, mod.group.groupidx)) #too small, assuming no bugs, #see Peterson assuming it refers to same kind of model print dgp.cov mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups) res2 = mod2.fit_iterative(2) print res2.params print res2.bse #both implementations produce the same results: from numpy.testing import assert_almost_equal assert_almost_equal(res.params, res2.params, decimal=14) assert_almost_equal(res.bse, res2.bse, decimal=14) mod5 = ShortPanelGLS(y, dgp.exog, dgp.groups) res5 = mod5.fit_iterative(5) print res2.params
resid = y - y_pred print np.corrcoef(resid.reshape(-1,n_groups, order='F')) print resid.std() err = y_pred - dgp.y_true print err.std() #OLS standard errors are too small mod.res_pooled.params mod.res_pooled.bse #heteroscedasticity robust doesn't help mod.res_pooled.HC1_se #compare with cluster robust se import statsmodels.sandbox.panel.sandwich_covariance as sw print sw.cov_cluster(mod.res_pooled, dgp.groups.astype(int))[1] #not bad, pretty close to panel estimator #and with Newey-West Hac print sw.se_cov(sw.cov_nw_panel(mod.res_pooled, 5, mod.group.groupidx)) #too small, assuming no bugs, #see Peterson assuming it refers to same kind of model print dgp.cov mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups) res2 = mod2.fit_iterative(2) print res2.params print res2.bse #both implementations produce the same results: from numpy.testing import assert_almost_equal assert_almost_equal(res.params, res2.params, decimal=14) assert_almost_equal(res.bse, res2.bse, decimal=14) mod5 = ShortPanelGLS(y, dgp.exog, dgp.groups) res5 = mod5.fit_iterative(5) print res2.params
#requires Petersen's test_data #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt pet = np.genfromtxt("test_data.txt") endog = pet[:,-1] group = pet[:,0].astype(int) time = pet[:,1].astype(int) exog = sm.add_constant(pet[:,2], prepend=True) res = sm.OLS(endog, exog).fit() cov01, covg, covt = sw.cov_cluster_2groups(res, group, group2=time) #Reference number from Petersen #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.htm bse_petw = [0.0284, 0.0284] bse_pet0 = [0.0670, 0.0506] bse_pet1 = [0.0234, 0.0334] #year bse_pet01 = [0.0651, 0.0536] #firm and year bse_0 = sw.se_cov(covg) bse_1 = sw.se_cov(covt) bse_01 = sw.se_cov(cov01) print res.HC0_se, bse_petw - res.HC0_se print bse_0, bse_0 - bse_pet0 print bse_1, bse_1 - bse_pet1 print bse_01, bse_01 - bse_pet01 assert_almost_equal(bse_petw, res.HC0_se, decimal=4) assert_almost_equal(bse_0, bse_pet0, decimal=4) assert_almost_equal(bse_1, bse_pet1, decimal=4) assert_almost_equal(bse_01, bse_pet01, decimal=4)
import statsmodels.sandbox.panel.sandwich_covariance as sw import statsmodels.sandbox.panel.sandwich_covariance_generic as swg nobs = 100 kvars = 4 #including constant x = np.random.randn(nobs, kvars - 1) exog = sm.add_constant(x, prepend=True) params_true = np.ones(kvars) y_true = np.dot(exog, params_true) sigma = 0.1 + np.exp(exog[:, -1]) endog = y_true + sigma * np.random.randn(nobs) self = sm.OLS(endog, exog).fit() print self.HC3_se print sw.se_cov(sw.cov_hc3(self)) #test standalone refactoring assert_almost_equal(sw.se_cov(sw.cov_hc0(self)), self.HC0_se, 15) assert_almost_equal(sw.se_cov(sw.cov_hc1(self)), self.HC1_se, 15) assert_almost_equal(sw.se_cov(sw.cov_hc2(self)), self.HC2_se, 15) assert_almost_equal(sw.se_cov(sw.cov_hc3(self)), self.HC3_se, 15) print self.HC0_se print sw.cov_hac_simple(self, nlags=0, use_correction=False)[1] #test White as HAC with nlags=0, same as nlags=1 ? bse_hac0 = sw.cov_hac_simple(self, nlags=0, use_correction=False)[1] assert_almost_equal(bse_hac0, self.HC0_se, 15) print bse_hac0 #test White as HAC with nlags=0, same as nlags=1 ? bse_hac0c = sw.cov_hac_simple(self, nlags=0, use_correction=True)[1] assert_almost_equal(bse_hac0c, self.HC1_se, 15)