def test_cov_cluster_2groups(): #comparing cluster robust standard errors to Peterson #requires Petersen's test_data #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt import os cur_dir = os.path.abspath(os.path.dirname(__file__)) fpath = os.path.join(cur_dir,"test_data.txt") pet = np.genfromtxt(fpath) endog = pet[:,-1] group = pet[:,0].astype(int) time = pet[:,1].astype(int) exog = add_constant(pet[:,2]) res = OLS(endog, exog).fit() cov01, covg, covt = sw.cov_cluster_2groups(res, group, group2=time) #Reference number from Petersen #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.htm bse_petw = [0.0284, 0.0284] bse_pet0 = [0.0670, 0.0506] bse_pet1 = [0.0234, 0.0334] #year bse_pet01 = [0.0651, 0.0536] #firm and year bse_0 = sw.se_cov(covg) bse_1 = sw.se_cov(covt) bse_01 = sw.se_cov(cov01) #print res.HC0_se, bse_petw - res.HC0_se #print bse_0, bse_0 - bse_pet0 #print bse_1, bse_1 - bse_pet1 #print bse_01, bse_01 - bse_pet01 assert_almost_equal(bse_petw, res.HC0_se, decimal=4) assert_almost_equal(bse_0, bse_pet0, decimal=4) assert_almost_equal(bse_1, bse_pet1, decimal=4) assert_almost_equal(bse_01, bse_pet01, decimal=4)
def test_hac(self): res = self.res #> nw = NeweyWest(fm, lag = 4, prewhite = FALSE, verbose=TRUE) #> nw2 = NeweyWest(fm, lag=10, prewhite = FALSE, verbose=TRUE) #> mkarray(nw, "cov_hac_4") cov_hac_4 = np.array([1.385551290884014, -0.3133096102522685, -0.0597207976835705, -0.3133096102522685, 0.1081011690351306, 0.000389440793564336, -0.0597207976835705, 0.000389440793564339, 0.0862118527405036]).reshape(3,3, order='F') #> mkarray(nw2, "cov_hac_10") cov_hac_10 = np.array([1.257386180080192, -0.2871560199899846, -0.03958300024627573, -0.2871560199899845, 0.1049107028987101, 0.0003896205316866944, -0.03958300024627578, 0.0003896205316866961, 0.0985539340694839]).reshape(3,3, order='F') cov = sw.cov_hac_simple(res, nlags=4, use_correction=False) bse_hac = sw.se_cov(cov) assert_almost_equal(cov, cov_hac_4, decimal=14) assert_almost_equal(bse_hac, np.sqrt(np.diag(cov)), decimal=14) cov = sw.cov_hac_simple(res, nlags=10, use_correction=False) bse_hac = sw.se_cov(cov) assert_almost_equal(cov, cov_hac_10, decimal=14) assert_almost_equal(bse_hac, np.sqrt(np.diag(cov)), decimal=14)
def test_hac_simple(): from statsmodels.datasets import macrodata d2 = macrodata.load().data g_gdp = 400 * np.diff(np.log(d2['realgdp'])) g_inv = 400 * np.diff(np.log(d2['realinv'])) exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1]], prepend=True) res_olsg = OLS(g_inv, exogg).fit() #> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=TRUE) #Lag truncation parameter chosen: 4 # (Intercept) ggdp lint cov1_r = [ [1.40643899878678802, -0.3180328707083329709, -0.060621111216488610], [-0.31803287070833292, 0.1097308348999818661, 0.000395311760301478], [-0.06062111121648865, 0.0003953117603014895, 0.087511528912470993] ] #> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=FALSE) #Lag truncation parameter chosen: 4 # (Intercept) ggdp lint cov2_r = [ [1.3855512908840137, -0.313309610252268500, -0.059720797683570477], [-0.3133096102522685, 0.108101169035130618, 0.000389440793564339], [-0.0597207976835705, 0.000389440793564336, 0.086211852740503622] ] cov1 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=True) se1 = sw.se_cov(cov1) cov2 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=False) se2 = sw.se_cov(cov2) assert_almost_equal(cov1, cov1_r, decimal=14) assert_almost_equal(cov2, cov2_r, decimal=14)
def test_hac(self): res = self.res #> nw = NeweyWest(fm, lag = 4, prewhite = FALSE, verbose=TRUE) #> nw2 = NeweyWest(fm, lag=10, prewhite = FALSE, verbose=TRUE) #> mkarray(nw, "cov_hac_4") cov_hac_4 = np.array([ 1.385551290884014, -0.3133096102522685, -0.0597207976835705, -0.3133096102522685, 0.1081011690351306, 0.000389440793564336, -0.0597207976835705, 0.000389440793564339, 0.0862118527405036 ]).reshape(3, 3, order='F') #> mkarray(nw2, "cov_hac_10") cov_hac_10 = np.array([ 1.257386180080192, -0.2871560199899846, -0.03958300024627573, -0.2871560199899845, 0.1049107028987101, 0.0003896205316866944, -0.03958300024627578, 0.0003896205316866961, 0.0985539340694839 ]).reshape(3, 3, order='F') cov = sw.cov_hac_simple(res, nlags=4, use_correction=False) bse_hac = sw.se_cov(cov) assert_almost_equal(cov, cov_hac_4, decimal=14) assert_almost_equal(bse_hac, np.sqrt(np.diag(cov)), decimal=14) cov = sw.cov_hac_simple(res, nlags=10, use_correction=False) bse_hac = sw.se_cov(cov) assert_almost_equal(cov, cov_hac_10, decimal=14) assert_almost_equal(bse_hac, np.sqrt(np.diag(cov)), decimal=14)
def test_hac_simple(): from statsmodels.datasets import macrodata d2 = macrodata.load().data g_gdp = 400*np.diff(np.log(d2['realgdp'])) g_inv = 400*np.diff(np.log(d2['realinv'])) exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1]]) res_olsg = OLS(g_inv, exogg).fit() #> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=TRUE) #Lag truncation parameter chosen: 4 # (Intercept) ggdp lint cov1_r = [[ 1.40643899878678802, -0.3180328707083329709, -0.060621111216488610], [ -0.31803287070833292, 0.1097308348999818661, 0.000395311760301478], [ -0.06062111121648865, 0.0003953117603014895, 0.087511528912470993]] #> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=FALSE) #Lag truncation parameter chosen: 4 # (Intercept) ggdp lint cov2_r = [[ 1.3855512908840137, -0.313309610252268500, -0.059720797683570477], [ -0.3133096102522685, 0.108101169035130618, 0.000389440793564339], [ -0.0597207976835705, 0.000389440793564336, 0.086211852740503622]] cov1 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=True) se1 = sw.se_cov(cov1) cov2 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=False) se2 = sw.se_cov(cov2) assert_almost_equal(cov1, cov1_r, decimal=14) assert_almost_equal(cov2, cov2_r, decimal=14)
def setup(self): res_ols = self.res1.get_robustcov_results( "cluster", groups=(self.groups, self.time), use_correction=True, use_t=True, ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster_2groups(self.res1, self.groups, group2=self.time, use_correction=True)[0] se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_cluster_2groups_small self.rtol = ( 0.35 # only f_pvalue and confint for constant differ >rtol=0.05 ) self.rtolh = 1e-10
def setup(self): res_ols = self.res1.model.fit( cov_type="nw-panel", cov_kwds=dict( groups=self.groups, maxlags=4, use_correction="hac", use_t=True, df_correction=False, ), ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_nw_panel(self.res1, 4, self.tidx) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_nw_panel4 self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.model.fit( cov_type="nw-groupsum", cov_kwds=dict(time=self.time, maxlags=4, use_correction=False, use_t=True), ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_nw_groupsum(self.res1, 4, self.time, use_correction=False) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_nw_groupsum4 self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): model = OLS(self.res1.model.endog, self.res1.model.exog) # res_ols = self.res1.model.fit(cov_type='cluster', res_ols = model.fit( cov_type="cluster", cov_kwds=dict( groups=self.groups, use_correction=False, use_t=False, df_correction=True, ), ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = False self.res2 = res2.results_cluster_large self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
def cov_hac_simple(results, nlags=None, weights_func=weights_bartlett, use_correction=True): c = cov_hac(results, nlags=nlags, weights_func=weights_func, use_correction=use_correction) return c, se_cov(c)
def setup(self): res_ols = self.res1 cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=False) se1 = sw.se_cov(cov1) self.bse_robust = se1 self.cov_robust = cov1 self.small = False self.res2 = res.results_ivhac4_large
def test_hac_simple(): from statsmodels.datasets import macrodata d2 = macrodata.load_pandas().data g_gdp = 400 * np.diff(np.log(d2['realgdp'].values)) g_inv = 400 * np.diff(np.log(d2['realinv'].values)) exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1].values]) res_olsg = OLS(g_inv, exogg).fit() # > NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, # verbose=TRUE, adjust=TRUE) # Lag truncation parameter chosen: 4 # (Intercept) ggdp lint cov1_r = [ [+1.40643899878678802, -0.3180328707083329709, -0.060621111216488610], [-0.31803287070833292, 0.1097308348999818661, +0.000395311760301478], [-0.06062111121648865, 0.0003953117603014895, +0.087511528912470993] ] # > NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, # verbose=TRUE, adjust=FALSE) # Lag truncation parameter chosen: 4 # (Intercept) ggdp lint cov2_r = [ [+1.3855512908840137, -0.313309610252268500, -0.059720797683570477], [-0.3133096102522685, +0.108101169035130618, +0.000389440793564339], [-0.0597207976835705, +0.000389440793564336, +0.086211852740503622] ] cov1 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=True) se1 = sw.se_cov(cov1) cov2 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=False) se2 = sw.se_cov(cov2) # Relax precision requirements for this test due to failure in NumPy 1.23 assert_allclose(cov1, cov1_r) assert_allclose(cov2, cov2_r) assert_allclose(np.sqrt(np.diag(cov1_r)), se1) assert_allclose(np.sqrt(np.diag(cov2_r)), se2) # compare default for nlags cov3 = sw.cov_hac_simple(res_olsg, use_correction=False) cov4 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=False) assert_allclose(cov3, cov4)
def get_robust_clu(cls): res1 = cls.res1 cov_clu = sc.cov_cluster(res1, group) cls.bse_rob = sc.se_cov(cov_clu) nobs, k_vars = res1.model.exog.shape k_params = len(res1.params) #n_groups = len(np.unique(group)) corr_fact = (nobs - 1.) / float(nobs - k_params) # for bse we need sqrt of correction factor cls.corr_fact = np.sqrt(corr_fact)
def get_robust_clu(cls): res1 = cls.res1 cov_clu = sw.cov_cluster(res1, group) cls.bse_rob = sw.se_cov(cov_clu) nobs, k_vars = res1.model.exog.shape k_params = len(res1.params) #n_groups = len(np.unique(group)) corr_fact = (nobs-1.) / float(nobs - k_params) # for bse we need sqrt of correction factor cls.corr_fact = np.sqrt(corr_fact)
def setup(self): res_ols = self.res1.get_robustcov_results("HAC", maxlags=4, use_correction=True, use_t=True) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=True) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res.results_ivhac4_small
def setup(self): res_ols = self.res1.get_robustcov_results('HAC', maxlags=4, use_correction=True, use_t=True) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=True) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res.results_ivhac4_small
def setup(self): res_ols = self.res1.get_robustcov_results("cluster", groups=self.groups, use_correction=True, use_t=True) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_cluster_wls_small self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.model.fit( cov_type="cluster", cov_kwds=dict(groups=self.groups, use_correction=True, use_t=True) ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_cluster self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.get_robustcov_results( "cluster", groups=(self.groups, self.time), use_correction=True, use_t=True ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster_2groups(self.res1, self.groups, group2=self.time, use_correction=True)[0] se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_cluster_2groups_small self.rtol = 0.35 # only f_pvalue and confint for constant differ >rtol=0.05 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.get_robustcov_results( "cluster", groups=self.groups, use_correction=True, use_t=True ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_cluster_wls_small self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.get_robustcov_results("HC1", use_t=True) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() # TODO: check standalone function # cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False) cov1 = res_ols.cov_HC1 se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_hc1_wls_small self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.get_robustcov_results( "cluster", groups=(self.groups, self.time), use_correction=False, use_t=False # True, ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster_2groups(self.res1, self.groups, group2=self.time, use_correction=False)[0] se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = False self.res2 = res2.results_cluster_2groups_large self.skip_f = True self.rtol = 1e-7 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.model.fit(cov_type='cluster', cov_kwds=dict(groups=self.groups, use_correction=True, use_t=True)) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_cluster self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.model.fit( cov_type="nw-groupsum", cov_kwds=dict(time=self.time, maxlags=4, use_correction=False, use_t=True) ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_nw_groupsum(self.res1, 4, self.time, use_correction=False) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_nw_groupsum4 self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.get_robustcov_results( "nw-panel", time=self.time, maxlags=4, use_correction="hac", use_t=True, df_correction=False ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_nw_panel(self.res1, 4, self.tidx) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_nw_panel4 self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.get_robustcov_results('HC1', use_t=True) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() #TODO: check standalone function #cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False) cov1 = res_ols.cov_HC1 se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_hc1_wls_small self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.get_robustcov_results('cluster', groups=(self.groups, self.time), use_correction=False, #True, use_t=False) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster_2groups(self.res1, self.groups, group2=self.time, use_correction=False)[0] se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = False self.res2 = res2.results_cluster_2groups_large self.skip_f = True self.rtol = 1e-7 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.get_robustcov_results('nw-groupsum', time=self.time, maxlags=4, use_correction=False, use_t=True) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_nw_groupsum(self.res1, 4, self.time, use_correction=False) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_nw_groupsum4 self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.get_robustcov_results('cluster', groups=self.groups, use_correction=False, use_t=False, df_correction=True) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = False self.res2 = res2.results_cluster_large self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): model = OLS(self.res1.model.endog, self.res1.model.exog) # res_ols = self.res1.model.fit(cov_type='cluster', res_ols = model.fit( cov_type="cluster", cov_kwds=dict(groups=self.groups, use_correction=False, use_t=False, df_correction=True) ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = False self.res2 = res2.results_cluster_large self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.model.fit(cov_type='nw-panel', cov_kwds = dict(groups=self.groups, maxlags=4, use_correction='hac', use_t=True, df_correction=False)) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_nw_panel(self.res1, 4, self.tidx) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_nw_panel4 self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): import pandas as pd fat_array = self.groups.reshape(-1, 1) fat_groups = pd.DataFrame(fat_array) res_ols = self.res1.get_robustcov_results('cluster', groups=fat_groups, use_correction=True, use_t=True) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_cluster self.rtol = 1e-6 self.rtolh = 1e-10
def setup(self): res_ols = self.res1.get_robustcov_results( "nw-panel", time=self.time, maxlags=4, use_correction="hac", use_t=True, df_correction=False, ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_nw_panel(self.res1, 4, self.tidx) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = True self.res2 = res2.results_nw_panel4 self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
mask = (xx!=-999.0).all(1) #nan code in dta file mask.shape y = y[mask] xx = xx[mask] group = group[mask] #run OLS res_srs = sm.OLS(y, xx).fit() print('params ', res_srs.params) print('bse_OLS ', res_srs.bse) #get cluster robust standard errors and compare with STATA cov_cr = sw.cov_cluster(res_srs, group.astype(int)) bse_cr = sw.se_cov(cov_cr) print('bse_rob ', bse_cr) res_stata = np.rec.array( [('growth', '|', -0.1027121, 0.22917029999999999, -0.45000000000000001, 0.65500000000000003, -0.55483519999999997, 0.34941109999999997), ('emer', '|', -5.4449319999999997, 0.72939690000000001, -7.46, 0.0, -6.8839379999999997, -4.0059269999999998), ('yr_rnd', '|', -51.075690000000002, 22.83615, -2.2400000000000002, 0.027, -96.128439999999998, -6.0229350000000004), ('_cons', '|', 740.3981, 13.460760000000001, 55.0, 0.0, 713.84180000000003, 766.95439999999996)], dtype=[('exogname', '|S6'), ('del', '|S1'), ('params', 'float'), ('bse', 'float'), ('tvalues', 'float'), ('pvalues', 'float'), ('cilow', 'float'), ('ciupp', 'float')]) print('diff Stata', bse_cr - res_stata.bse) assert_almost_equal(bse_cr, res_stata.bse, decimal=6) #We see that in this case the robust standard errors of the parameter estimates
def test_all(self): d = macrodata.load().data #import datasetswsm.greene as g #d = g.load('5-1') #growth rates gs_l_realinv = 400 * np.diff(np.log(d['realinv'])) gs_l_realgdp = 400 * np.diff(np.log(d['realgdp'])) #simple diff, not growthrate, I want heteroscedasticity later for testing endogd = np.diff(d['realinv']) exogd = add_constant(np.c_[np.diff(d['realgdp']), d['realint'][:-1]], prepend=True) endogg = gs_l_realinv exogg = add_constant(np.c_[gs_l_realgdp, d['realint'][:-1]],prepend=True) res_ols = OLS(endogg, exogg).fit() #print res_ols.params mod_g1 = GLSAR(endogg, exogg, rho=-0.108136) res_g1 = mod_g1.fit() #print res_g1.params mod_g2 = GLSAR(endogg, exogg, rho=-0.108136) #-0.1335859) from R res_g2 = mod_g2.iterative_fit(maxiter=5) #print res_g2.params rho = -0.108136 # coefficient std. error t-ratio p-value 95% CONFIDENCE INTERVAL partable = np.array([ [-9.50990, 0.990456, -9.602, 3.65e-018, -11.4631, -7.55670], # *** [ 4.37040, 0.208146, 21.00, 2.93e-052, 3.95993, 4.78086], # *** [-0.579253, 0.268009, -2.161, 0.0319, -1.10777, -0.0507346]]) # ** #Statistics based on the rho-differenced data: result_gretl_g1 = dict( endog_mean = ("Mean dependent var", 3.113973), endog_std = ("S.D. dependent var", 18.67447), ssr = ("Sum squared resid", 22530.90), mse_resid_sqrt = ("S.E. of regression", 10.66735), rsquared = ("R-squared", 0.676973), rsquared_adj = ("Adjusted R-squared", 0.673710), fvalue = ("F(2, 198)", 221.0475), f_pvalue = ("P-value(F)", 3.56e-51), resid_acf1 = ("rho", -0.003481), dw = ("Durbin-Watson", 1.993858)) #fstatistic, p-value, df1, df2 reset_2_3 = [5.219019, 0.00619, 2, 197, "f"] reset_2 = [7.268492, 0.00762, 1, 198, "f"] reset_3 = [5.248951, 0.023, 1, 198, "f"] #LM-statistic, p-value, df arch_4 = [7.30776, 0.120491, 4, "chi2"] #multicollinearity vif = [1.002, 1.002] cond_1norm = 6862.0664 determinant = 1.0296049e+009 reciprocal_condition_number = 0.013819244 #Chi-square(2): test-statistic, pvalue, df normality = [20.2792, 3.94837e-005, 2] #tests res = res_g1 #with rho from Gretl #basic assert_almost_equal(res.params, partable[:,0], 4) assert_almost_equal(res.bse, partable[:,1], 6) assert_almost_equal(res.tvalues, partable[:,2], 2) assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2) #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl #assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=7) #FAIL #assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=7) #FAIL assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5) assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=4) assert_approx_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], significant=2) #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO #arch #sm_arch = smsdia.acorr_lm(res.wresid**2, maxlag=4, autolag=None) sm_arch = smsdia.het_arch(res.wresid, maxlag=4) assert_almost_equal(sm_arch[0], arch_4[0], decimal=4) assert_almost_equal(sm_arch[1], arch_4[1], decimal=6) #tests res = res_g2 #with estimated rho #estimated lag coefficient assert_almost_equal(res.model.rho, rho, decimal=3) #basic assert_almost_equal(res.params, partable[:,0], 4) assert_almost_equal(res.bse, partable[:,1], 3) assert_almost_equal(res.tvalues, partable[:,2], 2) assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2) #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl #assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=7) #FAIL #assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=7) #FAIL assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5) assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=0) assert_almost_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], decimal=6) #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO c = oi.reset_ramsey(res, degree=2) compare_ftest(c, reset_2, decimal=(2,4)) c = oi.reset_ramsey(res, degree=3) compare_ftest(c, reset_2_3, decimal=(2,4)) #arch #sm_arch = smsdia.acorr_lm(res.wresid**2, maxlag=4, autolag=None) sm_arch = smsdia.het_arch(res.wresid, maxlag=4) assert_almost_equal(sm_arch[0], arch_4[0], decimal=1) assert_almost_equal(sm_arch[1], arch_4[1], decimal=2) ''' Performing iterative calculation of rho... ITER RHO ESS 1 -0.10734 22530.9 2 -0.10814 22530.9 Model 4: Cochrane-Orcutt, using observations 1959:3-2009:3 (T = 201) Dependent variable: ds_l_realinv rho = -0.108136 coefficient std. error t-ratio p-value ------------------------------------------------------------- const -9.50990 0.990456 -9.602 3.65e-018 *** ds_l_realgdp 4.37040 0.208146 21.00 2.93e-052 *** realint_1 -0.579253 0.268009 -2.161 0.0319 ** Statistics based on the rho-differenced data: Mean dependent var 3.113973 S.D. dependent var 18.67447 Sum squared resid 22530.90 S.E. of regression 10.66735 R-squared 0.676973 Adjusted R-squared 0.673710 F(2, 198) 221.0475 P-value(F) 3.56e-51 rho -0.003481 Durbin-Watson 1.993858 ''' ''' RESET test for specification (squares and cubes) Test statistic: F = 5.219019, with p-value = P(F(2,197) > 5.21902) = 0.00619 RESET test for specification (squares only) Test statistic: F = 7.268492, with p-value = P(F(1,198) > 7.26849) = 0.00762 RESET test for specification (cubes only) Test statistic: F = 5.248951, with p-value = P(F(1,198) > 5.24895) = 0.023: ''' ''' Test for ARCH of order 4 coefficient std. error t-ratio p-value -------------------------------------------------------- alpha(0) 97.0386 20.3234 4.775 3.56e-06 *** alpha(1) 0.176114 0.0714698 2.464 0.0146 ** alpha(2) -0.0488339 0.0724981 -0.6736 0.5014 alpha(3) -0.0705413 0.0737058 -0.9571 0.3397 alpha(4) 0.0384531 0.0725763 0.5298 0.5968 Null hypothesis: no ARCH effect is present Test statistic: LM = 7.30776 with p-value = P(Chi-square(4) > 7.30776) = 0.120491: ''' ''' Variance Inflation Factors Minimum possible value = 1.0 Values > 10.0 may indicate a collinearity problem ds_l_realgdp 1.002 realint_1 1.002 VIF(j) = 1/(1 - R(j)^2), where R(j) is the multiple correlation coefficient between variable j and the other independent variables Properties of matrix X'X: 1-norm = 6862.0664 Determinant = 1.0296049e+009 Reciprocal condition number = 0.013819244 ''' ''' Test for ARCH of order 4 - Null hypothesis: no ARCH effect is present Test statistic: LM = 7.30776 with p-value = P(Chi-square(4) > 7.30776) = 0.120491 Test of common factor restriction - Null hypothesis: restriction is acceptable Test statistic: F(2, 195) = 0.426391 with p-value = P(F(2, 195) > 0.426391) = 0.653468 Test for normality of residual - Null hypothesis: error is normally distributed Test statistic: Chi-square(2) = 20.2792 with p-value = 3.94837e-005: ''' #no idea what this is ''' Augmented regression for common factor test OLS, using observations 1959:3-2009:3 (T = 201) Dependent variable: ds_l_realinv coefficient std. error t-ratio p-value --------------------------------------------------------------- const -10.9481 1.35807 -8.062 7.44e-014 *** ds_l_realgdp 4.28893 0.229459 18.69 2.40e-045 *** realint_1 -0.662644 0.334872 -1.979 0.0492 ** ds_l_realinv_1 -0.108892 0.0715042 -1.523 0.1294 ds_l_realgdp_1 0.660443 0.390372 1.692 0.0923 * realint_2 0.0769695 0.341527 0.2254 0.8219 Sum of squared residuals = 22432.8 Test of common factor restriction Test statistic: F(2, 195) = 0.426391, with p-value = 0.653468 ''' ################ with OLS, HAC errors #Model 5: OLS, using observations 1959:2-2009:3 (T = 202) #Dependent variable: ds_l_realinv #HAC standard errors, bandwidth 4 (Bartlett kernel) #coefficient std. error t-ratio p-value 95% CONFIDENCE INTERVAL #for confidence interval t(199, 0.025) = 1.972 partable = np.array([ [-9.48167, 1.17709, -8.055, 7.17e-014, -11.8029, -7.16049], # *** [4.37422, 0.328787, 13.30, 2.62e-029, 3.72587, 5.02258], #*** [-0.613997, 0.293619, -2.091, 0.0378, -1.19300, -0.0349939]]) # ** result_gretl_g1 = dict( endog_mean = ("Mean dependent var", 3.257395), endog_std = ("S.D. dependent var", 18.73915), ssr = ("Sum squared resid", 22799.68), mse_resid_sqrt = ("S.E. of regression", 10.70380), rsquared = ("R-squared", 0.676978), rsquared_adj = ("Adjusted R-squared", 0.673731), fvalue = ("F(2, 199)", 90.79971), f_pvalue = ("P-value(F)", 9.53e-29), llf = ("Log-likelihood", -763.9752), aic = ("Akaike criterion", 1533.950), bic = ("Schwarz criterion", 1543.875), hqic = ("Hannan-Quinn", 1537.966), resid_acf1 = ("rho", -0.107341), dw = ("Durbin-Watson", 2.213805)) linear_logs = [1.68351, 0.430953, 2, "chi2"] #for logs: dropping 70 nan or incomplete observations, T=133 #(res_ols.model.exog <=0).any(1).sum() = 69 ?not 70 linear_squares = [7.52477, 0.0232283, 2, "chi2"] #Autocorrelation, Breusch-Godfrey test for autocorrelation up to order 4 lm_acorr4 = [1.17928, 0.321197, 4, 195, "F"] lm2_acorr4 = [4.771043, 0.312, 4, "chi2"] acorr_ljungbox4 = [5.23587, 0.264, 4, "chi2"] #break cusum_Harvey_Collier = [0.494432, 0.621549, 198, "t"] #stats.t.sf(0.494432, 198)*2 #see cusum results in files break_qlr = [3.01985, 0.1, 3, 196, "maxF"] #TODO check this, max at 2001:4 break_chow = [13.1897, 0.00424384, 3, "chi2"] # break at 1984:1 arch_4 = [3.43473, 0.487871, 4, "chi2"] normality = [23.962, 0.00001, 2, "chi2"] het_white = [33.503723, 0.000003, 5, "chi2"] het_breush_pagan = [1.302014, 0.521520, 2, "chi2"] #TODO: not available het_breush_pagan_konker = [0.709924, 0.701200, 2, "chi2"] reset_2_3 = [5.219019, 0.00619, 2, 197, "f"] reset_2 = [7.268492, 0.00762, 1, 198, "f"] reset_3 = [5.248951, 0.023, 1, 198, "f"] #not available cond_1norm = 5984.0525 determinant = 7.1087467e+008 reciprocal_condition_number = 0.013826504 vif = [1.001, 1.001] names = 'date residual leverage influence DFFITS'.split() cur_dir = os.path.abspath(os.path.dirname(__file__)) fpath = os.path.join(cur_dir, 'results/leverage_influence_ols_nostars.txt') lev = np.genfromtxt(fpath, skip_header=3, skip_footer=1, converters={0:lambda s: s}) #either numpy 1.6 or python 3.2 changed behavior if np.isnan(lev[-1]['f1']): lev = np.genfromtxt(fpath, skip_header=3, skip_footer=2, converters={0:lambda s: s}) lev.dtype.names = names res = res_ols #for easier copying cov_hac = sw.cov_hac_simple(res, nlags=4, use_correction=False) bse_hac = sw.se_cov(cov_hac) assert_almost_equal(res.params, partable[:,0], 5) assert_almost_equal(bse_hac, partable[:,1], 5) #TODO assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2) #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=6) #FAIL assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=6) #FAIL assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5) #f-value is based on cov_hac I guess #assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=0) #FAIL #assert_approx_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], significant=1) #FAIL #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO c = oi.reset_ramsey(res, degree=2) compare_ftest(c, reset_2, decimal=(6,5)) c = oi.reset_ramsey(res, degree=3) compare_ftest(c, reset_2_3, decimal=(6,5)) linear_sq = smsdia.linear_lm(res.resid, res.model.exog) assert_almost_equal(linear_sq[0], linear_squares[0], decimal=6) assert_almost_equal(linear_sq[1], linear_squares[1], decimal=7) hbpk = smsdia.het_breushpagan(res.resid, res.model.exog) assert_almost_equal(hbpk[0], het_breush_pagan_konker[0], decimal=6) assert_almost_equal(hbpk[1], het_breush_pagan_konker[1], decimal=6) hw = smsdia.het_white(res.resid, res.model.exog) assert_almost_equal(hw[:2], het_white[:2], 6) #arch #sm_arch = smsdia.acorr_lm(res.resid**2, maxlag=4, autolag=None) sm_arch = smsdia.het_arch(res.resid, maxlag=4) assert_almost_equal(sm_arch[0], arch_4[0], decimal=5) assert_almost_equal(sm_arch[1], arch_4[1], decimal=6) vif2 = [oi.variance_inflation_factor(res.model.exog, k) for k in [1,2]] infl = oi.OLSInfluence(res_ols) #print np.max(np.abs(lev['DFFITS'] - infl.dffits[0])) #print np.max(np.abs(lev['leverage'] - infl.hat_matrix_diag)) #print np.max(np.abs(lev['influence'] - infl.influence)) #just added this based on Gretl #just rough test, low decimal in Gretl output, assert_almost_equal(lev['residual'], res.resid, decimal=3) assert_almost_equal(lev['DFFITS'], infl.dffits[0], decimal=3) assert_almost_equal(lev['leverage'], infl.hat_matrix_diag, decimal=3) assert_almost_equal(lev['influence'], infl.influence, decimal=4)
group = pet[:,0].astype(int) time = pet[:,1].astype(int) exog = sm.add_constant(pet[:,2]) res = sm.OLS(endog, exog).fit() cov01, covg, covt = sw.cov_cluster_2groups(res, group, group2=time) #Reference number from Petersen #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.htm bse_petw = [0.0284, 0.0284] bse_pet0 = [0.0670, 0.0506] bse_pet1 = [0.0234, 0.0334] #year bse_pet01 = [0.0651, 0.0536] #firm and year bse_0 = sw.se_cov(covg) bse_1 = sw.se_cov(covt) bse_01 = sw.se_cov(cov01) print('OLS ', res.bse) print('het HC0 ', res.HC0_se, bse_petw - res.HC0_se) print('het firm ', bse_0, bse_0 - bse_pet0) print('het year ', bse_1, bse_1 - bse_pet1) print('het firm & year', bse_01, bse_01 - bse_pet01) print('relative difference standard error het firm & year to OLS') print(' ', bse_01 / res.bse) #From the last line we see that the cluster and year robust standard errors #are approximately twice those of OLS
def test_all(self): d = macrodata.load().data #import datasetswsm.greene as g #d = g.load('5-1') #growth rates gs_l_realinv = 400 * np.diff(np.log(d['realinv'])) gs_l_realgdp = 400 * np.diff(np.log(d['realgdp'])) #simple diff, not growthrate, I want heteroscedasticity later for testing endogd = np.diff(d['realinv']) exogd = add_constant(np.c_[np.diff(d['realgdp']), d['realint'][:-1]]) endogg = gs_l_realinv exogg = add_constant(np.c_[gs_l_realgdp, d['realint'][:-1]]) res_ols = OLS(endogg, exogg).fit() #print res_ols.params mod_g1 = GLSAR(endogg, exogg, rho=-0.108136) res_g1 = mod_g1.fit() #print res_g1.params mod_g2 = GLSAR(endogg, exogg, rho=-0.108136) #-0.1335859) from R res_g2 = mod_g2.iterative_fit(maxiter=5) #print res_g2.params rho = -0.108136 # coefficient std. error t-ratio p-value 95% CONFIDENCE INTERVAL partable = np.array([ [-9.50990, 0.990456, -9.602, 3.65e-018, -11.4631, -7.55670], # *** [ 4.37040, 0.208146, 21.00, 2.93e-052, 3.95993, 4.78086], # *** [-0.579253, 0.268009, -2.161, 0.0319, -1.10777, -0.0507346]]) # ** #Statistics based on the rho-differenced data: result_gretl_g1 = dict( endog_mean = ("Mean dependent var", 3.113973), endog_std = ("S.D. dependent var", 18.67447), ssr = ("Sum squared resid", 22530.90), mse_resid_sqrt = ("S.E. of regression", 10.66735), rsquared = ("R-squared", 0.676973), rsquared_adj = ("Adjusted R-squared", 0.673710), fvalue = ("F(2, 198)", 221.0475), f_pvalue = ("P-value(F)", 3.56e-51), resid_acf1 = ("rho", -0.003481), dw = ("Durbin-Watson", 1.993858)) #fstatistic, p-value, df1, df2 reset_2_3 = [5.219019, 0.00619, 2, 197, "f"] reset_2 = [7.268492, 0.00762, 1, 198, "f"] reset_3 = [5.248951, 0.023, 1, 198, "f"] #LM-statistic, p-value, df arch_4 = [7.30776, 0.120491, 4, "chi2"] #multicollinearity vif = [1.002, 1.002] cond_1norm = 6862.0664 determinant = 1.0296049e+009 reciprocal_condition_number = 0.013819244 #Chi-square(2): test-statistic, pvalue, df normality = [20.2792, 3.94837e-005, 2] #tests res = res_g1 #with rho from Gretl #basic assert_almost_equal(res.params, partable[:,0], 4) assert_almost_equal(res.bse, partable[:,1], 6) assert_almost_equal(res.tvalues, partable[:,2], 2) assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2) #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl #assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=7) #FAIL #assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=7) #FAIL assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5) assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=4) assert_approx_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], significant=2) #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO #arch #sm_arch = smsdia.acorr_lm(res.wresid**2, maxlag=4, autolag=None) sm_arch = smsdia.het_arch(res.wresid, maxlag=4) assert_almost_equal(sm_arch[0], arch_4[0], decimal=4) assert_almost_equal(sm_arch[1], arch_4[1], decimal=6) #tests res = res_g2 #with estimated rho #estimated lag coefficient assert_almost_equal(res.model.rho, rho, decimal=3) #basic assert_almost_equal(res.params, partable[:,0], 4) assert_almost_equal(res.bse, partable[:,1], 3) assert_almost_equal(res.tvalues, partable[:,2], 2) assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2) #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl #assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=7) #FAIL #assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=7) #FAIL assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5) assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=0) assert_almost_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], decimal=6) #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO c = oi.reset_ramsey(res, degree=2) compare_ftest(c, reset_2, decimal=(2,4)) c = oi.reset_ramsey(res, degree=3) compare_ftest(c, reset_2_3, decimal=(2,4)) #arch #sm_arch = smsdia.acorr_lm(res.wresid**2, maxlag=4, autolag=None) sm_arch = smsdia.het_arch(res.wresid, maxlag=4) assert_almost_equal(sm_arch[0], arch_4[0], decimal=1) assert_almost_equal(sm_arch[1], arch_4[1], decimal=2) ''' Performing iterative calculation of rho... ITER RHO ESS 1 -0.10734 22530.9 2 -0.10814 22530.9 Model 4: Cochrane-Orcutt, using observations 1959:3-2009:3 (T = 201) Dependent variable: ds_l_realinv rho = -0.108136 coefficient std. error t-ratio p-value ------------------------------------------------------------- const -9.50990 0.990456 -9.602 3.65e-018 *** ds_l_realgdp 4.37040 0.208146 21.00 2.93e-052 *** realint_1 -0.579253 0.268009 -2.161 0.0319 ** Statistics based on the rho-differenced data: Mean dependent var 3.113973 S.D. dependent var 18.67447 Sum squared resid 22530.90 S.E. of regression 10.66735 R-squared 0.676973 Adjusted R-squared 0.673710 F(2, 198) 221.0475 P-value(F) 3.56e-51 rho -0.003481 Durbin-Watson 1.993858 ''' ''' RESET test for specification (squares and cubes) Test statistic: F = 5.219019, with p-value = P(F(2,197) > 5.21902) = 0.00619 RESET test for specification (squares only) Test statistic: F = 7.268492, with p-value = P(F(1,198) > 7.26849) = 0.00762 RESET test for specification (cubes only) Test statistic: F = 5.248951, with p-value = P(F(1,198) > 5.24895) = 0.023: ''' ''' Test for ARCH of order 4 coefficient std. error t-ratio p-value -------------------------------------------------------- alpha(0) 97.0386 20.3234 4.775 3.56e-06 *** alpha(1) 0.176114 0.0714698 2.464 0.0146 ** alpha(2) -0.0488339 0.0724981 -0.6736 0.5014 alpha(3) -0.0705413 0.0737058 -0.9571 0.3397 alpha(4) 0.0384531 0.0725763 0.5298 0.5968 Null hypothesis: no ARCH effect is present Test statistic: LM = 7.30776 with p-value = P(Chi-square(4) > 7.30776) = 0.120491: ''' ''' Variance Inflation Factors Minimum possible value = 1.0 Values > 10.0 may indicate a collinearity problem ds_l_realgdp 1.002 realint_1 1.002 VIF(j) = 1/(1 - R(j)^2), where R(j) is the multiple correlation coefficient between variable j and the other independent variables Properties of matrix X'X: 1-norm = 6862.0664 Determinant = 1.0296049e+009 Reciprocal condition number = 0.013819244 ''' ''' Test for ARCH of order 4 - Null hypothesis: no ARCH effect is present Test statistic: LM = 7.30776 with p-value = P(Chi-square(4) > 7.30776) = 0.120491 Test of common factor restriction - Null hypothesis: restriction is acceptable Test statistic: F(2, 195) = 0.426391 with p-value = P(F(2, 195) > 0.426391) = 0.653468 Test for normality of residual - Null hypothesis: error is normally distributed Test statistic: Chi-square(2) = 20.2792 with p-value = 3.94837e-005: ''' #no idea what this is ''' Augmented regression for common factor test OLS, using observations 1959:3-2009:3 (T = 201) Dependent variable: ds_l_realinv coefficient std. error t-ratio p-value --------------------------------------------------------------- const -10.9481 1.35807 -8.062 7.44e-014 *** ds_l_realgdp 4.28893 0.229459 18.69 2.40e-045 *** realint_1 -0.662644 0.334872 -1.979 0.0492 ** ds_l_realinv_1 -0.108892 0.0715042 -1.523 0.1294 ds_l_realgdp_1 0.660443 0.390372 1.692 0.0923 * realint_2 0.0769695 0.341527 0.2254 0.8219 Sum of squared residuals = 22432.8 Test of common factor restriction Test statistic: F(2, 195) = 0.426391, with p-value = 0.653468 ''' ################ with OLS, HAC errors #Model 5: OLS, using observations 1959:2-2009:3 (T = 202) #Dependent variable: ds_l_realinv #HAC standard errors, bandwidth 4 (Bartlett kernel) #coefficient std. error t-ratio p-value 95% CONFIDENCE INTERVAL #for confidence interval t(199, 0.025) = 1.972 partable = np.array([ [-9.48167, 1.17709, -8.055, 7.17e-014, -11.8029, -7.16049], # *** [4.37422, 0.328787, 13.30, 2.62e-029, 3.72587, 5.02258], #*** [-0.613997, 0.293619, -2.091, 0.0378, -1.19300, -0.0349939]]) # ** result_gretl_g1 = dict( endog_mean = ("Mean dependent var", 3.257395), endog_std = ("S.D. dependent var", 18.73915), ssr = ("Sum squared resid", 22799.68), mse_resid_sqrt = ("S.E. of regression", 10.70380), rsquared = ("R-squared", 0.676978), rsquared_adj = ("Adjusted R-squared", 0.673731), fvalue = ("F(2, 199)", 90.79971), f_pvalue = ("P-value(F)", 9.53e-29), llf = ("Log-likelihood", -763.9752), aic = ("Akaike criterion", 1533.950), bic = ("Schwarz criterion", 1543.875), hqic = ("Hannan-Quinn", 1537.966), resid_acf1 = ("rho", -0.107341), dw = ("Durbin-Watson", 2.213805)) linear_logs = [1.68351, 0.430953, 2, "chi2"] #for logs: dropping 70 nan or incomplete observations, T=133 #(res_ols.model.exog <=0).any(1).sum() = 69 ?not 70 linear_squares = [7.52477, 0.0232283, 2, "chi2"] #Autocorrelation, Breusch-Godfrey test for autocorrelation up to order 4 lm_acorr4 = [1.17928, 0.321197, 4, 195, "F"] lm2_acorr4 = [4.771043, 0.312, 4, "chi2"] acorr_ljungbox4 = [5.23587, 0.264, 4, "chi2"] #break cusum_Harvey_Collier = [0.494432, 0.621549, 198, "t"] #stats.t.sf(0.494432, 198)*2 #see cusum results in files break_qlr = [3.01985, 0.1, 3, 196, "maxF"] #TODO check this, max at 2001:4 break_chow = [13.1897, 0.00424384, 3, "chi2"] # break at 1984:1 arch_4 = [3.43473, 0.487871, 4, "chi2"] normality = [23.962, 0.00001, 2, "chi2"] het_white = [33.503723, 0.000003, 5, "chi2"] het_breusch_pagan = [1.302014, 0.521520, 2, "chi2"] #TODO: not available het_breusch_pagan_konker = [0.709924, 0.701200, 2, "chi2"] reset_2_3 = [5.219019, 0.00619, 2, 197, "f"] reset_2 = [7.268492, 0.00762, 1, 198, "f"] reset_3 = [5.248951, 0.023, 1, 198, "f"] #not available cond_1norm = 5984.0525 determinant = 7.1087467e+008 reciprocal_condition_number = 0.013826504 vif = [1.001, 1.001] names = 'date residual leverage influence DFFITS'.split() cur_dir = os.path.abspath(os.path.dirname(__file__)) fpath = os.path.join(cur_dir, 'results/leverage_influence_ols_nostars.txt') lev = np.genfromtxt(fpath, skip_header=3, skip_footer=1, converters={0:lambda s: s}) #either numpy 1.6 or python 3.2 changed behavior if np.isnan(lev[-1]['f1']): lev = np.genfromtxt(fpath, skip_header=3, skip_footer=2, converters={0:lambda s: s}) lev.dtype.names = names res = res_ols #for easier copying cov_hac = sw.cov_hac_simple(res, nlags=4, use_correction=False) bse_hac = sw.se_cov(cov_hac) assert_almost_equal(res.params, partable[:,0], 5) assert_almost_equal(bse_hac, partable[:,1], 5) #TODO assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2) assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=4) #not in gretl assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=6) #FAIL assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=6) #FAIL assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5) #f-value is based on cov_hac I guess #res2 = res.get_robustcov_results(cov_type='HC1') # TODO: fvalue differs from Gretl, trying any of the HCx #assert_almost_equal(res2.fvalue, result_gretl_g1['fvalue'][1], decimal=0) #FAIL #assert_approx_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], significant=1) #FAIL #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO c = oi.reset_ramsey(res, degree=2) compare_ftest(c, reset_2, decimal=(6,5)) c = oi.reset_ramsey(res, degree=3) compare_ftest(c, reset_2_3, decimal=(6,5)) linear_sq = smsdia.linear_lm(res.resid, res.model.exog) assert_almost_equal(linear_sq[0], linear_squares[0], decimal=6) assert_almost_equal(linear_sq[1], linear_squares[1], decimal=7) hbpk = smsdia.het_breuschpagan(res.resid, res.model.exog) assert_almost_equal(hbpk[0], het_breusch_pagan_konker[0], decimal=6) assert_almost_equal(hbpk[1], het_breusch_pagan_konker[1], decimal=6) hw = smsdia.het_white(res.resid, res.model.exog) assert_almost_equal(hw[:2], het_white[:2], 6) #arch #sm_arch = smsdia.acorr_lm(res.resid**2, maxlag=4, autolag=None) sm_arch = smsdia.het_arch(res.resid, maxlag=4) assert_almost_equal(sm_arch[0], arch_4[0], decimal=5) assert_almost_equal(sm_arch[1], arch_4[1], decimal=6) vif2 = [oi.variance_inflation_factor(res.model.exog, k) for k in [1,2]] infl = oi.OLSInfluence(res_ols) #print np.max(np.abs(lev['DFFITS'] - infl.dffits[0])) #print np.max(np.abs(lev['leverage'] - infl.hat_matrix_diag)) #print np.max(np.abs(lev['influence'] - infl.influence)) #just added this based on Gretl #just rough test, low decimal in Gretl output, assert_almost_equal(lev['residual'], res.resid, decimal=3) assert_almost_equal(lev['DFFITS'], infl.dffits[0], decimal=3) assert_almost_equal(lev['leverage'], infl.hat_matrix_diag, decimal=3) assert_almost_equal(lev['influence'], infl.influence, decimal=4)
mask = (xx!=-999.0).all(1) #nan code in dta file mask.shape y = y[mask] xx = xx[mask] group = group[mask] #run OLS res_srs = sm.OLS(y, xx).fit() print 'params ', res_srs.params print 'bse_OLS ', res_srs.bse #get cluster robust standard errors and compare with STATA cov_cr = sw.cov_cluster(res_srs, group.astype(int)) bse_cr = sw.se_cov(cov_cr) print 'bse_rob ', bse_cr res_stata = np.rec.array( [ ('growth', '|', -0.1027121, 0.22917029999999999, -0.45000000000000001, 0.65500000000000003, -0.55483519999999997, 0.34941109999999997), ('emer', '|', -5.4449319999999997, 0.72939690000000001, -7.46, 0.0, -6.8839379999999997, -4.0059269999999998), ('yr_rnd', '|', -51.075690000000002, 22.83615, -2.2400000000000002, 0.027, -96.128439999999998, -6.0229350000000004), ('_cons', '|', 740.3981, 13.460760000000001, 55.0, 0.0, 713.84180000000003, 766.95439999999996)], dtype=[('exogname', '|S6'), ('del', '|S1'), ('params', '<f8'), ('bse', '<f8'), ('tvalues', '<f8'), ('pvalues', '<f8'), ('cilow', '<f8'), ('ciupp', '<f8')]) print 'diff Stata', bse_cr - res_stata.bse assert_almost_equal(bse_cr, res_stata.bse, decimal=6) #We see that in this case the robust standard errors of the parameter estimates
#import statsmodels.sandbox.panel.sandwich_covariance_generic as swg nobs = 100 kvars = 4 #including constant x = np.random.randn(nobs, kvars-1) exog = sm.add_constant(x, prepend=True) params_true = np.ones(kvars) y_true = np.dot(exog, params_true) sigma = 0.1 + np.exp(exog[:,-1]) endog = y_true + sigma * np.random.randn(nobs) self = sm.OLS(endog, exog).fit() print self.HC3_se print sw.se_cov(sw.cov_hc3(self)) #test standalone refactoring assert_almost_equal(sw.se_cov(sw.cov_hc0(self)), self.HC0_se, 15) assert_almost_equal(sw.se_cov(sw.cov_hc1(self)), self.HC1_se, 15) assert_almost_equal(sw.se_cov(sw.cov_hc2(self)), self.HC2_se, 15) assert_almost_equal(sw.se_cov(sw.cov_hc3(self)), self.HC3_se, 15) print self.HC0_se print sw.se_cov(sw.cov_hac_simple(self, nlags=0, use_correction=False)) #test White as HAC with nlags=0, same as nlags=1 ? bse_hac0 = sw.se_cov(sw.cov_hac_simple(self, nlags=0, use_correction=False)) assert_almost_equal(bse_hac0, self.HC0_se, 15) print bse_hac0 #test White as HAC with nlags=0, same as nlags=1 ? bse_hac0c = sw.se_cov(sw.cov_hac_simple(self, nlags=0, use_correction=True)) assert_almost_equal(bse_hac0c, self.HC1_se, 15)
# res.resid is of transformed model # np.corrcoef(res.resid.reshape(-1,n_groups, order='F')) y_pred = np.dot(mod.exog, res.params) resid = y - y_pred print np.corrcoef(resid.reshape(-1, n_groups, order="F")) print resid.std() err = y_pred - dgp.y_true print err.std() # OLS standard errors are too small mod.res_pooled.params mod.res_pooled.bse # heteroscedasticity robust doesn't help mod.res_pooled.HC1_se # compare with cluster robust se print sw.se_cov(sw.cov_cluster(mod.res_pooled, dgp.groups.astype(int))) # not bad, pretty close to panel estimator # and with Newey-West Hac print sw.se_cov(sw.cov_nw_panel(mod.res_pooled, 4, mod.group.groupidx)) # too small, assuming no bugs, # see Peterson assuming it refers to same kind of model print dgp.cov mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups) res2 = mod2.fit_iterative(2) print res2.params print res2.bse # both implementations produce the same results: from numpy.testing import assert_almost_equal assert_almost_equal(res.params, res2.params, decimal=12)
#res.resid is of transformed model #np.corrcoef(res.resid.reshape(-1,n_groups, order='F')) y_pred = np.dot(mod.exog, res.params) resid = y - y_pred print(np.corrcoef(resid.reshape(-1, n_groups, order='F'))) print(resid.std()) err = y_pred - dgp.y_true print(err.std()) #OLS standard errors are too small mod.res_pooled.params mod.res_pooled.bse #heteroscedasticity robust doesn't help mod.res_pooled.HC1_se #compare with cluster robust se print(sw.se_cov(sw.cov_cluster(mod.res_pooled, dgp.groups.astype(int)))) #not bad, pretty close to panel estimator #and with Newey-West Hac print(sw.se_cov(sw.cov_nw_panel(mod.res_pooled, 4, mod.group.groupidx))) #too small, assuming no bugs, #see Peterson assuming it refers to same kind of model print(dgp.cov) mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups) res2 = mod2.fit_iterative(2) print(res2.params) print(res2.bse) #both implementations produce the same results: from numpy.testing import assert_almost_equal assert_almost_equal(res.params, res2.params, decimal=12) assert_almost_equal(res.bse, res2.bse, decimal=13)
def get_robust_clu(cls): res1 = cls.res1 cov_clu = sw.cov_cluster(res1, group) cls.bse_rob = sw.se_cov(cov_clu) cls.corr_fact = cls.get_correction_factor(res1)