def BootstrapIVGMM(B, sec): # Original model model_original = IVGMM(df_fd[y],\ df_fd[[sec] + x_exo],\ df_fd[x_endo],\ df_fd[instruments]) results_original = model_original.fit() # Set boostrap prelims params_b = [] std_b = [] np.random.seed(seed=1) for b in range(B): # Resample df_fd_b = df_fd.sample(frac=1, replace=True) # Run model model_b = IVGMM(df_fd_b[y],\ df_fd_b[[sec] + x_exo],\ df_fd_b[x_endo],\ df_fd_b[instruments]) results_b = model_b.fit() # Save params and std params_b.append(results_b.params) std_b.append(results_b.std_errors) return results_original, params_b, std_b
def test_gmm_cue(data): mod = IVGMMCUE(data.dep, data.exog, data.endog, data.instr) res = mod.fit(display=False) assert res.iterations > 2 mod2 = IVGMM(data.dep, data.exog, data.endog, data.instr) res2 = mod2.fit() assert res.j_stat.stat <= res2.j_stat.stat mod = IVGMMCUE(data.dep, data.exog, data.endog, data.instr, center=False) res = mod.fit(display=False) mod2 = IVGMM(data.dep, data.exog, data.endog, data.instr, center=False) res2 = mod2.fit() assert res.j_stat.stat <= res2.j_stat.stat
def test_compare(data): res1 = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit() res2 = IV2SLS(data.dep, data.exog, data.endog, data.instr[:, :-1]).fit() res3 = IVGMM(data.dep, data.exog[:, :2], data.endog, data.instr).fit() res4 = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit() c = compare([res1, res2, res3, res4]) assert len(c.rsquared) == 4 c.summary c = compare({ 'Model A': res1, 'Model B': res2, 'Model C': res3, 'Model D': res4 }) c.summary res = OrderedDict() res['Model A'] = res1 res['Model B'] = res2 res['Model C'] = res3 res['Model D'] = res4 c = compare(res) c.summary c.pvalues res1 = IV2SLS(data.dep, data.exog[:, :1], None, None).fit() res2 = IV2SLS(data.dep, data.exog[:, :2], None, None).fit() c = compare({'Model A': res1, 'Model B': res2}) c.summary
def test_compare(data, stars): res1 = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit() res2 = IV2SLS(data.dep, data.exog, data.endog, data.instr[:, :-1]).fit() res3 = IVGMM(data.dep, data.exog[:, :2], data.endog, data.instr).fit() res4 = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit() c = compare([res1, res2, res3, res4], stars=stars) assert len(c.rsquared) == 4 assert isinstance(str(c.summary), str) if stars: total = 1 * (c.pvalues < 0.10) + (c.pvalues < 0.05) + (c.pvalues < 0.01) total_stars = np.asarray(total).sum() count = sum([char == "*" for char in str(c.summary)]) print(c.pvalues) print(total) print(c.summary) assert count == total_stars c = compare({ "Model A": res1, "Model B": res2, "Model C": res3, "Model D": res4 }) assert isinstance(str(c.summary), str) res = {"Model A": res1, "Model B": res2, "Model C": res3, "Model D": res4} c = compare(res, stars=stars) assert isinstance(str(c.summary), str) assert isinstance(c.pvalues, pd.DataFrame) res1 = IV2SLS(data.dep, data.exog[:, :1], None, None).fit() res2 = IV2SLS(data.dep, data.exog[:, :2], None, None).fit() c = compare({"Model A": res1, "Model B": res2}, stars=stars) assert isinstance(str(c.summary), str)
def test_gmm_cue_starting_vals(data): mod = IVGMM(data.dep, data.exog, data.endog, data.instr) sv = mod.fit().params mod = IVGMMCUE(data.dep, data.exog, data.endog, data.instr) mod.fit(starting=sv, display=False) with pytest.raises(ValueError): mod.fit(starting=sv[:-1], display=True)
def test_initial_weight(data): mod = IVGMM(data.dep, data.exog, data.endog, data.instr) res = mod.fit(iter_limit=1) z = np.concatenate([data.exog, data.instr], 1) ze = z + np.random.standard_normal(size=z.shape) w0 = ze.T @ ze / ze.shape[0] res0 = mod.fit(initial_weight=w0, iter_limit=1) assert np.any(res0.params != res.params)
def test_c_stat_exception(data): res = IVGMM(data.dep, data.exog, data.endog, data.instr).fit(cov_type="robust") match = "variables must be a str or a list of str" with pytest.raises(TypeError, match=match): res.c_stat(variables=1) with pytest.raises(TypeError, match=match): res.c_stat(variables=("x1", "x2"))
def test_initial_weight_error(data): mod = IVGMM(data.dep, data.exog, data.endog, data.instr) z = np.concatenate([data.exog, data.instr], 1) ze = z + np.random.standard_normal(size=z.shape) w0 = ze.T @ ze / ze.shape[0] with pytest.raises(ValueError, match="initial_weight must"): mod.fit(initial_weight=w0[:-1, :-1]) with pytest.raises(ValueError, match="initial_weight must"): mod.fit(initial_weight=w0[:-1])
def test_c_stat_smoke(data): res = IVGMM(data.dep, data.exog, data.endog, data.instr).fit(cov_type="robust") c_stat = res.c_stat() assert_allclose(c_stat.stat, 22.684, rtol=1e-4) assert_allclose(c_stat.pval, 0.00, atol=1e-3) c_stat = res.c_stat(["x1"]) assert_allclose(c_stat.stat, 0.158525, rtol=1e-3) assert_allclose(c_stat.pval, 0.6905, rtol=1e-3) # Final test c_stat2 = res.c_stat("x1") assert_allclose(c_stat.stat, c_stat2.stat)
def test_gmm_cue_optimization_options(data): mod = IVGMMCUE(data.dep, data.exog, data.endog, data.instr) res_none = mod.fit(display=False) opt_options = dict(method="BFGS", options={"disp": False}) res_bfgs = mod.fit(display=False, opt_options=opt_options) opt_options = dict(method="L-BFGS-B", options={"disp": False}) res_lbfgsb = mod.fit(display=False, opt_options=opt_options) assert res_none.iterations > 2 assert res_bfgs.iterations > 2 assert res_lbfgsb.iterations >= 1 mod2 = IVGMM(data.dep, data.exog, data.endog, data.instr) res2 = mod2.fit() assert res_none.j_stat.stat <= res2.j_stat.stat assert res_bfgs.j_stat.stat <= res2.j_stat.stat assert res_lbfgsb.j_stat.stat <= res2.j_stat.stat
def test_compare(data): res1 = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit() res2 = IV2SLS(data.dep, data.exog, data.endog, data.instr[:, :-1]).fit() res3 = IVGMM(data.dep, data.exog[:, :2], data.endog, data.instr).fit() res4 = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit() c = compare([res1, res2, res3, res4]) assert len(c.rsquared) == 4 c.summary c = compare({ "Model A": res1, "Model B": res2, "Model C": res3, "Model D": res4 }) c.summary res = {"Model A": res1, "Model B": res2, "Model C": res3, "Model D": res4} c = compare(res) c.summary c.pvalues res1 = IV2SLS(data.dep, data.exog[:, :1], None, None).fit() res2 = IV2SLS(data.dep, data.exog[:, :2], None, None).fit() c = compare({"Model A": res1, "Model B": res2}) c.summary
def test_gmm_str(data): mod = IVGMM(data.dep, data.exog, data.endog, data.instr) str(mod.fit(cov_type='unadjusted')) str(mod.fit(cov_type='robust')) str(mod.fit(cov_type='clustered', clusters=data.clusters)) str(mod.fit(cov_type='kernel'))
def test_gmm_iter(data): mod = IVGMM(data.dep, data.exog, data.endog, data.instr) res = mod.fit(iter_limit=100) assert res.iterations > 2 # This is just a quick smoke check of results get_all(res)
def test_gmm_str(data): mod = IVGMM(data.dep, data.exog, data.endog, data.instr) str(mod.fit(cov_type="unadjusted")) str(mod.fit(cov_type="robust")) str(mod.fit(cov_type="clustered", clusters=data.clusters)) str(mod.fit(cov_type="kernel"))
engine='xlsxwriter') for sheet, table in zip(['main', 'secondary', 'j_test'], [main_table, second_table, j_test]): table.to_excel(writer, sheet_name=sheet) writer.save() # Full Sample #-------------------------------------------- # All variables separate for sec in sec_vars: # Run model model = IVGMM(df_fd[y],\ df_fd[[sec] + x_exo],\ df_fd[x_endo],\ df_fd[instruments]) results = model.fit() # Save to excel results2Excel(results, 'gmmiv_full_sep_{}'.format(sec)) # Model with gross cds for sec in sec_vars[:6] + sec_vars[8:-2]: # Run model model = IVGMM(df_fd[y],\ df_fd[[sec] + ['cr_cd_gross'] + x_exo],\ df_fd[x_endo],\ df_fd[instruments]) results = model.fit()
def c_stat(self, vars=None): r""" C-test of endogeneity Parameters ---------- vars : list(str), optional List of variables to test for exogeneity. If None, all variables are jointly tested. Returns ------- t : WaldTestStatistic Object containing test statistic, p-value, distribution and null Notes ----- The C statistic iv the difference between the model estimated by assuming one or more of the endogenous variables is actually exogenous. The test is implemented as the difference between the J-statistics of two GMM estimations where both use the same weighting matrix. The use of a common weighting matrix is required for the C statistic to be positive. The first model is a estimated uses GMM estimation where one or more of the endogenous variables are assumed to be endogenous. The model would be relatively efficient if the assumption were true, and two quantities are computed, the J statistic, :math:`J_e`, and the moment weighting matrix, :math:`W_e`. WLOG assume the q variables tested are in the final q positions so that the first :math:`n_{exog} + n_{instr}` rows and columns correspond to the moment conditions in the original model. The second J statistic is computed using parameters estimated using the original moment conditions along with the upper left block of :math:`W_e`. Denote this values as :math:`J_c` where the c is used to indicate consistent. The test statistic is then .. math :: J_e - J_c \sim \chi^2_{m} where :math:`m` is the number of variables whose exogeneity is being tested. """ dependent, instruments = self.model.dependent, self.model.instruments exog, endog = self.model.exog, self.model.endog if vars is None: exog_e = c_[exog.ndarray, endog.ndarray] nobs = exog_e.shape[0] endog_e = empty((nobs, 0)) null = 'All endogenous variables are exogenous' else: if not isinstance(vars, list): vars = [vars] exog_e = c_[exog.ndarray, endog.pandas[vars].values] ex = [c for c in endog.pandas if c not in vars] endog_e = endog.pandas[ex].values null = 'Variables {0} are exogenous'.format(', '.join(vars)) from linearmodels.iv import IVGMM mod = IVGMM(dependent, exog_e, endog_e, instruments) res_e = mod.fit(cov_type=self.cov_type, **self.cov_config) j_e = res_e.j_stat.stat x = self.model._x y = self.model._y z = self.model._z nz = z.shape[1] weight_mat_c = res_e.weight_matrix.values[:nz, :nz] params_c = mod.estimate_parameters(x, y, z, weight_mat_c) j_c = self.model._j_statistic(params_c, weight_mat_c).stat stat = j_e - j_c df = exog_e.shape[1] - exog.shape[1] return WaldTestStatistic(stat, null, df, name='C-statistic')