def get_table3(df): ### regressions: rslt = smf.ols(formula="stdgrade ~ treat + pol1+ pol1t", data=df, weights=df["kwgt"]).fit( cov_type='cluster', cov_kwds={'groups': df["studentid"]}) rslt1 = rslt formula2 = "stdgrade ~ treat + treatmentvol + treatmentfor + volcourse + forcourse + pol1 + pol1t + pol1vol + pol1tvol + pol1for + pol1tfor" rslt = smf.ols(formula=formula2, data=df, weights=df["kwgt"]).fit( cov_type='cluster', cov_kwds={'groups': df["studentid"]}) rslt2 = rslt ### Table stargazer: stargazer = Stargazer([rslt1, rslt2]) stargazer.custom_columns(["column 1", "column 4"], [1, 1]) stargazer.title("Table 3 - Effects on standardized grades") stargazer.show_model_numbers(False) stargazer.significant_digits(2) stargazer.covariate_order(["treat", "treatmentvol", "treatmentfor"]) stargazer.rename_covariates({ "treat": "1st-year GPA is below 7", "treatmentvol": "Attendance is voluntary x treatment", "treatmentfor": "Absence is penalized x treatment" }) stargazer.show_degrees_of_freedom(False) stargazer.add_line('Fixed Effects', ['No', 'No']) return stargazer
class StargazerTestCase(unittest.TestCase): def setUp(self): self.df = pd.DataFrame(list(zip(range(9), range(0, 18, 2))), columns =['a', 'b']) self.est1 = smf.ols('a ~ 0 + b', self.df).fit() self.est2 = smf.ols('a ~ 1 + b', self.df).fit() self.stargazer = Stargazer([self.est1, self.est2]) def test_add_line(self): # too few arguments self.assertRaises(AssertionError, self.stargazer.add_line, '', [0]) # wrong location self.assertRaises(ValueError, self.stargazer.add_line, '', [0, 0], 'wrong') # correct usage for loc in LineLocation: self.stargazer.add_line(f'test {loc.value}', ['N/A', 'N/A'], loc) latex = self.stargazer.render_latex() for loc in LineLocation: self.assertIn(f' test {loc.value} & N/A & N/A \\', latex) def test_render_latex(self): # test escaping self.stargazer.rename_covariates({'b': 'b_'}) self.assertIn(' b_ ', self.stargazer.render_latex()) self.assertIn(r' b\_ ', self.stargazer.render_latex(escape=True))
def get_table_4and7(dependent_var, data): ''' argument:dependent variable, dataset return:either table4 or table7 depending on the input dataset ''' model_1 = sm_api.OLS(data[dependent_var], sm_api.add_constant(data["state"])).fit() model_2 = sm_api.OLS( data[dependent_var], sm_api.add_constant(data[["state", "bk", "kfc", "roys", "co_owned"]])).fit() model_3 = sm_api.OLS(data[dependent_var], sm_api.add_constant(data["GAP"])).fit() model_4 = sm_api.OLS( data[dependent_var], sm_api.add_constant(data[["GAP", "bk", "kfc", "roys", "co_owned"]])).fit() model_5 = sm_api.OLS( data[dependent_var], sm_api.add_constant(data[[ "GAP", "bk", "kfc", "roys", "co_owned", "southj", "centralj", "pa1", "pa2" ]])).fit() Table = Stargazer([model_1, model_2, model_3, model_4, model_5]) Table.rename_covariates({ 'state': 'New Jersey dummy', 'GAP': 'Initial wage GAP' }) Table.add_line('Controls for chain and ownership', ['No', 'Yes', 'No', 'Yes', 'Yes']) Table.add_line('Controls for region', ['No', 'No', 'No', 'No', 'Yes']) F2 = model_2.f_test( '(state = 0), (bk = 0), (kfc = 0), (roys =0),(co_owned= 0),(const=0)' ).pvalue.round(3) F4 = model_4.f_test( '(GAP = 0), (bk = 0), (kfc = 0), (roys =0),(co_owned= 0),(const=0)' ).pvalue.round(3) F5 = model_5.f_test( '(GAP = 0), (bk = 0), (kfc = 0), (roys =0),(co_owned= 0),(const=0), (southj=0),(centralj=0),(pa1=0),(pa2=0)' ).pvalue.round(3) if dependent_var == "change_in_FTE": Table.add_line('Probability value for controls', ['-', F2, '-', F4, F5]) Table.title("Models for " + dependent_var) Table.covariate_order(['state', 'GAP']) print("The mean and standard deviation of the dependent variable are", data[dependent_var].mean(), "and", data[dependent_var].std(), ",respectively.") return Table
def df_table12(df, name): df_table12 = df[[ f'{name}_C2', f'{name}_instrument_C2_thresh', f'{name}_I', 'trust', 'democ', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'mtnall' ]].dropna(axis=0) df_demo = df_table12[df_table12.democ > 1] dep1 = df_table12['trust'] dep2 = df_demo['trust'] exo1 = sm.add_constant(df_table12[f'{name}_C2']) exo2 = sm.add_constant(df_table12[[ f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) exo3 = sm.add_constant(df_demo[[ f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) ins1 = sm.add_constant(df_table12[f'{name}_instrument_C2_thresh']) ins2 = sm.add_constant(df_table12[[ f'{name}_instrument_C2_thresh', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) ins3 = sm.add_constant(df_demo[[ f'{name}_instrument_C2_thresh', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) reg1 = sm.OLS(dep1, exo1).fit(cov_type='HC1') reg2 = sm.OLS(dep1, exo2).fit(cov_type='HC1') reg3 = sm.OLS(dep2, exo3).fit(cov_type='HC1') reg4 = IV2SLS(dep1, exo1, ins1).fit() reg5 = IV2SLS(dep1, exo2, ins2).fit() reg6 = IV2SLS(dep2, exo3, ins3).fit() stargazer = Stargazer([reg1, reg2, reg3, reg4, reg5, reg6]) stargazer.covariate_order([f'{name}_C2', f'{name}_I']) stargazer.rename_covariates({ f'{name}_C2': 'Segregation $\hat{S}$ (' f'{name}' ')', f'{name}_I': 'Fractionalization $F$ (' f'{name}' ')' }) stargazer.custom_columns(['OLS', 'OLS', 'OLS', '2SLS', '2SLS', '2SLS'], [1, 1, 1, 1, 1, 1]) stargazer.add_line('Controls', ['No', 'Yes', 'Yes', 'No', 'Yes', 'Yes']) stargazer.add_line('Sample', ['Full', 'Full', 'Democ', 'Full', 'Full', 'Democ']) if name == 'ethnicity': stargazer.title('Panel A. Ethnicity') return stargazer else: stargazer.title('Panel B. Language') return stargazer
def table10_11(df, name, democ): full_x = [ f'{name}_I', f'{name}_C2', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ] ins = [ f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ] df_10_11_1 = df[[ f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'icrg_qog' ]].dropna(axis=0) df_10_11_2 = df[[ f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'ef_regul', 'ef_corruption', 'ef_property_rights' ]].dropna(axis=0) df_10_11_3 = df[[ f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'taxevas' ]].dropna(axis=0) if democ == 'democracy': df_10_11_1 = df_10_11_1[df_10_11_1.democ >= 1] df_10_11_2 = df_10_11_2[df_10_11_2.democ >= 1] df_10_11_3 = df_10_11_3[df_10_11_3.democ >= 1] x1 = sm.add_constant(df_10_11_1[full_x]) x2 = sm.add_constant(df_10_11_2[full_x]) x3 = sm.add_constant(df_10_11_3[full_x]) ins1 = sm.add_constant(df_10_11_1[ins]) ins2 = sm.add_constant(df_10_11_2[ins]) ins3 = sm.add_constant(df_10_11_3[ins]) else: x1 = sm.add_constant(df_10_11_1[[f'{name}_I', f'{name}_C2']]) x2 = sm.add_constant(df_10_11_2[[f'{name}_I', f'{name}_C2']]) x3 = sm.add_constant(df_10_11_3[[f'{name}_I', f'{name}_C2']]) ins1 = sm.add_constant( df_10_11_1[[f'{name}_I', f'{name}_instrument_C2_thresh']]) ins2 = sm.add_constant( df_10_11_2[[f'{name}_I', f'{name}_instrument_C2_thresh']]) ins3 = sm.add_constant( df_10_11_3[[f'{name}_I', f'{name}_instrument_C2_thresh']]) y1 = df_10_11_1['icrg_qog'] y2 = df_10_11_2['ef_corruption'] y3 = df_10_11_2['ef_property_rights'] y4 = df_10_11_2['ef_regul'] y5 = df_10_11_3['taxevas'] est1 = sm.OLS(y1, x1).fit(cov_type='HC1') est2 = IV2SLS(y1, x1, ins1).fit() est3 = sm.OLS(y2, x2).fit(cov_type='HC1') est4 = IV2SLS(y2, x2, ins2).fit() est5 = sm.OLS(y3, x2).fit(cov_type='HC1') est6 = IV2SLS(y3, x2, ins2).fit() est7 = sm.OLS(y4, x2).fit(cov_type='HC1') est8 = IV2SLS(y4, x2, ins2).fit() est9 = sm.OLS(y5, x3).fit(cov_type='HC1') est10 = IV2SLS(y5, x3, ins3).fit() stargazer = Stargazer( [est1, est2, est3, est4, est5, est6, est7, est8, est9, est10]) stargazer.custom_columns([ 'ICRG quality of gov', 'EF Corruption', 'EF Property rights', 'EF Regulation', 'Tax eva' ], [2, 2, 2, 2, 2]) stargazer.show_model_numbers(False) stargazer.covariate_order([f'{name}_C2', f'{name}_I']) stargazer.rename_covariates({ f'{name}_C2': 'Segregation $\hat{S}$ (' f'{name}' ')', f'{name}_I': 'Fractionalization $F$ (' f'{name}' ')' }) stargazer.add_line('Method', [ 'OLS', '2SLS', 'OLS', '2SLS', 'OLS', '2SLS', 'OLS', '2SLS', 'OLS', '2SLS' ]) if democ == 'democracy': stargazer.title('Panel B. Democracies sample, all controls') return stargazer else: stargazer.title('Panel A. Full sample, no additional controls') return stargazer
def table6(df, alternative=True): df_6E = df[[ 'ethnicity_C2', 'ethnicity_I', 'ethnicity_C', 'ethnicity_instrument_C_thresh', 'ethnicity_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'RulLaw', 'country' ]].dropna(axis=0) df_6L = df[[ 'language_C2', 'language_I', 'language_C', 'language_instrument_C_thresh', 'language_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'RulLaw', 'country' ]].dropna(axis=0) df_6R = df[[ 'religion_C2', 'religion_I', 'religion_C', 'religion_instrument_C_thresh', 'religion_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'RulLaw', 'country' ]].dropna(axis=0) df_6E_demo = df_6E[df_6E.democ >= 1] df_6L_demo = df_6L[df_6L.democ >= 1] df_6R_demo = df_6R[df_6R.democ >= 1] x1 = sm.add_constant(df_6E[[ 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x2 = sm.add_constant(df_6L[[ 'language_instrument_C2_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x3 = sm.add_constant(df_6R[[ 'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall' ]]) x4 = sm.add_constant(df_6E_demo[[ 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x5 = sm.add_constant(df_6L_demo[[ 'language_instrument_C2_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x6 = sm.add_constant(df_6R_demo[[ 'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall' ]]) y1 = df_6E['ethnicity_C2'] y2 = df_6L['language_C2'] y3 = df_6R['religion_C2'] y4 = df_6E_demo['ethnicity_C2'] y5 = df_6L_demo['language_C2'] y6 = df_6R_demo['religion_C2'] est1 = sm.OLS(y1, x1).fit(cov_type='HC1') est2 = sm.OLS(y2, x2).fit(cov_type='HC1') est3 = sm.OLS(y3, x3).fit(cov_type='HC1') est4 = sm.OLS(y4, x4).fit(cov_type='HC1') est5 = sm.OLS(y5, x5).fit(cov_type='HC1') est6 = sm.OLS(y6, x6).fit(cov_type='HC1') x1a = sm.add_constant(df_6E[[ 'ethnicity_instrument_C_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x2a = sm.add_constant(df_6L[[ 'language_instrument_C_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x3a = sm.add_constant(df_6R[[ 'religion_instrument_C_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall' ]]) x4a = sm.add_constant(df_6E_demo[[ 'ethnicity_instrument_C_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x5a = sm.add_constant(df_6L_demo[[ 'language_instrument_C_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x6a = sm.add_constant(df_6R_demo[[ 'religion_instrument_C_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall' ]]) y1a = df_6E['ethnicity_C'] y2a = df_6L['language_C'] y3a = df_6R['religion_C'] y4a = df_6E_demo['ethnicity_C'] y5a = df_6L_demo['language_C'] y6a = df_6R_demo['religion_C'] est1a = sm.OLS(y1a, x1a).fit(cov_type='HC1') est2a = sm.OLS(y2a, x2a).fit(cov_type='HC1') est3a = sm.OLS(y3a, x3a).fit(cov_type='HC1') est4a = sm.OLS(y4a, x4a).fit(cov_type='HC1') est5a = sm.OLS(y5a, x5a).fit(cov_type='HC1') est6a = sm.OLS(y6a, x6a).fit(cov_type='HC1') df_6Lb = df_6L.set_index('country') df_6Lb_demo = df_6L_demo.set_index('country') x2b = sm.add_constant(df_6Lb[[ 'language_instrument_C_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]].drop(index='usa')) x5b = sm.add_constant(df_6Lb_demo[[ 'language_instrument_C_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]].drop(index='usa')) y2b = df_6Lb['language_C'].drop(index='usa') y5b = df_6Lb_demo['language_C'].drop(index='usa') est2b = sm.OLS(y2b, x2b).fit(cov_type='HC1') est5b = sm.OLS(y5b, x5b).fit(cov_type='HC1') stargazer = Stargazer([est1, est2, est3, est4, est5, est6]) stargazer_a = Stargazer([est1a, est2a, est3a, est4a, est5a, est6a]) stargazer_b = Stargazer([est2b, est5b]) stargazer.covariate_order([ 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'language_instrument_C2_thresh', 'language_I', 'religion_instrument_C2_thresh', 'religion_I' ]) stargazer.rename_covariates({ 'ethnicity_instrument_C2_thresh': 'Instrument E', 'ethnicity_I': '$F$ (ethnicity)', 'language_instrument_C2_thresh': 'Instrument L', 'language_I': '$F$ (language)', 'religion_instrument_C2_thresh': 'Instrument R', 'religion_I': '$F$ (religion)' }) stargazer.custom_columns([ 'E$\hat{S}$', 'L$\hat{S}$', 'R$\hat{S}$', 'E$\hat{S}$', 'L$\hat{S}$', 'R$\hat{S}$' ], [1, 1, 1, 1, 1, 1]) stargazer.show_model_numbers(False) stargazer.add_line( 'Sample', ['Full', 'Full', 'Full', 'Democracy', 'Democracy', 'Democracy']) stargazer.title('Panel A. Segregation index $\hat{S}$') stargazer_a.covariate_order([ 'ethnicity_instrument_C_thresh', 'ethnicity_I', 'language_instrument_C_thresh', 'language_I', 'religion_instrument_C_thresh', 'religion_I' ]) stargazer_a.rename_covariates({ 'ethnicity_instrument_C_thresh': 'Instrument E', 'ethnicity_I': '$F$ (ethnicity)', 'language_instrument_C_thresh': 'Instrument L', 'language_I': '$F$ (language)', 'religion_instrument_C_thresh': 'Instrument R', 'religion_I': '$F$ (religion)' }) stargazer_a.custom_columns([ 'E$\\tilde{S}$', 'L$\\tilde{S}$', 'R$\\tilde{S}$', 'E$\\tilde{S}$', 'L$\\tilde{S}$', 'R$\\tilde{S}$' ], [1, 1, 1, 1, 1, 1]) stargazer_a.show_model_numbers(False) stargazer_a.add_line( 'Sample', ['Full', 'Full', 'Full', 'Democracy', 'Democracy', 'Democracy']) stargazer_a.title('Panel B. Segregation index $\\tilde{S}$') stargazer_b.covariate_order(['language_instrument_C_thresh', 'language_I']) stargazer_b.rename_covariates({ 'language_instrument_C_thresh': 'Instrument L', 'language_I': '$F$ (language)' }) stargazer_b.custom_columns(['L$\\tilde{S}$', 'L$\\tilde{S}$'], [1, 1]) stargazer_b.show_model_numbers(False) stargazer_b.add_line('Sample', ['Full', 'Democracy']) stargazer_b.title( 'Panel C. Segregation index $\\tilde{S}$ for language with sample excluding the US' ) return [stargazer, stargazer_a, stargazer_b]