def get_table3(df): ### regressions: rslt = smf.ols(formula="stdgrade ~ treat + pol1+ pol1t", data=df, weights=df["kwgt"]).fit( cov_type='cluster', cov_kwds={'groups': df["studentid"]}) rslt1 = rslt formula2 = "stdgrade ~ treat + treatmentvol + treatmentfor + volcourse + forcourse + pol1 + pol1t + pol1vol + pol1tvol + pol1for + pol1tfor" rslt = smf.ols(formula=formula2, data=df, weights=df["kwgt"]).fit( cov_type='cluster', cov_kwds={'groups': df["studentid"]}) rslt2 = rslt ### Table stargazer: stargazer = Stargazer([rslt1, rslt2]) stargazer.custom_columns(["column 1", "column 4"], [1, 1]) stargazer.title("Table 3 - Effects on standardized grades") stargazer.show_model_numbers(False) stargazer.significant_digits(2) stargazer.covariate_order(["treat", "treatmentvol", "treatmentfor"]) stargazer.rename_covariates({ "treat": "1st-year GPA is below 7", "treatmentvol": "Attendance is voluntary x treatment", "treatmentfor": "Absence is penalized x treatment" }) stargazer.show_degrees_of_freedom(False) stargazer.add_line('Fixed Effects', ['No', 'No']) return stargazer
def create_table(models, out): """Creates a table of regression results. Args: models: regression results out: the generated table is saved here Returns: None """ results = [] model_names = [] covariate_names = {} for model in models: with open(model, 'rb') as file: result = (pickle.load(file)) results.append(result) model_names.append(result.model_name) covariate_names.update(result.var_names) table = Stargazer(results) table.dependent_variable_name( covariate_names[results[0].model.endog_names]) table.custom_columns(model_names, [1] * len(model_names)) table.rename_covariates(covariate_names) latex_table = table.render_latex() latex_table = re.sub(r"l(c+)\}", r"lc\1}", latex_table) # ugly hack because stargazer generates an invalid latex table with open(out, 'w') as file: file.write(latex_table)
class StargazerTestCase(unittest.TestCase): def setUp(self): self.df = pd.DataFrame(list(zip(range(9), range(0, 18, 2))), columns =['a', 'b']) self.est1 = smf.ols('a ~ 0 + b', self.df).fit() self.est2 = smf.ols('a ~ 1 + b', self.df).fit() self.stargazer = Stargazer([self.est1, self.est2]) def test_add_line(self): # too few arguments self.assertRaises(AssertionError, self.stargazer.add_line, '', [0]) # wrong location self.assertRaises(ValueError, self.stargazer.add_line, '', [0, 0], 'wrong') # correct usage for loc in LineLocation: self.stargazer.add_line(f'test {loc.value}', ['N/A', 'N/A'], loc) latex = self.stargazer.render_latex() for loc in LineLocation: self.assertIn(f' test {loc.value} & N/A & N/A \\', latex) def test_render_latex(self): # test escaping self.stargazer.rename_covariates({'b': 'b_'}) self.assertIn(' b_ ', self.stargazer.render_latex()) self.assertIn(r' b\_ ', self.stargazer.render_latex(escape=True))
def get_table_4and7(dependent_var, data): ''' argument:dependent variable, dataset return:either table4 or table7 depending on the input dataset ''' model_1 = sm_api.OLS(data[dependent_var], sm_api.add_constant(data["state"])).fit() model_2 = sm_api.OLS( data[dependent_var], sm_api.add_constant(data[["state", "bk", "kfc", "roys", "co_owned"]])).fit() model_3 = sm_api.OLS(data[dependent_var], sm_api.add_constant(data["GAP"])).fit() model_4 = sm_api.OLS( data[dependent_var], sm_api.add_constant(data[["GAP", "bk", "kfc", "roys", "co_owned"]])).fit() model_5 = sm_api.OLS( data[dependent_var], sm_api.add_constant(data[[ "GAP", "bk", "kfc", "roys", "co_owned", "southj", "centralj", "pa1", "pa2" ]])).fit() Table = Stargazer([model_1, model_2, model_3, model_4, model_5]) Table.rename_covariates({ 'state': 'New Jersey dummy', 'GAP': 'Initial wage GAP' }) Table.add_line('Controls for chain and ownership', ['No', 'Yes', 'No', 'Yes', 'Yes']) Table.add_line('Controls for region', ['No', 'No', 'No', 'No', 'Yes']) F2 = model_2.f_test( '(state = 0), (bk = 0), (kfc = 0), (roys =0),(co_owned= 0),(const=0)' ).pvalue.round(3) F4 = model_4.f_test( '(GAP = 0), (bk = 0), (kfc = 0), (roys =0),(co_owned= 0),(const=0)' ).pvalue.round(3) F5 = model_5.f_test( '(GAP = 0), (bk = 0), (kfc = 0), (roys =0),(co_owned= 0),(const=0), (southj=0),(centralj=0),(pa1=0),(pa2=0)' ).pvalue.round(3) if dependent_var == "change_in_FTE": Table.add_line('Probability value for controls', ['-', F2, '-', F4, F5]) Table.title("Models for " + dependent_var) Table.covariate_order(['state', 'GAP']) print("The mean and standard deviation of the dependent variable are", data[dependent_var].mean(), "and", data[dependent_var].std(), ",respectively.") return Table
def df_table12(df, name): df_table12 = df[[ f'{name}_C2', f'{name}_instrument_C2_thresh', f'{name}_I', 'trust', 'democ', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'mtnall' ]].dropna(axis=0) df_demo = df_table12[df_table12.democ > 1] dep1 = df_table12['trust'] dep2 = df_demo['trust'] exo1 = sm.add_constant(df_table12[f'{name}_C2']) exo2 = sm.add_constant(df_table12[[ f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) exo3 = sm.add_constant(df_demo[[ f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) ins1 = sm.add_constant(df_table12[f'{name}_instrument_C2_thresh']) ins2 = sm.add_constant(df_table12[[ f'{name}_instrument_C2_thresh', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) ins3 = sm.add_constant(df_demo[[ f'{name}_instrument_C2_thresh', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) reg1 = sm.OLS(dep1, exo1).fit(cov_type='HC1') reg2 = sm.OLS(dep1, exo2).fit(cov_type='HC1') reg3 = sm.OLS(dep2, exo3).fit(cov_type='HC1') reg4 = IV2SLS(dep1, exo1, ins1).fit() reg5 = IV2SLS(dep1, exo2, ins2).fit() reg6 = IV2SLS(dep2, exo3, ins3).fit() stargazer = Stargazer([reg1, reg2, reg3, reg4, reg5, reg6]) stargazer.covariate_order([f'{name}_C2', f'{name}_I']) stargazer.rename_covariates({ f'{name}_C2': 'Segregation $\hat{S}$ (' f'{name}' ')', f'{name}_I': 'Fractionalization $F$ (' f'{name}' ')' }) stargazer.custom_columns(['OLS', 'OLS', 'OLS', '2SLS', '2SLS', '2SLS'], [1, 1, 1, 1, 1, 1]) stargazer.add_line('Controls', ['No', 'Yes', 'Yes', 'No', 'Yes', 'Yes']) stargazer.add_line('Sample', ['Full', 'Full', 'Democ', 'Full', 'Full', 'Democ']) if name == 'ethnicity': stargazer.title('Panel A. Ethnicity') return stargazer else: stargazer.title('Panel B. Language') return stargazer
def table10_11(df, name, democ): full_x = [ f'{name}_I', f'{name}_C2', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ] ins = [ f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ] df_10_11_1 = df[[ f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'icrg_qog' ]].dropna(axis=0) df_10_11_2 = df[[ f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'ef_regul', 'ef_corruption', 'ef_property_rights' ]].dropna(axis=0) df_10_11_3 = df[[ f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'taxevas' ]].dropna(axis=0) if democ == 'democracy': df_10_11_1 = df_10_11_1[df_10_11_1.democ >= 1] df_10_11_2 = df_10_11_2[df_10_11_2.democ >= 1] df_10_11_3 = df_10_11_3[df_10_11_3.democ >= 1] x1 = sm.add_constant(df_10_11_1[full_x]) x2 = sm.add_constant(df_10_11_2[full_x]) x3 = sm.add_constant(df_10_11_3[full_x]) ins1 = sm.add_constant(df_10_11_1[ins]) ins2 = sm.add_constant(df_10_11_2[ins]) ins3 = sm.add_constant(df_10_11_3[ins]) else: x1 = sm.add_constant(df_10_11_1[[f'{name}_I', f'{name}_C2']]) x2 = sm.add_constant(df_10_11_2[[f'{name}_I', f'{name}_C2']]) x3 = sm.add_constant(df_10_11_3[[f'{name}_I', f'{name}_C2']]) ins1 = sm.add_constant( df_10_11_1[[f'{name}_I', f'{name}_instrument_C2_thresh']]) ins2 = sm.add_constant( df_10_11_2[[f'{name}_I', f'{name}_instrument_C2_thresh']]) ins3 = sm.add_constant( df_10_11_3[[f'{name}_I', f'{name}_instrument_C2_thresh']]) y1 = df_10_11_1['icrg_qog'] y2 = df_10_11_2['ef_corruption'] y3 = df_10_11_2['ef_property_rights'] y4 = df_10_11_2['ef_regul'] y5 = df_10_11_3['taxevas'] est1 = sm.OLS(y1, x1).fit(cov_type='HC1') est2 = IV2SLS(y1, x1, ins1).fit() est3 = sm.OLS(y2, x2).fit(cov_type='HC1') est4 = IV2SLS(y2, x2, ins2).fit() est5 = sm.OLS(y3, x2).fit(cov_type='HC1') est6 = IV2SLS(y3, x2, ins2).fit() est7 = sm.OLS(y4, x2).fit(cov_type='HC1') est8 = IV2SLS(y4, x2, ins2).fit() est9 = sm.OLS(y5, x3).fit(cov_type='HC1') est10 = IV2SLS(y5, x3, ins3).fit() stargazer = Stargazer( [est1, est2, est3, est4, est5, est6, est7, est8, est9, est10]) stargazer.custom_columns([ 'ICRG quality of gov', 'EF Corruption', 'EF Property rights', 'EF Regulation', 'Tax eva' ], [2, 2, 2, 2, 2]) stargazer.show_model_numbers(False) stargazer.covariate_order([f'{name}_C2', f'{name}_I']) stargazer.rename_covariates({ f'{name}_C2': 'Segregation $\hat{S}$ (' f'{name}' ')', f'{name}_I': 'Fractionalization $F$ (' f'{name}' ')' }) stargazer.add_line('Method', [ 'OLS', '2SLS', 'OLS', '2SLS', 'OLS', '2SLS', 'OLS', '2SLS', 'OLS', '2SLS' ]) if democ == 'democracy': stargazer.title('Panel B. Democracies sample, all controls') return stargazer else: stargazer.title('Panel A. Full sample, no additional controls') return stargazer
def table6(df, alternative=True): df_6E = df[[ 'ethnicity_C2', 'ethnicity_I', 'ethnicity_C', 'ethnicity_instrument_C_thresh', 'ethnicity_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'RulLaw', 'country' ]].dropna(axis=0) df_6L = df[[ 'language_C2', 'language_I', 'language_C', 'language_instrument_C_thresh', 'language_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'RulLaw', 'country' ]].dropna(axis=0) df_6R = df[[ 'religion_C2', 'religion_I', 'religion_C', 'religion_instrument_C_thresh', 'religion_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'RulLaw', 'country' ]].dropna(axis=0) df_6E_demo = df_6E[df_6E.democ >= 1] df_6L_demo = df_6L[df_6L.democ >= 1] df_6R_demo = df_6R[df_6R.democ >= 1] x1 = sm.add_constant(df_6E[[ 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x2 = sm.add_constant(df_6L[[ 'language_instrument_C2_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x3 = sm.add_constant(df_6R[[ 'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall' ]]) x4 = sm.add_constant(df_6E_demo[[ 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x5 = sm.add_constant(df_6L_demo[[ 'language_instrument_C2_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x6 = sm.add_constant(df_6R_demo[[ 'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall' ]]) y1 = df_6E['ethnicity_C2'] y2 = df_6L['language_C2'] y3 = df_6R['religion_C2'] y4 = df_6E_demo['ethnicity_C2'] y5 = df_6L_demo['language_C2'] y6 = df_6R_demo['religion_C2'] est1 = sm.OLS(y1, x1).fit(cov_type='HC1') est2 = sm.OLS(y2, x2).fit(cov_type='HC1') est3 = sm.OLS(y3, x3).fit(cov_type='HC1') est4 = sm.OLS(y4, x4).fit(cov_type='HC1') est5 = sm.OLS(y5, x5).fit(cov_type='HC1') est6 = sm.OLS(y6, x6).fit(cov_type='HC1') x1a = sm.add_constant(df_6E[[ 'ethnicity_instrument_C_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x2a = sm.add_constant(df_6L[[ 'language_instrument_C_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x3a = sm.add_constant(df_6R[[ 'religion_instrument_C_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall' ]]) x4a = sm.add_constant(df_6E_demo[[ 'ethnicity_instrument_C_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x5a = sm.add_constant(df_6L_demo[[ 'language_instrument_C_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x6a = sm.add_constant(df_6R_demo[[ 'religion_instrument_C_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall' ]]) y1a = df_6E['ethnicity_C'] y2a = df_6L['language_C'] y3a = df_6R['religion_C'] y4a = df_6E_demo['ethnicity_C'] y5a = df_6L_demo['language_C'] y6a = df_6R_demo['religion_C'] est1a = sm.OLS(y1a, x1a).fit(cov_type='HC1') est2a = sm.OLS(y2a, x2a).fit(cov_type='HC1') est3a = sm.OLS(y3a, x3a).fit(cov_type='HC1') est4a = sm.OLS(y4a, x4a).fit(cov_type='HC1') est5a = sm.OLS(y5a, x5a).fit(cov_type='HC1') est6a = sm.OLS(y6a, x6a).fit(cov_type='HC1') df_6Lb = df_6L.set_index('country') df_6Lb_demo = df_6L_demo.set_index('country') x2b = sm.add_constant(df_6Lb[[ 'language_instrument_C_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]].drop(index='usa')) x5b = sm.add_constant(df_6Lb_demo[[ 'language_instrument_C_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]].drop(index='usa')) y2b = df_6Lb['language_C'].drop(index='usa') y5b = df_6Lb_demo['language_C'].drop(index='usa') est2b = sm.OLS(y2b, x2b).fit(cov_type='HC1') est5b = sm.OLS(y5b, x5b).fit(cov_type='HC1') stargazer = Stargazer([est1, est2, est3, est4, est5, est6]) stargazer_a = Stargazer([est1a, est2a, est3a, est4a, est5a, est6a]) stargazer_b = Stargazer([est2b, est5b]) stargazer.covariate_order([ 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'language_instrument_C2_thresh', 'language_I', 'religion_instrument_C2_thresh', 'religion_I' ]) stargazer.rename_covariates({ 'ethnicity_instrument_C2_thresh': 'Instrument E', 'ethnicity_I': '$F$ (ethnicity)', 'language_instrument_C2_thresh': 'Instrument L', 'language_I': '$F$ (language)', 'religion_instrument_C2_thresh': 'Instrument R', 'religion_I': '$F$ (religion)' }) stargazer.custom_columns([ 'E$\hat{S}$', 'L$\hat{S}$', 'R$\hat{S}$', 'E$\hat{S}$', 'L$\hat{S}$', 'R$\hat{S}$' ], [1, 1, 1, 1, 1, 1]) stargazer.show_model_numbers(False) stargazer.add_line( 'Sample', ['Full', 'Full', 'Full', 'Democracy', 'Democracy', 'Democracy']) stargazer.title('Panel A. Segregation index $\hat{S}$') stargazer_a.covariate_order([ 'ethnicity_instrument_C_thresh', 'ethnicity_I', 'language_instrument_C_thresh', 'language_I', 'religion_instrument_C_thresh', 'religion_I' ]) stargazer_a.rename_covariates({ 'ethnicity_instrument_C_thresh': 'Instrument E', 'ethnicity_I': '$F$ (ethnicity)', 'language_instrument_C_thresh': 'Instrument L', 'language_I': '$F$ (language)', 'religion_instrument_C_thresh': 'Instrument R', 'religion_I': '$F$ (religion)' }) stargazer_a.custom_columns([ 'E$\\tilde{S}$', 'L$\\tilde{S}$', 'R$\\tilde{S}$', 'E$\\tilde{S}$', 'L$\\tilde{S}$', 'R$\\tilde{S}$' ], [1, 1, 1, 1, 1, 1]) stargazer_a.show_model_numbers(False) stargazer_a.add_line( 'Sample', ['Full', 'Full', 'Full', 'Democracy', 'Democracy', 'Democracy']) stargazer_a.title('Panel B. Segregation index $\\tilde{S}$') stargazer_b.covariate_order(['language_instrument_C_thresh', 'language_I']) stargazer_b.rename_covariates({ 'language_instrument_C_thresh': 'Instrument L', 'language_I': '$F$ (language)' }) stargazer_b.custom_columns(['L$\\tilde{S}$', 'L$\\tilde{S}$'], [1, 1]) stargazer_b.show_model_numbers(False) stargazer_b.add_line('Sample', ['Full', 'Democracy']) stargazer_b.title( 'Panel C. Segregation index $\\tilde{S}$ for language with sample excluding the US' ) return [stargazer, stargazer_a, stargazer_b]
def table3_7(df, regression_type): df_3_7E = df[[ 'ethnicity_C2', 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea', 'LOScandin', 'democ', 'mtnall', 'RulLaw' ]].dropna(axis=0) df_3_7L = df[[ 'language_C2', 'language_instrument_C2_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea', 'LOScandin', 'democ', 'mtnall', 'RulLaw' ]].dropna(axis=0) df_3_7R = df[[ 'religion_C2', 'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea', 'LOScandin', 'democ', 'mtnall', 'RulLaw' ]].dropna(axis=0) exo = sm.add_constant(df_3_7E[[ 'ethnicity_C2', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'lnArea', 'democ', 'mtnall' ]]) exo2 = sm.add_constant(df_3_7E[['ethnicity_C2', 'ethnicity_I']]) exo3 = sm.add_constant(df_3_7L[[ 'language_C2', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'lnArea', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) exo4 = sm.add_constant(df_3_7L[['language_C2', 'language_I']]) exo5 = sm.add_constant(df_3_7R[[ 'religion_C2', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea', 'democ', 'mtnall' ]]) exo6 = sm.add_constant(df_3_7R[['religion_C2', 'religion_I']]) if regression_type == 'IV2SLS': reg = IV2SLS( df_3_7E['RulLaw'], exo, sm.add_constant(df_3_7E[[ 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'lnArea' ]])).fit() reg2 = IV2SLS( df_3_7E['RulLaw'], exo2, sm.add_constant( df_3_7E[['ethnicity_instrument_C2_thresh', 'ethnicity_I']])).fit() reg3 = IV2SLS( df_3_7L['RulLaw'], exo3, sm.add_constant(df_3_7L[[ 'language_instrument_C2_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'lnArea' ]])).fit() reg4 = IV2SLS( df_3_7L['RulLaw'], exo4, sm.add_constant( df_3_7L[['language_instrument_C2_thresh', 'language_I']])).fit() reg5 = IV2SLS( df_3_7R['RulLaw'], exo5, sm.add_constant(df_3_7R[[ 'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall', 'lnArea' ]])).fit() reg6 = IV2SLS( df_3_7R['RulLaw'], exo6, sm.add_constant( df_3_7R[['religion_instrument_C2_thresh', 'religion_I']])).fit() elif regression_type == 'OLS': reg2 = sm.OLS(df_3_7E['RulLaw'], exo2).fit(cov_type='HC1') reg = sm.OLS(df_3_7E['RulLaw'], exo).fit(cov_type='HC1') reg4 = sm.OLS(df_3_7L['RulLaw'], exo4).fit(cov_type='HC1') reg3 = sm.OLS(df_3_7L['RulLaw'], exo3).fit(cov_type='HC1') reg6 = sm.OLS(df_3_7R['RulLaw'], exo6).fit(cov_type='HC1') reg5 = sm.OLS(df_3_7R['RulLaw'], exo5).fit(cov_type='HC1') stargazer = Stargazer([reg2, reg, reg4, reg3, reg6, reg5]) stargazer.covariate_order([ 'ethnicity_C2', 'ethnicity_I', 'language_C2', 'language_I', 'religion_C2', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'lnArea', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'const' ]) stargazer.rename_covariates({ 'ethnicity_C2': 'Segregation $\hat{S}$ (ethnicity)', 'ethnicity_I': 'Fractionalization $F$ (ethnicity)', 'language_C2': 'Segregation $\hat{S}$ (language)', 'language_I': 'Fractionalization $F$ (language)', 'religion_C2': 'Segregation $\hat{S}$ (religion)', 'religion_I': 'Fractionalization $F$ (religion)', 'lnpopulation': 'ln (population)', 'lnGDP_pc': 'ln (GDP per capita)', 'lnArea': 'ln (average size of region)', 'protestants': 'Pretestants share', 'muslims': 'Muslmis Share', 'catholics': 'Catholics share', 'latitude': 'Latitude', 'LOEnglish': 'English legal origin', 'LOGerman': 'German legal origin', 'LOSocialist': 'Socialist legal origin', 'LOScandin': 'Scandinavian legal origin', 'democ': 'Democratic tradition', 'mtnall': 'Mountains', 'const': 'Constant' }) return HTML(stargazer.render_html())
model1 = sm.OLS(y, x).fit() model2 = sm.OLS(y, x_withfemale).fit() model1.summary() model2.summary() # ============================================================================= # STARGAZER MODEL OUTPUTS # ============================================================================= from stargazer.stargazer import Stargazer stargazer = Stargazer([model1, model2]) stargazer.custom_columns(['Base Model', 'Spesified Model'], [1, 1]) stargazer.significant_digits(2) stargazer.covariate_order([ 'const', 'propwomen', 'oppospower', 'gdpcap', 'sepowerdist', 'youthunemp' ]) stargazer.rename_covariates({ 'const': 'Constant', 'oppospower': 'Opposition Power', 'gdpcap': 'GDP($)', 'sepowerdist': 'Class Political Power', 'youthunemp': 'Unemployed Youth %', 'propwomen': 'Female Property Rights' }) stargazer.cov_spacing = 3 print(stargazer.render_latex())
def ols_regression_formatted(data, specifications, as_latex=False, covariates_names=None, covariates_order=None): """ Creates formatted tables for different dependent variables and specifications Input: data (df): Dataframe containing all necessary variables for OLS regression specifications (dictionary): dependent variables as keys and list of specifications as values as_latex (bool): specify whether Output as table or Latex code covariate_names (dict): dictionary with covariate names as in "data" as keys and new covariate names as values Output: list_of_tables (list of stargazer tables): list of formatted tables """ # Create dictionary which connects dependent variables with formatted tables dict_regression_tables = {} # Generate regressions for depvar in specifications.keys(): regression_list = [] specification_list = specifications[depvar] list_all_covariates = [] for specification in specification_list: estimation_equation = depvar + " ~ " + specification regression = smf.ols(data=data, formula=estimation_equation).fit() regression_list.append(regression) # Create set of all variables for this dependent variable list_all_covariates = list( set(list_all_covariates + regression.params.index.values.tolist())) # Format table with stargazer formatted_table = Stargazer(regression_list) # No dimension of freedoms and blank dependent variable formatted_table.show_degrees_of_freedom(False) formatted_table.dependent_variable_name("") # Optional: Change order of covariates if covariates_order is not None: covariates_order_depvar = list( OrderedSet(covariates_order).intersection(list_all_covariates)) list_remaining_covariates = list( OrderedSet(list_all_covariates).difference( OrderedSet(covariates_order_depvar))) covariates_sorted = list( OrderedSet(covariates_order_depvar).union( list_remaining_covariates)) covariates_sorted.remove("Intercept") covariates_sorted = covariates_sorted + ["Intercept"] formatted_table.covariate_order(covariates_sorted) # Optional: Change name of covariates if covariates_names is not None: formatted_table.rename_covariates(covariates_names) # Add table or latex code to dictionary if as_latex is True: dict_regression_tables[depvar] = formatted_table.render_latex() # Delete tabular environment around it dict_regression_tables[depvar] = dict_regression_tables[ depvar].replace("\\begin{table}[!htbp] \\centering\n", "") dict_regression_tables[depvar] = dict_regression_tables[ depvar].replace("\\end{table}", "") else: dict_regression_tables[depvar] = formatted_table return dict_regression_tables