def create_table(models, out):
    """Creates a table of regression results.

    Args:
        models: regression results
        out: the generated table is saved here

    Returns:
        None
    """

    results = []
    model_names = []
    covariate_names = {}

    for model in models:
        with open(model, 'rb') as file:
            result = (pickle.load(file))
        results.append(result)
        model_names.append(result.model_name)
        covariate_names.update(result.var_names)

    table = Stargazer(results)
    table.dependent_variable_name(
        covariate_names[results[0].model.endog_names])
    table.custom_columns(model_names, [1] * len(model_names))
    table.rename_covariates(covariate_names)

    latex_table = table.render_latex()
    latex_table = re.sub(r"l(c+)\}", r"lc\1}", latex_table)
    # ugly hack because stargazer generates an invalid latex table

    with open(out, 'w') as file:
        file.write(latex_table)
Esempio n. 2
0
def create_table(models, path):
    path = Path(path)
    stargazer = Stargazer(models)

    if path.suffix == ".tex":
        table = stargazer.render_latex()
    elif path.suffix == ".html":
        table = stargazer.render_html()
    else:
        raise NotImplementedError

    with open(path, "w") as file:
        file.write(table)
Esempio n. 3
0
class StargazerTestCase(unittest.TestCase):
    def setUp(self):
        self.df = pd.DataFrame(list(zip(range(9), range(0, 18, 2))),  columns =['a', 'b'])
        self.est1 = smf.ols('a ~ 0 + b', self.df).fit()
        self.est2 = smf.ols('a ~ 1 + b', self.df).fit()
        self.stargazer = Stargazer([self.est1, self.est2])

    def test_add_line(self):
        # too few arguments
        self.assertRaises(AssertionError, self.stargazer.add_line, '', [0])

        # wrong location
        self.assertRaises(ValueError, self.stargazer.add_line, '', [0, 0], 'wrong')

        # correct usage
        for loc in LineLocation:
            self.stargazer.add_line(f'test {loc.value}', ['N/A', 'N/A'], loc)
        latex = self.stargazer.render_latex()
        for loc in LineLocation:
            self.assertIn(f' test {loc.value} & N/A & N/A \\', latex)

    def test_render_latex(self):
        # test escaping
        self.stargazer.rename_covariates({'b': 'b_'})
        self.assertIn(' b_ ', self.stargazer.render_latex())
        self.assertIn(r' b\_ ', self.stargazer.render_latex(escape=True))
Esempio n. 4
0
def Appendix_Table_3(df):

    df_short_g = pd.DataFrame({
        "beliefadjustment_normalized":
        df[df["dummynews_goodbad"] == 0][df[df["dummynews_goodbad"] ==
                                            0]['treatgroup'] ==
                                         4]['beliefadjustment_normalized'],
        "beliefadjustment_bayes_norm":
        df[df["dummynews_goodbad"] == 0][df[df["dummynews_goodbad"] == 0]
                                         ['treatgroup'] == 4]
        ['beliefadjustment_bayes_norm']
    })
    model_ols = smf.ols(
        formula="beliefadjustment_normalized ~ beliefadjustment_bayes_norm",
        data=df_short_g)
    reg_s_1 = model_ols.fit(cov_type='HC1')

    df_short_b = pd.DataFrame({
        "beliefadjustment_normalized":
        df[df["dummynews_goodbad"] == 1][df[df["dummynews_goodbad"] ==
                                            1]['treatgroup'] ==
                                         4]['beliefadjustment_normalized'],
        "beliefadjustment_bayes_norm":
        df[df["dummynews_goodbad"] == 1][df[df["dummynews_goodbad"] == 1]
                                         ['treatgroup'] == 4]
        ['beliefadjustment_bayes_norm']
    })
    model_ols = smf.ols(
        formula="beliefadjustment_normalized ~ beliefadjustment_bayes_norm",
        data=df_short_b)
    reg_s_2 = model_ols.fit(cov_type='HC1')

    df["interact_negative_bayes"] = df["beliefadjustment_bayes_norm"] * df[
        "dummynews_goodbad"]

    model_ols = smf.ols(
        formula=
        "beliefadjustment_normalized ~ beliefadjustment_bayes_norm + dummynews_goodbad + interact_negative_bayes",
        data=df[df['treatgroup'] == 4])
    reg_s_3 = model_ols.fit(cov_type='HC1')

    Appendix_Table_3 = Stargazer([reg_s_1, reg_s_2, reg_s_3])
    Appendix_Table_3.title('Table 8: Belief Adjustment in the Short-Run')
    Appendix_Table_3.dependent_variable_name('Belief Adjustment')
    Appendix_Table_3.custom_columns([
        'Positive Information', 'Negative Information',
        'Difference-in-difference'
    ], [1, 1, 1])

    return Appendix_Table_3
def table3_7(df, regression_type):

    df_3_7E = df[[
        'ethnicity_C2', 'ethnicity_instrument_C2_thresh', 'ethnicity_I',
        'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics',
        'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea',
        'LOScandin', 'democ', 'mtnall', 'RulLaw'
    ]].dropna(axis=0)
    df_3_7L = df[[
        'language_C2', 'language_instrument_C2_thresh', 'language_I',
        'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics',
        'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea',
        'LOScandin', 'democ', 'mtnall', 'RulLaw'
    ]].dropna(axis=0)
    df_3_7R = df[[
        'religion_C2', 'religion_instrument_C2_thresh', 'religion_I',
        'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics',
        'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea',
        'LOScandin', 'democ', 'mtnall', 'RulLaw'
    ]].dropna(axis=0)

    exo = sm.add_constant(df_3_7E[[
        'ethnicity_C2', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc',
        'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish',
        'LOGerman', 'LOSocialist', 'LOScandin', 'lnArea', 'democ', 'mtnall'
    ]])
    exo2 = sm.add_constant(df_3_7E[['ethnicity_C2', 'ethnicity_I']])
    exo3 = sm.add_constant(df_3_7L[[
        'language_C2', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants',
        'lnArea', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman',
        'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])
    exo4 = sm.add_constant(df_3_7L[['language_C2', 'language_I']])
    exo5 = sm.add_constant(df_3_7R[[
        'religion_C2', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants',
        'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman',
        'LOSocialist', 'lnArea', 'democ', 'mtnall'
    ]])
    exo6 = sm.add_constant(df_3_7R[['religion_C2', 'religion_I']])

    if regression_type == 'IV2SLS':

        reg = IV2SLS(
            df_3_7E['RulLaw'], exo,
            sm.add_constant(df_3_7E[[
                'ethnicity_instrument_C2_thresh', 'ethnicity_I',
                'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims',
                'catholics', 'latitude', 'LOEnglish', 'LOGerman',
                'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'lnArea'
            ]])).fit()
        reg2 = IV2SLS(
            df_3_7E['RulLaw'], exo2,
            sm.add_constant(
                df_3_7E[['ethnicity_instrument_C2_thresh',
                         'ethnicity_I']])).fit()
        reg3 = IV2SLS(
            df_3_7L['RulLaw'], exo3,
            sm.add_constant(df_3_7L[[
                'language_instrument_C2_thresh', 'language_I', 'lnpopulation',
                'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
                'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ',
                'mtnall', 'lnArea'
            ]])).fit()
        reg4 = IV2SLS(
            df_3_7L['RulLaw'], exo4,
            sm.add_constant(
                df_3_7L[['language_instrument_C2_thresh',
                         'language_I']])).fit()
        reg5 = IV2SLS(
            df_3_7R['RulLaw'], exo5,
            sm.add_constant(df_3_7R[[
                'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation',
                'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
                'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall',
                'lnArea'
            ]])).fit()
        reg6 = IV2SLS(
            df_3_7R['RulLaw'], exo6,
            sm.add_constant(
                df_3_7R[['religion_instrument_C2_thresh',
                         'religion_I']])).fit()
    elif regression_type == 'OLS':
        reg2 = sm.OLS(df_3_7E['RulLaw'], exo2).fit(cov_type='HC1')
        reg = sm.OLS(df_3_7E['RulLaw'], exo).fit(cov_type='HC1')
        reg4 = sm.OLS(df_3_7L['RulLaw'], exo4).fit(cov_type='HC1')
        reg3 = sm.OLS(df_3_7L['RulLaw'], exo3).fit(cov_type='HC1')
        reg6 = sm.OLS(df_3_7R['RulLaw'], exo6).fit(cov_type='HC1')
        reg5 = sm.OLS(df_3_7R['RulLaw'], exo5).fit(cov_type='HC1')

    stargazer = Stargazer([reg2, reg, reg4, reg3, reg6, reg5])
    stargazer.covariate_order([
        'ethnicity_C2', 'ethnicity_I', 'language_C2', 'language_I',
        'religion_C2', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'lnArea',
        'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish',
        'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'const'
    ])
    stargazer.rename_covariates({
        'ethnicity_C2': 'Segregation $\hat{S}$ (ethnicity)',
        'ethnicity_I': 'Fractionalization $F$ (ethnicity)',
        'language_C2': 'Segregation $\hat{S}$ (language)',
        'language_I': 'Fractionalization $F$ (language)',
        'religion_C2': 'Segregation $\hat{S}$ (religion)',
        'religion_I': 'Fractionalization $F$ (religion)',
        'lnpopulation': 'ln (population)',
        'lnGDP_pc': 'ln (GDP per capita)',
        'lnArea': 'ln (average size of region)',
        'protestants': 'Pretestants share',
        'muslims': 'Muslmis Share',
        'catholics': 'Catholics share',
        'latitude': 'Latitude',
        'LOEnglish': 'English legal origin',
        'LOGerman': 'German legal origin',
        'LOSocialist': 'Socialist legal origin',
        'LOScandin': 'Scandinavian legal origin',
        'democ': 'Democratic tradition',
        'mtnall': 'Mountains',
        'const': 'Constant'
    })
    return HTML(stargazer.render_html())
Esempio n. 6
0
           interpolation='nearest',
           data=True)
plt.colorbar()
tick_marks = [i for i in range(len(DataReg1.columns))]
plt.xticks(tick_marks, DataReg1.columns, rotation=45)
plt.yticks(tick_marks, DataReg1.columns, rotation=45)
plt.title('NASDAQ Variable Correlations')

for i in range(len(DataReg1.columns)):
    for j in range(len(DataReg1.columns)):
        text = '%.2f' % (DataReg1.corr().iloc[i, j])
        plt.text(i - 0.2, j - 0.1, text)

# In[54]:

print(model2.summary())
#F-significantly large
#Good R-Squared value

# In[57]:

stargazer = Stargazer([model2, model3])
stargazer

# In[61]:

sns.pairplot(DataReg)
plt.suptitle('Pair Plot of Economic Indicators and Price', size=15)

# In[ ]:
Esempio n. 7
0
reg_info = {
    "Observações": lambda x: x.nobs,
    "R^2": lambda x: x.rsquared,
    "R^2 Ajustado": lambda x: x.rsquared_adj,
    "Estatística F": lambda x: f"{x.fvalue:.3f} ({x.f_pvalue:.3f})",
    "Jarque-Bera":
    lambda x: f"{jarque_bera(x.resid)[0]:.3f} ({jarque_bera(x.resid)[1]:.3f})",
    "Dickey-Fuller": lambda x:
    f"{adfuller(x.resid, maxlag=1, autolag=None)[0]:.3f} ({adfuller(x.resid, maxlag=1, autolag=None)[1]:.3f})",
    "Durbin-Watson": lambda x: f"{durbin_watson(x.resid):.3f}"
}

print(summary_col([reg], stars=True, info_dict=reg_info).as_latex())

print(Stargazer([reg]).render_latex())

reg_resid = reg.resid.shift(1).dropna()
reg_resid.name = "equilibrio"

y = d_series.spread,
X = pd.concat([reg_resid, d_series.selic, d_series.inad, d_series.ibc],
              axis="columns")

ecm = sm.OLS(
    endog=d_series.spread,
    exog=pd.concat([reg_resid, d_series.selic, d_series.inad, d_series.ibc],
                   axis="columns"),
).fit()

print(summary_col([ecm], stars=True, info_dict=reg_info).as_latex())
Esempio n. 8
0
df = pd.concat([country, df], axis=1)

df.drop(["Thailand", "location"], axis=1, inplace=True)

df.dropna(subset=["new_tests_per_thousand"], inplace=True)

df["date"] = pd.to_datetime(df["date"])

df_my = df[df["Malaysia"] == 1]

df_my = df_my.merge(mob_my, on="date")

df_th = df[df["Malaysia"] == 0]

df_th = df_th.merge(mob_th, on="date")

df = pd.concat([df_my, df_th])

df["post_election"] = df["date"].apply(
    lambda x: 0 if x <= pd.to_datetime("2020-09-26") else 1)

mdl_joined_el = ols(
    "new_cases_per_million ~ Malaysia * post_election + new_tests_per_thousand + retail + grocery + parks + transit",
    data=df)
results_joined_el = mdl_joined_el.fit()
results_joined_el.summary()

stargazer = Stargazer([results_joined_el])

stargazer.render_latex()
Esempio n. 9
0
def main():
#%%    
    
#Load data
    os.chdir('/Users/rgreen/Documents/Github/NDVI_Projection/')
    
    oromia = pd.ExcelFile('oromia.xlsx')
    arsi = pd.read_excel(oromia, 'arsi')
    bale = pd.read_excel(oromia, 'bale')
    borena = pd.read_excel(oromia, 'borena')
    guji = pd.read_excel(oromia, 'guji')
    westarsi = pd.read_excel(oromia, 'westarsi')
    
    
    arsi.insert(0, 'Time', np.linspace(1,585,585))
    bale.insert(0, 'Time', np.linspace(1,585,585))
    borena.insert(0, 'Time', np.linspace(1,585,585))
    guji.insert(0, 'Time', np.linspace(1,585,585))
    westarsi.insert(0, 'Time', np.linspace(1,585,585))
    
#%%   
    #dekadal data (P, LST, ET)    

    ddf = pd.DataFrame()
    ddf['D_NDVI'] = arsi.NDVI.diff()[1:]
    ddf = ddf.reset_index(drop=True)
    ddf['N_P'] = ((max(arsi.NDVI) - (arsi.NDVI[:-1]))* (arsi.P[:-1]))
    ddf['N_LST'] = (((arsi.NDVI[:-1]) - min(arsi.NDVI))* (arsi.LST[:-1])).shift(4) #need to shift back lags, use shift not index
    ddf['N_ET'] = (((arsi.NDVI[:-1]) - min(arsi.NDVI))* (arsi.ET[:-1])).shift(4)

    
    ddf = pd.DataFrame()
    ddf['D_NDVI'] = arsi.NDVI.diff().shift(-1)
    ddf = ddf.reset_index(drop=True)
    #L_NDVI = arsi.NDVI.shift(-1) #lag
    ddf['N_P'] = ((max(arsi.NDVI) - (arsi.NDVI))* (arsi.P))
    ddf['N_LST'] = (((arsi.NDVI) - min(arsi.NDVI))* (arsi.LST)).shift(4) #need to shift back lags, use shift not index
    ddf['N_ET'] = (((arsi.NDVI) - min(arsi.NDVI))* (arsi.ET)).shift(4)
    
    
    
    #shift is (n-1) dekads, ex. shift(4) is 5 dekads lag
        
    #mask
    mask_lst = ~np.isnan(ddf.N_LST) & ~np.isnan(ddf.D_NDVI)
    mask_et = ~np.isnan(ddf.N_ET) & ~np.isnan(ddf.D_NDVI)
    
    #stats.lingress (X,Y)
    slope1, intercept1, r1, p1, std1 = stats.linregress(ddf.N_P, ddf.D_NDVI)
    line1 = slope1*ddf.N_P+intercept1
    print("r-squared: %f" % r1**2)
    slope2, intercept2, r2, p2, std2 = stats.linregress(ddf.N_LST[mask_lst], ddf.D_NDVI[mask_lst])
    line2 = slope2*ddf.N_LST+intercept2
    print("r-squared: %f" % r2**2)
    slope3, intercept3, r3, p3, std3 = stats.linregress(ddf.N_ET[mask_et], ddf.D_NDVI[mask_et])
    line3 = slope3*ddf.N_ET+intercept3
    print("r-squared: %f" % r3**2)
    
    fig, (ax1, ax2, ax3) = plt.subplots(3, sharey =True)
    ax1.scatter(ddf.N_P, ddf.D_NDVI, color = 'darkcyan')
    ax1.plot(ddf.N_P, line1, color = 'k')
    ax1.set(xlabel = r'$(NDVI_{max} - NDVI_{t-1})*P_{t-1}$', ylabel = '')
    ax1.text(16, -0.04, r'y = 0.005x + 0.02', fontsize=8)
    ax1.text(16, -0.05, r'$r^2$ = 0.528', fontsize=8)
    ax2.scatter(ddf.N_LST, ddf.D_NDVI, color = 'forestgreen')
    ax2.set(xlabel = r'$(NDVI_{t-5} - NDVI_{min})*LST_{t-5}$', ylabel = '')
    ax2.plot(ddf.N_LST, line2, color = 'k')
    ax2.text(9, 0.07, r'y = -0.005x + 0.038', fontsize=8)
    ax2.text(9, 0.06, r'$r^2$ = 0.386', fontsize=8)
    ax3.scatter(ddf.N_ET, ddf.D_NDVI, color = 'cornflowerblue')
    ax3.set(xlabel = r'$(NDVI_{max} - NDVI_{t-5})*ET_{t-5}$', ylabel = '')
    ax3.plot(ddf.N_ET, line3, color = 'k')
    ax3.text(15, 0.07, r'y = -0.003x + 0.031', fontsize=8)
    ax3.text(15, 0.06, r'$r^2$ = 0.414', fontsize=8)
    fig.text(0.06, 0.5, r'$\Delta NDVI$', ha='center', va='center', rotation='vertical') #common ylabel
# =============================================================================
#     
#     sns.set(style="ticks", color_codes=True)
#     
#     fig = plt.figure()
#     sns.regplot(x=ddf.N_P, y=ddf.D_NDVI)
#     
#     g = sns.PairGrid(ddf, y_vars=["D_NDVI"], x_vars=["N_P", "N_LST", "N_ET"], height=4)
#     g.map(sns.regplot, color=".3")
#     
#     replacements = {'D_NDVI': r'$\Delta NDVI$', 'N_P': '(maxNDVI - NDVIt-1)*Pt-1',
#                 'N_LST': '(NDVIt-5 - minNDVI)*LSTt-5', 'N_ET': '(NDVIt-5 - minNDVI)*N_ETt-5'}
# 
#     for i in range(4):
#         for j in range(4):
#             xlabel = g.axes[i][j].get_xlabel()
#             ylabel = g.axes[i][j].get_ylabel()
#             if xlabel in replacements.keys():
#                 g.axes[i][j].set_xlabel(replacements[xlabel])
#             if ylabel in replacements.keys():
#                 g.axes[i][j].set_ylabel(replacements[ylabel])
#         
# =============================================================================
    
# =============================================================================
#    
#     r, p = stats.pearsonr(ddf.D_NDVI, ddf.N_P)
#     r, p = stats.pearsonr(ddf.D_NDVI, ddf.N_LST)
#     r, p = stats.pearsonr(ddf.D_NDVI, ddf.N_LST)
#     
# =============================================================================
    
 #%%   

    X = ddf[['N_P']]
    Y = ddf['D_NDVI']
    X = sm.add_constant(X)
    est = sm.OLS(Y,X, missing = 'drop').fit()
    est.summary()
    Y_pred = est.predict()
    
    original = Y + arsi.NDVI
    predicted = Y_pred + arsi.NDVI[:-2]
    result = pd.concat([original, predicted], axis=1)
    result.columns= ['Original','Predicted']
    rms1 = sqrt(mean_squared_error(result.Original[:-2], result.Predicted[:-2])) #0.0186
    
    X = ddf[['N_P', 'N_LST', 'N_ET']]
    Y = ddf['D_NDVI']
    X = sm.add_constant(X)
    est = sm.OLS(Y,X, missing = 'drop').fit()
    est.summary()
    Y_pred = est.predict()
    
    original = Y + arsi.NDVI
    predicted = Y_pred + arsi.NDVI[:-6]
    result = pd.concat([original, predicted], axis=1)
    result.columns= ['Original','Predicted']
    rms1 = sqrt(mean_squared_error(result.Original[:-6], result.Predicted[:-6])) #0.0186

    fig = plt.figure()
    plt.plot(arsi.DT, result.Original, 'darkblue', label = 'Original')
    plt.plot(arsi.DT, result.Predicted, 'dodgerblue', label = 'Projected')
    plt.legend(loc='upper right')
    plt.title('Arsi NDVI')
    


    
    X = ((max(arsi.NDVI) - arsi.NDVI)* arsi.P)  #for multivariate regression ddf[['N_P', 'N_LST']]
    Y = arsi.NDVI.diff().shift(-1)
    X = sm.add_constant(X) #only use when doing first run OLS then remove when fitting prediction
    est = sm.OLS(Y,X, missing = 'drop').fit()
    est.summary()
    Y_pred = est.predict()
    Y_pred2 = 0.0047*X - 0.0201
    
    
    #take delta predictions and convert to forecast
    original = Y + arsi.NDVI
    original = np.append(result.original, np.nan)
    predicted = Y_pred2 + arsi.NDVI
    result = pd.concat([original, predicted], axis=1)
    result.columns= ['Original','Predicted']
    
    fig = plt.figure()
    plt.plot(arsi.DT, result.Original, 'darkblue', label = 'Original')
    plt.plot(arsi.DT, result.Predicted, 'dodgerblue', label = 'Projected')
    plt.legend(loc='upper right')
    plt.title('Arsi NDVI')
    
    rms1 = sqrt(mean_squared_error(result.Original[:-2], result.Predicted[:-2])) #0.0186

    
    
    #create stargazer model table
    stargazer = Stargazer([est])
    HTML(stargazer.render_html())
    
    
    
    model = sm.OLS(ddf.D_NDVI, sm.add_constant(ddf.N_P)).fit()
    #predict values of Y
    Y_pred = model.predict()
    #summary table
    model.summary()
def table8_9_ext5(df, name, GDP):

    df_8_9A = df[[
        f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh',
        'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics',
        'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin',
        'democ', 'mtnall', 'voice', 'PolStab', 'GovEffec', 'RegQual',
        'ConCorr', 'RulLaw'
    ]].dropna(axis=0)
    df_8_9B = df_8_9A[[
        f'{name}_C2', f'{name}_instrument_C2_thresh', f'{name}_I', 'voice',
        'PolStab', 'GovEffec', 'RegQual', 'ConCorr', 'RulLaw'
    ]]
    if GDP == 'democ':
        df_8_9C = df_8_9A[df_8_9A.democ >= 1]
    elif GDP == 'GDP':
        df_8_9C = df_8_9A[df_8_9A.lnGDP_pc >= 7]

    exoA = sm.add_constant(df_8_9A[[
        f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnGDP_pc', 'protestants',
        'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman',
        'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])

    exoB = sm.add_constant(df_8_9B[[f'{name}_C2', f'{name}_I']])

    exoC = sm.add_constant(df_8_9C[[
        f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnGDP_pc', 'protestants',
        'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman',
        'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])

    insA = sm.add_constant(df_8_9A[[
        f'{name}_instrument_C2_thresh', f'{name}_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])

    insB = sm.add_constant(
        df_8_9B[[f'{name}_instrument_C2_thresh', f'{name}_I']])

    insC = sm.add_constant(df_8_9C[[
        f'{name}_instrument_C2_thresh', f'{name}_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])

    df_8_9s = [df_8_9A, df_8_9B, df_8_9C]

    exos = [exoA, exoB, exoC]

    inss = [insA, insB, insC]

    y = [[f'y{idx}A', f'y{idx}B', f'y{idx}C'] for idx in range(1, 7)]
    est = [[f'est{idx}A', f'est{idx}B', f'est{idx}C'] for idx in range(1, 7)]

    star = ['starA', 'starB', 'starC']

    for idx, i in enumerate(['A', 'B', 'C']):

        y[0][idx] = df_8_9s[idx]['voice']
        y[1][idx] = df_8_9s[idx]['PolStab']
        y[2][idx] = df_8_9s[idx]['GovEffec']
        y[3][idx] = df_8_9s[idx]['RegQual']
        y[4][idx] = df_8_9s[idx]['RulLaw']
        y[5][idx] = df_8_9s[idx]['ConCorr']

        est[0][idx] = IV2SLS(y[0][idx], exos[idx], inss[idx]).fit()
        est[1][idx] = IV2SLS(y[1][idx], exos[idx], inss[idx]).fit()
        est[2][idx] = IV2SLS(y[2][idx], exos[idx], inss[idx]).fit()
        est[3][idx] = IV2SLS(y[3][idx], exos[idx], inss[idx]).fit()
        est[4][idx] = IV2SLS(y[4][idx], exos[idx], inss[idx]).fit()
        est[5][idx] = IV2SLS(y[5][idx], exos[idx], inss[idx]).fit()

        star[idx] = Stargazer([
            est[0][idx], est[1][idx], est[2][idx], est[3][idx], est[4][idx],
            est[5][idx]
        ])
    for i in range(3):
        star[i].covariate_order([f'{name}_C2', f'{name}_I'])
        star[i].rename_covariates({
            f'{name}_C2':
            'Segregation $\hat{S}$ ('
            f'{name}'
            ')',
            f'{name}_I':
            'Fractionalization $F$ ('
            f'{name}'
            ')'
        })
        star[i].show_model_numbers(False)
        star[i].custom_columns([
            'Voice', 'Political stability', 'Govern-t effectiv.',
            'Regul. quality', 'Rule of law', 'Control of corr'
        ], [1, 1, 1, 1, 1, 1])
    if GDP == 'democ':
        star[0].add_line('Controls',
                         ['Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes'])
        star[0].add_line('Sample',
                         ['Full', 'Full', 'Full', 'Full', 'Full', 'Full'])
        star[1].add_line('Controls', ['No', 'No', 'No', 'No', 'No', 'No'])
        star[1].add_line('Sample',
                         ['Full', 'Full', 'Full', 'Full', 'Full', 'Full'])
        star[2].add_line('Controls',
                         ['Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes'])
        star[2].add_line(
            'Sample', ['Democ', 'Democ', 'Democ', 'Democ', 'Democ', 'Democ'])

        star[0].title('Panel A. Baseline : All controls and full sample')
        star[1].title('Panel B. No controls and full sample')
        star[2].title('Panel C. All controls; sample excludes dictatorship')

        return [star[0], star[1], star[2]]

    if GDP == 'GDP':
        if name == 'ethnicity':
            star[2].title(
                'Panal A. Ethnicity: All controls; sample excludes poorest countries'
            )
        elif name == 'language':
            star[2].title(
                'Panel B. Language: All controls; sample excludes poorest countries'
            )
        return star[2]
def df_table12(df, name):
    df_table12 = df[[
        f'{name}_C2', f'{name}_instrument_C2_thresh', f'{name}_I', 'trust',
        'democ', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants',
        'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman',
        'LOSocialist', 'LOScandin', 'mtnall'
    ]].dropna(axis=0)

    df_demo = df_table12[df_table12.democ > 1]

    dep1 = df_table12['trust']
    dep2 = df_demo['trust']

    exo1 = sm.add_constant(df_table12[f'{name}_C2'])
    exo2 = sm.add_constant(df_table12[[
        f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc',
        'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish',
        'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])
    exo3 = sm.add_constant(df_demo[[
        f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc',
        'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish',
        'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])

    ins1 = sm.add_constant(df_table12[f'{name}_instrument_C2_thresh'])
    ins2 = sm.add_constant(df_table12[[
        f'{name}_instrument_C2_thresh', f'{name}_I', 'lnpopulation', 'lnArea',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])
    ins3 = sm.add_constant(df_demo[[
        f'{name}_instrument_C2_thresh', f'{name}_I', 'lnpopulation', 'lnArea',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])

    reg1 = sm.OLS(dep1, exo1).fit(cov_type='HC1')
    reg2 = sm.OLS(dep1, exo2).fit(cov_type='HC1')
    reg3 = sm.OLS(dep2, exo3).fit(cov_type='HC1')
    reg4 = IV2SLS(dep1, exo1, ins1).fit()
    reg5 = IV2SLS(dep1, exo2, ins2).fit()
    reg6 = IV2SLS(dep2, exo3, ins3).fit()

    stargazer = Stargazer([reg1, reg2, reg3, reg4, reg5, reg6])
    stargazer.covariate_order([f'{name}_C2', f'{name}_I'])
    stargazer.rename_covariates({
        f'{name}_C2':
        'Segregation $\hat{S}$ ('
        f'{name}'
        ')',
        f'{name}_I':
        'Fractionalization $F$ ('
        f'{name}'
        ')'
    })

    stargazer.custom_columns(['OLS', 'OLS', 'OLS', '2SLS', '2SLS', '2SLS'],
                             [1, 1, 1, 1, 1, 1])
    stargazer.add_line('Controls', ['No', 'Yes', 'Yes', 'No', 'Yes', 'Yes'])
    stargazer.add_line('Sample',
                       ['Full', 'Full', 'Democ', 'Full', 'Full', 'Democ'])

    if name == 'ethnicity':
        stargazer.title('Panel A. Ethnicity')
        return stargazer

    else:
        stargazer.title('Panel B. Language')
        return stargazer
Esempio n. 12
0
model3 = ols('cnt ~ temp_celsius + windspeed_kh + hum', data=wbr).fit()
print(model3.summary2())

#modelo 4
model4 = ols('cnt ~ temp_celsius + windspeed_kh + hum + workingday',
             data=wbr).fit()
print(model4.summary2())

# coef intercept numéro de bicis que vendo cuando todas las otras variables son 0)
# coef temp_celsius por cada incremento de 1 temp_celius aumentan mis ventas 161.
# Coef working day en los no working day 4009 pero si quito los otros vendo 125 mas.

#!pip install stargazer
from stargazer.stargazer import Stargazer
#genera codigo HTML
Stargazer([model1, model2, model3, model4]).render_html()

# Cuando hay una relacion que sube y baja hay que calcular el ^2 y restarlo .
# cuando una variable nominal tiene mas de 2 categorias tenemos que hacer

wbr["S1"] = 0
wbr["S2"] = 0
wbr["S3"] = 0
wbr["S4"] = 0

wbr.loc[(wbr['season'] == 1), "S1"] = 1
wbr.loc[(wbr['season'] == 2), "S2"] = 1
wbr.loc[(wbr['season'] == 3), "S3"] = 1
wbr.loc[(wbr['season'] == 4), "S4"] = 1

#Cuando hacemos dummys siempre tenemos que dejar una variable fuera que actua como referencia de las demas(la mas frecuente)
Esempio n. 13
0
os.getcwd()
os.chdir(
    "/Users/manunavjeevan/Desktop/UCLA/Second Year/Winter 2020/IO/Problem Set 1"
)
data = pd.read_csv('dataCleaned.csv')
data.head()
data
#Part 1: Logit
## Want to run a regression of logged share differences against
## price and promotion
y = data['shareDiff']
x = data[['price', 'prom']]
#x = sm.add_constant(x)
model1 = sm.OLS(y, x).fit()
print(model1.summary())
print(Stargazer([model1]).render_latex())
summary_col([model1]).as_latex()

## price, promotion, and a dummy for brand
brandDummies = pd.get_dummies(data['brand'], prefix='brand')
x = data[['price', 'prom']].join(brandDummies)
#x = sm.add_constant(x)
model2 = sm.OLS(y, x).fit()
print(model2.summary())
print(Stargazer([model2]).render_latex())
print(summary_col([model2]).as_latex())

## Price, promotion and store*brand
data['storeBrand'] = data.store + data['brand'] / 100
storeBrandDummies = pd.get_dummies(data['storeBrand'])
storeBrandDummies
Esempio n. 14
0
x = sm.add_constant(x1)
x_withfemale = sm.add_constant(x2)

model1 = sm.OLS(y, x).fit()
model2 = sm.OLS(y, x_withfemale).fit()

model1.summary()
model2.summary()

# =============================================================================
# STARGAZER MODEL OUTPUTS
# =============================================================================
from stargazer.stargazer import Stargazer

stargazer = Stargazer([model1, model2])
stargazer.custom_columns(['Base Model', 'Spesified Model'], [1, 1])
stargazer.significant_digits(2)
stargazer.covariate_order([
    'const', 'propwomen', 'oppospower', 'gdpcap', 'sepowerdist', 'youthunemp'
])

stargazer.rename_covariates({
    'const': 'Constant',
    'oppospower': 'Opposition Power',
    'gdpcap': 'GDP($)',
    'sepowerdist': 'Class Political Power',
    'youthunemp': 'Unemployed Youth %',
    'propwomen': 'Female Property Rights'
})
Esempio n. 15
0
    tbl.show_f_statistic = False
    tbl.show_notes = True

    return tbl


###

fname = "src/tablespecs/table_01.json"

with open(fname) as json_file:
    json_dict = json.load(json_file)

models = json_dict['MODELS']

models = [json_dict["MODEL_PATH"] + i for i in models]

reg = []

for iModel in models:
    reg.append(sm.load(iModel))

stargazer = Stargazer(reg)

stargazer2 = publish_table(stargazer, json_dict)

print('---')
print(json_dict["RESTRICTIONS"])
print('---')

print(stargazer2.render_latex(only_tabular=True))
Esempio n. 16
0
"""
model2 = ols('cnt ~ windspeed_kh', data=wbr).fit()
model2.summary2()
"""
Siguiente modelo basado en la temperatura y el windspeed_kh
Como podemos ver en el modelo, al incluir nuevas variables cambia la influencia de
las variables.
"""
model3 = ols('cnt ~ temp_celsius + windspeed_kh', data=wbr).fit()
model3.summary2()

wbr.hum.hist()
"""
Siguiende modelo basado con la variable humedad
"""
model4 = ols('cnt ~ hum', data=wbr).fit()
model4.summary2()
"""
Siguiente modelo basado en la temperatura, el windspeed_kh y hum
"""
model5 = ols('cnt ~ temp_celsius + windspeed_kh + hum', data=wbr).fit()
model5.summary2()
"""
stargazer ayuda a representar todos los modelos.
"""
#!pip install stargazer
from stargazer.stargazer import Stargazer

stargazer = Stargazer([model1, model2, model3, model4, model5])
stargazer.render_html()
# Independent variables = temp_celsius + windspeed_kh
model2 = ols("cnt ~ temp_celsius + windspeed_kh", data=wbr).fit()
model2.summary2()

# Independent variable = hum
wbr.hum.hist()
model1_h = ols("cnt ~ hum", data=wbr).fit()
model1_h.summary2()

# Independent variables = temp_celsius + windspeed_kh + hum
model3 = ols("cnt ~ temp_celsius + windspeed_kh + hum", data=wbr).fit()
model3.summary2()

# Compare all models
stargazer = Stargazer([model1_t, model2, model3])
stargazer
stargazer.title("Table 1. A model of bicycle demand in Washington D.C.")
stargazer

#####################
# REGRESSION WITH DUMMIES

# Independent variable = workingday
model1_wd = ols("cnt ~ workingday", data=wbr).fit()
model1_wd.summary2()

# Independent variables = temp_celsius + windspeed_kh + hum + workingday
model4 = ols("cnt ~ temp_celsius + windspeed_kh + hum + workingday",
             data=wbr).fit()
model4.summary2()
Esempio n. 18
0
def get_table_4and7(dependent_var, data):
    '''
    argument:dependent variable, dataset
    return:either table4 or table7 depending on the input dataset
    '''
    model_1 = sm_api.OLS(data[dependent_var],
                         sm_api.add_constant(data["state"])).fit()
    model_2 = sm_api.OLS(
        data[dependent_var],
        sm_api.add_constant(data[["state", "bk", "kfc", "roys",
                                  "co_owned"]])).fit()
    model_3 = sm_api.OLS(data[dependent_var],
                         sm_api.add_constant(data["GAP"])).fit()
    model_4 = sm_api.OLS(
        data[dependent_var],
        sm_api.add_constant(data[["GAP", "bk", "kfc", "roys",
                                  "co_owned"]])).fit()
    model_5 = sm_api.OLS(
        data[dependent_var],
        sm_api.add_constant(data[[
            "GAP", "bk", "kfc", "roys", "co_owned", "southj", "centralj",
            "pa1", "pa2"
        ]])).fit()
    Table = Stargazer([model_1, model_2, model_3, model_4, model_5])
    Table.rename_covariates({
        'state': 'New Jersey dummy',
        'GAP': 'Initial wage GAP'
    })
    Table.add_line('Controls for chain and ownership',
                   ['No', 'Yes', 'No', 'Yes', 'Yes'])
    Table.add_line('Controls for region', ['No', 'No', 'No', 'No', 'Yes'])
    F2 = model_2.f_test(
        '(state = 0), (bk = 0), (kfc = 0), (roys =0),(co_owned= 0),(const=0)'
    ).pvalue.round(3)
    F4 = model_4.f_test(
        '(GAP = 0), (bk = 0), (kfc = 0), (roys =0),(co_owned= 0),(const=0)'
    ).pvalue.round(3)
    F5 = model_5.f_test(
        '(GAP = 0), (bk = 0), (kfc = 0), (roys =0),(co_owned= 0),(const=0), (southj=0),(centralj=0),(pa1=0),(pa2=0)'
    ).pvalue.round(3)
    if dependent_var == "change_in_FTE":
        Table.add_line('Probability value for controls',
                       ['-', F2, '-', F4, F5])
    Table.title("Models for " + dependent_var)
    Table.covariate_order(['state', 'GAP'])
    print("The mean and standard deviation of the dependent variable are",
          data[dependent_var].mean(), "and", data[dependent_var].std(),
          ",respectively.")

    return Table
def table4_5(df, name):

    df_table4A = df[[
        f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnGDP_pc', 'protestants',
        'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman',
        'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'voice', 'PolStab',
        'GovEffec', 'RegQual', 'ConCorr', 'RulLaw'
    ]].dropna(axis=0)

    df_table4B = df_table4A[[
        f'{name}_C2', f'{name}_I', 'voice', 'PolStab', 'GovEffec', 'RegQual',
        'ConCorr', 'RulLaw'
    ]]

    df_table4C = df_table4A[df_table4A.democ > 1]

    xA = sm.add_constant(df_table4A[[
        f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnGDP_pc', 'protestants',
        'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman',
        'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])

    xB = sm.add_constant(df_table4B[[f'{name}_C2', f'{name}_I']])

    xC = sm.add_constant(df_table4C[[
        f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnGDP_pc', 'protestants',
        'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman',
        'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])
    df_table4s = [df_table4A, df_table4B, df_table4C]

    xs = [xA, xB, xC]

    y = [[f'y{idx}A', f'y{idx}B', f'y{idx}C'] for idx in range(1, 7)]
    est = [[f'est{idx}A', f'est{idx}B', f'est{idx}C'] for idx in range(1, 7)]

    star = ['starA', 'starB', 'starC']

    for idx, i in enumerate(['A', 'B', 'C']):

        y[0][idx] = df_table4s[idx]['voice']
        y[1][idx] = df_table4s[idx]['PolStab']
        y[2][idx] = df_table4s[idx]['GovEffec']
        y[3][idx] = df_table4s[idx]['RegQual']
        y[4][idx] = df_table4s[idx]['RulLaw']
        y[5][idx] = df_table4s[idx]['ConCorr']

        est[0][idx] = sm.OLS(y[0][idx], xs[idx]).fit(cov_type='HC1')
        est[1][idx] = sm.OLS(y[1][idx], xs[idx]).fit(cov_type='HC1')
        est[2][idx] = sm.OLS(y[2][idx], xs[idx]).fit(cov_type='HC1')
        est[3][idx] = sm.OLS(y[3][idx], xs[idx]).fit(cov_type='HC1')
        est[4][idx] = sm.OLS(y[4][idx], xs[idx]).fit(cov_type='HC1')
        est[5][idx] = sm.OLS(y[5][idx], xs[idx]).fit(cov_type='HC1')

        star[idx] = Stargazer([
            est[0][idx], est[1][idx], est[2][idx], est[3][idx], est[4][idx],
            est[5][idx]
        ])
    for i in range(3):
        star[i].covariate_order([f'{name}_C2', f'{name}_I'])
        star[i].rename_covariates({
            f'{name}_C2':
            'Segregation $\hat{S}$ ('
            f'{name}'
            ')',
            f'{name}_I':
            'Fractionalization $F$ ('
            f'{name}'
            ')'
        })
        star[i].show_model_numbers(False)
        star[i].custom_columns([
            'Voice', 'Political stability', 'Govern-t effectiv.',
            'Regul. quality', 'Rule of law', 'Control of corr'
        ], [1, 1, 1, 1, 1, 1])
    star[0].add_line('Controls', ['Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes'])
    star[0].add_line('Sample',
                     ['Full', 'Full', 'Full', 'Full', 'Full', 'Full'])
    star[1].add_line('Controls', ['No', 'No', 'No', 'No', 'No', 'No'])
    star[1].add_line('Sample',
                     ['Full', 'Full', 'Full', 'Full', 'Full', 'Full'])
    star[2].add_line('Controls', ['Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes'])
    star[2].add_line('Sample',
                     ['Democ', 'Democ', 'Democ', 'Democ', 'Democ', 'Democ'])

    star[0].title('Panel A. Baseline : All controls and full sample')
    star[1].title('Panel B. No controls and full sample')
    star[2].title('Panel C. All controls; sample excludes dictatorship')

    return [star[0], star[1], star[2]]
Esempio n. 20
0
def Appendix_Table_1(df):
    df_good = pd.DataFrame({
        "beliefadjustment_normalized":
        df[df["dummynews_goodbad_h"] == 0]['beliefadjustment_normalized'],
        "dummytreat_direct1month":
        df[df["dummynews_goodbad_h"] == 0]['dummytreat_direct1month'],
        "rank":
        df[df["dummynews_goodbad_h"] == 0]['rank'],
        "beliefadjustment_bayes_norm":
        df[df["dummynews_goodbad_h"] == 0]['beliefadjustment_bayes_norm']
    })
    model_ols = smf.ols(
        formula="beliefadjustment_normalized ~ dummytreat_direct1month",
        data=df_good)
    reg_1 = model_ols.fit(cov_type='HC1')
    model_ols = smf.ols(
        formula=
        "beliefadjustment_normalized ~ dummytreat_direct1month + rank + beliefadjustment_bayes_norm",
        data=df_good)
    reg_2 = model_ols.fit(cov_type='HC1')
    df_bad = pd.DataFrame({
        "beliefadjustment_normalized":
        df[df["dummynews_goodbad_h"] == 1]['beliefadjustment_normalized'],
        "dummytreat_direct1month":
        df[df["dummynews_goodbad_h"] == 1]['dummytreat_direct1month'],
        "rank":
        df[df["dummynews_goodbad_h"] == 1]['rank'],
        "beliefadjustment_bayes_norm":
        df[df["dummynews_goodbad_h"] == 1]['beliefadjustment_bayes_norm']
    })
    model_ols = smf.ols(
        formula="beliefadjustment_normalized ~ dummytreat_direct1month",
        data=df_bad)
    reg_3 = model_ols.fit(cov_type='HC1')
    model_ols = smf.ols(
        formula=
        "beliefadjustment_normalized ~ dummytreat_direct1month + rank + beliefadjustment_bayes_norm",
        data=df_bad)
    reg_4 = model_ols.fit(cov_type='HC1')

    #Generating interaction term
    df["interact_direct1month"] = df["dummytreat_direct1month"] * df[
        "dummynews_goodbad"]

    model_ols = smf.ols(
        formula=
        "beliefadjustment_normalized ~ dummytreat_direct1month + dummynews_goodbad_h + interact_direct1month",
        data=df)
    reg_5 = model_ols.fit(cov_type='HC1')
    model_ols = smf.ols(
        formula=
        "beliefadjustment_normalized ~ dummytreat_direct1month + dummynews_goodbad_h + rank + interact_direct1month + beliefadjustment_bayes_norm",
        data=df)
    reg_6 = model_ols.fit(cov_type='HC1')

    Appendix_Table_1 = Stargazer([reg_1, reg_2, reg_3, reg_4, reg_5, reg_6])
    Appendix_Table_1.title(
        'Appendix Table 1 - Belief Adjustment: Direct versus One Month Later')
    Appendix_Table_1.dependent_variable_name('Normalized Belief Adjustment - ')
    Appendix_Table_1.custom_columns([
        'Positive Information', 'Negative Information',
        'Difference-in-difference'
    ], [2, 2, 2])

    return Appendix_Table_1
def table6(df, alternative=True):

    df_6E = df[[
        'ethnicity_C2', 'ethnicity_I', 'ethnicity_C',
        'ethnicity_instrument_C_thresh', 'ethnicity_instrument_C2_thresh',
        'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics',
        'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin',
        'democ', 'mtnall', 'RulLaw', 'country'
    ]].dropna(axis=0)
    df_6L = df[[
        'language_C2', 'language_I', 'language_C',
        'language_instrument_C_thresh', 'language_instrument_C2_thresh',
        'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics',
        'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin',
        'democ', 'mtnall', 'RulLaw', 'country'
    ]].dropna(axis=0)
    df_6R = df[[
        'religion_C2', 'religion_I', 'religion_C',
        'religion_instrument_C_thresh', 'religion_instrument_C2_thresh',
        'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics',
        'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin',
        'democ', 'mtnall', 'RulLaw', 'country'
    ]].dropna(axis=0)

    df_6E_demo = df_6E[df_6E.democ >= 1]
    df_6L_demo = df_6L[df_6L.democ >= 1]
    df_6R_demo = df_6R[df_6R.democ >= 1]

    x1 = sm.add_constant(df_6E[[
        'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])
    x2 = sm.add_constant(df_6L[[
        'language_instrument_C2_thresh', 'language_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])
    x3 = sm.add_constant(df_6R[[
        'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall'
    ]])
    x4 = sm.add_constant(df_6E_demo[[
        'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])
    x5 = sm.add_constant(df_6L_demo[[
        'language_instrument_C2_thresh', 'language_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])
    x6 = sm.add_constant(df_6R_demo[[
        'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall'
    ]])

    y1 = df_6E['ethnicity_C2']
    y2 = df_6L['language_C2']
    y3 = df_6R['religion_C2']
    y4 = df_6E_demo['ethnicity_C2']
    y5 = df_6L_demo['language_C2']
    y6 = df_6R_demo['religion_C2']

    est1 = sm.OLS(y1, x1).fit(cov_type='HC1')
    est2 = sm.OLS(y2, x2).fit(cov_type='HC1')
    est3 = sm.OLS(y3, x3).fit(cov_type='HC1')
    est4 = sm.OLS(y4, x4).fit(cov_type='HC1')
    est5 = sm.OLS(y5, x5).fit(cov_type='HC1')
    est6 = sm.OLS(y6, x6).fit(cov_type='HC1')

    x1a = sm.add_constant(df_6E[[
        'ethnicity_instrument_C_thresh', 'ethnicity_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])
    x2a = sm.add_constant(df_6L[[
        'language_instrument_C_thresh', 'language_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])
    x3a = sm.add_constant(df_6R[[
        'religion_instrument_C_thresh', 'religion_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall'
    ]])
    x4a = sm.add_constant(df_6E_demo[[
        'ethnicity_instrument_C_thresh', 'ethnicity_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])
    x5a = sm.add_constant(df_6L_demo[[
        'language_instrument_C_thresh', 'language_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])
    x6a = sm.add_constant(df_6R_demo[[
        'religion_instrument_C_thresh', 'religion_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall'
    ]])

    y1a = df_6E['ethnicity_C']
    y2a = df_6L['language_C']
    y3a = df_6R['religion_C']
    y4a = df_6E_demo['ethnicity_C']
    y5a = df_6L_demo['language_C']
    y6a = df_6R_demo['religion_C']

    est1a = sm.OLS(y1a, x1a).fit(cov_type='HC1')
    est2a = sm.OLS(y2a, x2a).fit(cov_type='HC1')
    est3a = sm.OLS(y3a, x3a).fit(cov_type='HC1')
    est4a = sm.OLS(y4a, x4a).fit(cov_type='HC1')
    est5a = sm.OLS(y5a, x5a).fit(cov_type='HC1')
    est6a = sm.OLS(y6a, x6a).fit(cov_type='HC1')

    df_6Lb = df_6L.set_index('country')
    df_6Lb_demo = df_6L_demo.set_index('country')

    x2b = sm.add_constant(df_6Lb[[
        'language_instrument_C_thresh', 'language_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]].drop(index='usa'))

    x5b = sm.add_constant(df_6Lb_demo[[
        'language_instrument_C_thresh', 'language_I', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]].drop(index='usa'))
    y2b = df_6Lb['language_C'].drop(index='usa')
    y5b = df_6Lb_demo['language_C'].drop(index='usa')

    est2b = sm.OLS(y2b, x2b).fit(cov_type='HC1')
    est5b = sm.OLS(y5b, x5b).fit(cov_type='HC1')

    stargazer = Stargazer([est1, est2, est3, est4, est5, est6])
    stargazer_a = Stargazer([est1a, est2a, est3a, est4a, est5a, est6a])
    stargazer_b = Stargazer([est2b, est5b])

    stargazer.covariate_order([
        'ethnicity_instrument_C2_thresh', 'ethnicity_I',
        'language_instrument_C2_thresh', 'language_I',
        'religion_instrument_C2_thresh', 'religion_I'
    ])
    stargazer.rename_covariates({
        'ethnicity_instrument_C2_thresh': 'Instrument E',
        'ethnicity_I': '$F$ (ethnicity)',
        'language_instrument_C2_thresh': 'Instrument L',
        'language_I': '$F$ (language)',
        'religion_instrument_C2_thresh': 'Instrument R',
        'religion_I': '$F$ (religion)'
    })
    stargazer.custom_columns([
        'E$\hat{S}$', 'L$\hat{S}$', 'R$\hat{S}$', 'E$\hat{S}$', 'L$\hat{S}$',
        'R$\hat{S}$'
    ], [1, 1, 1, 1, 1, 1])
    stargazer.show_model_numbers(False)
    stargazer.add_line(
        'Sample',
        ['Full', 'Full', 'Full', 'Democracy', 'Democracy', 'Democracy'])
    stargazer.title('Panel A. Segregation index $\hat{S}$')

    stargazer_a.covariate_order([
        'ethnicity_instrument_C_thresh', 'ethnicity_I',
        'language_instrument_C_thresh', 'language_I',
        'religion_instrument_C_thresh', 'religion_I'
    ])
    stargazer_a.rename_covariates({
        'ethnicity_instrument_C_thresh': 'Instrument E',
        'ethnicity_I': '$F$ (ethnicity)',
        'language_instrument_C_thresh': 'Instrument L',
        'language_I': '$F$ (language)',
        'religion_instrument_C_thresh': 'Instrument R',
        'religion_I': '$F$ (religion)'
    })
    stargazer_a.custom_columns([
        'E$\\tilde{S}$', 'L$\\tilde{S}$', 'R$\\tilde{S}$', 'E$\\tilde{S}$',
        'L$\\tilde{S}$', 'R$\\tilde{S}$'
    ], [1, 1, 1, 1, 1, 1])
    stargazer_a.show_model_numbers(False)
    stargazer_a.add_line(
        'Sample',
        ['Full', 'Full', 'Full', 'Democracy', 'Democracy', 'Democracy'])
    stargazer_a.title('Panel B. Segregation index $\\tilde{S}$')

    stargazer_b.covariate_order(['language_instrument_C_thresh', 'language_I'])
    stargazer_b.rename_covariates({
        'language_instrument_C_thresh': 'Instrument L',
        'language_I': '$F$ (language)'
    })
    stargazer_b.custom_columns(['L$\\tilde{S}$', 'L$\\tilde{S}$'], [1, 1])
    stargazer_b.show_model_numbers(False)
    stargazer_b.add_line('Sample', ['Full', 'Democracy'])
    stargazer_b.title(
        'Panel C. Segregation index $\\tilde{S}$ for language with sample excluding the US'
    )

    return [stargazer, stargazer_a, stargazer_b]
Esempio n. 22
0
 def setUp(self):
     self.df = pd.DataFrame(list(zip(range(9), range(0, 18, 2))),  columns =['a', 'b'])
     self.est1 = smf.ols('a ~ 0 + b', self.df).fit()
     self.est2 = smf.ols('a ~ 1 + b', self.df).fit()
     self.stargazer = Stargazer([self.est1, self.est2])
def table10_11(df, name, democ):

    full_x = [
        f'{name}_I', f'{name}_C2', 'lnpopulation', 'lnGDP_pc', 'protestants',
        'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman',
        'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]
    ins = [
        f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation',
        'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
        'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]

    df_10_11_1 = df[[
        f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh',
        'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics',
        'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin',
        'democ', 'mtnall', 'icrg_qog'
    ]].dropna(axis=0)
    df_10_11_2 = df[[
        f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh',
        'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics',
        'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin',
        'democ', 'mtnall', 'ef_regul', 'ef_corruption', 'ef_property_rights'
    ]].dropna(axis=0)
    df_10_11_3 = df[[
        f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh',
        'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics',
        'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin',
        'democ', 'mtnall', 'taxevas'
    ]].dropna(axis=0)

    if democ == 'democracy':
        df_10_11_1 = df_10_11_1[df_10_11_1.democ >= 1]
        df_10_11_2 = df_10_11_2[df_10_11_2.democ >= 1]
        df_10_11_3 = df_10_11_3[df_10_11_3.democ >= 1]

        x1 = sm.add_constant(df_10_11_1[full_x])
        x2 = sm.add_constant(df_10_11_2[full_x])
        x3 = sm.add_constant(df_10_11_3[full_x])

        ins1 = sm.add_constant(df_10_11_1[ins])
        ins2 = sm.add_constant(df_10_11_2[ins])
        ins3 = sm.add_constant(df_10_11_3[ins])

    else:
        x1 = sm.add_constant(df_10_11_1[[f'{name}_I', f'{name}_C2']])
        x2 = sm.add_constant(df_10_11_2[[f'{name}_I', f'{name}_C2']])
        x3 = sm.add_constant(df_10_11_3[[f'{name}_I', f'{name}_C2']])

        ins1 = sm.add_constant(
            df_10_11_1[[f'{name}_I', f'{name}_instrument_C2_thresh']])
        ins2 = sm.add_constant(
            df_10_11_2[[f'{name}_I', f'{name}_instrument_C2_thresh']])
        ins3 = sm.add_constant(
            df_10_11_3[[f'{name}_I', f'{name}_instrument_C2_thresh']])

    y1 = df_10_11_1['icrg_qog']
    y2 = df_10_11_2['ef_corruption']
    y3 = df_10_11_2['ef_property_rights']
    y4 = df_10_11_2['ef_regul']
    y5 = df_10_11_3['taxevas']

    est1 = sm.OLS(y1, x1).fit(cov_type='HC1')
    est2 = IV2SLS(y1, x1, ins1).fit()
    est3 = sm.OLS(y2, x2).fit(cov_type='HC1')
    est4 = IV2SLS(y2, x2, ins2).fit()
    est5 = sm.OLS(y3, x2).fit(cov_type='HC1')
    est6 = IV2SLS(y3, x2, ins2).fit()
    est7 = sm.OLS(y4, x2).fit(cov_type='HC1')
    est8 = IV2SLS(y4, x2, ins2).fit()
    est9 = sm.OLS(y5, x3).fit(cov_type='HC1')
    est10 = IV2SLS(y5, x3, ins3).fit()

    stargazer = Stargazer(
        [est1, est2, est3, est4, est5, est6, est7, est8, est9, est10])
    stargazer.custom_columns([
        'ICRG quality of gov', 'EF Corruption', 'EF Property rights',
        'EF Regulation', 'Tax eva'
    ], [2, 2, 2, 2, 2])
    stargazer.show_model_numbers(False)
    stargazer.covariate_order([f'{name}_C2', f'{name}_I'])
    stargazer.rename_covariates({
        f'{name}_C2':
        'Segregation $\hat{S}$ ('
        f'{name}'
        ')',
        f'{name}_I':
        'Fractionalization $F$ ('
        f'{name}'
        ')'
    })
    stargazer.add_line('Method', [
        'OLS', '2SLS', 'OLS', '2SLS', 'OLS', '2SLS', 'OLS', '2SLS', 'OLS',
        '2SLS'
    ])

    if democ == 'democracy':
        stargazer.title('Panel B. Democracies sample, all controls')
        return stargazer

    else:
        stargazer.title('Panel A. Full sample, no additional controls')
        return stargazer
Esempio n. 24
0
def Main_Table_1(df):

    df_good = pd.DataFrame({
        "beliefadjustment_normalized":
        df[df["dummynews_goodbad"] == 0]['beliefadjustment_normalized'],
        "dummytreat_direct1month":
        df[df["dummynews_goodbad"] == 0]['dummytreat_direct1month'],
        "rank":
        df[df["dummynews_goodbad"] == 0]['rank'],
        "beliefadjustment_bayes_norm":
        df[df["dummynews_goodbad"] == 0]['beliefadjustment_bayes_norm']
    })
    model_ols = smf.ols(
        formula="beliefadjustment_normalized ~ dummytreat_direct1month",
        data=df_good)
    reg_1 = model_ols.fit(cov_type='HC1')
    model_ols = smf.ols(
        formula=
        "beliefadjustment_normalized ~ dummytreat_direct1month + rank + beliefadjustment_bayes_norm",
        data=df_good)
    reg_2 = model_ols.fit(cov_type='HC1')

    df_bad = pd.DataFrame({
        "beliefadjustment_normalized":
        df[df["dummynews_goodbad"] == 1]['beliefadjustment_normalized'],
        "dummytreat_direct1month":
        df[df["dummynews_goodbad"] == 1]['dummytreat_direct1month'],
        "rank":
        df[df["dummynews_goodbad"] == 1]['rank'],
        "beliefadjustment_bayes_norm":
        df[df["dummynews_goodbad"] == 1]['beliefadjustment_bayes_norm']
    })
    model_ols = smf.ols(
        formula="beliefadjustment_normalized ~ dummytreat_direct1month",
        data=df_bad)
    reg_3 = model_ols.fit(cov_type='HC1')
    model_ols = smf.ols(
        formula=
        "beliefadjustment_normalized ~ dummytreat_direct1month + rank + beliefadjustment_bayes_norm",
        data=df_bad)
    reg_4 = model_ols.fit(cov_type='HC1')

    #Generating interaction term
    df["interact_direct1month"] = df["dummytreat_direct1month"] * df[
        "dummynews_goodbad"]

    model_ols = smf.ols(
        formula=
        "beliefadjustment_normalized ~ dummytreat_direct1month + dummynews_goodbad + interact_direct1month",
        data=df)
    reg_5 = model_ols.fit(cov_type='HC1')
    model_ols = smf.ols(
        formula=
        "beliefadjustment_normalized ~ dummytreat_direct1month + dummynews_goodbad + rank + interact_direct1month + beliefadjustment_bayes_norm",
        data=df)
    reg_6 = model_ols.fit(cov_type='HC1')

    model_ols = smf.ols(
        formula=
        "beliefadjustment_normalized ~ dummytreat_direct1month + dummynews_goodbad + interact_direct1month + rankdummy1 + rankdummy2 + rankdummy3 + rankdummy4 + rankdummy5 + rankdummy6 + rankdummy7 + rankdummy8 + rankdummy9 + rankdummy1_interact + rankdummy2_interact + rankdummy3_interact + rankdummy4_interact + rankdummy5_interact + rankdummy6_interact + rankdummy7_interact + rankdummy8_interact + rankdummy9_interact",
        data=df)
    reg_7 = model_ols.fit(cov_type='HC1')
    model_ols = smf.ols(
        formula=
        "beliefadjustment_normalized ~ dummytreat_direct1month + dummynews_goodbad + interact_direct1month + beliefadjustment_bayes_norm + rankdummy1 + rankdummy2 + rankdummy3 + rankdummy4 + rankdummy5 + rankdummy6 + rankdummy7 + rankdummy8 + rankdummy9 + rankdummy1_interact + rankdummy2_interact + rankdummy3_interact + rankdummy4_interact + rankdummy5_interact + rankdummy6_interact + rankdummy7_interact + rankdummy8_interact + rankdummy9_interact",
        data=df)
    reg_8 = model_ols.fit(cov_type='HC1')

    Main_Table_1 = Stargazer(
        [reg_1, reg_2, reg_3, reg_4, reg_5, reg_6, reg_7, reg_8])
    Main_Table_1.title(
        'Table 1 - Belief Adjustment: Direct versus One Month Later')
    Main_Table_1.dependent_variable_name('Normalized Belief Adjustment - ')
    Main_Table_1.custom_columns([
        'Positive Information', 'Negative Information',
        'Difference-in-difference',
        'Difference-in-difference with rank fixed effects'
    ], [2, 2, 2, 2])

    return Main_Table_1
def get_table3(df):
    
    ### regressions:
    rslt = smf.ols(formula="stdgrade ~ treat + pol1+ pol1t", data=df, weights=df["kwgt"]).fit(cov_type='cluster',cov_kwds={'groups': df["studentid"]})
    rslt1 = rslt
    
    formula2 = "stdgrade ~ treat + treatmentvol + treatmentfor + volcourse + forcourse + pol1 + pol1t + pol1vol + pol1tvol + pol1for + pol1tfor"
    rslt = smf.ols(formula=formula2, data=df,weights=df["kwgt"] ).fit(cov_type='cluster',cov_kwds={'groups': df["studentid"]})
    rslt2 = rslt
    
    ### Table stargazer:
    stargazer = Stargazer([rslt1,rslt2])
    stargazer.custom_columns(["column 1","column 4" ], [1,1])
    stargazer.title("Table 3 - Effects on standardized grades")
    stargazer.show_model_numbers(False)
    stargazer.significant_digits(2)
    stargazer.covariate_order([ "treat","treatmentvol","treatmentfor"])
    stargazer.rename_covariates({"treat": "1st-year GPA is below 7",
                                 "treatmentvol":"Attendance is voluntary x treatment",
                                 "treatmentfor":"Absence is penalized x treatment"})
    stargazer.show_degrees_of_freedom(False)
    stargazer.add_line('Fixed Effects', ['No', 'No'])

    return stargazer
Esempio n. 26
0
model1a = ols('cnt ~ temp_celsius', data=wbr).fit()  #Primero y, luego x aquí
model1a.summary2()

model1b = ols('cnt ~ windspeed_kh', data=wbr).fit()
model1b.summary2(
)  #Es significativa también y negativa, pero R2 es mucho menor: solo el 6% depende de la variabilidad en el viento

###

model2 = ols('cnt ~ temp_celsius + windspeed_kh',
             data=wbr).fit()  #Dos predictores ahora
model2.summary2()

###

wbr.hum.hist()  #Describir primero SIEMPRE
model1c = ols('cnt ~ hum', data=wbr).fit()
model1c.summary2()

model3 = ols('cnt ~ temp_celsius + windspeed_kh + hum', data=wbr).fit()
model3.summary2()  #Aumenta R2 respecto a model2 y cambian los coeficientes

### Para reportar

#!pip install stargazer
from stargazer.stargazer import Stargazer

stargazer = Stargazer([model1a, model2, model3])
stargazer.render_html()
Esempio n. 27
0
# 5. Estimating regression of the return on each strategy on FF 5 factors:
reg_df = strategy_ret_df.copy()
reg_df = pd.merge(reg_df, ff_df, left_index=True, right_index=True)

strategy_name_list = list(strategy_ret_df.columns)
results_list = []
for name in strategy_name_list:
    # to have the same name for all variables
    reg_df_tmp = reg_df.rename({name: "ret"}, axis=1)
    results_list.append(
        smf.ols(formula="ret ~ MKT + SMB + HML + CMA + RMW",
                data=reg_df_tmp * 12).fit())

# Outputting short regression results:
stargazer = Stargazer([results_list[0], results_list[3], results_list[6]])
stargazer.custom_columns(['D 30', 'prob 20', 'prob 40'], [1, 1, 1])
stargazer.covariate_order(['Intercept', 'MKT', 'SMB', 'HML', 'RMW', 'CMA'])
stargazer.show_degrees_of_freedom(False)
f = open(
    "/Users/rsigalov/Dropbox/2019_Revision/Writing/Predictive Regressions/tables/disaster_sort_reg_on_ff.tex",
    "w")
f.write(stargazer.render_latex())
f.close()

# Doing extended regression table where I do regressions of strategy return on
# (1) just the market, (2) FF 3 factors and (3) FF 5 factors.
results_list = []
for name in ["D_30", "p_20_30"]:
    # to have the same name for all variables
    reg_df_tmp = reg_df.rename({name: "ret"}, axis=1)
Esempio n. 28
0
def ols_regression_formatted(data,
                             specifications,
                             as_latex=False,
                             covariates_names=None,
                             covariates_order=None):
    """
    Creates formatted tables for different dependent variables and specifications
    Input:
    data (df): Dataframe containing all necessary variables for OLS regression
    specifications (dictionary): dependent variables as keys and list of specifications
    as values
    as_latex (bool): specify whether Output as table or Latex code
    covariate_names (dict): dictionary with covariate names as in "data" as keys and new
    covariate names as values
    Output:
    list_of_tables (list of stargazer tables): list of formatted tables
    """

    # Create dictionary which connects dependent variables with formatted tables
    dict_regression_tables = {}

    # Generate regressions
    for depvar in specifications.keys():

        regression_list = []
        specification_list = specifications[depvar]
        list_all_covariates = []

        for specification in specification_list:

            estimation_equation = depvar + " ~ " + specification
            regression = smf.ols(data=data, formula=estimation_equation).fit()
            regression_list.append(regression)

            # Create set of all variables for this dependent variable
            list_all_covariates = list(
                set(list_all_covariates +
                    regression.params.index.values.tolist()))

        # Format table with stargazer
        formatted_table = Stargazer(regression_list)

        # No dimension of freedoms and blank dependent variable
        formatted_table.show_degrees_of_freedom(False)
        formatted_table.dependent_variable_name("")

        # Optional: Change order of covariates
        if covariates_order is not None:

            covariates_order_depvar = list(
                OrderedSet(covariates_order).intersection(list_all_covariates))
            list_remaining_covariates = list(
                OrderedSet(list_all_covariates).difference(
                    OrderedSet(covariates_order_depvar)))
            covariates_sorted = list(
                OrderedSet(covariates_order_depvar).union(
                    list_remaining_covariates))
            covariates_sorted.remove("Intercept")
            covariates_sorted = covariates_sorted + ["Intercept"]

            formatted_table.covariate_order(covariates_sorted)

        # Optional: Change name of covariates
        if covariates_names is not None:

            formatted_table.rename_covariates(covariates_names)

        # Add table or latex code to dictionary
        if as_latex is True:

            dict_regression_tables[depvar] = formatted_table.render_latex()

            # Delete tabular environment around it
            dict_regression_tables[depvar] = dict_regression_tables[
                depvar].replace("\\begin{table}[!htbp] \\centering\n", "")
            dict_regression_tables[depvar] = dict_regression_tables[
                depvar].replace("\\end{table}", "")

        else:
            dict_regression_tables[depvar] = formatted_table

    return dict_regression_tables