예제 #1
0
def create_table(models, path):
    path = Path(path)
    stargazer = Stargazer(models)

    if path.suffix == ".tex":
        table = stargazer.render_latex()
    elif path.suffix == ".html":
        table = stargazer.render_html()
    else:
        raise NotImplementedError

    with open(path, "w") as file:
        file.write(table)
def table3_7(df, regression_type):

    df_3_7E = df[[
        'ethnicity_C2', 'ethnicity_instrument_C2_thresh', 'ethnicity_I',
        'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics',
        'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea',
        'LOScandin', 'democ', 'mtnall', 'RulLaw'
    ]].dropna(axis=0)
    df_3_7L = df[[
        'language_C2', 'language_instrument_C2_thresh', 'language_I',
        'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics',
        'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea',
        'LOScandin', 'democ', 'mtnall', 'RulLaw'
    ]].dropna(axis=0)
    df_3_7R = df[[
        'religion_C2', 'religion_instrument_C2_thresh', 'religion_I',
        'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics',
        'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea',
        'LOScandin', 'democ', 'mtnall', 'RulLaw'
    ]].dropna(axis=0)

    exo = sm.add_constant(df_3_7E[[
        'ethnicity_C2', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc',
        'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish',
        'LOGerman', 'LOSocialist', 'LOScandin', 'lnArea', 'democ', 'mtnall'
    ]])
    exo2 = sm.add_constant(df_3_7E[['ethnicity_C2', 'ethnicity_I']])
    exo3 = sm.add_constant(df_3_7L[[
        'language_C2', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants',
        'lnArea', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman',
        'LOSocialist', 'LOScandin', 'democ', 'mtnall'
    ]])
    exo4 = sm.add_constant(df_3_7L[['language_C2', 'language_I']])
    exo5 = sm.add_constant(df_3_7R[[
        'religion_C2', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants',
        'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman',
        'LOSocialist', 'lnArea', 'democ', 'mtnall'
    ]])
    exo6 = sm.add_constant(df_3_7R[['religion_C2', 'religion_I']])

    if regression_type == 'IV2SLS':

        reg = IV2SLS(
            df_3_7E['RulLaw'], exo,
            sm.add_constant(df_3_7E[[
                'ethnicity_instrument_C2_thresh', 'ethnicity_I',
                'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims',
                'catholics', 'latitude', 'LOEnglish', 'LOGerman',
                'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'lnArea'
            ]])).fit()
        reg2 = IV2SLS(
            df_3_7E['RulLaw'], exo2,
            sm.add_constant(
                df_3_7E[['ethnicity_instrument_C2_thresh',
                         'ethnicity_I']])).fit()
        reg3 = IV2SLS(
            df_3_7L['RulLaw'], exo3,
            sm.add_constant(df_3_7L[[
                'language_instrument_C2_thresh', 'language_I', 'lnpopulation',
                'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
                'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ',
                'mtnall', 'lnArea'
            ]])).fit()
        reg4 = IV2SLS(
            df_3_7L['RulLaw'], exo4,
            sm.add_constant(
                df_3_7L[['language_instrument_C2_thresh',
                         'language_I']])).fit()
        reg5 = IV2SLS(
            df_3_7R['RulLaw'], exo5,
            sm.add_constant(df_3_7R[[
                'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation',
                'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude',
                'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall',
                'lnArea'
            ]])).fit()
        reg6 = IV2SLS(
            df_3_7R['RulLaw'], exo6,
            sm.add_constant(
                df_3_7R[['religion_instrument_C2_thresh',
                         'religion_I']])).fit()
    elif regression_type == 'OLS':
        reg2 = sm.OLS(df_3_7E['RulLaw'], exo2).fit(cov_type='HC1')
        reg = sm.OLS(df_3_7E['RulLaw'], exo).fit(cov_type='HC1')
        reg4 = sm.OLS(df_3_7L['RulLaw'], exo4).fit(cov_type='HC1')
        reg3 = sm.OLS(df_3_7L['RulLaw'], exo3).fit(cov_type='HC1')
        reg6 = sm.OLS(df_3_7R['RulLaw'], exo6).fit(cov_type='HC1')
        reg5 = sm.OLS(df_3_7R['RulLaw'], exo5).fit(cov_type='HC1')

    stargazer = Stargazer([reg2, reg, reg4, reg3, reg6, reg5])
    stargazer.covariate_order([
        'ethnicity_C2', 'ethnicity_I', 'language_C2', 'language_I',
        'religion_C2', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'lnArea',
        'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish',
        'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'const'
    ])
    stargazer.rename_covariates({
        'ethnicity_C2': 'Segregation $\hat{S}$ (ethnicity)',
        'ethnicity_I': 'Fractionalization $F$ (ethnicity)',
        'language_C2': 'Segregation $\hat{S}$ (language)',
        'language_I': 'Fractionalization $F$ (language)',
        'religion_C2': 'Segregation $\hat{S}$ (religion)',
        'religion_I': 'Fractionalization $F$ (religion)',
        'lnpopulation': 'ln (population)',
        'lnGDP_pc': 'ln (GDP per capita)',
        'lnArea': 'ln (average size of region)',
        'protestants': 'Pretestants share',
        'muslims': 'Muslmis Share',
        'catholics': 'Catholics share',
        'latitude': 'Latitude',
        'LOEnglish': 'English legal origin',
        'LOGerman': 'German legal origin',
        'LOSocialist': 'Socialist legal origin',
        'LOScandin': 'Scandinavian legal origin',
        'democ': 'Democratic tradition',
        'mtnall': 'Mountains',
        'const': 'Constant'
    })
    return HTML(stargazer.render_html())
예제 #3
0
model1a = ols('cnt ~ temp_celsius', data=wbr).fit()  #Primero y, luego x aquí
model1a.summary2()

model1b = ols('cnt ~ windspeed_kh', data=wbr).fit()
model1b.summary2(
)  #Es significativa también y negativa, pero R2 es mucho menor: solo el 6% depende de la variabilidad en el viento

###

model2 = ols('cnt ~ temp_celsius + windspeed_kh',
             data=wbr).fit()  #Dos predictores ahora
model2.summary2()

###

wbr.hum.hist()  #Describir primero SIEMPRE
model1c = ols('cnt ~ hum', data=wbr).fit()
model1c.summary2()

model3 = ols('cnt ~ temp_celsius + windspeed_kh + hum', data=wbr).fit()
model3.summary2()  #Aumenta R2 respecto a model2 y cambian los coeficientes

### Para reportar

#!pip install stargazer
from stargazer.stargazer import Stargazer

stargazer = Stargazer([model1a, model2, model3])
stargazer.render_html()
예제 #4
0
def main():
#%%    
    
#Load data
    os.chdir('/Users/rgreen/Documents/Github/NDVI_Projection/')
    
    oromia = pd.ExcelFile('oromia.xlsx')
    arsi = pd.read_excel(oromia, 'arsi')
    bale = pd.read_excel(oromia, 'bale')
    borena = pd.read_excel(oromia, 'borena')
    guji = pd.read_excel(oromia, 'guji')
    westarsi = pd.read_excel(oromia, 'westarsi')
    
    
    arsi.insert(0, 'Time', np.linspace(1,585,585))
    bale.insert(0, 'Time', np.linspace(1,585,585))
    borena.insert(0, 'Time', np.linspace(1,585,585))
    guji.insert(0, 'Time', np.linspace(1,585,585))
    westarsi.insert(0, 'Time', np.linspace(1,585,585))
    
#%%   
    #dekadal data (P, LST, ET)    

    ddf = pd.DataFrame()
    ddf['D_NDVI'] = arsi.NDVI.diff()[1:]
    ddf = ddf.reset_index(drop=True)
    ddf['N_P'] = ((max(arsi.NDVI) - (arsi.NDVI[:-1]))* (arsi.P[:-1]))
    ddf['N_LST'] = (((arsi.NDVI[:-1]) - min(arsi.NDVI))* (arsi.LST[:-1])).shift(4) #need to shift back lags, use shift not index
    ddf['N_ET'] = (((arsi.NDVI[:-1]) - min(arsi.NDVI))* (arsi.ET[:-1])).shift(4)

    
    ddf = pd.DataFrame()
    ddf['D_NDVI'] = arsi.NDVI.diff().shift(-1)
    ddf = ddf.reset_index(drop=True)
    #L_NDVI = arsi.NDVI.shift(-1) #lag
    ddf['N_P'] = ((max(arsi.NDVI) - (arsi.NDVI))* (arsi.P))
    ddf['N_LST'] = (((arsi.NDVI) - min(arsi.NDVI))* (arsi.LST)).shift(4) #need to shift back lags, use shift not index
    ddf['N_ET'] = (((arsi.NDVI) - min(arsi.NDVI))* (arsi.ET)).shift(4)
    
    
    
    #shift is (n-1) dekads, ex. shift(4) is 5 dekads lag
        
    #mask
    mask_lst = ~np.isnan(ddf.N_LST) & ~np.isnan(ddf.D_NDVI)
    mask_et = ~np.isnan(ddf.N_ET) & ~np.isnan(ddf.D_NDVI)
    
    #stats.lingress (X,Y)
    slope1, intercept1, r1, p1, std1 = stats.linregress(ddf.N_P, ddf.D_NDVI)
    line1 = slope1*ddf.N_P+intercept1
    print("r-squared: %f" % r1**2)
    slope2, intercept2, r2, p2, std2 = stats.linregress(ddf.N_LST[mask_lst], ddf.D_NDVI[mask_lst])
    line2 = slope2*ddf.N_LST+intercept2
    print("r-squared: %f" % r2**2)
    slope3, intercept3, r3, p3, std3 = stats.linregress(ddf.N_ET[mask_et], ddf.D_NDVI[mask_et])
    line3 = slope3*ddf.N_ET+intercept3
    print("r-squared: %f" % r3**2)
    
    fig, (ax1, ax2, ax3) = plt.subplots(3, sharey =True)
    ax1.scatter(ddf.N_P, ddf.D_NDVI, color = 'darkcyan')
    ax1.plot(ddf.N_P, line1, color = 'k')
    ax1.set(xlabel = r'$(NDVI_{max} - NDVI_{t-1})*P_{t-1}$', ylabel = '')
    ax1.text(16, -0.04, r'y = 0.005x + 0.02', fontsize=8)
    ax1.text(16, -0.05, r'$r^2$ = 0.528', fontsize=8)
    ax2.scatter(ddf.N_LST, ddf.D_NDVI, color = 'forestgreen')
    ax2.set(xlabel = r'$(NDVI_{t-5} - NDVI_{min})*LST_{t-5}$', ylabel = '')
    ax2.plot(ddf.N_LST, line2, color = 'k')
    ax2.text(9, 0.07, r'y = -0.005x + 0.038', fontsize=8)
    ax2.text(9, 0.06, r'$r^2$ = 0.386', fontsize=8)
    ax3.scatter(ddf.N_ET, ddf.D_NDVI, color = 'cornflowerblue')
    ax3.set(xlabel = r'$(NDVI_{max} - NDVI_{t-5})*ET_{t-5}$', ylabel = '')
    ax3.plot(ddf.N_ET, line3, color = 'k')
    ax3.text(15, 0.07, r'y = -0.003x + 0.031', fontsize=8)
    ax3.text(15, 0.06, r'$r^2$ = 0.414', fontsize=8)
    fig.text(0.06, 0.5, r'$\Delta NDVI$', ha='center', va='center', rotation='vertical') #common ylabel
# =============================================================================
#     
#     sns.set(style="ticks", color_codes=True)
#     
#     fig = plt.figure()
#     sns.regplot(x=ddf.N_P, y=ddf.D_NDVI)
#     
#     g = sns.PairGrid(ddf, y_vars=["D_NDVI"], x_vars=["N_P", "N_LST", "N_ET"], height=4)
#     g.map(sns.regplot, color=".3")
#     
#     replacements = {'D_NDVI': r'$\Delta NDVI$', 'N_P': '(maxNDVI - NDVIt-1)*Pt-1',
#                 'N_LST': '(NDVIt-5 - minNDVI)*LSTt-5', 'N_ET': '(NDVIt-5 - minNDVI)*N_ETt-5'}
# 
#     for i in range(4):
#         for j in range(4):
#             xlabel = g.axes[i][j].get_xlabel()
#             ylabel = g.axes[i][j].get_ylabel()
#             if xlabel in replacements.keys():
#                 g.axes[i][j].set_xlabel(replacements[xlabel])
#             if ylabel in replacements.keys():
#                 g.axes[i][j].set_ylabel(replacements[ylabel])
#         
# =============================================================================
    
# =============================================================================
#    
#     r, p = stats.pearsonr(ddf.D_NDVI, ddf.N_P)
#     r, p = stats.pearsonr(ddf.D_NDVI, ddf.N_LST)
#     r, p = stats.pearsonr(ddf.D_NDVI, ddf.N_LST)
#     
# =============================================================================
    
 #%%   

    X = ddf[['N_P']]
    Y = ddf['D_NDVI']
    X = sm.add_constant(X)
    est = sm.OLS(Y,X, missing = 'drop').fit()
    est.summary()
    Y_pred = est.predict()
    
    original = Y + arsi.NDVI
    predicted = Y_pred + arsi.NDVI[:-2]
    result = pd.concat([original, predicted], axis=1)
    result.columns= ['Original','Predicted']
    rms1 = sqrt(mean_squared_error(result.Original[:-2], result.Predicted[:-2])) #0.0186
    
    X = ddf[['N_P', 'N_LST', 'N_ET']]
    Y = ddf['D_NDVI']
    X = sm.add_constant(X)
    est = sm.OLS(Y,X, missing = 'drop').fit()
    est.summary()
    Y_pred = est.predict()
    
    original = Y + arsi.NDVI
    predicted = Y_pred + arsi.NDVI[:-6]
    result = pd.concat([original, predicted], axis=1)
    result.columns= ['Original','Predicted']
    rms1 = sqrt(mean_squared_error(result.Original[:-6], result.Predicted[:-6])) #0.0186

    fig = plt.figure()
    plt.plot(arsi.DT, result.Original, 'darkblue', label = 'Original')
    plt.plot(arsi.DT, result.Predicted, 'dodgerblue', label = 'Projected')
    plt.legend(loc='upper right')
    plt.title('Arsi NDVI')
    


    
    X = ((max(arsi.NDVI) - arsi.NDVI)* arsi.P)  #for multivariate regression ddf[['N_P', 'N_LST']]
    Y = arsi.NDVI.diff().shift(-1)
    X = sm.add_constant(X) #only use when doing first run OLS then remove when fitting prediction
    est = sm.OLS(Y,X, missing = 'drop').fit()
    est.summary()
    Y_pred = est.predict()
    Y_pred2 = 0.0047*X - 0.0201
    
    
    #take delta predictions and convert to forecast
    original = Y + arsi.NDVI
    original = np.append(result.original, np.nan)
    predicted = Y_pred2 + arsi.NDVI
    result = pd.concat([original, predicted], axis=1)
    result.columns= ['Original','Predicted']
    
    fig = plt.figure()
    plt.plot(arsi.DT, result.Original, 'darkblue', label = 'Original')
    plt.plot(arsi.DT, result.Predicted, 'dodgerblue', label = 'Projected')
    plt.legend(loc='upper right')
    plt.title('Arsi NDVI')
    
    rms1 = sqrt(mean_squared_error(result.Original[:-2], result.Predicted[:-2])) #0.0186

    
    
    #create stargazer model table
    stargazer = Stargazer([est])
    HTML(stargazer.render_html())
    
    
    
    model = sm.OLS(ddf.D_NDVI, sm.add_constant(ddf.N_P)).fit()
    #predict values of Y
    Y_pred = model.predict()
    #summary table
    model.summary()