def create_table(models, path): path = Path(path) stargazer = Stargazer(models) if path.suffix == ".tex": table = stargazer.render_latex() elif path.suffix == ".html": table = stargazer.render_html() else: raise NotImplementedError with open(path, "w") as file: file.write(table)
def table3_7(df, regression_type): df_3_7E = df[[ 'ethnicity_C2', 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea', 'LOScandin', 'democ', 'mtnall', 'RulLaw' ]].dropna(axis=0) df_3_7L = df[[ 'language_C2', 'language_instrument_C2_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea', 'LOScandin', 'democ', 'mtnall', 'RulLaw' ]].dropna(axis=0) df_3_7R = df[[ 'religion_C2', 'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea', 'LOScandin', 'democ', 'mtnall', 'RulLaw' ]].dropna(axis=0) exo = sm.add_constant(df_3_7E[[ 'ethnicity_C2', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'lnArea', 'democ', 'mtnall' ]]) exo2 = sm.add_constant(df_3_7E[['ethnicity_C2', 'ethnicity_I']]) exo3 = sm.add_constant(df_3_7L[[ 'language_C2', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'lnArea', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) exo4 = sm.add_constant(df_3_7L[['language_C2', 'language_I']]) exo5 = sm.add_constant(df_3_7R[[ 'religion_C2', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea', 'democ', 'mtnall' ]]) exo6 = sm.add_constant(df_3_7R[['religion_C2', 'religion_I']]) if regression_type == 'IV2SLS': reg = IV2SLS( df_3_7E['RulLaw'], exo, sm.add_constant(df_3_7E[[ 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'lnArea' ]])).fit() reg2 = IV2SLS( df_3_7E['RulLaw'], exo2, sm.add_constant( df_3_7E[['ethnicity_instrument_C2_thresh', 'ethnicity_I']])).fit() reg3 = IV2SLS( df_3_7L['RulLaw'], exo3, sm.add_constant(df_3_7L[[ 'language_instrument_C2_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'lnArea' ]])).fit() reg4 = IV2SLS( df_3_7L['RulLaw'], exo4, sm.add_constant( df_3_7L[['language_instrument_C2_thresh', 'language_I']])).fit() reg5 = IV2SLS( df_3_7R['RulLaw'], exo5, sm.add_constant(df_3_7R[[ 'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall', 'lnArea' ]])).fit() reg6 = IV2SLS( df_3_7R['RulLaw'], exo6, sm.add_constant( df_3_7R[['religion_instrument_C2_thresh', 'religion_I']])).fit() elif regression_type == 'OLS': reg2 = sm.OLS(df_3_7E['RulLaw'], exo2).fit(cov_type='HC1') reg = sm.OLS(df_3_7E['RulLaw'], exo).fit(cov_type='HC1') reg4 = sm.OLS(df_3_7L['RulLaw'], exo4).fit(cov_type='HC1') reg3 = sm.OLS(df_3_7L['RulLaw'], exo3).fit(cov_type='HC1') reg6 = sm.OLS(df_3_7R['RulLaw'], exo6).fit(cov_type='HC1') reg5 = sm.OLS(df_3_7R['RulLaw'], exo5).fit(cov_type='HC1') stargazer = Stargazer([reg2, reg, reg4, reg3, reg6, reg5]) stargazer.covariate_order([ 'ethnicity_C2', 'ethnicity_I', 'language_C2', 'language_I', 'religion_C2', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'lnArea', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'const' ]) stargazer.rename_covariates({ 'ethnicity_C2': 'Segregation $\hat{S}$ (ethnicity)', 'ethnicity_I': 'Fractionalization $F$ (ethnicity)', 'language_C2': 'Segregation $\hat{S}$ (language)', 'language_I': 'Fractionalization $F$ (language)', 'religion_C2': 'Segregation $\hat{S}$ (religion)', 'religion_I': 'Fractionalization $F$ (religion)', 'lnpopulation': 'ln (population)', 'lnGDP_pc': 'ln (GDP per capita)', 'lnArea': 'ln (average size of region)', 'protestants': 'Pretestants share', 'muslims': 'Muslmis Share', 'catholics': 'Catholics share', 'latitude': 'Latitude', 'LOEnglish': 'English legal origin', 'LOGerman': 'German legal origin', 'LOSocialist': 'Socialist legal origin', 'LOScandin': 'Scandinavian legal origin', 'democ': 'Democratic tradition', 'mtnall': 'Mountains', 'const': 'Constant' }) return HTML(stargazer.render_html())
model1a = ols('cnt ~ temp_celsius', data=wbr).fit() #Primero y, luego x aquí model1a.summary2() model1b = ols('cnt ~ windspeed_kh', data=wbr).fit() model1b.summary2( ) #Es significativa también y negativa, pero R2 es mucho menor: solo el 6% depende de la variabilidad en el viento ### model2 = ols('cnt ~ temp_celsius + windspeed_kh', data=wbr).fit() #Dos predictores ahora model2.summary2() ### wbr.hum.hist() #Describir primero SIEMPRE model1c = ols('cnt ~ hum', data=wbr).fit() model1c.summary2() model3 = ols('cnt ~ temp_celsius + windspeed_kh + hum', data=wbr).fit() model3.summary2() #Aumenta R2 respecto a model2 y cambian los coeficientes ### Para reportar #!pip install stargazer from stargazer.stargazer import Stargazer stargazer = Stargazer([model1a, model2, model3]) stargazer.render_html()
def main(): #%% #Load data os.chdir('/Users/rgreen/Documents/Github/NDVI_Projection/') oromia = pd.ExcelFile('oromia.xlsx') arsi = pd.read_excel(oromia, 'arsi') bale = pd.read_excel(oromia, 'bale') borena = pd.read_excel(oromia, 'borena') guji = pd.read_excel(oromia, 'guji') westarsi = pd.read_excel(oromia, 'westarsi') arsi.insert(0, 'Time', np.linspace(1,585,585)) bale.insert(0, 'Time', np.linspace(1,585,585)) borena.insert(0, 'Time', np.linspace(1,585,585)) guji.insert(0, 'Time', np.linspace(1,585,585)) westarsi.insert(0, 'Time', np.linspace(1,585,585)) #%% #dekadal data (P, LST, ET) ddf = pd.DataFrame() ddf['D_NDVI'] = arsi.NDVI.diff()[1:] ddf = ddf.reset_index(drop=True) ddf['N_P'] = ((max(arsi.NDVI) - (arsi.NDVI[:-1]))* (arsi.P[:-1])) ddf['N_LST'] = (((arsi.NDVI[:-1]) - min(arsi.NDVI))* (arsi.LST[:-1])).shift(4) #need to shift back lags, use shift not index ddf['N_ET'] = (((arsi.NDVI[:-1]) - min(arsi.NDVI))* (arsi.ET[:-1])).shift(4) ddf = pd.DataFrame() ddf['D_NDVI'] = arsi.NDVI.diff().shift(-1) ddf = ddf.reset_index(drop=True) #L_NDVI = arsi.NDVI.shift(-1) #lag ddf['N_P'] = ((max(arsi.NDVI) - (arsi.NDVI))* (arsi.P)) ddf['N_LST'] = (((arsi.NDVI) - min(arsi.NDVI))* (arsi.LST)).shift(4) #need to shift back lags, use shift not index ddf['N_ET'] = (((arsi.NDVI) - min(arsi.NDVI))* (arsi.ET)).shift(4) #shift is (n-1) dekads, ex. shift(4) is 5 dekads lag #mask mask_lst = ~np.isnan(ddf.N_LST) & ~np.isnan(ddf.D_NDVI) mask_et = ~np.isnan(ddf.N_ET) & ~np.isnan(ddf.D_NDVI) #stats.lingress (X,Y) slope1, intercept1, r1, p1, std1 = stats.linregress(ddf.N_P, ddf.D_NDVI) line1 = slope1*ddf.N_P+intercept1 print("r-squared: %f" % r1**2) slope2, intercept2, r2, p2, std2 = stats.linregress(ddf.N_LST[mask_lst], ddf.D_NDVI[mask_lst]) line2 = slope2*ddf.N_LST+intercept2 print("r-squared: %f" % r2**2) slope3, intercept3, r3, p3, std3 = stats.linregress(ddf.N_ET[mask_et], ddf.D_NDVI[mask_et]) line3 = slope3*ddf.N_ET+intercept3 print("r-squared: %f" % r3**2) fig, (ax1, ax2, ax3) = plt.subplots(3, sharey =True) ax1.scatter(ddf.N_P, ddf.D_NDVI, color = 'darkcyan') ax1.plot(ddf.N_P, line1, color = 'k') ax1.set(xlabel = r'$(NDVI_{max} - NDVI_{t-1})*P_{t-1}$', ylabel = '') ax1.text(16, -0.04, r'y = 0.005x + 0.02', fontsize=8) ax1.text(16, -0.05, r'$r^2$ = 0.528', fontsize=8) ax2.scatter(ddf.N_LST, ddf.D_NDVI, color = 'forestgreen') ax2.set(xlabel = r'$(NDVI_{t-5} - NDVI_{min})*LST_{t-5}$', ylabel = '') ax2.plot(ddf.N_LST, line2, color = 'k') ax2.text(9, 0.07, r'y = -0.005x + 0.038', fontsize=8) ax2.text(9, 0.06, r'$r^2$ = 0.386', fontsize=8) ax3.scatter(ddf.N_ET, ddf.D_NDVI, color = 'cornflowerblue') ax3.set(xlabel = r'$(NDVI_{max} - NDVI_{t-5})*ET_{t-5}$', ylabel = '') ax3.plot(ddf.N_ET, line3, color = 'k') ax3.text(15, 0.07, r'y = -0.003x + 0.031', fontsize=8) ax3.text(15, 0.06, r'$r^2$ = 0.414', fontsize=8) fig.text(0.06, 0.5, r'$\Delta NDVI$', ha='center', va='center', rotation='vertical') #common ylabel # ============================================================================= # # sns.set(style="ticks", color_codes=True) # # fig = plt.figure() # sns.regplot(x=ddf.N_P, y=ddf.D_NDVI) # # g = sns.PairGrid(ddf, y_vars=["D_NDVI"], x_vars=["N_P", "N_LST", "N_ET"], height=4) # g.map(sns.regplot, color=".3") # # replacements = {'D_NDVI': r'$\Delta NDVI$', 'N_P': '(maxNDVI - NDVIt-1)*Pt-1', # 'N_LST': '(NDVIt-5 - minNDVI)*LSTt-5', 'N_ET': '(NDVIt-5 - minNDVI)*N_ETt-5'} # # for i in range(4): # for j in range(4): # xlabel = g.axes[i][j].get_xlabel() # ylabel = g.axes[i][j].get_ylabel() # if xlabel in replacements.keys(): # g.axes[i][j].set_xlabel(replacements[xlabel]) # if ylabel in replacements.keys(): # g.axes[i][j].set_ylabel(replacements[ylabel]) # # ============================================================================= # ============================================================================= # # r, p = stats.pearsonr(ddf.D_NDVI, ddf.N_P) # r, p = stats.pearsonr(ddf.D_NDVI, ddf.N_LST) # r, p = stats.pearsonr(ddf.D_NDVI, ddf.N_LST) # # ============================================================================= #%% X = ddf[['N_P']] Y = ddf['D_NDVI'] X = sm.add_constant(X) est = sm.OLS(Y,X, missing = 'drop').fit() est.summary() Y_pred = est.predict() original = Y + arsi.NDVI predicted = Y_pred + arsi.NDVI[:-2] result = pd.concat([original, predicted], axis=1) result.columns= ['Original','Predicted'] rms1 = sqrt(mean_squared_error(result.Original[:-2], result.Predicted[:-2])) #0.0186 X = ddf[['N_P', 'N_LST', 'N_ET']] Y = ddf['D_NDVI'] X = sm.add_constant(X) est = sm.OLS(Y,X, missing = 'drop').fit() est.summary() Y_pred = est.predict() original = Y + arsi.NDVI predicted = Y_pred + arsi.NDVI[:-6] result = pd.concat([original, predicted], axis=1) result.columns= ['Original','Predicted'] rms1 = sqrt(mean_squared_error(result.Original[:-6], result.Predicted[:-6])) #0.0186 fig = plt.figure() plt.plot(arsi.DT, result.Original, 'darkblue', label = 'Original') plt.plot(arsi.DT, result.Predicted, 'dodgerblue', label = 'Projected') plt.legend(loc='upper right') plt.title('Arsi NDVI') X = ((max(arsi.NDVI) - arsi.NDVI)* arsi.P) #for multivariate regression ddf[['N_P', 'N_LST']] Y = arsi.NDVI.diff().shift(-1) X = sm.add_constant(X) #only use when doing first run OLS then remove when fitting prediction est = sm.OLS(Y,X, missing = 'drop').fit() est.summary() Y_pred = est.predict() Y_pred2 = 0.0047*X - 0.0201 #take delta predictions and convert to forecast original = Y + arsi.NDVI original = np.append(result.original, np.nan) predicted = Y_pred2 + arsi.NDVI result = pd.concat([original, predicted], axis=1) result.columns= ['Original','Predicted'] fig = plt.figure() plt.plot(arsi.DT, result.Original, 'darkblue', label = 'Original') plt.plot(arsi.DT, result.Predicted, 'dodgerblue', label = 'Projected') plt.legend(loc='upper right') plt.title('Arsi NDVI') rms1 = sqrt(mean_squared_error(result.Original[:-2], result.Predicted[:-2])) #0.0186 #create stargazer model table stargazer = Stargazer([est]) HTML(stargazer.render_html()) model = sm.OLS(ddf.D_NDVI, sm.add_constant(ddf.N_P)).fit() #predict values of Y Y_pred = model.predict() #summary table model.summary()