def linearfunction(x,y,name='linear rating'): datadf = pd.DataFrame.from_dict({'x':x,'y':y}).dropna() ## put x and y in a dataframe so you can drop ones that don't match up datadf = datadf[datadf>=0].dropna() ##verify data is valid (not inf) regression = pd.ols(y=datadf['y'],x=datadf['x']) pearson = pearson_r(datadf['x'],datadf['y'])[0] spearman = spearman_r(datadf['x'],datadf['y'])[0] coeffdf = pd.DataFrame({'a':[regression.beta[1]],'b':[regression.beta[0]],'r2':[regression.r2],'rmse':[regression.rmse],'pearson':[pearson],'spearman':[spearman]},index=[name]) return coeffdf
def powerfunction(x,y,name='power rating',pvalue=0.01): ## put x and y in a dataframe so you can drop ones that don't match up datadf = pd.DataFrame.from_dict({'x':x,'y':y}).dropna().apply(np.log10) datadf = datadf[datadf>=-10] ##verify data is valid (not inf) regression = pd.ols(y=datadf['y'],x=datadf['x']) if pearson_r(datadf['x'],datadf['y'])[1] < pvalue: pearson = pearson_r(datadf['x'],datadf['y'])[0] else: pearson = np.nan if spearman_r(datadf['x'],datadf['y'])[1] < pvalue: spearman = spearman_r(datadf['x'],datadf['y'])[0] else: spearman = np.nan coeffdf = pd.DataFrame({'a':[10**regression.beta[1]],'b':[regression.beta[0]], 'r2':[regression.r2],'rmse':[regression.rmse],'pearson':[pearson],'spearman':[spearman]}, index=[name]) return coeffdf
def SedAcc_vs_SSY_Waves(data,sed_acc='Total_gm2d',max_y=40,plot_health_thresholds=False,show=True,save=False,filename=''): #plt.ioff() cols =data['Pod(P)/Tube(T)'].value_counts().shape[0] north_reef = ['1A','1B','1C','2A','2C'] south_reef = ['2B','3A','3B','3C'] if 'T1A' in data['Pod(P)/Tube(T)'].values: tubes_or_pods = 'Tubes' north_reef = ['T'+x for x in north_reef] south_reef = ['T'+x for x in south_reef] if 'P1A' in data['Pod(P)/Tube(T)'].values: tubes_or_pods = 'Pods' north_reef = ['P'+x for x in north_reef] south_reef = ['P'+x for x in south_reef] ## Plot accumulation fig, axes = plt.subplots(3, 3,sharey=False,figsize=(12,8)) ## Plot residuals of Sed_Acc and SSY fig_resid, axes_resid = plt.subplots(3, 3,sharey=False,figsize=(12,8)) ## Regressions reg = pd.DataFrame() reg_table = pd.DataFrame() ## Plot Sed data for x, loc in enumerate(np.sort(data['Pod(P)/Tube(T)'].value_counts().index.values)): print x, loc ## Data for Regression reg_loc = data[data['Pod(P)/Tube(T)'] == loc][['Month',sed_acc,'SSY','Waves']] reg_loc = reg_loc.dropna() reg = reg.append(reg_loc) ## Fit regression #reg_mod = sm.OLS(reg_loc[sed_acc], reg_loc[['SSY','Waves']]).fit() reg_mod = smf.ols(formula=sed_acc+" ~ SSY + Waves", data=reg_loc).fit() ## regression betas SSY_beta, Waves_beta = '%.3f'%reg_mod.params['SSY'], '%.3f'%reg_mod.params['Waves'] ## P values SSY_pval = '%.3f'%reg_mod.pvalues['SSY'] + pval_aster(reg_mod.pvalues['SSY'])[0] SSY_pval_col = pval_aster(reg_mod.pvalues['SSY'])[1] Waves_pval = '%.3f'%reg_mod.pvalues['Waves'] + pval_aster(reg_mod.pvalues['Waves'])[0] Waves_pval_col = pval_aster(reg_mod.pvalues['Waves'])[1] ## Spearman SSY_spear = spearman_r(reg_loc[sed_acc], reg_loc['SSY']) if SSY_spear[1] < 0.10: SSY_spear_r = '%.3f'%SSY_spear[0] elif SSY_spear[1] >= 0.10: SSY_spear_r = '' #SSY_spear_r = '%.3f'%SSY_spear[0]+'p:'+'%.3f'%SSY_spear[1] Waves_spear = spearman_r(reg_loc[sed_acc], reg_loc['Waves']) if Waves_spear[1] < 0.10: Waves_spear_r = '%.3f'%Waves_spear[0] elif Waves_spear[1] >= 0.10: Waves_spear_r = '' #Waves_spear_r = '%.3f'%Waves_spear[0]+'p:'+'%.3f'%Waves_spear[1] ## Make table of model parameters reg_mod_table = pd.DataFrame({'Sed':sed_acc, 'r2adj':'%.2f'%reg_mod.rsquared_adj,'SSY_spear_r':SSY_spear_r,'SSY_beta':SSY_beta,'SSY_pval':SSY_pval[:5],'Waves_spear_r':Waves_spear_r,'Waves_beta':Waves_beta,'Waves_pval':Waves_pval[:5]},index=[loc]) reg_table = reg_table.append(reg_mod_table) ## PLOTS max_y = reg_loc[sed_acc].max() axes1=axes.reshape(-1) ## Plot SSY vs Sed_Acc reg_loc.plot(x='SSY',y=sed_acc,ax=axes1[x],color='r',ls='None',marker='o',fillstyle='none') axes1[x].set_xlim(0,250), axes1[x].set_ylim(0,max_y*1.35) ## Plot Waves vs Sed_Acc axes2 = axes1[x].twiny() reg_loc.plot(x='Waves',y=sed_acc,ax=axes2,color='b',ls='None',marker='s',fillstyle='none') axes2.set_xlim(0.5,2.5), axes2.set_ylim(0,max_y*1.35) ## Figure/Plots of residuals vs predicted value axes1_resid = axes_resid.reshape(-1) sed_acc_vs_ssy_mod = sm.OLS(reg_loc[sed_acc], reg_loc['SSY']).fit() sed_acc_vs_ssy_mod = smf.ols(formula=sed_acc+" ~ SSY", data=reg_loc).fit() reg_loc['SSY_resid'] = sed_acc_vs_ssy_mod.resid reg_loc['SSY_pred'] = sed_acc_vs_ssy_mod.predict() reg_loc.plot(x='SSY_pred',y='SSY_resid',ax=axes1_resid[x],color='k',ls='None',marker='v') axes1_resid[x].set_ylabel('SSY residuals'), axes1_resid[x].set_xlim(0,reg_loc['SSY_pred'].max()*1.15) axes1_resid[x].grid(False) ## Annotate Points for row in reg_loc.iterrows(): print row[1]['Month'] try: #axes1[x].annotate(row[1]['Month'],xy=(row[1]['SSY']+10,row[1]['Total_TerrOrg_gm2d']),fontsize=6) print except: raise ## Format subplot axes1[x].xaxis.set_visible(False), axes2.xaxis.set_visible(False) axes1[x].tick_params(labelsize=8), axes2.tick_params(labelsize=8) axes1[x].xaxis.grid(False), axes2.xaxis.grid(False) axes1[x].yaxis.grid(False), axes2.yaxis.grid(False) axes1[x].legend().set_visible(False), axes2.legend().set_visible(False) # Subplot title eg P1A axes1[x].text(0.05,.95,loc,verticalalignment='top', horizontalalignment='left',transform=axes1[x].transAxes) ## Plot text Pvalues axes1[x].text(0.6,.95,'p_SSY:'+SSY_pval,verticalalignment='top', horizontalalignment='left',transform=axes1[x].transAxes,fontsize=13,color=SSY_pval_col) axes1[x].text(0.6,.85,'p_Wave:'+Waves_pval,verticalalignment='top', horizontalalignment='left',transform=axes1[x].transAxes,fontsize=13,color=Waves_pval_col) if x<=2: axes2.xaxis.set_visible(True), axes2.set_xlabel('Waves (m)',color='b') #for tl in axes2.get_xticklabels(): #tl.set_color('b') ## Label left axes axes[0,0].set_ylabel('NORTHERN \n g/'+r'$m^2$'+'/day') axes[1,0].set_ylabel('CENTRAL \n g/'+r'$m^2$'+'/day') axes[2,0].set_ylabel('SOUTHERN \n g/'+r'$m^2$'+'/day') #axes[0,0].set_ylim(0,max_y) axes[2,0].set_xlabel('SSY (tons)',color='r'), axes[2,1].set_xlabel('SSY (tons)',color='r'), axes[2,2].set_xlabel('SSY (tons)',color='r') ## turn on axes for ax in axes[2]: ax.xaxis.set_visible(True) for tl in ax.get_xticklabels(): tl.set_color('r') fig.tight_layout(pad=0.2) fig.subplots_adjust(top=0.9)#, fig.suptitle(tubes_or_pods+' '+sed_acc,fontsize=16) fig_resid.tight_layout(pad=0.2) fig_resid.subplots_adjust(top=0.9), fig_resid.suptitle(tubes_or_pods+' '+sed_acc,fontsize=16) show_plot(show,fig) savefig(fig,save,filename) savefig(fig_resid,save,filename+'_residuals') ### Mean North/South north_sed = data[data['Pod(P)/Tube(T)'].isin(north_reef)].dropna() south_sed = data[data['Pod(P)/Tube(T)'].isin(south_reef)].dropna() north_mean_acc = pd.DataFrame() south_mean_acc = pd.DataFrame() ## Select Sed data for mon in Comp_XL.sheet_names[1:12]: ## Select data corresponding to the site location e.g. P1A, T2B etc ## Mean organic north_mean_sed_acc = north_sed[north_sed['Month'] == mon][sed_acc].mean() south_mean_sed_acc = south_sed[south_sed['Month'] == mon][sed_acc].mean() ## Aux Data precip = north_sed[north_sed['Month'] == mon]['Precip'].max() ssy = north_sed[north_sed['Month'] == mon]['SSY'].max() waves = north_sed[north_sed['Month'] == mon]['Waves'].max() ## Make DF north_mean_acc = north_mean_acc.append(pd.DataFrame({sed_acc:north_mean_sed_acc,'Precip':precip,'SSY':ssy,'Waves':waves},index=['North_'+tubes_or_pods])) south_mean_acc = south_mean_acc.append(pd.DataFrame({sed_acc:south_mean_sed_acc,'Precip':precip,'SSY':ssy,'Waves':waves},index=['South_'+tubes_or_pods])) for data in [north_mean_acc, south_mean_acc]: ## Regression for Monthly mean on North/South ## Data for Regression reg_loc = data[[sed_acc,'SSY','Waves']] reg = reg.append(reg_loc) ## Fit regression reg_mod = sm.OLS(reg_loc[sed_acc], reg_loc[['SSY','Waves']]).fit() reg_mod = smf.ols(formula=sed_acc+" ~ SSY + Waves", data=reg_loc).fit() ## regression betas SSY_beta, Waves_beta = '%.3f'%reg_mod.params['SSY'], '%.3f'%reg_mod.params['Waves'] ## P values SSY_pval = '%.3f'%reg_mod.pvalues['SSY'] + pval_aster(reg_mod.pvalues['SSY'])[0] SSY_pval_col = pval_aster(reg_mod.pvalues['SSY'])[1] Waves_pval = '%.3f'%reg_mod.pvalues['Waves'] + pval_aster(reg_mod.pvalues['Waves'])[0] Waves_pval_col = pval_aster(reg_mod.pvalues['Waves'])[1] ## Spearman SSY_spear = spearman_r(reg_loc[sed_acc], reg_loc['SSY']) if SSY_spear[1] < 0.10: SSY_spear_r = '%.3f'%SSY_spear[0] elif SSY_spear[1] >= 0.10: SSY_spear_r = '' #SSY_spear_r = '%.3f'%SSY_spear[0]+'p:'+'%.3f'%SSY_spear[1] Waves_spear = spearman_r(reg_loc[sed_acc], reg_loc['Waves']) if Waves_spear[1] < 0.10: Waves_spear_r = '%.3f'%Waves_spear[0] elif Waves_spear[1] >= 0.10: Waves_spear_r = '' #Waves_spear_r = '%.3f'%Waves_spear[0]+'p:'+'%.3f'%Waves_spear[1] reg_mod_table = pd.DataFrame({'Sed':sed_acc, 'r2adj':'%.2f'%reg_mod.rsquared_adj,'SSY_spear_r':SSY_spear_r,'SSY_beta':SSY_beta,'SSY_pval':SSY_pval[:5],'Waves_spear_r':Waves_spear_r,'Waves_beta':Waves_beta,'Waves_pval':Waves_pval[:5]},index=[data.index[0]]) reg_table = reg_table.append(reg_mod_table) return reg_table[['Sed','r2adj','SSY_spear_r','SSY_beta','SSY_pval','Waves_spear_r','Waves_beta','Waves_pval']]