def linearfunction(x,y,name='linear rating'):
    datadf = pd.DataFrame.from_dict({'x':x,'y':y}).dropna() ## put x and y in a dataframe so you can drop ones that don't match up    
    datadf = datadf[datadf>=0].dropna() ##verify data is valid (not inf)
    regression = pd.ols(y=datadf['y'],x=datadf['x'])
    pearson = pearson_r(datadf['x'],datadf['y'])[0]
    spearman = spearman_r(datadf['x'],datadf['y'])[0]
    coeffdf = pd.DataFrame({'a':[regression.beta[1]],'b':[regression.beta[0]],'r2':[regression.r2],'rmse':[regression.rmse],'pearson':[pearson],'spearman':[spearman]},index=[name])
    return coeffdf
def powerfunction(x,y,name='power rating',pvalue=0.01):
    ## put x and y in a dataframe so you can drop ones that don't match up  
    datadf = pd.DataFrame.from_dict({'x':x,'y':y}).dropna().apply(np.log10)   
    datadf = datadf[datadf>=-10] ##verify data is valid (not inf)
    regression = pd.ols(y=datadf['y'],x=datadf['x'])
    if pearson_r(datadf['x'],datadf['y'])[1] < pvalue:
        pearson = pearson_r(datadf['x'],datadf['y'])[0]
    else: 
        pearson = np.nan
    if  spearman_r(datadf['x'],datadf['y'])[1] < pvalue:
        spearman = spearman_r(datadf['x'],datadf['y'])[0]
    else:
        spearman = np.nan
    coeffdf = pd.DataFrame({'a':[10**regression.beta[1]],'b':[regression.beta[0]],
    'r2':[regression.r2],'rmse':[regression.rmse],'pearson':[pearson],'spearman':[spearman]},
index=[name])
    return coeffdf
예제 #3
0
def linearfunction(x,y,name='linear rating'):
    datadf = pd.DataFrame.from_dict({'x':x,'y':y}).dropna() ## put x and y in a dataframe so you can drop ones that don't match up    
    datadf = datadf[datadf>=0].dropna() ##verify data is valid (not inf)
    regression = pd.ols(y=datadf['y'],x=datadf['x'])
    pearson = pearson_r(datadf['x'],datadf['y'])[0]
    spearman = spearman_r(datadf['x'],datadf['y'])[0]
    coeffdf = pd.DataFrame({'a':[regression.beta[1]],'b':[regression.beta[0]],'r2':[regression.r2],'rmse':[regression.rmse],'pearson':[pearson],'spearman':[spearman]},index=[name])
    return coeffdf
def SedAcc_vs_SSY_Waves(data,sed_acc='Total_gm2d',max_y=40,plot_health_thresholds=False,show=True,save=False,filename=''):  
    #plt.ioff()
    cols =data['Pod(P)/Tube(T)'].value_counts().shape[0]

    north_reef = ['1A','1B','1C','2A','2C']
    south_reef = ['2B','3A','3B','3C']
    if 'T1A' in data['Pod(P)/Tube(T)'].values:
        tubes_or_pods = 'Tubes'
        north_reef = ['T'+x for x in north_reef] 
        south_reef = ['T'+x for x in south_reef] 
    if 'P1A' in data['Pod(P)/Tube(T)'].values:
        tubes_or_pods = 'Pods'
        north_reef = ['P'+x for x in north_reef] 
        south_reef = ['P'+x for x in south_reef]     
    ## Plot accumulation
    fig, axes = plt.subplots(3, 3,sharey=False,figsize=(12,8))
    ## Plot residuals of Sed_Acc and SSY
    fig_resid, axes_resid = plt.subplots(3, 3,sharey=False,figsize=(12,8))

    ## Regressions
    reg = pd.DataFrame()    
    reg_table = pd.DataFrame()
    ## Plot Sed data
    for x, loc in enumerate(np.sort(data['Pod(P)/Tube(T)'].value_counts().index.values)):
        print x, loc
        ## Data for Regression
        reg_loc = data[data['Pod(P)/Tube(T)'] == loc][['Month',sed_acc,'SSY','Waves']]
        reg_loc = reg_loc.dropna()
        reg = reg.append(reg_loc)
        ## Fit regression
        #reg_mod = sm.OLS(reg_loc[sed_acc], reg_loc[['SSY','Waves']]).fit()
        reg_mod = smf.ols(formula=sed_acc+" ~ SSY + Waves", data=reg_loc).fit()
        ## regression betas
        SSY_beta, Waves_beta = '%.3f'%reg_mod.params['SSY'], '%.3f'%reg_mod.params['Waves']
        ## P values
        SSY_pval  = '%.3f'%reg_mod.pvalues['SSY'] + pval_aster(reg_mod.pvalues['SSY'])[0]
        SSY_pval_col = pval_aster(reg_mod.pvalues['SSY'])[1]
        Waves_pval = '%.3f'%reg_mod.pvalues['Waves'] + pval_aster(reg_mod.pvalues['Waves'])[0]
        Waves_pval_col = pval_aster(reg_mod.pvalues['Waves'])[1]
        ## Spearman
        SSY_spear = spearman_r(reg_loc[sed_acc], reg_loc['SSY'])
        if SSY_spear[1] < 0.10:
            SSY_spear_r = '%.3f'%SSY_spear[0]
        elif SSY_spear[1] >= 0.10:
            SSY_spear_r = ''
        #SSY_spear_r = '%.3f'%SSY_spear[0]+'p:'+'%.3f'%SSY_spear[1]                
        
        Waves_spear = spearman_r(reg_loc[sed_acc], reg_loc['Waves'])
        if Waves_spear[1] < 0.10:
            Waves_spear_r = '%.3f'%Waves_spear[0]
        elif Waves_spear[1] >= 0.10:
            Waves_spear_r = ''
        #Waves_spear_r = '%.3f'%Waves_spear[0]+'p:'+'%.3f'%Waves_spear[1]
        
        ## Make table of model parameters
        reg_mod_table = pd.DataFrame({'Sed':sed_acc, 'r2adj':'%.2f'%reg_mod.rsquared_adj,'SSY_spear_r':SSY_spear_r,'SSY_beta':SSY_beta,'SSY_pval':SSY_pval[:5],'Waves_spear_r':Waves_spear_r,'Waves_beta':Waves_beta,'Waves_pval':Waves_pval[:5]},index=[loc])
        reg_table = reg_table.append(reg_mod_table)
        
        ## PLOTS
        max_y = reg_loc[sed_acc].max()
        axes1=axes.reshape(-1)
        ## Plot SSY vs Sed_Acc
        reg_loc.plot(x='SSY',y=sed_acc,ax=axes1[x],color='r',ls='None',marker='o',fillstyle='none')
        axes1[x].set_xlim(0,250), axes1[x].set_ylim(0,max_y*1.35)
        ## Plot Waves vs Sed_Acc
        axes2 = axes1[x].twiny()
        reg_loc.plot(x='Waves',y=sed_acc,ax=axes2,color='b',ls='None',marker='s',fillstyle='none')
        axes2.set_xlim(0.5,2.5), axes2.set_ylim(0,max_y*1.35)
        
        ## Figure/Plots of residuals vs predicted value
        axes1_resid = axes_resid.reshape(-1)        
        sed_acc_vs_ssy_mod = sm.OLS(reg_loc[sed_acc], reg_loc['SSY']).fit()
        sed_acc_vs_ssy_mod = smf.ols(formula=sed_acc+" ~ SSY", data=reg_loc).fit()
        reg_loc['SSY_resid'] = sed_acc_vs_ssy_mod.resid
        reg_loc['SSY_pred'] = sed_acc_vs_ssy_mod.predict()
        reg_loc.plot(x='SSY_pred',y='SSY_resid',ax=axes1_resid[x],color='k',ls='None',marker='v')
        axes1_resid[x].set_ylabel('SSY residuals'), axes1_resid[x].set_xlim(0,reg_loc['SSY_pred'].max()*1.15)
        axes1_resid[x].grid(False)
        ## Annotate Points
        for row in reg_loc.iterrows():
            print row[1]['Month']
            try:
                #axes1[x].annotate(row[1]['Month'],xy=(row[1]['SSY']+10,row[1]['Total_TerrOrg_gm2d']),fontsize=6)
                print
            except:
                raise   
        ## Format subplot
        axes1[x].xaxis.set_visible(False), axes2.xaxis.set_visible(False)
        axes1[x].tick_params(labelsize=8), axes2.tick_params(labelsize=8)
        axes1[x].xaxis.grid(False), axes2.xaxis.grid(False)
        axes1[x].yaxis.grid(False), axes2.yaxis.grid(False)
        axes1[x].legend().set_visible(False), axes2.legend().set_visible(False)
        # Subplot title eg P1A
        axes1[x].text(0.05,.95,loc,verticalalignment='top', horizontalalignment='left',transform=axes1[x].transAxes)
        ## Plot text Pvalues
        axes1[x].text(0.6,.95,'p_SSY:'+SSY_pval,verticalalignment='top', horizontalalignment='left',transform=axes1[x].transAxes,fontsize=13,color=SSY_pval_col)
        axes1[x].text(0.6,.85,'p_Wave:'+Waves_pval,verticalalignment='top', horizontalalignment='left',transform=axes1[x].transAxes,fontsize=13,color=Waves_pval_col)        
                
        if x<=2:
            axes2.xaxis.set_visible(True), axes2.set_xlabel('Waves (m)',color='b')
            #for tl in axes2.get_xticklabels():
                #tl.set_color('b')
                
    ## Label left axes
    axes[0,0].set_ylabel('NORTHERN \n g/'+r'$m^2$'+'/day')
    axes[1,0].set_ylabel('CENTRAL \n g/'+r'$m^2$'+'/day')
    axes[2,0].set_ylabel('SOUTHERN \n g/'+r'$m^2$'+'/day') 
    #axes[0,0].set_ylim(0,max_y)
    axes[2,0].set_xlabel('SSY (tons)',color='r'), axes[2,1].set_xlabel('SSY (tons)',color='r'), axes[2,2].set_xlabel('SSY (tons)',color='r')
    ## turn on axes
    for ax in axes[2]:
        ax.xaxis.set_visible(True)
        for tl in ax.get_xticklabels():
            tl.set_color('r')
    fig.tight_layout(pad=0.2)
    fig.subplots_adjust(top=0.9)#, fig.suptitle(tubes_or_pods+' '+sed_acc,fontsize=16)
    
    fig_resid.tight_layout(pad=0.2)
    fig_resid.subplots_adjust(top=0.9), fig_resid.suptitle(tubes_or_pods+' '+sed_acc,fontsize=16)
    
    show_plot(show,fig)
    savefig(fig,save,filename)
    savefig(fig_resid,save,filename+'_residuals')
    
    ### Mean North/South
    north_sed = data[data['Pod(P)/Tube(T)'].isin(north_reef)].dropna()
    south_sed = data[data['Pod(P)/Tube(T)'].isin(south_reef)].dropna()

    north_mean_acc = pd.DataFrame()
    south_mean_acc = pd.DataFrame()
    ## Select Sed data
    for mon in Comp_XL.sheet_names[1:12]:
        ## Select data corresponding to the site location e.g. P1A, T2B etc
        ## Mean organic
        north_mean_sed_acc = north_sed[north_sed['Month'] == mon][sed_acc].mean()
        south_mean_sed_acc = south_sed[south_sed['Month'] == mon][sed_acc].mean() 
        ## Aux Data
        precip = north_sed[north_sed['Month'] == mon]['Precip'].max()
        ssy = north_sed[north_sed['Month'] == mon]['SSY'].max()
        waves = north_sed[north_sed['Month'] == mon]['Waves'].max()
        ## Make DF
        north_mean_acc = north_mean_acc.append(pd.DataFrame({sed_acc:north_mean_sed_acc,'Precip':precip,'SSY':ssy,'Waves':waves},index=['North_'+tubes_or_pods]))
        south_mean_acc = south_mean_acc.append(pd.DataFrame({sed_acc:south_mean_sed_acc,'Precip':precip,'SSY':ssy,'Waves':waves},index=['South_'+tubes_or_pods]))
     
    for data in [north_mean_acc, south_mean_acc]:
        ## Regression for Monthly mean on North/South
        ## Data for Regression
        reg_loc = data[[sed_acc,'SSY','Waves']]
        reg = reg.append(reg_loc)
        ## Fit regression
        reg_mod = sm.OLS(reg_loc[sed_acc], reg_loc[['SSY','Waves']]).fit()
        reg_mod = smf.ols(formula=sed_acc+" ~ SSY + Waves", data=reg_loc).fit()
        ## regression betas
        SSY_beta, Waves_beta = '%.3f'%reg_mod.params['SSY'], '%.3f'%reg_mod.params['Waves']
        ## P values
        SSY_pval  = '%.3f'%reg_mod.pvalues['SSY'] + pval_aster(reg_mod.pvalues['SSY'])[0]
        SSY_pval_col = pval_aster(reg_mod.pvalues['SSY'])[1]
        Waves_pval = '%.3f'%reg_mod.pvalues['Waves'] + pval_aster(reg_mod.pvalues['Waves'])[0]
        Waves_pval_col = pval_aster(reg_mod.pvalues['Waves'])[1]
        ## Spearman
        SSY_spear = spearman_r(reg_loc[sed_acc], reg_loc['SSY'])
        
        if SSY_spear[1] < 0.10:
            SSY_spear_r = '%.3f'%SSY_spear[0]
        elif SSY_spear[1] >= 0.10:
            SSY_spear_r = ''
        #SSY_spear_r = '%.3f'%SSY_spear[0]+'p:'+'%.3f'%SSY_spear[1]    
        
        Waves_spear = spearman_r(reg_loc[sed_acc], reg_loc['Waves'])
        if Waves_spear[1] < 0.10:
            Waves_spear_r = '%.3f'%Waves_spear[0]
        elif Waves_spear[1] >= 0.10:
            Waves_spear_r = ''
            
        #Waves_spear_r = '%.3f'%Waves_spear[0]+'p:'+'%.3f'%Waves_spear[1]  
        
        reg_mod_table = pd.DataFrame({'Sed':sed_acc, 'r2adj':'%.2f'%reg_mod.rsquared_adj,'SSY_spear_r':SSY_spear_r,'SSY_beta':SSY_beta,'SSY_pval':SSY_pval[:5],'Waves_spear_r':Waves_spear_r,'Waves_beta':Waves_beta,'Waves_pval':Waves_pval[:5]},index=[data.index[0]])
        reg_table = reg_table.append(reg_mod_table)
 
    return reg_table[['Sed','r2adj','SSY_spear_r','SSY_beta','SSY_pval','Waves_spear_r','Waves_beta','Waves_pval']]