y = tau[:,0] #y = np.log(y/1000. + 10.) #y, _ = stats.boxcox(y) #for i in y: # if i in ydel: # y[i] = np.nan notNaNs = ~np.any(np.isnan(x),1) & ~np.isnan(y) X = x[notNaNs,:] y = y[notNaNs] X= sm.add_constant(X) model = sm.OLS(y, X).fit() print model.summary() yhat = model.predict() print "R2 is: %.3f, R2adj is: %.3f" %(mysm.cal_R2(y,yhat,n=model.nobs,p=model.df_model)) print "rmse is %.3f"%(mysm.cal_RMSE(y, yhat)) if plott == 1: fig, axes = plt.subplots(nrows=1,ncols=1) #plt.hist(y,30,alpha=0.3, normed=True) plt.scatter(X[:,1],y) if plotres == 1: fig = plt.figure() ax = fig.add_axes([0.05, 0.05, 0.9, 0.9]) ax.scatter(model.predict(), model.resid) ax.set_ylabel('residual') ax.set_xlabel('yhat') if plot_y_yhat: fig = plt.figure() ax = fig.add_axes([0.05, 0.05, 0.9, 0.9]) ax.scatter(y, yhat) ax.set_ylabel('yhat')
profid = data.index.unique() # index of profile start d14C = prep.getvarxls(data,'D14C_BulkLayer', profid, ':') sampleyr = prep.getvarxls(data, 'SampleYear', profid, ':') layerbot = prep.getvarxls(data, 'Layer_bottom', profid, ':') tau, cost = C14.cal_tau(d14C, sampleyr, 3, False) np.savez('./Synthesis_allD14C_tau.npz',tau=tau,cost=cost) taudata = np.load('./Synthesis_allD14C_tau.npz') tau = taudata['tau'] cost = taudata['cost'] D14C2000 = np.array(C14.cal_D14Ctosmpyr(tau[:,0], 2000)) is_badcost = cost[:,0]>50 data.D14C_BulkLayer[is_badcost] a = mysm.cal_RMSE(d14C[~is_badcost], D14C2000[~is_badcost]) D14C2000df = pd.DataFrame(data=D14C2000) D14C2000df.to_csv('normalizedD14C.csv') #%% verify the D14C normalization approach newdata = data.copy() # index of profiles that have multiple year measurements def print_normalized(profid, tosmpyr): prof1 = data.loc[profid,['Layer_bottom','D14C_BulkLayer','SampleYear']] mod = C14.cal_D14Ctosmpyr(tau[:,0], tosmpyr) newdata['D14C_normalized'] = mod prof = newdata.loc[profid,['Layer_bottom','D14C_BulkLayer','D14C_normalized','SampleYear']] print prof print_normalized(1, 2013)
ax.set_title('profile ID:' + str(pro) + '\n' + bio) plt.legend(loc=4) plt.gca().invert_yaxis() #%% calculate rmse rmse = [] for idd in range(len(profid4modeling)): pro = profid4modeling[idd] biome = {1:'Boreal Forest',2:'Temperate Forest',3:'Tropical Forest',4:'Grassland', \ 5:'Cropland',6:'Shrublands',7:'Peatland',8:'Savannas'} print 'profid is ',pro f_i = interp1d(data.loc[pro:pro,'Layer_bottom'].values, obss[idd]) f_x = prep.extrap1d(f_i) y = f_x(depthh[idd]) y = np.reshape(y,(y.shape[0],1)) yhat = out[idd] rmse.append(myst.cal_RMSE(y, yhat)) #%% use jobaggy soc to extrapolate missing soc profiles, # log-fitting interpolation, using cum_pctC, write to extrasitegridid.txt from scipy.optimize import curve_fit def func(x, K, I): return np.exp(K*np.log(x)+I) out = [] obss = [] depthh = [] outf = open('extrasitegridid.txt','w') for i in extraprofid: print 'profile is :',i jobgypctC = np.array(csvbiome[data.loc[i:i,'VegTypeCode_Local'].values[0]])/100. popt, pcov = curve_fit(func, np.r_[0,jobgydepth], np.r_[0,np.cumsum(jobgypctC)])