def subsample(training, target, sample): base_dir = get_main_dir() # randomly subsample the data and store the subsampling distribution fname = os.path.join( os.path.join( os.path.join(os.path.join(base_dir, 'input'), 'calibrations'), 'idealised'), 'subsample_%d.npy' % (sample)) if not os.path.isfile(fname): size = 7 doys = np.unique(training['doy']) sub = np.unique(np.random.randint(doys[0], doys[-1] + 1, size=size)) while len(sub) < size: sub = np.unique( np.append( sub, np.unique( np.random.randint(doys[0], doys[-1] + 1, size=size - len(sub))))) ssub = np.asarray(training.loc[training['doy'].isin(sub)].index) """ # there are small differences in day time hours... same sizes? if sample > 1: # the first distribution is the reference size ref = np.load(fname.replace('%d.npy' % (sample), '1.npy')) diff = len(ref) - len(ssub) while diff > 0: # random extra data points from any one day sub = np.unique(np.append(sub, np.unique(np.random.randint(doys[0], doys[-1] + 1, size=1)))) ssub = np.asarray(training.loc[training['doy'].isin(sub)].index) diff = len(ref) - len(ssub) if diff < 0: # randomly remove the excess rm = np.random.randint(0, len(ssub), size=abs(diff)) ssub = np.delete(ssub, rm) """ np.save(fname, ssub) else: ssub = np.load(fname) # now accordingly subsample input and output Y = target[ssub] X = training.iloc[ssub] X.reset_index(inplace=True, drop=True) return X, Y
def check_X_Y(swaters): base_dir = get_main_dir() # check that the 4 week forcing file exists fname1 = os.path.join( os.path.join( os.path.join(os.path.join(base_dir, 'input'), 'calibrations'), 'idealised'), 'training_x.csv') if not os.path.isfile(fname1): # create file if it doesn't exist params = InForcings().defparams params.doy = random.randrange(92, 275) # random day within GS InForcings().run(fname1, params, Ndays=7 * 4) for profile in swaters: # check that the output file from the reference model exists fname2 = os.path.join( os.path.join( os.path.join(os.path.join(base_dir, 'input'), 'calibrations'), 'idealised'), 'training_%s_y.csv' % (profile)) if not os.path.isfile(fname2): df1, __ = read_csv(fname1) # add the soil moisture profile to the input data df1['sw'], df1['Ps'] = soil_water(df1, profile) df1['Ps_pd'] = df1['Ps'].copy() # pre-dawn soil water potentials df1['Ps_pd'].where(df1['PPFD'] <= 50., np.nan, inplace=True) # fixed value for the wind speed df1['u'] = df1['u'].iloc[0] # non time-sensitive: last valid value propagated until next valid df1.fillna(method='ffill', inplace=True) __ = hrun(fname2, df1, len(df1.index), 'Farquhar', models=['Medlyn'], inf_gb=True) return
def prep_training_N_target(profile, sub=None): base_dir = get_main_dir() # path to input data fname = os.path.join( os.path.join( os.path.join(os.path.join(base_dir, 'input'), 'calibrations'), 'idealised'), 'training_x.csv') df1, __ = read_csv(fname) # path to output data from the reference model fname = os.path.join( os.path.join( os.path.join(os.path.join(base_dir, 'input'), 'calibrations'), 'idealised'), 'training_%s_y.csv' % (profile)) df2, __ = read_csv(fname) # add the soil moisture profile to the input data df1['sw'], df1['Ps'] = soil_water(df1, profile) df1['Ps_pd'] = df1['Ps'].copy() # daily pre-dawn soil water potentials df1['Ps_pd'].where(df1['PPFD'] <= 50., np.nan, inplace=True) # fix the wind speed df1['u'] = df1['u'].iloc[0] # non time-sensitive: last valid value propagated until next valid df1.fillna(method='ffill', inplace=True) # drop everything below min threshold for photosynthesis and reindex Y = np.asarray(df2['gs(std)'][df1['PPFD'] > 50.]) * 1000. # mmol m-2 s-1 X = df1[df1['PPFD'] > 50.] X.reset_index(inplace=True, drop=True) # add Rnet to the input (no ET or soil albedo feedbacks, this can be done) X['Rnet'] = net_radiation(X) X['scale2can'] = 1. if sub is not None: # randomly subsample one week out of the data X, Y = subsample(X, Y, sub) return X, Y
def __init__(self, method='powell', store=None, inf_gb=True): # fitting method self.method = method # which solver is used # MCMC-specific self.steps = 15000 self.nchains = 4 self.burn = 1000 self.thin = 2 if store is None: # default storing path for the outputs self.base_dir = get_main_dir() # working paths self.opath = os.path.join(os.path.join(self.base_dir, 'output'), 'calibrations') else: # user defined storing path for the outputs self.opath = store self.inf_gb = inf_gb # whether to calculate gb or not
def main(fname1, fname2, fname3, calibs='both', orientation='both', colours=None): base_dir = get_main_dir() dirname = os.path.join( os.path.join(os.path.join(base_dir, 'output'), 'calibrations'), 'idealised') # load in the data df1 = (pd.read_csv(os.path.join(dirname, fname1), header=[0]).dropna( axis=0, how='all').dropna(axis=1, how='all').squeeze()) df2 = (pd.read_csv(os.path.join(dirname, fname2), header=[0]).dropna( axis=0, how='all').dropna(axis=1, how='all').squeeze()) df3 = (pd.read_csv(os.path.join(dirname, fname3), header=[0]).dropna( axis=0, how='all').dropna(axis=1, how='all').squeeze()) if orientation == 'both': for orientation in ['landscape', 'portrait']: plt_setup(calibs, orientation, colours=colours) # rendering calib_info_plot(df1.copy(), df2.copy(), df3.copy(), calibs=calibs, orientation=orientation) solver_info_plot(df1) else: plt_setup(calibs, orientation, colours=colours) # rendering solver_info_plot(df1.copy()) calib_info_plot(df1, df2, df3, calibs=calibs, orientation=orientation) return
def build_calibrated_forcing(training): base_dir = get_main_dir() # working paths # forcing file used to calibrate the models fname = os.path.join(os.path.join(os.path.join(os.path.join(base_dir, 'input'), 'calibrations'), 'obs_driven'), '%s_x.csv' % (training)) df1, columns = read_csv(fname) # file containing the best calibrated params fname = os.path.join(os.path.join(os.path.join(os.path.join(base_dir, 'output'), 'calibrations'), 'obs_driven'), 'best_fit.csv') df2 = (pd.read_csv(fname, header=[0]).dropna(axis=0, how='all') .dropna(axis=1, how='all').squeeze()) df2 = df2[df2['training'] == training] # attribute the first (and second and third) parameter(s) for i in df2.index: df1.loc[0, df2.loc[i, 'p1']] = df2.loc[i, 'v1'] if not pd.isnull(df2.loc[i, 'v2']): df1.loc[0, df2.loc[i, 'p2']] = df2.loc[i, 'v2'] if not pd.isnull(df2.loc[i, 'v3']): df1.loc[0, df2.loc[i, 'p3']] = df2.loc[i, 'v3'] # save the forcing file containing the calibrated params df1.columns = columns # original columns df1.drop([('Tleaf', '[deg C]')], axis=1, inplace=True) # drop Tleaf df1.to_csv(os.path.join(os.path.join(os.path.join(os.path.join(base_dir, 'input'), 'simulations'), 'obs_driven'), '%s_calibrated.csv' % (training)), index=False, na_rep='', encoding='utf-8') return
ax.set_rgrids([0., 0.25, 0.5, 0.75], ['0', '', '', '0.75']) ax.set_rmax(0.75) # draw lines angles ax.set_thetagrids(np.degrees([0, -45]), []) ax.set_title('Reading Key:', fontsize=7., x=-1.4, y=0.05) fig.savefig(figname) plt.close() return ############################################################################### base_dir = get_main_dir() figname = os.path.join(os.path.join(os.path.join(base_dir, 'output'), 'plots'), 'model_sensitivities_ST_1.5.jpg') #if not os.path.isfile(figname): fname = os.path.join( os.path.join( os.path.join( os.path.join(os.path.join(base_dir, 'output'), 'simulations'), 'idealised'), 'sensitivities'), 'overview_of_sensitivities_1.5MPa.csv') df = (pd.read_csv(fname, header=[0]).dropna(axis=0, how='all').dropna(axis=1, how='all').squeeze()) plot_sensitivities(df, figname)
def calib_info_plot(df1, df2, df3, calibs='wet', orientation='landscape'): # user-defined plot attributes vscale = 0.45 # scaling factor around the median pscale = 1.1 # positions for each model's parameters pspace = 0.025 # second parameter positions wbox = 0.9 # width of the violin plots vert = True s1 = 'left' s2 = 'right' if orientation == 'portrait': vert = False s1 = 'top' s2 = 'bottom' if calibs == 'both': # colours for the violin plot edge lines c = plt.rcParams['axes.prop_cycle'].by_key()['color'][-1] pspace /= 2.5 # landscape characteristics fs = (6., 3.) if orientation == 'portrait': fs = (3.25, 6.) # declare the figure and the axes fig, ax = plt.subplots(nrows=1, figsize=fs) # model order? models = automate_model_order(df1.copy()) # modify and order model names across all dfs df1 = update_model_names(df1, models) df2 = update_model_names(df2, models) df3 = update_model_names(df3, models) # organise, normalise, and scale the data consistently df1, w, i = sorted_data(df1) wet1 = scaled_data(w, sc=vscale) inter1 = scaled_data(i, sc=vscale) # subset of the top 3 solvers df2, w, i = sorted_data(df2, norm_wet=w, norm_inter=i) wet2 = scaled_data(w, sc=vscale) inter2 = scaled_data(i, sc=vscale) # best params df3, w, i = sorted_data(df3, norm_wet=w, norm_inter=i) best_w = scaled_data(w, sc=vscale) best_i = scaled_data(i, sc=vscale) # param names in model order? params = parameter_names(w) # where are there 2nd params? all = np.array([i for i in range(len(wet1)) if np.nansum(wet1[i]) != 0.]) isec = np.array([i for i in range(len(all)) if (all[i] % 2) != 0]) pos = np.arange(float(len(all))) * pscale pos[isec] -= 8. * pspace # second parameter position pos2 = pos + pspace if orientation == 'portrait': pos2 = pos - pspace # now that we've reworked the positions, only keep those wet1 = [wet1[i] for i in all] wet2 = [wet2[i] for i in all] inter1 = [inter1[i] for i in all] inter2 = [inter2[i] for i in all] best_w = [best_w[i] for i in all] best_i = [best_i[i] for i in all] # all solver data Npoints = 1000 # smooth violins bw = 0.35 if calibs != 'inter': vp1 = ax.violinplot(wet1, showextrema=False, points=Npoints, positions=pos, vert=vert, widths=wbox, bw_method=bw) for vp in vp1['bodies']: vp.set_alpha(0.7) if calibs != 'wet': vp2 = ax.violinplot(inter1, showextrema=False, points=Npoints, positions=pos2, vert=vert, widths=wbox, bw_method=bw) for vp in vp2['bodies']: vp.set_alpha(0.7) if calibs == 'both': slice_vplot(vp1, s1, ec='gray') slice_vplot(vp2, s2, ec='gray') # top 3 solver data, if no substantial improvement, no plot bw *= 2. plt_wet = np.array([(np.amax(wet2[i]) - np.amin(wet2[i])) < 0.75 * (np.amax(wet1[i]) - np.amin(wet1[i])) for i in range(len(wet1))]) plt_inter = np.array([(np.amax(inter2[i]) - np.amin(inter2[i])) < 0.75 * (np.amax(inter1[i]) - np.amin(inter1[i])) for i in range(len(inter1))]) wet2 = [wet2[i] for i in range(len(wet2)) if plt_wet[i]] inter2 = [inter2[i] for i in range(len(inter2)) if plt_inter[i]] if calibs != 'inter': vp3 = ax.violinplot(wet2, showextrema=False, points=Npoints, positions=pos[plt_wet], vert=vert, widths=wbox, bw_method=bw) for vp in vp3['bodies']: vp.set_alpha(0.7) vp.set_hatch('/' * 6) if calibs != 'wet': vp4 = ax.violinplot(inter2, showextrema=False, points=Npoints, positions=pos2[plt_inter], vert=vert, widths=wbox, bw_method=bw) for vp in vp4['bodies']: vp.set_alpha(0.7) vp.set_hatch('/' * 6) if calibs == 'both': slice_vplot(vp3, s1, ec=c) slice_vplot(vp4, s2, ec=c) # best params if calibs == 'both': x = np.append(pos - wbox / 8., pos + wbox / 4. + pspace / 2.) y = np.append(best_w, best_i) elif calibs == 'wet': x = pos y = best_w else: x = pos y = best_i if orientation == 'portrait': y = x if calibs == 'both': x = np.append(best_i, best_w) elif calibs == 'wet': x = best_w else: x = best_i ax.plot(x, y, lw=0, marker='*', mec='k', zorder=9) # add custom legend if calibs == 'both': # add custom legend handles = custom_legend(calibs, orientation, ec='gray') else: handles = custom_legend(calibs, orientation) if orientation == 'landscape': ax.legend(handles=handles, loc=2, bbox_to_anchor=[-0.025, 1.015]) else: ax.legend(handles=handles, ncol=3, columnspacing=1., handlelength=1., handletextpad=0.4, frameon=False, loc=2, bbox_to_anchor=[0., 1.03]) # add grid and format the axes lpos = np.asarray([0.05, 0.5, 0.9, 1., 1.1, 2., 20.]) mpos = np.copy(pos) mpos[isec - 1] += (mpos[isec] - mpos[isec - 1]) / 2. mpos = np.delete(mpos, isec) mlines = np.copy(pos) + pscale / 2. mlines[isec] += pspace * 2. / 3. mlines = np.delete(mlines, isec - 1) custom_grid(mlines, -(vscale**lpos), ax, orientation) if orientation == 'landscape': ax.set_yticks(-(vscale**lpos)) if calibs == 'both': ax.set_xlim([np.amin(pos) - 0.5, np.amax(pos) + 0.5]) else: ax.set_xlim([np.amin(pos) - 0.55, np.amax(pos) + 0.6]) ax.set_xticks(mpos) else: ax.set_xticks(-(vscale**lpos)) if calibs == 'both': ax.set_ylim([np.amin(pos) - 0.5, np.amax(pos) + 0.5]) else: ax.set_ylim([np.amin(pos) - 0.6, np.amax(pos) + 0.55]) ax.set_yticks(mpos + 0.15) # nicer display of the model names and normalised param values pvals = ['0.05', '0.5', '0.9', '', '1.1', '2', '20'] models[models.index('WUE-LWP')] = r'WUE-$f_{\varPsi_l}$' models[models.index('SOX-OPT')] = r'SOX$_\mathrm{\mathsf{opt}}$' if orientation == 'landscape': ax.set_yticklabels(pvals) ax.set_xticklabels(models, va='top', rotation=25., size=7.) for i in range(len(params)): # add param names t = ax.text(pos[i], -(vscale**0.2), params[i], va='top', ha='center') t.set_bbox( dict(boxstyle='round,pad=0.1', fc='w', ec='none', alpha=0.8)) # move the y labels to the right side ax.yaxis.set_label_position('right') ax.yaxis.tick_right() ax.set_ylabel('Normalised parameter values') else: ax.set_xticklabels(pvals) ax.set_yticklabels(models, ha='left', va='top', size=7.) for i in range(len(params)): # add param names yy = pos[i] + 0.125 if i in [9, 14, 15, 16]: yy = pos[i] + 0.375 t = ax.text(-(vscale**4.4), yy, params[i], ha='right', va='top') if i == 12: t.set_path_effects([ path_effects.Stroke(linewidth=0.75, foreground='w'), path_effects.Normal() ]) ax.tick_params(axis='y', direction='in', pad=-5.) plt.setp(ax.get_yticklabels(), bbox=dict(boxstyle='round', fc='w', ec='none')) ax.set_xlabel('Normalised parameter values') # remove the ticks themselves ax.xaxis.set_tick_params(length=0.) ax.yaxis.set_tick_params(length=0.) base_dir = get_main_dir() opath = os.path.join(os.path.join(base_dir, 'output'), 'plots') fig.tight_layout() plt.savefig( os.path.join(opath, 'model_calibs_%s_%s.png' % (calibs, orientation))) plt.savefig( os.path.join(opath, 'model_calibs_%s_%s.jpg' % (calibs, orientation)))
def solver_info_plot(df): # declare the figure and the axes fig, ax = plt.subplots(figsize=(2.75, 3.)) if 'training' not in df.columns: size = 30. else: size = 20. # solver performance info df = solver_performance(df) c = plt.rcParams['axes.prop_cycle'].by_key()['color'][-1] # counts plot where the points are bigger as more points overlap ax.scatter(df['sv'], df['Rank'], marker='o', c='grey', alpha=0.7, s=df['counts'] * size) # plot the N best data ax.scatter(df[df['sv'] < 3]['sv'], df[df['sv'] < 3]['Rank'], marker='o', c=c, s=df[df['sv'] < 3]['counts'] * size / 2.) # plot the average ranks ax.plot(pd.unique(df['sv']), df.groupby('sv')['waRank'].mean(), c='k', lw=1.5) # format the axes ax.set_xticks(np.arange(len(df['solver'].unique())) + 0.5) ax.set_xticklabels(df['solver'].unique(), rotation=55., ha='right', va='top') ax.xaxis.set_tick_params(length=0.) # replace y axis with skill arrow ax.get_yaxis().set_visible(False) ax.text(-0.125, 0.95, 'Low\nskill', va='center', ha='center', transform=ax.transAxes) ax.annotate('High\nskill', xy=(-0.125, 0.9), xytext=(-0.125, 0.05), xycoords='axes fraction', va='center', ha='center', arrowprops=dict(arrowstyle='<-', lw=0.75)) for spine in ax.spines.values(): # thinner spines spine.set_visible(True) spine.set_linewidth(0.25) base_dir = get_main_dir() opath = os.path.join(os.path.join(base_dir, 'output'), 'plots') fig.tight_layout() plt.savefig(os.path.join(opath, 'solver_performance.png')) plt.savefig(os.path.join(opath, 'solver_performance.jpg')) return
handles, labels = ax.get_legend_handles_labels() ax.legend(handles[:len(mods) + 1], labels[:len(mods) + 1], bbox_to_anchor=(1.025, 1. / 3.), loc=3) fig.savefig(fname) plt.close() ############################################################################### # first, activate user defined rendering options plt_setup() base_dir = get_main_dir() # dir paths ifdir = os.path.join( os.path.join(os.path.join(base_dir, 'input'), 'simulations'), 'idealised') ofdir = os.path.join( os.path.join(os.path.join(base_dir, 'output'), 'simulations'), 'idealised') # path to input data fname1 = os.path.join(ifdir, 'wet_calibration.csv') df1, __ = read_csv(fname1) # initialise soil moisture forcings df1['sw'] = df1['theta_sat'] df1.fillna(method='ffill', inplace=True) # plot the atmospheric forcings figdir = os.path.join(os.path.join(base_dir, 'output'), 'plots')
def obs_calibs(df1, df2, figname): fig = plt.figure(figsize=(6.5, 8.)) gs = fig.add_gridspec(nrows=96, ncols=16, hspace=0.3, wspace=0.2) ax2 = fig.add_subplot(gs[52:, 6:]) # conductance data ipath = os.path.join( os.path.join(os.path.join(get_main_dir(), 'input'), 'simulations'), 'obs_driven') labels = [] for i, what in enumerate(df1['site_spp'].unique().dropna()): if i < 13: nrow = int(i / 4) * 16 ncol = (i % 4) * 4 ax1 = fig.add_subplot(gs[nrow:nrow + 16, ncol:ncol + 4]) else: nrow += 16 ax1 = fig.add_subplot(gs[nrow:nrow + 16, :4]) sub = df1.copy()[df1['site_spp'] == what] sub = sub.select_dtypes(exclude=['object', 'category']) sub = sub[sub['Pleaf'] > -9999.] sub['gs'] /= sub['gs'].max() for day in sub['doy'].unique(): mask = sub['doy'] == day plot_obs(ax1, sub['Pleaf'][mask], sub['gs'][mask]) x0, x1, obs_popt = fit_Tuzet(sub) x = np.linspace(sub['Pleaf'].max(), sub['Pleaf'].min(), 500) ax1.plot(x, fsig_tuzet(x, obs_popt[0], obs_popt[1]), 'k', zorder=30) ax1.vlines(x0, 0., 1., linestyle=':') ax1.vlines(x1, 0., 1., linestyle=':') # get the integrated VC given by the obs and site params ref, __ = read_csv(os.path.join(ipath, '%s_calibrated.csv' % (what))) b, c = Weibull_params(ref.iloc[0]) int_VC = np.zeros(len(sub)) for j in range(len(sub)): int_VC[j], __ = quad(f, sub['Pleaf'].iloc[j], sub['Ps'].iloc[j], args=(b, c)) plot_obs(ax2, i, np.log(sub['E'] / int_VC), which='kmax') # subplot titles (including labelling) what = what.split('_') species = r'\textit{%s %s}' % (what[-2], what[-1]) labels += [r'\textit{%s. %s}' % (what[-2][0], what[-1])] if 'Quercus' in what: species += ' (%s)' % (what[0][0]) labels[-1] += ' (%s)' % (what[0][0]) txt = ax1.annotate(r'\textbf{(%s)} %s' % (string.ascii_lowercase[i], species), xy=(0.025, 0.98), xycoords='axes fraction', ha='left', va='top') txt.set_bbox( dict(boxstyle='round,pad=0.1', fc='w', ec='none', alpha=0.8)) # format axes ticks ax1.xaxis.set_major_locator(mpl.ticker.NullLocator()) if (i == 13) or ((ncol > 0) and (nrow == 32)): render_xlabels(ax1, r'$\Psi_{l}$', 'MPa') if ncol == 0: ax1.yaxis.set_major_locator(mpl.ticker.MaxNLocator(3)) ax1.yaxis.set_major_formatter( mpl.ticker.FormatStrFormatter('%.1f')) ax1.set_ylabel(r'$g_{s, norm}$') else: ax1.yaxis.set_major_locator(mpl.ticker.MaxNLocator(3)) ax1.set_yticklabels([]) ax2.annotate(r'\textbf{(%s)}' % (string.ascii_lowercase[i + 1]), xy=(0.05, 0.98), xycoords='axes fraction', ha='right', va='top') # add max conductance parameter values params, models = get_calib_kmax(df2) params = np.asarray(params) locs = np.arange(len(df1['site_spp'].unique())) # update colour list colours = ([ '#6023b7', '#af97c5', '#009231', '#6b3b07', '#ff8e12', '#ffe020', '#f10c80', '#ffc2cd' ]) * len(params) for i in range(params.shape[1]): if i < 8: ax2.scatter(locs, params[:, i], s=50, linewidths=0.25, c=colours[i], alpha=0.9, label=models[0][i], zorder=4) else: ax2.scatter(locs, params[:, i], s=50, linewidths=0.25, c=colours[i], alpha=0.9, zorder=4) # tighten the subplot ax2.set_xlim(locs[0] - 0.8, locs[-1] + 0.8) ax2.set_ylim(np.log(0.025) - 0.1, np.log(80.)) # ticks ax2.set_xticks(locs + 0.5) ax2.set_xticklabels(labels, ha='right', rotation=40) ax2.xaxis.set_tick_params(length=0.) yticks = [0.025, 0.25, 1, 5, 25, 75] ax2.set_yticks([np.log(e) for e in yticks]) ax2.set_yticklabels(yticks) render_ylabels(ax2, r'k$_{max}$', 'mmol m$^{-2}$ s$^{-1}$ MPa$^{-1}$') handles, labels = ax2.get_legend_handles_labels() labels[3] = 'SOX$_\mathrm{\mathsf{opt}}$' ax2.legend(handles, labels, ncol=3, labelspacing=1. / 3., columnspacing=0.5, loc=3) # save fig.savefig(figname)
#============================================================================== to_fit = True sample = None # None, 1, 2, or 3 swaters = ['wet', 'inter'] # two different soil moisture profiles # declare empty dataframe which will be used to analyse the calibrations odf = pd.DataFrame(columns=[ 'Model', 'training', 'solver', 'BIC', 'Rank', 'p1', 'v1', 'p2', 'v2' ]) # where should the fitting solvers' outputs be stored? base_dir = get_main_dir() # working paths check_X_Y(swaters) # check that the training calibration files exist if to_fit: for swater in swaters: # loop over the training soil moisture profiles X, Y = prep_training_N_target(swater, sub=sample) # where should the calibration output be stored? opath = os.path.join( os.path.join(os.path.join(base_dir, 'output'), 'calibrations'), 'idealised') if sample is not None: # move files to the relevant sub-dir