def delz(model_name, dat, let): """ Read model name, output date and letter and return the corresponding array of cell lengths in z-direction. """ output_dir = rm.make_output_dirs(model_name, dat, let)[0] input_file = rm.make_file_dir_names(model_name)[2] arr_filename = pm.py_output_filename('specs', 'delz', specl(model_name, dat, let), 'npy') if rm.check_hashtag(output_dir, input_file, "# delz"): line = rm.read_hashtag_input(output_dir + '/' + input_file, '# delz', 1) else: input_file = rm.make_file_dir_names(model_name)[4] if rm.check_hashtag(output_dir, input_file, "# delz"): line = rm.read_hashtag_input(output_dir + '/' + input_file, '# delz', 1) else: if os.path.isfile(arr_filename): return np.load(arr_filename) else: raise IOError('Input files, hashtags or' + ' npy file not found:\n\n' + output_dir + '\n\n' + input_file + '\n\n' + arr_filename) num_entries = len(str.split(line)) nums = [ int(str.split(entry, "*")[0]) if len(str.split(entry, "*")) > 1 else 1 for entry in str.split(line) ] lens = [ float(str.split(entry, "*")[1]) if len(str.split(entry, "*")) > 1 else float(str.split(entry, "*")[0]) for entry in str.split(line) ] vec = [[lens[i] for j in range(nums[i])] for i in range(num_entries)] arr = np.array([num for elem in vec for num in elem]) np.save(arr_filename, arr) return arr
def plot( ax, which_methods, which_methods_left, which_methods_right, which_res='endres', model='wavebc', method='meanc', ensemble_size=50, n_syn=1, n_syn_bold=1, n_comparisons=10000, pic_format='pdf', bar_colors=['black', 'white', 'grey'] ): """ Reads probability arrays which method is better, worse, or if they are even. Then plots those three possibilities in bars comparing the methods given in which_methods_left and which_methods_right. Parameters ---------- ax : Axes The axes to draw to. which_methods : array int Array of integers containing the method specifiers from module plotarrays. which_methods_left : array int Array of integers containing the method specifiers for the left side of the comparisons. which_methods_right : array int Array of integers containing the method specifiers for the right side of the comparisons. which_res : string 'endres' - use residuals after EnKF run 'begres' - use residuals before EnKF run model : string 'wavebc' - Model wavebc 'wave' - Model wave method : string Which method to use for statistical comparison of the subset. If n_syn == 1, the comparison always defaults to comparing the residuals. 'ttest' - Use the T-Test, testing if the two samples belong to the same Gaussian distribution. 'gauss' - Calculate Gaussian distribution of the difference and calculate its probability to be larger than zero. 'meanc' - Calculate the means and compare. ensemble_size : integer Ensemble size of the job. Possibilities: 50, 70, 100, 250, 500, 1000, 2000 n_syn : integer Number of synthetic studies in subset. n_syn_bold : integer Number of synthetic studies in subset used for the bold ticklabels. n_comparisons : integer Number of comparisons calculated. pic_format : string Format of the picture 'pdf' - pdf-format 'eps' - eps-format 'png' - png-format 'jpg' - jpg-format 'svg' - svg-format bar_colors : array of strings Three colors for the three patches of one bar. Returns ------- ax : array Axes containing plot. pic_name : string Containing proposed saving location for Figure. """ # Check for imethod in which_methods_left: if imethod not in which_methods: raise RuntimeError( 'Wrong methods in wrong_methods_left' ) for imethod in which_methods_left: if imethod not in which_methods: raise RuntimeError( 'Wrong methods in wrong_methods_right' ) if ensemble_size in [50, 70, 100, 250]: if n_syn > 1000: raise RuntimeError('n_syn wrong') elif ensemble_size in [500, 1000, 2000]: if n_syn > 100: raise RuntimeError('n_syn wrong') else: raise RuntimeError('ensemble size wrong') # Both methods in one array show_methods = [which_methods_left, which_methods_right] # Number of bars and patches num_bars = len(show_methods[0]) num_patches = 3*num_bars # Load probs probs = np.load( pm.py_output_filename( na.tag, 'probs_'+which_res, model + '_'+method+'_'+str(ensemble_size)+'_'+str(n_syn)+'_' + str(n_comparisons)+'_' + '_'.join([str(i) for i in which_methods]), 'npy' ) ) # Load probs for bold labels probs_bold = np.load( pm.py_output_filename( na.tagn, 'probs_'+which_res, model+'_' + method+'_'+str(ensemble_size)+'_'+str(n_syn_bold) + '_'+str(n_comparisons)+'_' + '_'.join([str(i) for i in which_methods]), 'npy' ) ) ax.set_position([0.3, 0.05, 0.4, 0.75]) ax.set_frame_on(False) # Patch arrays for ax.barh() in_bottom = np.zeros(num_patches) in_height = np.zeros(num_patches) in_width = np.zeros(num_patches) in_left = np.zeros(num_patches) in_color = ['' for i in range(num_patches)] for i in range(num_patches): in_bottom[i] = num_bars-i/3 in_height[i] = 0.8 in_width[i] = probs[show_methods[0][i/3], show_methods[1][i/3]][np.mod(i, 3)] in_left[i] = np.sum(probs[show_methods[0][i/3], show_methods[1][i/3]][0:np.mod(i, 3)]) in_color[i] = bar_colors[np.mod(i, 3)] # Plot patches in bars ax.barh(bottom=in_bottom, height=in_height, width=in_width, left=in_left, color=in_color, edgecolor='k') # H_0 labels inside bar if method == "ttest": for i in range(1, num_bars+1): if in_left[3*i-1]-in_left[3*i-2] > 0.15: ax.text(in_left[3*i-2]+0.4*(in_left[3*i-1]-in_left[3*i-2]), in_bottom[3*i-2]+0.3, "$H_0$", fontsize=20) # Axis 1 ax.tick_params(direction='out', length=0, width=1, labelsize=20, top=False, bottom=False, labelright=False, pad=8) ax.set_xlim([-0.01, 1.01]) ax.set_ylim([0.9, num_bars+0.8]) ax.set_xticks([]) ax.set_yticks([num_bars-i+0.4 for i in range(num_bars)]) ax.set_yticklabels( [pa.longnames_methods[show_methods[0][i]] for i in range(num_bars)] ) # Twin Axis 2 ax2 = ax.twinx() ax2.set_position([0.3, 0.05, 0.4, 0.75]) ax2.set_frame_on(False) ax2.tick_params(direction='out', length=0, width=1, labelsize=20, top=False, bottom=False, labelleft=False, labelright=True, labelcolor='black', pad=8) ax2.set_xlim([-0.01, 1.01]) ax2.set_ylim([0.9, num_bars+0.8]) ax2.set_xticks([]) ax2.set_yticks([num_bars-i+0.4 for i in range(num_bars)]) ax2.set_yticklabels( [pa.longnames_methods[show_methods[1][i]] for i in range(num_bars)] ) # Boldness of axislabels for i in range(num_bars): if(probs_bold[show_methods[0][i], show_methods[1][i]][0] == 1): ax.yaxis.get_majorticklabels()[i].set_weight('bold') elif(probs_bold[show_methods[0][i], show_methods[1][i]][2] == 1): ax2.yaxis.get_majorticklabels()[i].set_weight('bold') else: ax.yaxis.get_majorticklabels()[i].set_style('italic') ax2.yaxis.get_majorticklabels()[i].set_style('italic') # Saving location pic_name = pm.py_output_filename( na.tag, 'bars_'+which_res, model+'_'+method+'_'+str(ensemble_size) + '_'+str(n_syn)+str(n_comparisons)+'_'+'_' + '_'.join([str(i) for i in which_methods]), pic_format ) return ax, pic_name
def quots( ax, which_methods=[0, 1, 2, 3, 4, 5, 6], which_res='endres', stat_method='mean', model='wavebc', ensemble_sizes=[50, 70, 100, 250], ensemble_size=50, pic_format='pdf', is_text=False, axistitle='', fonttit=40, figpos=[0.32, 0.2, 0.6, 0.8], ticksize=20, ): """ A function plotting a grid of quotients of statistical measures. Parameters ---------- ax : Axes The axes to draw to. which_methods : array int Array of integers containing the method specifiers from module plotarrays. The methods appear in the plot in this order. which_res : string 'endres' - use residuals after EnKF run 'begres' - use residuals before EnKF run stat_method : string 'mean' - Means 'std' - Standard deviation 'stdm' - Standard deviation of the mean 'median' - Median or 50 Percentile 'q25' - 25 Percentile 'q75' - 75 Percentile model : string 'wavebc' - Model wavebc 'wave' - Model wave ensemble_sizes : array of integers array can typically contain 50, 70, 100, 250, 500, 1000, 2000 ensemble_size : integer Ensemble size of the job. Possibilities: 50, 70, 100, 250, 500, 1000, 2000 pic_format : string Format of the picture 'pdf' - pdf-format 'eps' - eps-format 'png' - png-format 'jpg' - jpg-format 'svg' - svg-format figpos : array of floats Four numbers xbeg, ybeg, xrange, yrange More input specifying plot parameters. Returns ------- ax : Axes Axes containing quotient matrix. pic_name : string Containing proposed saving location for Figure. """ # Check if ensemble_size not in [50, 70, 100, 250, 500, 1000, 2000]: raise RuntimeError('ensemble_size wrong') # Title ax.set_title(axistitle, size=fonttit) # Number of compared methods num_methods = len(which_methods) # Ensemble size translated to index iens = pa.indens[model][ensemble_size] # Load residuals res = np.load(pm.py_output_filename( 'errorplot', which_res, stat_method+'_'+model+'_' + '_'.join([str(enssize) for enssize in ensemble_sizes])+'_' + '_'.join([str(i) for i in which_methods]), 'npy')) # Calculate and sort quots quots = np.array( [[res[i1, iens]/res[i2, iens] for i1 in range(num_methods)] for i2 in range(num_methods)] ) ax.set_position(figpos) # White Rectangles for ipm in range(num_methods): for jpm in range(num_methods): # Diagonal black if ipm == jpm: quots[ipm, jpm] = 0.0 # Upper triangle white if ipm < jpm: quots[ipm, jpm] = None ax.imshow( quots, interpolation='nearest', cmap='Greys_r', norm=colors.Normalize(vmin=0.8, vmax=1.0, clip=False) ) # Plot: Mostly ticks ax.set_xticks([i for i in range(num_methods)]) ax.set_xticklabels([pa.names_methods[which_methods[i]] for i in range(len(which_methods))], fontsize=ticksize, rotation=90) ax.set_yticks([i for i in range(num_methods)]) ax.set_yticklabels([pa.names_methods[which_methods[i]] for i in range(len(which_methods))], fontsize=ticksize) ax.tick_params(length=0) ax.set_frame_on(False) # Text for itext in range(num_methods): for jtext in range(num_methods): if itext < jtext: ntext = quots[jtext, itext] ttext = str(ntext)[0:4] px = itext-0.35 py = jtext+0.15 colero = 'white' if ntext < 0.9 else 'black' ax.text(px, py, ttext, color=colero, fontsize=25) # Text: n_syn and ensemble_size if is_text: model_spec = ' Tracer ' if model == 'wavereal' else ' Well ' ax.text( 3.5, 1.5, model_spec+'\n' + r' $n_{e}$: '+str(ensemble_size).rjust(4), linespacing=1.5, fontsize=30, bbox={'facecolor': 'grey', 'alpha': 0.5, 'pad': 10}, ) # Saving location pic_name = pm.py_output_filename( ea.tag, 'quots_'+which_res, stat_method+'_'+model+'_' + str(ensemble_size)+'_' + '_'.join([str(i) for i in which_methods]), pic_format) return ax, pic_name
def read( model_name, dat, let, varname='kz_mean', befaft='aft', fdir=None, fname=None, nt=10, ): """ Reading variable arrays from SHEMAT-Suite. Parameters ---------- model_name : string String of model name. dat : string String with date of model run. let : string String of letter of model run. varname : string Variable name for array to be read. Possibilities: 'kz_mean' 'kz_std','head_mean','lz_mean', 'temp_mean' befaft : string Specifies whether the output is read in from before ('bef') or after ('aft') the EnKF update. nt : integer Number inside file name. fdir : string Full directory of vtk file. fname : string Full name of vtk file. Returns ------- numpy_array : array Array containing the variable array numpy_array_name : string Containing proposed saving location for Array. """ # Automatic file name generation if (not fdir and not fname): # enkf_output_dir fdir = rm.make_output_dirs(model_name, dat, let)[2] if befaft == 'aft': # assim_out_file_aft fname = rm.make_file_dir_names(model_name, nt)[19] elif befaft == 'bef': # assim_out_file_bef fname = rm.make_file_dir_names(model_name, nt)[18] # Get vtk_reader ########################################################## vtk_reader = pf.my_vtk(fdir, fname, varname) # Debug ################################################################### print(varname, vtk_reader.GetOutput().GetCellData().GetArray(0).GetValueRange()) # Numpy Array ############################################################ numpy_array = pf.my_vtk_to_numpy(vtk_reader) # Numpy Array Name ######################################################## numpy_array_name = pm.py_output_filename( aa.tag, varname + '_' + str(nt).zfill(4) + '_' + befaft, sc.specl(model_name, dat, let), "npy") return numpy_array, numpy_array_name
def plot( ax, which_methods=[0, 1, 2, 3, 4, 5, 6], which_res='endres', stat_method='mean', ensemble_sizes=[50, 70, 100, 250], axistitle='', model='wavebc', is_std=False, lineyval=0.62, std_method='std', pic_format='pdf', figpos=[0.15, 0.3, 0.8, 0.6], xlim_min=0, xlim_max=None, ylims=[0.28, 0.82], is_textpos_auto=True, textpos=[0.7, 0.6, 0.5, 0.4], xdiff_nens=0.5, yticks=[0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1], ylabel=r'RMSE $\log(K[\mathrm{m}^2])$', num_pack=4, # Number of methods in pack is_text=False, text_x=0.5, text_y=0.5, n_syn=1000, legend_input=None, formatsos=['o', 'v', 's', 'p', 'o', 'v', 's', 'p', 'o', 'v', 's', 'p', 'o', 'v', 's', 'p', 'o', 'v', 's', 'p', 'o', 'v', 's', 'p', 'o', 'v', 's', 'p', 'o', 'v', 's', 'p'], coleros=[(0.0, 0.0, 0.0), (0.0, 0.0, 0.0), (0.0, 0.0, 0.0), (0.0, 0.0, 0.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (0.0, 0.0, 0.0), (0.0, 0.0, 0.0), (0.0, 0.0, 0.0), (0.0, 0.0, 0.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (0.0, 0.0, 0.0), (0.0, 0.0, 0.0), (0.0, 0.0, 0.0), (0.0, 0.0, 0.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (0.0, 0.0, 0.0), (0.0, 0.0, 0.0), (0.0, 0.0, 0.0), (0.0, 0.0, 0.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0)], markersize=[10 for i in range(32)], markeredgesize=1.5, fontleg=30, fonttit=40, fontlab=40, fonttic=30, ): """ A plotting function for statistics of residual distributions. Parameters ---------- ax : Axes The axes to draw to. which_methods : array int Array of integers containing the method specifiers from module plotarrays. The methods appear in the plot in this order. which_res : string 'endres' - use residuals after EnKF run 'begres' - use residuals before EnKF run stat_method : string 'mean' - Means 'std' - Standard deviation 'stdm' - Standard deviation of the mean 'median' - Median or 50 Percentile 'q25' - 25 Percentile 'q75' - 75 Percentile ensemble_sizes : array of integers array can typically contain 50, 70, 100, 250, 500, 1000, 2000 model : string 'wavebc' - Model wavebc 'wave' - Model wave is_std : boolean True - Show errorbars of standard deviation False - No errorbars std_method : string Standard deviation to use 'std' - Standard deviation 'stdm' - Standard deviation of mean pic_format : string Format of the picture 'pdf' - pdf-format 'eps' - eps-format 'png' - png-format 'jpg' - jpg-format 'svg' - svg-format figpos : array of floats Four numbers xbeg, ybeg, xrange, yrange More input specifying plot parameters. Returns ------- ax : Axes Axes containing plot. pic_name : string Containing proposed saving location for Figure. """ # Check for enssize in ensemble_sizes: if enssize not in [50, 70, 100, 250, 500, 1000, 2000]: raise RuntimeError( 'Wrong ensemble size.' ) # Title ax.set_title(axistitle, size=fonttit) # Number of methods num_methods = len(which_methods) # Default legend input if legend_input is None: legend_input = pa.longnames_methods legend_input = np.array([legend_input[i].ljust(18) for i in which_methods]) # Load residuals res = np.load(pm.py_output_filename( 'errorplot', which_res, stat_method+'_'+model+'_' + '_'.join([str(enssize) for enssize in ensemble_sizes])+'_' + '_'.join([str(i) for i in which_methods]), 'npy' )) # Load standard deviation if is_std: std = np.load(pm.py_output_filename( 'errorplot', which_res, std_method+'_'+model+'_' + '_'.join([str(enssize) for enssize in ensemble_sizes])+'_' + '_'.join([str(i) for i in which_methods]), 'npy')) ax.set_prop_cycle("color", ['k']) ax.set_position(figpos) for iens, enssize in enumerate(ensemble_sizes): # x positions, up to 15 methods x = np.delete(np.arange(0, 100), np.arange(0, 100, num_pack+1)) # Skip one after num_pack+1 entries for vertical line resplot = res[:, iens] if is_std: stdplot = std[:, iens] # Plot puntos = [] # Contains plotted points ax.plot(x[:len(resplot)], resplot, 'k-', label=3) for iplot in range(num_methods): # Points punto, = ax.plot( x[iplot], resplot[iplot], formatsos[iplot], lw=2, ms=markersize[iplot], label=legend_input[iplot], c=coleros[iplot], mew=markeredgesize, mec='black' ) puntos.append(punto) # Text if iplot == num_methods-1: ax.text( x[iplot]+xdiff_nens, resplot[iplot] if is_textpos_auto else textpos[iens], r'$n_{e}$='+str(enssize), verticalalignment='center', horizontalalignment='left', size=20, ) # Error if is_std: ax.errorbar( x[iplot], resplot[iplot], yerr=stdplot[iplot], fmt=formatsos[iplot], lw=2, ms=markersize[iplot], label='this', mfc=coleros[iplot], mew=markeredgesize, mec='black' ) # Legend num_inleg = num_pack # Methods per legend (except last) num_legs = int(num_methods/num_inleg + int(bool(np.mod(num_methods, num_inleg)))) # Number of legends num_inlastleg = (np.mod(num_methods, num_inleg) if np.mod(num_methods, num_inleg) else num_inleg) # Methods in last legend leginds = [num_inleg-1+i*num_inleg if i < num_legs-1 else num_inleg-1+(i-1)*num_inleg+num_inlastleg for i in range(num_legs)] # last method ind in each legend legranges = [num_inleg if i < num_legs-1 else num_inlastleg for i in range(num_legs)] # Methods in each legend for ileg in range(num_legs): xleg = figpos[0] + ileg*figpos[2]/num_legs my_legend = ax.legend( handles=[puntos[i] for i in range(leginds[ileg]-legranges[ileg]+1, leginds[ileg]+1)], bbox_to_anchor=[xleg, 0.00, figpos[2]/num_legs, 0.3], bbox_transform=plt.gcf().transFigure, # loc=[0.0, 1.0], mode='expand', # labelspacing=1.0, ncol=1, numpoints=1, fontsize=fontleg, framealpha=1.0, markerscale=1.0 ) ax.add_artist(my_legend) # Lines for xline in range(0, 100, num_pack+1): ax.vlines(xline, 0.0, 1.0, linestyles='dotted') for yline in yticks: ax.hlines(yline, 0, 100, linestyles='dotted') ax.hlines(lineyval, 0, 100, linestyles='dashed') # Text: Model name and n_syn in box if is_text: model_spec = ' Tracer ' if model == 'wavereal' else ' Well ' ax.text( text_x, text_y, model_spec+'\n' + r' $n_{syn}$: '+str(n_syn).rjust(4), linespacing=1.5, fontsize=30, bbox={'facecolor': (0.8, 0.8, 0.8), 'alpha': 1.0, 'pad': 10}, ) # Style ax.set_xlim([xlim_min, (num_legs*(num_pack+1) if xlim_max is None else xlim_max)]) ax.set_ylabel(ylabel, fontsize=fontlab, labelpad=10) ax.tick_params(direction='in', length=6, width=1, labelsize=fonttic, top=False, right=False, bottom=False, pad=8) ax.set_xticks([]) ax.set_yticks(yticks) ax.get_xaxis().set_visible(False) ax.set_ylim(ylims) # Saving location pic_name = pm.py_output_filename( ea.tag, which_res, stat_method+'_'+model+'_' + '_'.join([str(enssize) for enssize in ensemble_sizes])+'_' + '_'.join([str(i) for i in which_methods]), pic_format ) return ax, pic_name
def read( which_methods, which_res='endres', model='wavebc', ensemble_sizes=[50, 70, 100, 250], method='ttest', ensemble_size=50, n_syn=1, n_comparisons=10000, cl=0.95, pval=0.05, ): """ Reads residual arrays at beginning (begres) or end (endres) of the EnKF run and calculates probability arrays which method is better, worse, or if they are even. Parameters ---------- which_methods : array int Array of integers containing the method specifiers from module plotarrays. which_res : string 'endres' - use residuals after EnKF run 'begres' - use residuals before EnKF run model : string 'wavebc' - Model wavebc 'wave' - Model wave ensemble_sizes : array of integers array can typically contain 50, 70, 100, 250, 500, 1000, 2000 method : string Which method to use for statistical comparison of the subset. If n_syn == 1, the comparison always defaults to comparing the residuals. 'ttest' - Use the T-Test, testing if the two samples belong to the same Gaussian distribution. 'gauss' - Calculate Gaussian distribution of the difference and calculate its probability to be larger than zero. 'meanc' - Calculate the means and compare. ensemble_size : integer Ensemble size of the job. Possibilities: 50, 70, 100, 250, 500, 1000, 2000 n_syn : integer Number of synthetic studies in subset. n_comparisons : integer Number of comparisons calculated. cl : float Confidence level for 'gauss'. If the probability weight of the distribution of the difference between two methods is larger than cl on one side of zero, then the method with the smaller RMSE is considered to have performed better. pval : float If the p-value from the T-Test is smaller than pval the Test is considered to be negative, thus a significant difference between the distributions is assumed, making the method with the smaller RMSE performing significantly better. Returns ------- probs : array Array containing the probabilities. probs_name : string Containing proposed saving location for array. """ # Checks if model not in ['wavebc', 'wave', 'wavewell', 'wavereal']: raise RuntimeError('model wrong') if method not in ['ttest', 'gauss', 'meanc']: raise RuntimeError('method wrong') if ensemble_size in [50, 70, 100, 250]: if n_syn > 1000: raise RuntimeError('n_syn wrong') elif ensemble_size in [500, 1000, 2000]: if n_syn > 100: raise RuntimeError('n_syn wrong') else: raise RuntimeError('ensemble size wrong') # Maximum Number of runs max_n_runs = 0 for i_method in which_methods: max_n_runs = np.max( [pa.nums[model][i_method][ensemble_size], max_n_runs]) # Load final residuals res = np.zeros([len(which_methods), max_n_runs]) for i, i_method in enumerate(which_methods): res_name = pm.py_output_filename( 'dists', which_res, model + '_' + pa.dats[model][i_method][ensemble_size] + '_' + pa.lets[model][i_method][ensemble_size], 'npy') res[i, 0:pa.nums[model][i_method][ensemble_size]] = np.load(res_name) # Initialize probs array probs = np.zeros([len(which_methods), len(which_methods), 3]) # DOCUMENTATION: # ------------------------------------------------- # probs[i, j, 0] : Probability that method i is better # probs[i, j, 1] : Probability that methods are equal # probs[i, j, 2] : Probability that method j is better for ii, ri in enumerate(which_methods): for ij, rj in enumerate(which_methods): # Every pair only once (symmetry) if ij < ij: continue # Residual arrays for each method resi = res[ii, 0:pa.nums[model][ri][ensemble_size]] resj = res[ij, 0:pa.nums[model][rj][ensemble_size]] if [n_syn, n_syn] >= [ pa.nums[model][ri][ensemble_size], pa.nums[model][rj][ensemble_size] ]: if not n_comparisons == 1: raise RuntimeError( 'Set n_comparisons to 1 if n_syn equal' + ' to full number of available studies') ni = 0 # ...i better ne = 0 # ...equal nj = 0 # ...j better # Iterate number of comparisons for i in range(n_comparisons): # Subset of random order isi = np.random.permutation( np.arange(pa.nums[model][ri][ensemble_size]))[0:n_syn] isj = np.random.permutation( np.arange(pa.nums[model][rj][ensemble_size]))[0:n_syn] resmixi = resi[isi] resmixj = resj[isj] # Single run if n_syn == 1: if resmixi[0] < resmixj[0]: ni = ni + 1 elif resmixi[0] > resmixj[0]: nj = nj + 1 else: # Equality happens ne = ne + 1 # Mean comparison elif method == "meanc": if np.mean(resmixi) < np.mean(resmixj): ni = ni + 1 elif np.mean(resmixi) > np.mean(resmixj): nj = nj + 1 else: # Equality happens ne = ne + 1 # T-Test elif method == "ttest": tv, pv = stats.ttest_ind(resmixi, resmixj, equal_var=False) if pv < pval: # Significant difference if tv < 0: ni = ni + 1 else: nj = nj + 1 else: # No significant difference ne = ne + 1 # Gaussian difference elif method == "gauss": # Means mi = np.mean(resmixi) mj = np.mean(resmixj) # Mean Standard deviations si = np.std(resmixi) / np.sqrt(resmixi.size) sj = np.std(resmixj) / np.sqrt(resmixj.size) # Mean difference and stdev of mean difference m = mj - mi s = np.sqrt(si * si + sj * sj) # Probability bigger than zero pcl = 0.5 + 0.5 * sp.special.erf(m / (s * np.sqrt(2))) if pcl > cl: # i better ni = ni + 1 elif pcl < 1 - cl: # j better nj = nj + 1 else: # No significant difference ne = ne + 1 # Output probabilities pi = float(ni) / float(ni + ne + nj) # i better pe = float(ne) / float(ni + ne + nj) # equal pj = float(nj) / float(ni + ne + nj) # j better probs[ii, ij, 0] = pi probs[ii, ij, 1] = pe probs[ii, ij, 2] = pj probs[ij, ii, 0] = pj probs[ij, ii, 1] = pe probs[ij, ii, 2] = pi probs_name = pm.py_output_filename( na.tag, 'probs_' + which_res, model + '_' + method + '_' + str(ensemble_size) + '_' + str(n_syn) + '_' + str(n_comparisons) + '_' + '_'.join([str(i) for i in which_methods]), 'npy') return probs, probs_name
def read( model_name, dat, let, fdir=None, fname=None, varname='uindex', num_mon=1, ): """ Reading monitor arrays from SHEMAT-Suite. Parameters ---------- model_name : string String of model name. dat : string String with date of model run. let : string String of letter of model run. varname : string Variable name for array to be read. Possibilities: 'uindex' 'head','temp','kz', 'v' num_mon : integer Number for monitoring point. IMPORTANT: num_mon = 0 corresponds to the first monitoring point in SHEMAT monitor file. General: num_mon = i corresponds to monitoring point i+1 Returns ------- numpy_array : array Array containing the monitor variable array numpy_array_name : string Containing proposed saving location for Array. """ # Dirs if fdir is None: # samples_output_dir fdir = rm.make_output_dirs(model_name, dat, let)[1] if fname is None: # monitor_file fname = rm.make_file_dir_names(model_name)[16] # Read from monitor file ################################################## numpy_array = np.genfromtxt( fdir + '/' + fname, dtype='f8', comments='%', usecols=(ma.varpos[varname]), ) # Reshape ################################################################# num_mons = sc.num_mons(model_name, dat, let) if np.remainder(len(numpy_array), num_mons): raise RuntimeError('Problem with num_mons') numpy_array = numpy_array.reshape(len(numpy_array) / num_mons, num_mons) numpy_array = numpy_array[:, num_mon] # Numpy Array Name ######################################################## numpy_array_name = pm.py_output_filename( ma.tag, varname, sc.specl(model_name, dat, let) + '_' + str(num_mon), "npy") return numpy_array, numpy_array_name
def hist( ax, model_name='wavebc', which_method=0, ensemble_size=50, n_syn=10, n_comparisons=1000, which_res='endres', n_bins=100, # std_method='std', pic_format='pdf', # figpos=[0.15,0.3,0.8,0.6], #xbeg, ybeg, xrange, yrange xlims=None, histlims=None, title=None, titley=1.05, is_plot=False, xlabel=None, ylabel=None, # ylims=[0.28,0.82], is_xticks=True, is_yticks=True, itickhide=10, # num_pack=4, # Number of methods in pack # formatsos=['o','v','s','p','o','v','s','p'], # coleros=[(0.0,0.0,0.0),(0.0,0.0,0.0),(0.0,0.0,0.0),(0.0,0.0,0.0), # (1.0,1.0,1.0),(1.0,1.0,1.0),(1.0,1.0,1.0),(1.0,1.0,1.0)], # markersize=10, # markeredgesize=1.5, # fontleg=30, #18 fonttit=30, fontaxl=30, fonttic=10, ): """ A histogramming function for means of random subsets of given size. Parameters ---------- ax : Axes The axes to draw to. model_name : string String of model name. 'wavebc' - Model wavebc 'wavereal' - Model wavereal 'wavewell' - Model wavewell 'wave' - Model wave which_method : int Integer containing the method specifier from module plotarrays. ensemble_size : integer Ensemble size of the job. Possibilities: 50, 70, 100, 250, 500, 1000, 2000 n_syn : integer Number of synthetic studies in subset for mean calculation. n_comparisons : integer Number of means calculated. which_res : string 'endres' - use residuals after EnKF run 'begres' - use residuals before EnKF run n_bins : integer Number of bins of histogram pic_format : string Format of the picture 'pdf' - pdf-format 'eps' - eps-format 'png' - png-format 'jpg' - jpg-format 'svg' - svg-format Returns ------- ax : Axes Axes containing histogram. pic_name : string Containing proposed saving location for Figure. """ # Load means arr = np.load( pm.py_output_filename( ga.tag, 'meanarray_' + which_res, model_name + '_' + str(ensemble_size) + '_' + str(n_syn) + '_' + str(n_comparisons) + '_' + str(which_method), 'npy')) # Histogram n, bins, patches = ax.hist(arr, n_bins, color='grey', range=histlims, density=True, stacked=True) ax.tick_params(labelsize=fonttic) if xlims: ax.set_xlim(xlims) if title: ax.set_title(title, size=fonttit, y=titley) if not is_xticks: ax.set_xticklabels([]) else: for label in ax.xaxis.get_ticklabels()[::itickhide]: label.set_visible(False) if not is_yticks: ax.set_yticklabels([]) else: for label in ax.yaxis.get_ticklabels()[::itickhide]: label.set_visible(False) if xlabel: ax.set_xlabel(xlabel, size=fontaxl) if ylabel: ax.set_ylabel(ylabel, size=fontaxl) if is_plot: # add a 'best fit' line y = mlab.normpdf(bins, np.mean(arr), np.std(arr)) ax.plot(bins, y, '--', lw=1, color="k") # Saving location pic_name = pm.py_output_filename( ga.tag, 'meanarray_' + which_res, model_name + '_' + str(ensemble_size) + '_' + str(n_syn) + '_' + str(n_comparisons) + '_' + str(which_method), pic_format) return ax, pic_name
def plot( ax, model_name, dat, let, num_mon=1, is_labels=True, is_ownticks=True, varname='temp', # 'head','v','temp','kz', 'uindex' position=[0.1, 0.1, 0.8, 0.8], xlims=[0.0, 26000.0], ylims=[15.0, 22.0], # marker='o', linewidth=5, markercolor='black', # markeralpha=1.0, legend_label='default', xlabel='[m]', ylabel='[m]', xlabelfontsize=40, ylabelfontsize=40, ticklabelfontsize=20, xownticks=[0.1+i*0.1 for i in range(9)], yownticks=[0.1+i*0.1 for i in range(9)], pic_format='pdf', # 'png','eps','pdf' ): """ A plotting function for monitoring time series. Parameters ---------- ax : Axes The axes to draw to. model_name : string String of model name. dat : string String with date of model run. let : string String of letter of model run. Returns ------- ax : Axes Axes containing image of variable array. pic_name : string Containing proposed saving location for Figure. """ # Load variable array time = np.load(pm.py_output_filename( ma.tag, 'time', sc.specl(model_name, dat, let)+'_'+str(num_mon), "npy") ) var = np.load(pm.py_output_filename( ma.tag, varname, sc.specl(model_name, dat, let)+'_'+str(num_mon), "npy") ) ax.plot(time, var, label=legend_label, color=markercolor, lw=linewidth) # Axis position ax.set_position(position) # Ticks if is_ownticks: ax.xaxis.set_ticks(xownticks) ax.yaxis.set_ticks(yownticks) # Title # ax.set_title('Temperature field') # Labels ax.set_xlabel(xlabel, fontsize=xlabelfontsize, visible=is_labels) ax.set_ylabel(ylabel, fontsize=ylabelfontsize, visible=is_labels) ax.tick_params(length=20 if is_labels else 0, labelsize=ticklabelfontsize) # Axis Limits ax.set_xlim(xlims[0], xlims[1]) ax.set_ylim(ylims[0], ylims[1]) # Figure name pic_name = pm.py_output_filename( ma.tag, varname, sc.specl(model_name, dat, let)+'_'+str(num_mon), pic_format ) return ax, pic_name
def sort( which_methods, indsort=None, ensemble_sizes=[50, 70, 100, 250], model_name='wavebc', which_res='endres', stat_method='mean', template_model_name='wavebc', template_which_res='endres', template_stat_method='mean', template_ensemble_sizes=[50, 70, 100, 250], template_enssize=50, ): """ Reads a template array and sorts the indices. Then it sorts the specified stat_array in the same order. Parameters ---------- which_methods : array int Array of integers containing the method specifiers from module plotarrays. ensemble_sizes : array of integers array can typically contain 50, 70, 100, 250, 500, 1000, 2000 model_name : string String of model name. 'wavebc' - Model wavebc 'wavereal' - Model wavereal 'wavewell' - Model wavewell 'wave' - Model wave which_res : string 'endres' - use residuals after EnKF run 'begres' - use residuals before EnKF run stat_method : string 'mean' - Calculate means 'std' - Standard deviation 'stdm' - Standard deviation of the mean 'median' - Median or 50 Percentile 'q25' - 25 Percentile 'q75' - 75 Percentile template_ensemble_sizes : array of integers array can typically contain 50, 70, 100, 250, 500, 1000, 2000 template_model_name : string 'wavebc' - Model wavebc for template 'wave' - Model wave for template template_which_res : string Specified of the template array. 'endres' - use residuals after EnKF run 'begres' - use residuals before EnKF run template_enssize : integer Ensemble size of the template. Possibilities: 50, 70, 100, 250, 500, 1000, 2000 template_stat_method : string Specified of the template array. 'mean' - Calculate means 'std' - Standard deviation 'stdm' - Standard deviation of the mean 'median' - Median or 50 Percentile 'q25' - 25 Percentile 'q75' - 75 Percentile Returns ------- stat_array : array Array containing the statistical measures (sorted). stat_array_name : string Containing proposed saving location for array (sorted). which_methods_sorted : array of ints Array sorted indices. """ # Indices for sorting order if not indsort: indsort = pf.indsort( which_methods, model_name=template_model_name, which_res=template_which_res, stat_method=template_stat_method, ensemble_sizes=template_ensemble_sizes, ensemble_size=template_enssize, ) # Load to be sorted array stat_array = np.load( pm.py_output_filename( ea.tag, which_res, stat_method + '_' + model_name + '_' + '_'.join([str(enssize) for enssize in ensemble_sizes]) + '_' + '_'.join([str(i) for i in which_methods]), 'npy')) # Sort array for i in range(stat_array.shape[1]): stat_array[:, i] = np.array(stat_array)[:, i][indsort] # Sort which_methods which_methods_sorted = np.zeros(np.shape(which_methods), dtype=np.int64) for i in range(len(which_methods)): which_methods_sorted[i] = which_methods[indsort[i]] # Name for sorted array stat_array_name = pm.py_output_filename( ea.tag, which_res, stat_method + '_' + model_name + '_' + '_'.join([str(enssize) for enssize in ensemble_sizes]) + '_' + '_'.join([str(i) for i in which_methods_sorted]), 'npy') return stat_array, stat_array_name, which_methods_sorted
def read( model_name='wavebc', which_method=0, ensemble_size=50, n_syn=10, n_comparisons=1000, which_res='endres', ): """ Reads residual arrays at beginning (begres) or end (endres) of the EnKF run and calculates an array of means from random subsets of given size. Parameters ---------- model_name : string String of model name. 'wavebc' - Model wavebc 'wavereal' - Model wavereal 'wavewell' - Model wavewell 'wave' - Model wave which_method : int Integer containing the method specifier from module plotarrays. ensemble_size : integer Ensemble size of the job. Possibilities: 50, 70, 100, 250, 500, 1000, 2000 n_syn : integer Number of synthetic studies in subset for mean calculation. n_comparisons : integer Number of means calculated. which_res : string 'endres' - use residuals after EnKF run 'begres' - use residuals before EnKF run Returns ------- gauss_array : array Array containing the means. gauss_array_name : string Containing proposed saving location for array. """ # Checks if ensemble_size in [50, 70, 100, 250]: if n_syn > 1000: raise RuntimeError('n_syn wrong') elif ensemble_size in [500, 1000, 2000]: if n_syn > 100: raise RuntimeError('n_syn wrong') else: raise RuntimeError('ensemble size wrong') # Load final residuals for all methods and the ensemblesize dats = pa.dats[model_name] lets = pa.lets[model_name] nums = pa.nums[model_name] res = np.load( pm.py_output_filename( 'dists', which_res, model_name + '_' + dats[which_method][ensemble_size] + '_' + lets[which_method][ensemble_size], 'npy')) # Calculate mean array gauss_array = [ np.mean(res[np.random.permutation( np.arange(nums[which_method][ensemble_size]))[0:n_syn]]) for i in range(n_comparisons) ] gauss_array_name = pm.py_output_filename( ga.tag, 'meanarray_' + which_res, model_name + '_' + str(ensemble_size) + '_' + str(n_syn) + '_' + str(n_comparisons) + '_' + str(which_method), 'npy') return gauss_array, gauss_array_name
def plot( ax, model_name, dat, let, nt=0, is_grid=True, is_mask=False, is_labels=True, is_ownticks=False, is_ownticklabels=False, axistitle='', varname='uindex', # 'head','v','temp','kz', 'uindex' v_component=1, # 0,1,2 is_position=True, position=[0.1, 0.1, 0.6, 0.8], is_ownlims=False, xlims=[0.0, 0.8032], ylims=[0.0, 0.8032], zlims=[0.0, 0.8032], alpha=1.0, maskvalue=7, xlabelfontsize=40, ylabelfontsize=40, xownticks=[0.1 + i * 0.1 for i in range(9)], yownticks=[0.1 + i * 0.1 for i in range(9)], xticklabels=[0.1 + i * 0.1 for i in range(9)], yticklabels=[0.1 + i * 0.1 for i in range(9)], xticklabelfontsize=20, yticklabelfontsize=20, num_cbar=7, low_cbar=10.0285, high_cbar=10.0304, auto_cbar=True, pic_format='pdf', # 'png','eps','pdf' is_xz=False, ): """ A plotting function for variable arrays in a NonUniformGrid. Parameters ---------- ax : Axes The axes to draw to. model_name : string String of model name. 'wavebc' - Model wavebc 'wavereal' - Model wavereal 'wavewell' - Model wavewell 'wave' - Model wave dat : string String with date of model run. let : string String of letter of model run. Returns ------- ax : Axes Axes containing image of variable array. pic_name : string Containing proposed saving location for Figure. """ # Read grid arrays from pskf/tools/plot/specs.py x = sc.x(model_name, dat, let) y = (sc.y(model_name, dat, let) if not is_xz else sc.z( model_name, dat, let)) xticks = sc.xticks(model_name, dat, let) yticks = (sc.yticks(model_name, dat, let) if not is_xz else sc.zticks( model_name, dat, let)) # Load variable array var = np.load( pm.py_output_filename( fa.tag, varname, sc.specl(model_name, dat, let) + '_' + str(nt), "npy", )) if is_xz: # Quick fix for x-z-arrays var = var.flatten().reshape(var.shape[::-1]) if varname == 'v': var = var[:, :, v_component] if varname == 'head': var = var - 10.0 if varname == 'kz': var = np.log10(var) if auto_cbar: low_cbar = var.min() high_cbar = var.max() # # Possible Mask if is_mask: var = np.ma.array(var, mask=np.logical_or(var < maskvalue - 0.5, var > maskvalue + 0.5)) # Axis position if is_position: ax.set_position(position) # Create image im = mpl.image.NonUniformImage(ax, interpolation='nearest', cmap=pf.cmap_discretize( cm.viridis, num_cbar), norm=colors.Normalize(vmin=low_cbar, vmax=high_cbar, clip=False)) im.set_data(x, y, var) im.set_alpha(alpha) ax.images.append(im) # Ticks if is_ownticks: ax.xaxis.set_ticks(xownticks) ax.yaxis.set_ticks(yownticks) else: ax.xaxis.set_ticks(xticks[1::10]) ax.yaxis.set_ticks(yticks[1::10]) # Grid if is_grid: ax.grid() # Title ax.set_title(axistitle, fontsize=30) # Labels ax.set_xlabel(r'x ($\mathrm{m}$)', fontsize=xlabelfontsize, visible=is_labels) ax.set_ylabel(r'y ($\mathrm{m}$)', fontsize=ylabelfontsize, visible=is_labels) ax.tick_params(length=20 if is_labels else 0) if is_ownticklabels: ax.set_xticklabels(xticklabels, visible=is_labels, fontsize=xticklabelfontsize) ax.set_yticklabels(yticklabels, visible=is_labels, fontsize=yticklabelfontsize) ax.tick_params(axis="x", which="both", top=False, labeltop=False) ax.tick_params(axis="y", which="both", right=False, labelright=False) # Axis Limits if is_ownlims: ax.set_xlim(xlims[0], xlims[1]) ax.set_ylim(ylims[0] if not is_xz else zlims[0], ylims[1] if not is_xz else zlims[1]) else: ax.set_xlim( sc.xlims(model_name, dat, let)[0], sc.xlims(model_name, dat, let)[1]) ax.set_ylim( sc.ylims(model_name, dat, let)[0] if not is_xz else sc.zlims( model_name, dat, let)[0], sc.ylims(model_name, dat, let)[1] if not is_xz else sc.zlims( model_name, dat, let)[1]) # Figure name if varname == 'v': varname = varname + '_' + str(v_component) if is_mask: varname = varname + '_' + str(maskvalue).zfill(2) pic_name = pm.py_output_filename( fa.tag, varname, sc.specl(model_name, dat, let) + '_' + str(nt), pic_format) return ax, pic_name
def read( which_methods, ensemble_sizes=[50, 70, 100, 250], model='wavebc', which_res='endres', stat_method='mean', ): """ Reads residual arrays at beginning (begres) or end (endres) of the EnKF run and calculates an array of given statistical measure. Parameters ---------- which_methods : array int Array of integers containing the method specifiers from module plotarrays. ensemble_sizes : array of integers array can typically contain 50, 70, 100, 250, 500, 1000, 2000 model : string 'wavebc' - Model wavebc 'wave' - Model wave which_res : string 'endres' - use residuals after EnKF run 'begres' - use residuals before EnKF run 'endstd' - use standard deviations after EnKF run 'begstd' - use standard deviations before EnKF run stat_method : string 'mean' - Calculate means 'std' - Standard deviation 'stdm' - Standard deviation of the mean 'median' - Median or 50 Percentile 'q25' - 25 Percentile 'q75' - 75 Percentile Returns ------- stat_array : array Array containing the statistical measures. stat_array_name : string Containing proposed saving location for array. """ # Input check if which_res not in ['endres', 'begres', 'endstd', 'begstd']: raise RuntimeError("which_res has to be" + " 'endres', 'begres'" + ", 'endstd' or 'begstd'") if stat_method not in ['mean', 'std', 'stdm', 'median', 'q25', 'q75']: raise RuntimeError('stat_method wrong') for enssize in ensemble_sizes: if enssize not in [50, 70, 100, 250, 500, 1000, 2000, 10000]: raise RuntimeError('ensemble size wrong') if model not in ['wavebc', 'wave', 'wavewell', 'wavereal']: raise RuntimeError('model wrong') # Nunber of methods num_methods = len(which_methods) # Initialize stat_array stat_array = np.zeros([num_methods, len(ensemble_sizes)]) # i_kind: counter # j_kind: method-index for i_kind, j_kind in enumerate(which_methods): for j, enssize in enumerate(ensemble_sizes): # Get date and time dat = pa.dats[model][j_kind][enssize] let = pa.lets[model][j_kind][enssize] num = pa.nums[model][j_kind][enssize] # Read residuals res = np.load(pm.py_output_filename('dists', which_res, model+'_'+dat+'_'+let, 'npy')) # Calculate statistical quantitiy if stat_method == 'mean': stat_array[i_kind, j] = np.mean(res) elif stat_method == 'std': stat_array[i_kind, j] = np.std(res) elif stat_method == 'stdm': stat_array[i_kind, j] = np.std(res)/np.sqrt(num) elif stat_method == 'median': stat_array[i_kind, j] = np.percentile(res, 50) elif stat_method == 'q25': stat_array[i_kind, j] = np.percentile(res, 25) elif stat_method == 'q75': stat_array[i_kind, j] = np.percentile(res, 75) # Name of the array stat_array_name = pm.py_output_filename( ea.tag, which_res, stat_method+'_'+model+'_' + '_'.join([str(enssize) for enssize in ensemble_sizes])+'_' + '_'.join([str(i) for i in which_methods]), 'npy') return stat_array, stat_array_name
def mean( model_name, dat, let, varname='uindex', mons=[0, 1], ): """ Computing mean monitor arrays from certain monitoring points. Parameters ---------- model_name : string String of model name. dat : string String with date of model run. let : string String of letter of model run. varname : string Variable name for array to be read. Possibilities: 'uindex' 'head','temp','kz', 'v' mons : array of integers Numbers for monitoring points for mean. IMPORTANT: num_mon = 0 corresponds to the first monitoring point in SHEMAT monitor file. General: num_mon = i corresponds to monitoring point i+1 Returns ------- mean_array : array Array containing the mean monitor variable array mean_array_name : string Containing proposed saving location for Array. """ for imon, num_mon in enumerate(mons): # File name filename = pm.py_output_filename( ma.tag, varname, sc.specl(model_name, dat, let) + '_' + str(num_mon), "npy") # Check existence if not os.path.isfile(filename): raise RuntimeError('Monitoring numpy-file does not exist: ' + filename) # Load filename if imon == 0: mean_array = np.load(filename) else: mean_array = mean_array + np.load(filename) if imon == len(mons) - 1: mean_array = mean_array / np.float(len(mons)) # Mean array name mean_array_name = pm.py_output_filename( ma.tag, varname, sc.specl(model_name, dat, let) + '_' + 'mean' + '_' + '_'.join([str(i) for i in mons]), "npy") return mean_array, mean_array_name
def matrix( ax, which_methods=[0, 1, 2, 3, 4, 5, 6], which_res='endres', method='meanc', model='wavebc', ensemble_size=50, n_syn=1, # number of synthetic studies n_comparisons=10000, is_text=False, pic_format='pdf', # 'png' or 'eps' or 'svg' or 'pdf' figpos=[0.14, 0.14, 0.8, 0.8], # xbeg, ybeg, xrange, yrange # ylims=[0.28,0.82], is_longnames=True, ticklabelfontsize=10, xtick_y=0.0, is_color_trafo=False, # num_pack=4, # Number of methods in pack # formatsos=['o','v','s','p','o','v','s','p'], # coleros=[(0.0,0.0,0.0),(0.0,0.0,0.0),(0.0,0.0,0.0),(0.0,0.0,0.0), # (1.0,1.0,1.0),(1.0,1.0,1.0),(1.0,1.0,1.0),(1.0,1.0,1.0)], # markersize=10, # markeredgesize=1.5, # fontleg=30, #18 # fonttit=40, # fontlab=40, # fonttic=30, ): """ A plotting function for statistics of residual distributions. Parameters ---------- ax : Axes The axes to draw to. which_methods : array of ints The methods to be loaded in ascending order. which_res : string 'endres' - use residuals after EnKF run 'begres' - use residuals before EnKF run method : string Which method to use for statistical comparison of the subset. If n_syn == 1, the comparison always defaults to comparing the residuals. 'ttest' - Use the T-Test, testing if the two samples belong to the same Gaussian distribution. 'gauss' - Calculate Gaussian distribution of the difference and calculate its probability to be larger than zero. 'meanc' - Calculate the means and compare. model : string 'wavebc' - Model wavebc 'wave' - Model wave ensemble_size : integer Ensemble size of the job. Possibilities: 50, 70, 100, 250, 500, 1000, 2000 n_syn : integer Number of synthetic studies in subset. n_comparisons : integer Number of comparisons calculated. pic_format : string Format of the picture 'pdf' - pdf-format 'eps' - eps-format 'png' - png-format 'jpg' - jpg-format 'svg' - svg-format Returns ------- ax : Axes Axes containing plot. pic_name : string Containing proposed saving location for Figure. """ # Check if ensemble_size in [50, 70, 100, 250]: if n_syn > 1000: raise RuntimeError('n_syn wrong') elif ensemble_size in [500, 1000, 2000]: if n_syn > 100: raise RuntimeError('n_syn wrong') else: raise RuntimeError('ensemble size wrong') # Number of compared methods num_methods = len(which_methods) # Load probs probs = np.load(pm.py_output_filename( na.tag, 'probs_'+which_res, model+'_'+method+'_'+str(ensemble_size) + '_'+str(n_syn)+'_'+str(n_comparisons)+'_' + '_'.join([str(i) for i in which_methods]), 'npy' )) ax.set_position(figpos) # Rectangles in upper right half: Fraction of Undecided undecided = probs[:, :, 1] for ipm in range(num_methods): for jpm in range(num_methods): if ipm > jpm: # Lower left half white undecided[ipm, jpm] = None if ipm == jpm: # Diagonal black undecided[ipm, jpm] = 1.0 if ipm < jpm: # Single comparisons white if n_syn == 1: undecided[ipm, jpm] = None # One comparison white if n_syn == 1000: undecided[ipm, jpm] = None # For mean comparison white if method == 'meanc': undecided[ipm, jpm] = None ax.imshow(undecided, interpolation='nearest', cmap='Greys', norm=colors.Normalize(vmin=0, vmax=1, clip=False)) # Triangles: Grid X, Y = np.meshgrid(np.arange(num_methods+1), np.arange(num_methods+1)) X = X.flatten()-0.5 Y = Y.flatten()-0.5 # Triangles: Indices triangles = np.zeros([2*np.sum(range(1, num_methods)), 3]) ix = 0 # Upper triangles for i in range(1, num_methods): for j in range(i, num_methods): triangles[ix, :] = [(i-1)+j*(num_methods+1), i+j*(num_methods+1), i+num_methods+j*(num_methods+1)] ix = ix+1 # Lower triangles for i in range(1, num_methods): for j in range(i, num_methods): triangles[ix, :] = [i+j*(num_methods+1), i+num_methods+j*(num_methods+1), i+num_methods+1+j*(num_methods+1)] ix = ix+1 # Triangles: Triangulation instance tria = mpl.tri.Triangulation(X, Y, triangles) # Triangles: Colors coleros = np.array([[probs[i, j, 0] for i in range(j+1, num_methods)] for j in range(num_methods-1)] + [[probs[i, j, 2] for i in range(j+1, num_methods)] for j in range(num_methods-1)]) coleros = np.hstack(coleros) # Color Transformation if is_color_trafo: coleros = 0.5*(coleros-0.5)+0.5 coleros[coleros < 0.275] = 0.0 coleros[coleros > 0.725] = 1.0 # Triangles: Plot with facecolor plt.tripcolor( tria, facecolors=coleros, cmap=mpl.cm.Greys, norm=colors.Normalize(vmin=0, vmax=1, clip=False), edgecolor='k' ) # Plot: Mostly ticks ticklabelinput = ([pa.longnames_methods[which_methods[i]] for i in range(num_methods)] if is_longnames else [pa.names_methods[which_methods[i]] for i in range(num_methods)]) ax.set_xticks([i for i in range(num_methods)]) ax.set_xticklabels(ticklabelinput, fontsize=ticklabelfontsize, rotation=90, y=xtick_y) ax.set_yticks([i for i in range(num_methods)]) ax.set_yticklabels(ticklabelinput, fontsize=ticklabelfontsize) ax.tick_params(length=0) ax.set_frame_on(False) # Text: Upper triangles for i in range(3): for itext in range(num_methods): for jtext in range(num_methods): if itext < jtext: ntext = (np.around(100*probs[jtext, itext, i], decimals=1) if i != 1 else np.around(100*probs[itext, jtext, i], decimals=1)) ttext = (str(ntext)[0:4] if 0 < ntext < 100 else str(ntext)[0:0]) px = (itext-0.45 if i == 0 else (jtext-0.125 if i == 1 else itext-0.10)) py = (jtext-0.15 if i == 0 else (itext+0.05 if i == 1 else jtext+0.3)) colero = 'white' if ntext > 50 else 'black' if i != 1 or (n_syn != 1000 and n_syn != 1 and method != "meanc"): ax.text(px, py, ttext, color=colero, fontsize=20) # Text: n_syn and ensemble_size if is_text: model_spec = ' Tracer ' if model == 'wavereal' else ' Well ' ax.text( 3.5, 1.5, model_spec+'\n' + r' $n_{e}$: '+str(ensemble_size).rjust(4)+'\n' + r' $n_{syn}$: '+str(n_syn).rjust(4), linespacing=1.5, fontsize=30, bbox={'facecolor': 'grey', 'alpha': 0.5, 'pad': 10}, ) # Saving location pic_name = pm.py_output_filename( na.tag, 'matrix_'+which_res, model+'_'+method+'_'+str(ensemble_size) + '_'+str(n_syn)+'_'+str(n_comparisons)+'_' + '_'.join([str(i) for i in which_methods]), pic_format ) return ax, pic_name
def read( model_name, dat, let, fdir=None, ): """ Reading variable array from SHEMAT-Suite vtk-file. Parameters ---------- model_name : string String of model name. dat : string String with date of model run. let : string String of letter of model run. Returns ------- numpy_array : array Array containing the full single_cell array numpy_array[irobs,iens,iloc]: - irobs: Observation time index - iens: Ensemble member index - iloc: Single cell location index numpy_array_name : string Containing proposed saving location for Array. """ # Single cell output directory if fdir is None: # single_cell_output_dir fdir = (rm.make_output_dirs(model_name, dat, let)[0] + '/single_cell_output') # Read single cell location and variable locs = sc.single_cell_locs(model_name, dat, let) variables = sc.single_cell_variables(model_name, dat, let) # Generate empty numpy_array nrobs_int = sc.nrobs_int(model_name, dat, let) num_locs = sc.num_single_cell(model_name, dat, let) nrens = sc.nrens(model_name, dat, let) numpy_array = np.zeros([nrobs_int, nrens, num_locs]) # single_cell_output_file for iloc, loc in enumerate(locs): fname = ('single_cell_E1_' + str(loc[0]).zfill(4) + '_' + str(loc[1]).zfill(4) + '_' + str(loc[2]).zfill(4) + '_' + str(variables[iloc]).zfill(4) + '_aft' + '.txt') numpy_array[:, :, iloc] = np.loadtxt(fdir + '/' + fname, skiprows=5) # Numpy Array Name ######################################################## numpy_array_name = pm.py_output_filename( sca.tag, 'single_cell', sc.specl(model_name, dat, let) + '_' + str(num_locs), "npy", ) return numpy_array, numpy_array_name
def read( model_name, dat, let, fdir=None, fname=None, varname='uindex', nt=0, ): """ Reading variable array from SHEMAT-Suite vtk-file. Parameters ---------- model_name : string String of model name. dat : string String with date of model run. let : string String of letter of model run. varname : string Variable name for array to be read. Possibilities: 'uindex' 'head','temp','kz', 'v' nt : string Number of time step output. Returns ------- numpy_array : array Array containing the variable array numpy_array_name : string Containing proposed saving location for Array. """ # Dirs if fdir is None: # samples_output_dir fdir = rm.make_output_dirs(model_name, dat, let)[1] if fname is None: # time_out_file fname = rm.make_file_dir_names(model_name, nt)[17] # Get filetype ############################################################ if fname[-3:] == 'vtk': ftype = 'vtk' elif fname[-2:] == 'h5' or fname[-3:] == 'hdf': ftype = 'hdf' else: print(fname) raise RuntimeError('Wrong filetype.') # Get reader ############################################################## if ftype == 'vtk': reader = pf.my_vtk(fdir, fname, varname) elif ftype == 'hdf': reader = pf.my_hdf(fdir+'/'+fname, varname) # Numpy Array ############################################################ if ftype == 'vtk': numpy_array = pf.my_vtk_to_numpy(reader) elif ftype == 'hdf': numpy_array = reader # Numpy Array Name ######################################################## numpy_array_name = pm.py_output_filename( fa.tag, varname, sc.specl(model_name, dat, let)+'_'+str(nt), "npy", ) return numpy_array, numpy_array_name
def plot( ax, model_name, dat, let, befaft='aft', nt=10, numpy_array_name=None, is_grid=True, is_mask=False, is_labels=True, is_ownticks=False, axistitle='', title_fontsize=30, varname='kz_mean', # 'head','v','temp','kz', 'uindex' v_component=1, # 0, 1, 2 is_position=True, position=[0.1, 0.1, 0.6, 0.8], is_ownlims=False, xlims=[0.0, 620.0], ylims=[0.0, 620.0], alpha=1.0, maskvalue=7, xlabelfontsize=40, ylabelfontsize=40, xownticks=[0.1 + i * 0.1 for i in range(9)], yownticks=[0.1 + i * 0.1 for i in range(9)], diff_ticks=1, num_cbar=7, low_cbar=10.0285, high_cbar=10.0304, auto_cbar=True, pic_format='pdf', # 'png','eps','pdf' ): """ A plotting function for variable arrays in a NonUniformGrid. Parameters ---------- ax : Axes The axes to draw to. model_name : string String of model name. dat : string String with date of model run. let : string String of letter of model run. nt : integer Number inside file name. numpy_array_name : string Full file name of numpy array including ending .npy Returns ------- ax : Axes Axes containing image of variable array. pic_name : string Containing proposed saving location for Figure. """ # Read grid arrays from pskf/tools/plot/specs.py x = sc.x(model_name, dat, let) y = sc.y(model_name, dat, let) xticks = sc.xticks(model_name, dat, let)[::diff_ticks] yticks = sc.yticks(model_name, dat, let)[::diff_ticks] # Load variable array if not numpy_array_name: var = np.load( pm.py_output_filename( aa.tag, varname + '_' + str(nt).zfill(4) + '_' + befaft, sc.specl(model_name, dat, let), "npy")) else: var = np.load(numpy_array_name) if varname == 'v': var = var[:, :, v_component] if auto_cbar: low_cbar = var.min() high_cbar = var.max() # # Possible Mask if is_mask: var = np.ma.array(var, mask=np.logical_or(var < maskvalue - 0.5, var > maskvalue + 0.5)) # Axis position if is_position: ax.set_position(position) # Create image im = mpl.image.NonUniformImage(ax, interpolation='nearest', cmap=pf.cmap_discretize( cm.viridis, num_cbar), norm=colors.Normalize(vmin=low_cbar, vmax=high_cbar, clip=False)) im.set_data(x, y, var) im.set_alpha(alpha) ax.images.append(im) # Ticks if is_ownticks: ax.xaxis.set_ticks(xownticks) ax.yaxis.set_ticks(yownticks) else: ax.xaxis.set_ticks(xticks) ax.yaxis.set_ticks(yticks) # Grid if is_grid: ax.grid() # Title ax.set_title(axistitle, fontsize=title_fontsize) # Labels ax.set_xlabel('[m]', fontsize=xlabelfontsize, visible=is_labels) ax.set_ylabel('[m]', fontsize=ylabelfontsize, visible=is_labels) ax.tick_params(length=10 if is_labels else 0) ax.set_yticklabels(ax.get_yticks(), visible=is_labels) ax.set_xticklabels(ax.get_xticks(), visible=is_labels) # Axis Limits ax.set_xlim(xlims[0], xlims[1]) ax.set_ylim(ylims[0], ylims[1]) # Figure name if varname == 'v': varname = varname + '_' + str(v_component) if is_mask: varname = varname + '_' + str(maskvalue).zfill(2) pic_name = pm.py_output_filename(aa.tag, varname + '_' + str(nt).zfill(4), sc.specl(model_name, dat, let), pic_format) return ax, pic_name
def plot( ax, model_name, dat, let, points=[0, 1, 2, 3], is_pairs=0, pairs=[[0, 1], [2, 3]], is_labels=True, is_ownticks=False, is_ownticklabels=False, axistitle='', is_position=True, position=[0.1, 0.1, 0.6, 0.8], is_ownlims=False, xlims=[0, 20], ylims=[0.0, 1.0], xlabeltext='Time', ylabeltext=r'T [$^\circ C \,$]', xlabelfontsize=14, ylabelfontsize=14, xownticks=[10, 60, 600, 3600, 24*3600, 20*24*3600], yownticks=[0.1+i*0.1 for i in range(9)], xticklabels=['10s', '1min', '10min', '1h', '1d', '20d'], yticklabels=[0.1+i*0.1 for i in range(9)], labels={0: 'North', # Outer ring 1: 'Northwest', 2: 'West', 3: 'Southwest', 4: 'South', 5: 'Southeast', 6: 'East', 7: 'Northeast', 8: 'North', # Inner ring 9: 'Northwest', 10: 'West', 11: 'Southwest', 12: 'South', 13: 'Southeast', 14: 'East', 15: 'Northeast'}, coleros=['b', 'g', 'lightblue', 'cyan', 'r', 'chocolate', 'magenta', 'orange', 'b', 'g', 'lightblue', 'cyan', 'r', 'chocolate', 'magenta', 'orange'], xticklabelfontsize=20, yticklabelfontsize=20, pic_format='pdf', # 'png','eps','pdf' ): """ A plotting function for means of single cell ensembles against observation time. Parameters ---------- ax : Axes The axes to draw to. model_name : string String of model name. 'wavebc' - Model wavebc 'wavereal' - Model wavereal 'wavewell' - Model wavewell 'wave' - Model wave dat : string String with date of model run. let : string String of letter of model run. Returns ------- ax : Axes Axes containing figure of variable array. pic_name : string Containing proposed saving location for Figure. """ # Load single cell array num_locs = sc.num_single_cell(model_name, dat, let) var = np.load( pm.py_output_filename( sca.tag, 'single_cell', sc.specl(model_name, dat, let)+'_'+str(num_locs), "npy", ) ) # Load time array t = sc.obstimes(model_name, dat, let) # Axis position if is_position: ax.set_position(position) # Title ax.set_title(axistitle, fontsize=30) # Plots nrobs_int = sc.nrobs_int(model_name, dat, let) [ax.semilogx(t, np.mean(var, axis=1)[:nrobs_int, i], 'o', markersize=2, label=labels[i], c=coleros[ip]) for ip, i in enumerate(points)] # Labels if is_labels: ax.set_xlabel(xlabeltext, fontsize=xlabelfontsize, labelpad=0) ax.set_ylabel(ylabeltext, fontsize=ylabelfontsize) # Ticks if is_ownticks: ax.xaxis.set_ticks(xownticks) ax.yaxis.set_ticks(yownticks) if is_ownticklabels: ax.xaxis.set_ticklabels(xticklabels, fontsize=xticklabelfontsize) ax.yaxis.set_ticklabels(yticklabels, fontsize=yticklabelfontsize) # Limits if is_ownlims: ax.xlim(xlims[0], xlims[1]) ax.ylim(ylims[0], ylims[1]) # Figure name pic_name = pm.py_output_filename( sca.tag, 'single_cell', sc.specl(model_name, dat, let)+'_'+str(num_locs), pic_format ) return ax, pic_name
def indsort( which_methods, model_name='wavebc', which_res='endres', stat_method='mean', ensemble_sizes=[50, 70, 100, 250], ensemble_size=50, is_update=True, ): """ Sort the methods specified in which_methods according to an array of statistical measures. Parameters ---------- which_methods : array int Array of integers containing the method specifiers from module plotarrays. model_name : string 'wavebc' - Model wavebc 'wavereal' - Model wavereal 'wavewell' - Model wavewell 'wave' - Model wave which_res : string 'endres' - use residuals after EnKF run 'begres' - use residuals before EnKF run stat_method : string 'mean' - Calculate means 'std' - Standard deviation 'stdm' - Standard deviation of the mean 'median' - Median or 50 Percentile 'q25' - 25 Percentile 'q75' - 75 Percentile ensemble_sizes : array of integers array can typically contain 50, 70, 100, 250, 500, 1000, 2000 ensemble_size : integer Possibilities: 50, 70, 100, 250, 500, 1000, 2000. The ensemble size used for sorting is_update : boolean If True, the endres_mean array will be update. If False, an existing array would be used. Should be False if the underlying data is not present. Returns ------- which_methods_sorted : array of ints Array sorted indices. Notes ------- The array containing the statistical measures is loaded according to the input of the function. The array should be found in output/errorplot/npy. If it is not found, it should be generated using the read-function in 'pskf.scripts.errorplot.read'. """ # Name of the array for sorting array_name = pm.py_output_filename( 'errorplot', which_res, stat_method + '_' + model_name + '_' + '_'.join([str(enssize) for enssize in ensemble_sizes]) + '_' + '_'.join([str(i) for i in which_methods]), 'npy') # Ensure array existence or update if is_update or not os.path.isfile(array_name): numpy_array, numpy_array_name = er.read( which_methods=which_methods, ensemble_sizes=ensemble_sizes, model=model_name, which_res=which_res, stat_method=stat_method, ) np.save(numpy_array_name, numpy_array) # Index of ensemble size iensemble_size = pa.indens[model_name][ensemble_size] # Load array array = np.load(array_name)[:, iensemble_size] # Indices for sorting order which_methods_sorted = np.argsort(array) return which_methods_sorted