def main(): cli = anc.get_args() parameter_names, posterior, nposterior, nfit = read_posterior( cli.full_path) # set label and legend names Kep_labels = anc.keplerian_legend(parameter_names, cli.m_type) plot_labels = [u"%s" % (Kep_labels[ii]) for ii in range(0, nfit)] #print Kep_labels #print plot_labels plot_folder = os.path.join(cli.full_path, 'plots') if (not os.path.isdir(plot_folder)): os.makedirs(plot_folder) corner_file = os.path.join(plot_folder, 'corner_plot_fitted') #k = np.ceil(2. * nposterior**(1./3.)).astype(int) #if(k>20): k=20 k = anc.get_bins(posterior, rule='doane') nl = 4 levels = [1. - np.exp(-0.5 * ii) for ii in range(0, nl) ] # 2D sigmas: 0sigma, 1sigma, 2sigma, 3sigma, .. #fig = corner.corner(posterior, bins=k, #labels = parameter_names, #quantiles = [0.16, 0.5, 0.84], #show_titles = True, #title_kwargs = {'fontsize': 10}, #levels = (1.-np.exp(-0.5), 1.-np.exp(-1.), 1.-np.exp(-1.5)) # 2D sigmas: 1sigma, 2sigma, 3sigma #) fig = corner.corner( posterior, labels=plot_labels, quantiles=[0.16, 0.5, 0.84], show_titles=True, levels=levels, ) fig.savefig('%s.png' % (corner_file), dpi=300) print 'Saved plot %s.png' % (corner_file) fig.savefig('%s.pdf' % (corner_file), dpi=150) print 'Saved plot %s.pdf' % (corner_file) #fig.savefig('%s.png' %(corner_file), bbox_inches='tight', dpi=300) #fig.savefig('%s.pdf' %(corner_file), bbox_inches='tight', dpi=150) return
def main(): cli = anc.get_args() parameter_names, posterior, nposterior, nfit = read_posterior(cli.full_path) # set label and legend names Kep_labels = anc.keplerian_legend(parameter_names, cli.m_type) plot_labels = [u"%s" %(Kep_labels[ii]) for ii in range(0, nfit)] #print Kep_labels #print plot_labels plot_folder = os.path.join(cli.full_path, 'plots') if (not os.path.isdir(plot_folder)): os.makedirs(plot_folder) corner_file = os.path.join(plot_folder, 'corner_plot_fitted') #k = np.ceil(2. * nposterior**(1./3.)).astype(int) #if(k>20): k=20 k = anc.get_bins(posterior, rule='doane') nl = 4 levels = [1.-np.exp(-0.5*ii) for ii in range(0,nl)] # 2D sigmas: 0sigma, 1sigma, 2sigma, 3sigma, .. #fig = corner.corner(posterior, bins=k, #labels = parameter_names, #quantiles = [0.16, 0.5, 0.84], #show_titles = True, #title_kwargs = {'fontsize': 10}, #levels = (1.-np.exp(-0.5), 1.-np.exp(-1.), 1.-np.exp(-1.5)) # 2D sigmas: 1sigma, 2sigma, 3sigma #) fig = corner.corner(posterior, labels = plot_labels, quantiles = [0.16, 0.5, 0.84], show_titles = True, levels = levels, ) fig.savefig('%s.png' %(corner_file), dpi=300) print 'Saved plot %s.png' %(corner_file) fig.savefig('%s.pdf' %(corner_file), dpi=150) print 'Saved plot %s.pdf' %(corner_file) #fig.savefig('%s.png' %(corner_file), bbox_inches='tight', dpi=300) #fig.savefig('%s.pdf' %(corner_file), bbox_inches='tight', dpi=150) return
def main(): # --- # initialize logger logger = logging.getLogger("Main_log") logger.setLevel(logging.DEBUG) formatter = logging.Formatter("%(asctime)s - %(message)s") # read cli arguments cli = anc.get_args() #plot_folder = prepare_plot_folder(working_path) emcee_plots = anc.prepare_emcee_plot_folder(cli.full_path) log_file = os.path.join(emcee_plots, 'Geweke_log.txt') flog = logging.FileHandler(log_file, 'w') flog.setLevel(logging.DEBUG) flog.setFormatter(formatter) logger.addHandler(flog) # log screen slog = logging.StreamHandler() slog.setLevel(logging.DEBUG) slog.setFormatter(formatter) logger.addHandler(slog) # computes mass conversion factor #m_factor = anc.mass_conversion_factor(cli.m_type) m_factor, m_unit = anc.mass_type_factor(1., cli.m_type, False) # set emcee and trades folder emcee_folder = cli.full_path trades_folder = os.path.join(os.path.dirname(cli.full_path), '') # and best folder emcee_file, emcee_best, folder_best = anc.get_emcee_file_and_best( emcee_folder, cli.temp_status) # get data from the hdf5 file parameter_names_emcee, parameter_boundaries, chains, acceptance_fraction, autocor_time, lnprobability, ln_err_const, completed_steps = anc.get_data( emcee_file, cli.temp_status) # print Memory occupation of ... anc.print_memory_usage(chains) nfit, nwalkers, nruns, nburnin, nruns_sel = anc.get_emcee_parameters( chains, cli.temp_status, cli.nburnin, completed_steps) logger.info( 'nfit(%d), nwalkers(%d), nruns(%d), nburnin(%d), nruns_sel(%d)' % (nfit, nwalkers, nruns, nburnin, nruns_sel)) # set label and legend names kel_labels = anc.keplerian_legend(parameter_names_emcee, cli.m_type) chains_T, parameter_boundaries = anc.select_transpose_convert_chains( nfit, nwalkers, nburnin, nruns, nruns_sel, m_factor, parameter_names_emcee, parameter_boundaries, chains) if (cli.temp_status): n_steps = completed_steps else: n_steps = nruns sel_steps = int(cli.sel_steps) if (sel_steps == 0): sel_steps = n_steps cols = 1 + int(np.rint(nwalkers / 40.)) for ifit in range(0, nfit): logger.info('Parameter: %13s' % (parameter_names_emcee[ifit])) #fig = plt.figure(figsize=(12,12)) fig = plt.figure(figsize=(6, 6)) lower_interval, z_score = anc.geweke_test(chains_T[:, :, ifit], start_frac=0.01, n_sel_steps=sel_steps) ax = plt.subplot2grid((1, 1), (0, 0)) for i_c in range(0, nwalkers): ax.plot(lower_interval, z_score[:, i_c], '.-', label='walker %d' % (i_c + 1), alpha=0.8) ax.axhline(2., color='lightgray') ax.axhline(-2., color='lightgray') ax.set_xlabel('steps (%s)' % (parameter_names_emcee[ifit].strip())) #plt.legend(loc='best',fontsize=9) #ax.legend(loc='center left', fontsize=9, bbox_to_anchor=(1, 0.5), ncol=cols) fig.savefig(os.path.join( emcee_plots, 'geweke_%03d_%s.png' % (ifit + 1, parameter_names_emcee[ifit])), bbox_inches='tight', dpi=200) plt.close(fig) logger.info('saved plot %s' % (os.path.join( emcee_plots, 'geweke_trace_pam_%s.png' % (parameter_names_emcee[ifit])))) logger.info('') return
def main(): print print ' ================== ' print ' PSO PLOTS' print ' ================== ' print # read cli arguments cli = get_args() # computes mass conversion factor #m_factor = mass_conversion_factor(cli.m_type) m_factor, m_unit = mass_type_factor(1., cli.mtype, False) # set pso_file pso_file = os.path.join(cli.full_path, 'pso_run.hdf5') population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution, parameters_minmax, parameter_names, pop_shape = anc.get_pso_data( pso_file) nfit = pop_shape[0] npop = pop_shape[1] niter = pop_shape[2] iteration = np.arange(0, niter) + 1 if (isinstance(parameters_minmax, type(population_fitness))): parameters_minmax_bck = parameters_minmax.copy() # set label and legend names kel_legends, labels_list = anc.keplerian_legend(parameter_names, cli.m_type) anc.print_memory_usage(population) anc.print_memory_usage(population_fitness) pso_plots = os.path.join(cli.full_path, 'plots') if (not os.path.isdir(pso_plots)): os.makedirs(pso_plots) # parameter_names and parameters_minmax in pso_run.hdf5 if (isinstance(parameter_names, type(population)) and isinstance(parameters_minmax, type(population_fitness))): for ii in range(0, nfit): print 'parameter: %s' % (parameter_names[ii]) if (parameter_names[ii][0] == 'm' and parameter_names[ii][1] != 'A'): population[ii, :, :] = population[ii, :, :] * m_factor y_min = parameters_minmax[ii, 0] * m_factor y_max = parameters_minmax[ii, 1] * m_factor else: y_min = parameters_minmax[ii, 0] y_max = parameters_minmax[ii, 1] print 'boundaries: [%.6f, %.6f]' % (y_min, y_max) print ' minmax: [%.6f, %.6f]' % (np.min( population[ii, :, :]), np.max(population[ii, :, :])) pso_fig_file = os.path.join( pso_plots, 'evolution_%s.png' % (parameter_names[ii])) print ' %s' % (pso_fig_file), fig = plt.figure(figsize=(12, 12)) for jj in range(0, npop): #print jj, plt.plot(iteration, population[ii, jj, :], marker='o', mfc='gray', mec='none', ls='', ms=4) plt.ylim(y_min, y_max) if (isinstance(pso_best_evolution, type(population_fitness))): if (parameter_names[ii][0] == 'm' and parameter_names[ii][1] != 'A'): plt.plot(iteration, pso_best_evolution[ii, :] * m_factor, marker='o', mfc='black', mec='white', mew=0.25, ls='-', ms=5) #print pso_best_evolution[-1,0],pso_best_evolution[-1,-1] else: plt.plot(iteration, pso_best_evolution[ii, :], marker='o', mfc='black', mec='white', mew=0.25, ls='-', ms=5) plt.xlabel('$N_\mathrm{iteration}$') plt.ylabel(kel_legends[ii]) plt.draw() fig.savefig(pso_fig_file, bbox_inches='tight', dpi=150) print ' done' #elif (): return
def main(): # READ COMMAND LINE ARGUMENTS cli = get_args() # STARTING TIME start = time.localtime() pc_output_dir = '%d-%02d-%02dT%02dh%02dm%02ds_' %(start.tm_year, start.tm_mon, start.tm_mday, start.tm_hour, start.tm_min, start.tm_sec) pc_output_files = 'trades_pc' # RENAME working_path = cli.full_path nthreads=1 # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ... pytrades.initialize_trades(working_path, cli.sub_folder, nthreads) # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof n_bodies = pytrades.n_bodies # NUMBER OF TOTAL BODIES OF THE SYSTEM n_planets = n_bodies - 1 # NUMBER OF PLANETS IN THE SYSTEM ndata = pytrades.ndata # TOTAL NUMBER OF DATA AVAILABLE npar = pytrades.npar # NUMBER OF TOTAL PARAMATERS ~n_planets X 6 nfit = pytrades.nfit # NUMBER OF PARAMETERS TO FIT nfree = pytrades.nfree # NUMBER OF FREE PARAMETERS (ie nrvset) dof = pytrades.dof # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT global inv_dof #inv_dof = np.float64(1.0 / dof) inv_dof = pytrades_lib.pytrades.inv_dof # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS str_len = pytrades.str_len temp_names = pytrades.get_parameter_names(nfit,str_len) trades_names = anc.convert_fortran_charray2python_strararray(temp_names) fitting_names = anc.trades_names_to_emcee(trades_names) # save initial_fitting parameters into array original_fit_parameters = trades_parameters.copy() fitting_parameters = anc.e_to_sqrte_fitting(trades_parameters, trades_names) trades_minmax = pytrades.parameters_minmax # PARAMETER BOUNDARIES parameters_minmax = anc.e_to_sqrte_boundaries(trades_minmax, trades_names) # RADIAL VELOCITIES SET n_rv = pytrades_lib.pytrades.nrv n_set_rv = pytrades_lib.pytrades.nrvset # TRANSITS SET n_t0 = pytrades_lib.pytrades.nt0 n_t0_sum = pytrades_lib.pytrades.ntts n_set_t0 = 0 for i in range(0, n_bodies-1): if (n_t0[i] > 0): n_set_t0 += 1 # compute global constant for the loglhd global ln_err_const #try: ## fortran variable RV in python will be rv!!! #e_RVo = np.array(pytrades_lib.pytrades.ervobs[:], dtype=np.float64) #except: #e_RVo = np.array([0.], dtype=np.float64) #try: #e_T0o = np.array(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1)) #except: #e_T0o = np.array([0.], dtype=np.float64) #ln_err_const = anc.compute_ln_err_const(dof, e_RVo, e_T0o, True) ln_err_const = pytrades_lib.pytrades.ln_err_const # INITIALISE SCRIPT FOLDER/LOG FILE working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder) anc.print_both('',of_run) anc.print_both(' ======== ',of_run) anc.print_both(' pyTRADES' ,of_run) anc.print_both(' ======== ',of_run) anc.print_both('',of_run) anc.print_both(' WORKING PATH = %s' %(working_path),of_run) anc.print_both(' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' %(ndata, nfit, nfree, dof),of_run) anc.print_both(' Total N_RV = %d for %d set(s)' %(n_rv, n_set_rv),of_run) anc.print_both(' Total N_T0 = %d for %d out of %d planet(s)' %(n_t0_sum, n_set_t0, n_planets),of_run) anc.print_both(' %s = %.7f' %('log constant error = ', ln_err_const),of_run) # SET PYPOLYCHORD # needed to define number of derived parameters for PyPolyChord nder = 0 # define the loglikelihood function for PyPolyChord def likelihood(fitting_par): # derived parameters derived_par = [0.0] * nder # convert fitting_par to trades_par trades_par = anc.sqrte_to_e_fitting(fitting_par, fitting_names) loglhd = 0. check = 1 loglhd, check = pytrades.fortran_loglikelihood(np.array(trades_par, dtype=np.float64)) #print loglhd, ln_err_const loglhd = loglhd + ln_err_const # ln_err_const: global variable return loglhd, derived_par # define the prior for the fitting parameters def prior(hypercube): """ Uniform prior from [-1,1]^D. """ fitting_par = [0.0] * nfit for i, x in enumerate(hypercube): fitting_par[i] = PC_priors.UniformPrior(parameters_minmax[i,0], parameters_minmax[i,1])(x) return fitting_par # set PyPolyChord: the pc_settings define how to run PC, e.g. nlive, precision_criterio, etc. pc_settings = PC_settings.PolyChordSettings(nfit, nder) pc_settings.base_dir = cli.pc_output_dir pc_settings.file_root = cli.pc_output_files pc_settings.do_clustering = True # Possible PyPolyChord settings: #Keyword arguments #----------------- #nlive: int #(Default: nDims*25) #The number of live points. #Increasing nlive increases the accuracy of posteriors and evidences, #and proportionally increases runtime ~ O(nlive). #num_repeats : int #(Default: nDims*5) #The number of slice slice-sampling steps to generate a new point. #Increasing num_repeats increases the reliability of the algorithm. #Typically #* for reliable evidences need num_repeats ~ O(5*nDims). #* for reliable posteriors need num_repeats ~ O(nDims) #nprior : int #(Default: nlive) #The number of prior samples to draw before starting compression. #do_clustering : boolean #(Default: True) #Whether or not to use clustering at run time. #feedback : {0,1,2,3} #(Default: 1) #How much command line feedback to give #precision_criterion : float #(Default: 0.001) #Termination criterion. Nested sampling terminates when the evidence #contained in the live points is precision_criterion fraction of the #total evidence. #max_ndead : int #(Default: -1) #Alternative termination criterion. Stop after max_ndead iterations. #Set negative to ignore (default). #boost_posterior : float #(Default: 0.0) #Increase the number of posterior samples produced. This can be set #arbitrarily high, but you won't be able to boost by more than #num_repeats #Warning: in high dimensions PolyChord produces _a lot_ of posterior #samples. You probably don't need to change this #posteriors : boolean #(Default: True) #Produce (weighted) posterior samples. Stored in <root>.txt. #equals : boolean #(Default: True) #Produce (equally weighted) posterior samples. Stored in #<root>_equal_weights.txt #cluster_posteriors : boolean #(Default: True) #Produce posterior files for each cluster? #Does nothing if do_clustering=False. #write_resume : boolean #(Default: True) #Create a resume file. #read_resume : boolean #(Default: True) #Read from resume file. #write_stats : boolean #(Default: True) #Write an evidence statistics file. #write_live : boolean #(Default: True) #Write a live points file. #write_dead : boolean #(Default: True) #Write a dead points file. #write_dead : boolean #(Default: True) #Write a prior points file. #update_files : int #(Default: nlive) #How often to update the files in <base_dir>. #base_dir : string #(Default: 'chains') #Where to store output files. #file_root : string #(Default: 'test') #Root name of the files produced. #grade_frac : List[float] #(Default: 1) #The amount of time to spend in each speed. #grade_dims : List[int] #(Default: 1) #The number of parameters within each speed. # RUN POLYCHORD pc_run = PC.run_polychord(likelihood, nfit, nder, pc_settings, prior) # set label and legend names kel_plot_labels = anc.keplerian_legend(fitting_names, cli.m_type) pc_paramnames = [('%s' %(fitting_names[i]), r'%s' %(kel_plot_labels[i])) for i in range(nfit)] #pc_paramnames += [('r*', 'r')] pc_run.make_paramnames_files(pc_paramnames) if(cli.pc_plot): import getdist.plots import matplotlib.pyplot as plt plt.rc('font',**{'family':'serif','serif':['Computer Modern Roman']}) plt.rc('text', usetex=True) posterior = pc_run.posterior g = getdist.plots.getSubplotPlotter() g.triangle_plot(posterior, filled=True) plt.show() return
def main(): # --- # initialize logger logger = logging.getLogger("Main_log") logger.setLevel(logging.DEBUG) formatter = logging.Formatter("%(asctime)s - %(message)s") # global variables label_separation=-0.90 # if uses this, comment ax.xyaxis.labelpad = label_pad label_pad = 12 # it uses this, comment ax.xyaxis.set_label_coords()... label_size = 8 ticklabel_size = 4 def set_xaxis(ax, label_size, label_separation, label_pad, ticklabel_size, kel_label, ticks_formatter, tick_fmt='%.4f'): ax.get_xaxis().set_visible(True) ax.xaxis.set_tick_params(labelsize=ticklabel_size) ax.xaxis.set_label_coords(0.5, label_separation) #ax.ticklabel_format(style='plain', axis='both', useOffset=False) plt.setp(ax.xaxis.get_majorticklabels(), rotation=70 ) #ax.xaxis.labelpad = label_pad ax.set_xlabel(kel_label, fontsize=label_size) tick_step = (ticks_formatter[1] - ticks_formatter[0]) / ticks_formatter[2] ax.xaxis.set_ticks(np.arange(ticks_formatter[0], ticks_formatter[1], tick_step)) tick_formatter = FormatStrFormatter(tick_fmt) ax.xaxis.set_major_formatter(tick_formatter) return def set_yaxis(ax, label_size, label_separation, label_pad, ticklabel_size, kel_label, ticks_formatter, tick_fmt='%.4f'): ax.get_yaxis().set_visible(True) ax.yaxis.set_tick_params(labelsize=ticklabel_size) ax.yaxis.set_label_coords(label_separation,0.5) #ax.ticklabel_format(style='plain', axis='both', useOffset=False) #ax.yaxis.labelpad = label_pad ax.set_ylabel(kel_label, fontsize=label_size) tick_step = (ticks_formatter[1] - ticks_formatter[0]) / ticks_formatter[2] ax.yaxis.set_ticks(np.arange(ticks_formatter[0], ticks_formatter[1], tick_step)) tick_formatter = FormatStrFormatter(tick_fmt) ax.yaxis.set_major_formatter(tick_formatter) return print print ' ================== ' print ' CORRELATION PLOTS' print ' ================== ' print # read cli arguments cli = anc.get_args() #plot_folder = prepare_plot_folder(working_path) emcee_plots = anc.prepare_emcee_plot_folder(cli.full_path) log_file = os.path.join(emcee_plots, 'emcee_triangle_log.txt') flog = logging.FileHandler(log_file, 'w') flog.setLevel(logging.DEBUG) flog.setFormatter(formatter) logger.addHandler(flog) # log screen slog = logging.StreamHandler() slog.setLevel(logging.DEBUG) slog.setFormatter(formatter) logger.addHandler(slog) # computes mass conversion factor #m_factor = anc.mass_conversion_factor(cli.m_type) m_factor, m_unit = anc.mass_type_factor(1., cli.m_type, False) # set emcee and trades folder emcee_folder = cli.full_path trades_folder = os.path.join(os.path.dirname(cli.full_path), '') # and best folder emcee_file, emcee_best, folder_best = anc.get_emcee_file_and_best(emcee_folder, cli.temp_status) # get data from the hdf5 file parameter_names_emcee, parameter_boundaries, chains, acceptance_fraction, autocor_time, lnprobability, ln_err_const, completed_steps = anc.get_data(emcee_file, cli.temp_status) # print Memory occupation of ... anc.print_memory_usage(chains) nfit, nwalkers, nruns, nburnin, nruns_sel = anc.get_emcee_parameters(chains, cli.temp_status, cli.nburnin, completed_steps) logger.info('nfit(%d), nwalkers(%d), nruns(%d), nburnin(%d), nruns_sel(%d)' %(nfit, nwalkers, nruns, nburnin, nruns_sel)) # test label_separation #if (nfit <= 3): label_separation = -0.1 if(nfit > 2): #label_separation = -0.1 - ( 0.075 * (nfit-2) ) # good for figsize=(12,12) label_separation = -0.15 - ( 0.125 * (nfit-2) ) # testing #else: #label_separation = -0.15 #label_size = label_size - 1 * int(nfit / 5.) label_size = label_size - 1 * int(nfit / 2.5) # set label and legend names kel_plot_labels = anc.keplerian_legend(parameter_names_emcee, cli.m_type) chains_T_full, parameter_boundaries = anc.select_transpose_convert_chains(nfit, nwalkers, nburnin, nruns, nruns_sel, m_factor, parameter_names_emcee, parameter_boundaries, chains) chains_T, flatchain_posterior_0, lnprob_burnin, thin_steps = anc.thin_the_chains(cli.use_thin, nburnin, nruns, nruns_sel, autocor_time, chains_T_full, lnprobability, burnin_done=False) flatchain_posterior_0 = anc.fix_lambda(flatchain_posterior_0, parameter_names_emcee ) if(cli.boot_id > 0): flatchain_posterior_msun = anc.posterior_back_to_msun(m_factor,parameter_names_emcee,flatchain_posterior_0) boot_file = anc.save_bootstrap_like(emcee_folder, cli.boot_id, parameter_names_emcee, flatchain_posterior_msun) logger.info('saved bootstrap like file: %s' %(boot_file)) del flatchain_posterior_msun k = anc.get_auto_bins(flatchain_posterior_0) if(cli.overplot is not None): if(cli.adhoc is not None): overp_names, read_par = anc.read_fitted_file(cli.adhoc) cli.overplot = 777 else: ## OPEN summary_parameters.hdf5 FILE s_h5f = h5py.File(os.path.join(cli.full_path, 'summary_parameters.hdf5'), 'r') # take only the selected sample s_overplot = '%04d' %(cli.overplot) read_par = s_h5f['parameters/%s/fitted/parameters' %(s_overplot)][...] s_h5f.close() # fitted parameters has always Mp/Ms in Msun/Mstar, so it is needed to rescale it properly overp_par = read_par.copy() for ii in range(0, nfit): if('Ms' in parameter_names_emcee[ii]): #if('Ms' in overp_names[ii]): overp_par[ii] = overp_par[ii]*m_factor #fig, ax = plt.subplots(nrows = nfit-1, ncols=nfit, figsize=(12,12)) #fig = plt.figure(figsize=(12,12)) fig = plt.figure(figsize=(6,6)) fig.subplots_adjust(hspace=0.05, wspace=0.05) for ix in range(0, nfit, 1): x_data = flatchain_posterior_0[:, ix] ##x_med = median_parameters[ix] x_min, x_max = anc.compute_limits(x_data, 0.05) if(x_min == x_max): x_min = parameter_boundaries[ix,0] x_max = parameter_boundaries[ix,1] #x_max_mean = mode_parameters[ix] for iy in range(nfit-1, -1, -1): y_data = flatchain_posterior_0[:, iy] y_min, y_max = anc.compute_limits(y_data, 0.05) if(y_min == y_max): y_min = parameter_boundaries[iy,0] y_max = parameter_boundaries[iy,1] #y_max_mean = mode_parameters[iy] if(iy > ix): # correlation plot logger.info('%s vs %s' %(parameter_names_emcee[ix], parameter_names_emcee[iy])) ax = plt.subplot2grid((nfit+1, nfit), (iy,ix)) #hist2d_counts, xedges, yedges, image2d = ax.hist2d(x_data, y_data, bins=k, range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]], cmap=cm.gray_r, normed=True) hist2d_counts, xedges, yedges, image2d = ax.hist2d(\ x_data, y_data, bins=k, range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]], cmap=cm.gray_r, normed=False #density=False ) #new_k = int(k/3) new_k = k hist2d_counts_2, xedges_2, yedges_2 = np.histogram2d(\ x_data, y_data, bins=new_k, range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]], #normed=True density=False ) x_bins = [0.5*(xedges_2[i]+xedges_2[i+1]) for i in range(0, new_k)] y_bins = [0.5*(yedges_2[i]+yedges_2[i+1]) for i in range(0, new_k)] #ax.contour(x_bins, y_bins, hist2d_counts_2.T, 3, cmap=cm.gray, linestyle='solid', linewidths=(0.7, 0.7, 0.7)) nl = 5 levels = [1.-np.exp(-0.5*ii) for ii in range(0,nl)] # 2D sigmas: 0sigma, 1sigma, 2sigma, 3sigma, .. #ax.contour(x_bins, y_bins, hist2d_counts_2.T, levels, cmap=cm.gray, linestyle='solid', linewidths=(0.7, 0.7, 0.7)) #ax.contour(x_bins, y_bins, hist2d_counts_2.T, levels, cmap=cm.viridis, linestyle='solid', linewidths=1.) #ax.contour(x_bins, y_bins, hist2d_counts_2.T, cmap=cm.viridis, linestyle='solid', linewidths=0.7) #ax.contour(x_bins, y_bins, hist2d_counts_2.T, levels, cmap=cm.viridis, linestyle='solid', linewidths=0.7, normed=True) ax.contour(x_bins, y_bins, hist2d_counts_2.T, nl, cmap=cm.viridis, linestyles='solid', linewidths=0.5, #normed=True ) if(cli.overplot is not None): # plot selected overplot sample ax.axvline(overp_par[ix], color='C0', ls='--', lw=1.1, alpha=0.5) ax.axhline(overp_par[iy], color='C0', ls='--', lw=1.1, alpha=0.5) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) if(iy == nfit-1): set_xaxis(ax, label_size, label_separation, label_pad, ticklabel_size, kel_plot_labels[ix], [xedges[0], xedges[-1], 4]) if(ix == 0): set_yaxis(ax, label_size, label_separation, label_pad, ticklabel_size, kel_plot_labels[iy], [yedges[0], yedges[-1], 5]) ax.set_ylim([y_min, y_max]) ax.set_xlim([x_min, x_max]) plt.draw() elif(iy == ix): # distribution plot logger.info('%s histogram' %(parameter_names_emcee[ix])) ax = plt.subplot2grid((nfit+1, nfit), (ix,ix)) if (ix == nfit-1): hist_orientation='horizontal' else: hist_orientation='vertical' idx = np.argsort(x_data) if(not cli.cumulative): # HISTOGRAM hist_counts, edges, patces = ax.hist(x_data, bins=k, range=[x_data.min(), x_data.max()], histtype='stepfilled', color='darkgrey', #edgecolor='lightgray', edgecolor='None', align='mid', orientation=hist_orientation, #normed=True, density=True, stacked=True ) else: # CUMULATIVE HISTOGRAM hist_counts, edges, patces = ax.hist(x_data, bins=k, range=[x_data.min(), x_data.max()], histtype='stepfilled', color='darkgrey', #edgecolor='lightgray', edgecolor='None', align='mid', orientation=hist_orientation, density=True, stacked=True, cumulative=True ) if (ix == nfit-1): ax.set_ylim([y_min, y_max]) if(cli.overplot is not None): # plot selected overplot sample ax.axhline(overp_par[ix], color='C0', ls='--', lw=1.1, alpha=0.5) else: ax.set_xlim([x_min, x_max]) if(cli.overplot is not None): # plot selected overplot sample ax.axvline(overp_par[ix], color='C0', ls='--', lw=1.1, alpha=0.5) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) ax.set_title(kel_plot_labels[ix], fontsize=label_size) plt.draw() logger.info('saving plot') emcee_fig_file = os.path.join(emcee_plots, 'emcee_triangle.png') fig.savefig(emcee_fig_file, bbox_inches='tight', dpi=300) logger.info('png done') emcee_fig_file = os.path.join(emcee_plots, 'emcee_triangle.pdf') fig.savefig(emcee_fig_file, bbox_inches='tight', dpi=96) logger.info('pdf done') plt.close(fig) logger.info('') return
def main(): print print ' ================== ' print ' PSO PLOTS' print ' ================== ' print # read cli arguments cli = get_args() # computes mass conversion factor #m_factor = mass_conversion_factor(cli.m_type) m_factor, m_unit = mass_type_factor(1., cli.mtype, False) # set pso_file pso_file = os.path.join(cli.full_path, 'pso_run.hdf5') population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution, parameters_minmax, parameter_names, pop_shape = anc.get_pso_data(pso_file) nfit = pop_shape[0] npop = pop_shape[1] niter = pop_shape[2] iteration = np.arange(0,niter)+1 if(isinstance(parameters_minmax,type(population_fitness))): parameters_minmax_bck = parameters_minmax.copy() # set label and legend names kel_legends, labels_list = anc.keplerian_legend(parameter_names, cli.m_type) anc.print_memory_usage(population) anc.print_memory_usage(population_fitness) pso_plots = os.path.join(cli.full_path,'plots') if (not os.path.isdir(pso_plots)): os.makedirs(pso_plots) # parameter_names and parameters_minmax in pso_run.hdf5 if(isinstance(parameter_names, type(population)) and isinstance(parameters_minmax,type(population_fitness))): for ii in range(0, nfit): print 'parameter: %s' %(parameter_names[ii]) if (parameter_names[ii][0] == 'm' and parameter_names[ii][1] != 'A'): population[ii,:,:] = population[ii,:,:]*m_factor y_min = parameters_minmax[ii,0]*m_factor y_max = parameters_minmax[ii,1]*m_factor else: y_min = parameters_minmax[ii,0] y_max = parameters_minmax[ii,1] print 'boundaries: [%.6f, %.6f]' %(y_min, y_max) print ' minmax: [%.6f, %.6f]' %(np.min(population[ii,:,:]), np.max(population[ii,:,:])) pso_fig_file = os.path.join(pso_plots, 'evolution_%s.png' %(parameter_names[ii])) print ' %s' %(pso_fig_file), fig = plt.figure(figsize=(12,12)) for jj in range(0, npop): #print jj, plt.plot(iteration, population[ii,jj,:], marker='o', mfc='gray', mec='none', ls='', ms=4) plt.ylim(y_min, y_max) if(isinstance(pso_best_evolution, type(population_fitness))): if (parameter_names[ii][0] == 'm' and parameter_names[ii][1] != 'A'): plt.plot(iteration, pso_best_evolution[ii,:]*m_factor, marker='o', mfc='black', mec='white', mew=0.25, ls='-', ms=5) #print pso_best_evolution[-1,0],pso_best_evolution[-1,-1] else: plt.plot(iteration, pso_best_evolution[ii,:], marker='o', mfc='black', mec='white', mew=0.25, ls='-', ms=5) plt.xlabel('$N_\mathrm{iteration}$') plt.ylabel(kel_legends[ii]) plt.draw() fig.savefig(pso_fig_file, bbox_inches='tight', dpi=150) print ' done' #elif (): return
def main(): print print ' ======================== ' print ' TRADES+EMCEE CHAIN PLOTS' print ' ======================== ' print # read cli arguments cli = anc.get_args() # computes mass conversion factor #m_factor, m_unit = anc.mass_conversion_factor_and_unit(cli.m_type) m_factor, m_unit = anc.mass_type_factor(1., cli.m_type, False) # set emcee and trades folder emcee_folder = cli.full_path trades_folder = os.path.join(os.path.dirname(cli.full_path), '') # and best folder emcee_file, emcee_best, folder_best = anc.get_emcee_file_and_best(emcee_folder, cli.temp_status) parameter_names_emcee, parameter_boundaries, chains, acceptance_fraction, autocor_time, lnprobability, ln_err_const, completed_steps = anc.get_data(emcee_file, cli.temp_status) # set label and legend names kel_labels = anc.keplerian_legend(parameter_names_emcee, cli.m_type) nfit, nwalkers, nruns, nburnin, nruns_sel = anc.get_emcee_parameters(chains, cli.temp_status, cli.nburnin, completed_steps) anc.print_memory_usage(chains) chains_T_full, parameter_boundaries = anc.select_transpose_convert_chains(nfit, nwalkers, nburnin, nruns, nruns_sel, m_factor, parameter_names_emcee, parameter_boundaries, chains) if(cli.use_thin or cli.use_thin > 0): chains_T, flatchain_posterior_0, lnprob_burnin, thin_steps, chains_T_full_thinned = anc.thin_the_chains(cli.use_thin, nburnin, nruns, nruns_sel, autocor_time, chains_T_full, lnprobability, burnin_done=False, full_chains_thinned=True) nburnin_plt = np.rint(nburnin / thin_steps).astype(int) nend = np.rint(nruns / thin_steps).astype(int) else: chains_T, flatchain_posterior_0, lnprob_burnin, thin_steps = anc.thin_the_chains(cli.use_thin, nburnin, nruns, nruns_sel, autocor_time, chains_T_full, lnprobability, burnin_done=False, full_chains_thinned=False) nburnin_plt = nburnin nend = nruns #name_par, name_excluded = anc.get_sample_list(cli.sample_str, parameter_names_emcee) #sample_parameters, idx_sample = anc.pick_sample_parameters(flatchain_posterior_0, parameter_names_emcee, name_par = name_par, name_excluded = name_excluded) #flatchain_posterior_1 = flatchain_posterior_0 # fix lambda? #flatchain_posterior_0 = anc.fix_lambda(flatchain_posterior_0, #parameter_names_emcee #) if(cli.boot_id > 0): flatchain_posterior_msun = anc.posterior_back_to_msun(m_factor,parameter_names_emcee,flatchain_posterior_0) boot_file = anc.save_bootstrap_like(emcee_folder, cli.boot_id, parameter_names_emcee, flatchain_posterior_msun) logger.info('saved bootstrap like file: %s' %(boot_file)) del flatchain_posterior_msun k = anc.get_auto_bins(flatchain_posterior_0) try: overplot = int(cli.overplot) except: overplot = None ## OPEN summary_parameters.hdf5 FILE s_h5f = h5py.File(os.path.join(cli.full_path, 'summary_parameters.hdf5'), 'r') if(overplot is not None): # sample_parameters ci_fitted = s_h5f['confidence_intervals/fitted/ci'][...] sample_parameters = s_h5f['parameters/0666/fitted/parameters'][...] sample_lgllhd = s_h5f['parameters/0666'].attrs['lgllhd'] try: sample2_parameters = s_h5f['parameters/0667/fitted/parameters'][...] sample2_lgllhd = s_h5f['parameters/0667'].attrs['lgllhd'] except: sample2_parameters = None sample2_lgllhd = None try: sample3_parameters = s_h5f['parameters/0668/fitted/parameters'][...] sample3_lgllhd = s_h5f['parameters/0668'].attrs['lgllhd'] except: sample3_parameters = None sample3_lgllhd = None median_parameters = s_h5f['parameters/1051/fitted/parameters'][...] median_lgllhd = s_h5f['parameters/1051'].attrs['lgllhd'] max_lnprob_parameters = s_h5f['parameters/2050/fitted/parameters'][...] max_lgllhd = s_h5f['parameters/2050'].attrs['lgllhd'] try: mode_parameters = s_h5f['parameters/3051/fitted/parameters'][...] mode_lgllhd = s_h5f['parameters/3051'].attrs['lgllhd'] except: mode_parameters = None mode_lgllhd = None overp_par = s_h5f['parameters/%04d/fitted/parameters' %(overplot)][...] overp_lgllhd = s_h5f['parameters/%04d' %(overplot)].attrs['lgllhd'] #nfit = s_h5f['confidence_intervals/fitted'].attrs['nfit'] ndata = s_h5f['confidence_intervals/fitted'].attrs['ndata'] dof = s_h5f['confidence_intervals/fitted'].attrs['dof'] s_h5f.close() emcee_plots = os.path.join(cli.full_path,'plots') if (not os.path.isdir(emcee_plots)): os.makedirs(emcee_plots) for i in range(0, nfit): if('Ms' in parameter_names_emcee[i]): conv_plot = m_factor else: conv_plot = 1. emcee_fig_file = os.path.join(emcee_plots, 'chain_%03d_%s.png' %(i+1, parameter_names_emcee[i].strip())) print ' %s' %(emcee_fig_file), #fig, (axChain, axHist) = plt.subplots(nrows=1, ncols=2, figsize=(12,12)) fig, (axChain, axHist) = plt.subplots(nrows=1, ncols=2, figsize=(6,6)) (counts, bins_val, patches) = axHist.hist(flatchain_posterior_0[:,i], bins=k, range=(flatchain_posterior_0[:,i].min(), flatchain_posterior_0[:,i].max()), orientation='horizontal', density=True, stacked=True, histtype='stepfilled', color='darkgrey', edgecolor='lightgray', align='mid' ) xpdf = scipy_norm.pdf(flatchain_posterior_0[:,i], loc = flatchain_posterior_0[:,i].mean(), scale = flatchain_posterior_0[:,i].std()) idx = np.argsort(flatchain_posterior_0[:,i]) axHist.plot(xpdf[idx], flatchain_posterior_0[idx,i], color='black', marker='None', ls='-.', lw=1.5, label='pdf') # chains after burn-in #axChain.plot(chains_T[:,:,i], '-', alpha=0.3) # chains with the burn-in if(cli.use_thin): axChain.plot(chains_T_full_thinned[:,:,i], '-', alpha=0.3) else: axChain.plot(chains_T_full[:,:,i], '-', alpha=0.3) axChain.axvspan(0, nburnin_plt, color='gray', alpha=0.45) axChain.axvline(nburnin_plt, color='gray', ls='-', lw=1.5) if(overplot is not None): if(mode_parameters is not None): # plot of mode (mean of higher peak/bin) axChain.axhline(mode_parameters[i]*conv_plot, color='red', ls='-', lw=2.1, alpha=1, label='mode') # plot of median axChain.axhline(median_parameters[i]*conv_plot, marker='None', c='blue',ls='-', lw=2.1, alpha=1.0, label='median fit') # plot of max_lnprob axChain.axhline(max_lnprob_parameters[i]*conv_plot, marker='None', c='black',ls='-', lw=1.1, alpha=1.0, label='max lnprob') if(sample_parameters is not None): # plot of sample_parameters axChain.axhline(sample_parameters[i]*conv_plot, marker='None', c='orange',ls='--', lw=2.3, alpha=0.77, label='picked: %12.7f' %(sample_parameters[i])) if(sample2_parameters is not None): # plot of sample2_parameters axChain.axhline(sample2_parameters[i]*conv_plot, marker='None', c='cyan',ls=':', lw=2.7, alpha=0.77, label='close lgllhd: %12.7f' %(sample2_parameters[i])) if(sample3_parameters is not None): # plot of sample3_parameters axChain.axhline(sample3_parameters[i]*conv_plot, marker='None', c='yellow',ls='-', lw=3.1, alpha=0.66, label='close lgllhd: %12.7f' %(sample3_parameters[i])) if(overplot not in [1050, 1051, 2050, 3050, 3051]): axChain.axhline(overp_par[i]*conv_plot, marker='None', c='black',ls='--', lw=2.5, alpha=0.6, label='overplot %d' %(overplot)) # plot ci axChain.axhline(ci_fitted[i,0]*conv_plot, marker='None', c='forestgreen',ls='-', lw=2.1, alpha=1.0, label='CI 15.865th (%.5f)' %(ci_fitted[i,0]*conv_plot)) axChain.axhline(ci_fitted[i,1]*conv_plot, marker='None', c='forestgreen',ls='-', lw=2.1, alpha=1.0, label='CI 84.135th (%.5f)' %(ci_fitted[i,1]*conv_plot)) axChain.ticklabel_format(useOffset=False) xlabel = '$N_\mathrm{steps}$' if(cli.use_thin): xlabel = '$N_\mathrm{steps} \\times %d$' %(thin_steps) axChain.set_xlabel(xlabel) axChain.set_xlim([0, nend]) axChain.set_ylabel(kel_labels[i]) y_min = flatchain_posterior_0[:,i].min() y_max = flatchain_posterior_0[:,i].max() axChain.set_ylim([y_min, y_max]) axChain.set_title('Full chain %s:=[%.3f , %.3f]' %(kel_labels[i], parameter_boundaries[i,0], parameter_boundaries[i,1])) plt.draw() axHist.ticklabel_format(useOffset=False) axHist.tick_params(direction='inout', labelleft=False) axHist.set_ylim([y_min, y_max]) if(overplot is not None): if(mode_parameters is not None): # plot mode axHist.axhline(mode_parameters[i]*conv_plot, color='red', ls='-', lw=2.1, alpha=1, label='mode') # plot median axHist.axhline(median_parameters[i]*conv_plot, marker='None', c='blue',ls='-', lw=2.1, alpha=1.0, label='median fit') # plot of max_lnprob axHist.axhline(max_lnprob_parameters[i]*conv_plot, marker='None', c='black',ls='-', lw=1.1, alpha=1.0, label='max lnprob') if(sample_parameters is not None): # plot of sample_parameters axHist.axhline(sample_parameters[i]*conv_plot, marker='None', c='orange',ls='--', lw=2.3, alpha=0.77, label='picked: %12.7f' %(sample_parameters[i]*conv_plot)) if(sample2_parameters is not None): # plot of sample2_parameters axHist.axhline(sample2_parameters[i]*conv_plot, marker='None', c='cyan',ls=':', lw=2.7, alpha=0.77, label='close lgllhd: %12.7f' %(sample2_parameters[i])) if(sample3_parameters is not None): # plot of sample3_parameters axHist.axhline(sample3_parameters[i]*conv_plot, marker='None', c='yellow',ls='-', lw=3.1, alpha=0.66, label='close lgllhd: %12.7f' %(sample3_parameters[i])) if(overplot not in [1050, 1051, 2050, 3050, 3051]): axHist.axhline(overp_par[i]*conv_plot, marker='None', c='black',ls='--', lw=2.5, alpha=0.8, label='overplot %d' %(overplot)) # plot ci axHist.axhline(ci_fitted[i,0]*conv_plot, marker='None', c='forestgreen',ls='-', lw=2.1, alpha=1.0, label='CI 15.865th (%.5f)' %(ci_fitted[i,0]*conv_plot)) axHist.axhline(ci_fitted[i,1]*conv_plot, marker='None', c='forestgreen',ls='-', lw=2.1, alpha=1.0, label='CI 84.135th (%.5f)' %(ci_fitted[i,1]*conv_plot)) axHist.set_title('Distribution of posterior chain') axHist.legend(loc='center left', fontsize=9, bbox_to_anchor=(1, 0.5)) plt.draw() fig.savefig(emcee_fig_file, bbox_inches='tight', dpi=150) print ' saved' print #fig = plt.figure(figsize=(12,12)) fig = plt.figure(figsize=(6,6)) # lnprob xlabel = '$N_\mathrm{steps}$' if(cli.use_thin): xlabel = '$N_\mathrm{steps} \\times %d$' %(thin_steps) ax = plt.subplot2grid((2,1), (0,0)) ax.plot(lnprob_burnin.T, '-', alpha=0.3) if(overplot is not None): posterior_msun = anc.posterior_back_to_msun(m_factor,parameter_names_emcee,flatchain_posterior_0) post_sel, lnprob_sel = anc.select_within_all_ci(posterior_msun, ci_fitted[:,0:2], lnprob_burnin.T.reshape(-1)) #lnprob_sel = lnprob_burnin.T.reshape((-1)) lgllhd_med = np.percentile(lnprob_burnin.T.reshape(-1), 50., interpolation='midpoint') abs_dlg = np.abs(lnprob_sel - lgllhd_med) lgllhd_mad = np.percentile(abs_dlg, 50., interpolation='midpoint') #lnp_min = np.min(lnprob_sel) #lnp_max = np.max(lnprob_sel) lnp_min = lgllhd_med - lgllhd_mad lnp_max = lgllhd_med + lgllhd_mad print ' lgllhd_med & mad = ',lgllhd_med, lgllhd_mad print ' lnp_min = ',lnp_min, ' lnp_max = ',lnp_max print ' lnl_668 = ',sample3_lgllhd ax.axhline(lgllhd_med, color='black', ls='-', lw=1.6, alpha=0.77) #if(sample2_lgllhd is not None): #ax.axhline(sample2_lgllhd, marker='None', c='cyan',ls=':', lw=2.7, alpha=0.9) if(sample3_lgllhd is not None): ax.axhline(sample3_lgllhd, marker='None', c='yellow',ls='-', lw=3.1, alpha=0.9) ax.axhspan(lnp_min, lnp_max, color='gray', alpha=0.77) ax.axhline(lnp_min, color='black', ls='--', lw=1.6, alpha=0.77) ax.axhline(lnp_max, color='black', ls='--', lw=1.6, alpha=0.77) min_lnp = np.min(lnprob_burnin.T, axis=0).min() max_lnp = np.max(lnprob_burnin.T, axis=0).max() y_min, y_max = anc.compute_limits(np.asarray([min_lnp, max_lnp]), 0.05) ax.set_ylim((y_min, y_max)) ax.set_ylabel('lnprob') #ax.get_xaxis().set_visible(False) ax.set_xlabel(xlabel) # chi2r chi2r = -2.*(lnprob_burnin.T-ln_err_const)/np.float64(dof) ax = plt.subplot2grid((2,1), (1,0)) ax.axhline(1.0, color='gray', ls='-') ax.plot(chi2r, '-', alpha=0.3) if(overplot is not None): c2r_med = -(2.*(lgllhd_med - ln_err_const))/np.float64(dof) c2r_smax = -(2.*(lnp_min - ln_err_const))/np.float64(dof) c2r_smin = -(2.*(lnp_max - ln_err_const))/np.float64(dof) print ' c2r_med = ',c2r_med print ' c2r_smin = ',c2r_smin, ' c2r_smax = ', c2r_smax ax.axhline(c2r_med, color='black', ls='-', lw=1.6, alpha=0.77) ax.axhspan(c2r_smin, c2r_smax, color='gray', alpha=0.77) ax.axhline(c2r_smin, color='black', ls='--', lw=1.6, alpha=0.77) ax.axhline(c2r_smax, color='black', ls='--', lw=1.6, alpha=0.77) #if(sample2_lgllhd is not None): #c2r_sample2 = -2.*(sample2_lgllhd - ln_err_const)/np.float64(dof) #ax.axhline(c2r_sample2, marker='None', c='cyan',ls=':', lw=2.7, alpha=0.9) if(sample3_lgllhd is not None): c2r_sample3 = -2.*(sample3_lgllhd - ln_err_const)/np.float64(dof) ax.axhline(c2r_sample3, marker='None', c='yellow',ls='-', lw=3.1, alpha=0.9) c2r_min = -2.*(y_max - ln_err_const)/np.float64(dof) c2r_max = -2.*(y_min - ln_err_const)/np.float64(dof) ax.set_ylim((c2r_min, c2r_max)) ax.set_ylabel('$\chi^{2}/\mathrm{dof}$') #ax.get_xaxis().set_visible(True) ax.set_xlabel(xlabel) fig.savefig(os.path.join(emcee_plots, 'emcee_lnprobability.png'), bbox_inches='tight', dpi=150) print ' %s saved' %(os.path.join(emcee_plots, 'emcee_lnprobability.png')) return
def main(): # --- # initialize logger logger = logging.getLogger("Main_log") logger.setLevel(logging.DEBUG) formatter = logging.Formatter("%(asctime)s - %(message)s") # read cli arguments cli = anc.get_args() #plot_folder = prepare_plot_folder(working_path) emcee_plots = anc.prepare_emcee_plot_folder(cli.full_path) log_file = os.path.join(emcee_plots, 'GelmanRubin_log.txt') flog = logging.FileHandler(log_file, 'w') flog.setLevel(logging.DEBUG) flog.setFormatter(formatter) logger.addHandler(flog) # log screen slog = logging.StreamHandler() slog.setLevel(logging.DEBUG) slog.setFormatter(formatter) logger.addHandler(slog) # computes mass conversion factor #m_factor = anc.mass_conversion_factor(cli.m_type) m_factor, m_unit = anc.mass_type_factor(1., cli.m_type, False) # set emcee and trades folder emcee_folder = cli.full_path trades_folder = os.path.join(os.path.dirname(cli.full_path), '') # and best folder emcee_file, emcee_best, folder_best = anc.get_emcee_file_and_best(emcee_folder, cli.temp_status) # get data from the hdf5 file parameter_names_emcee, parameter_boundaries, chains, acceptance_fraction, autocor_time, lnprobability, ln_err_const, completed_steps = anc.get_data(emcee_file, cli.temp_status) # print Memory occupation of ... anc.print_memory_usage(chains) nfit, nwalkers, nruns, nburnin, nruns_sel = anc.get_emcee_parameters(chains, cli.temp_status, cli.nburnin, completed_steps) logger.info('nfit(%d), nwalkers(%d), nruns(%d), nburnin(%d), nruns_sel(%d)' %(nfit, nwalkers, nruns, nburnin, nruns_sel)) # set label and legend names kel_labels = anc.keplerian_legend(parameter_names_emcee, cli.m_type) chains_T, parameter_boundaries = anc.select_transpose_convert_chains(nfit, nwalkers, nburnin, nruns, nruns_sel, m_factor, parameter_names_emcee, parameter_boundaries, chains) if(cli.temp_status): n_steps = completed_steps else: n_steps = nruns sel_steps = int(cli.sel_steps) if (sel_steps == 0): sel_steps = n_steps steps = np.linspace(start=0, stop=n_steps, num=sel_steps, endpoint=True, dtype=np.int) steps[0] = 10 #print steps sel_steps = steps.shape[0] gr_Rc_1 = np.ones((sel_steps, nfit)) + 99. gr_Rc_2 = np.ones((sel_steps, nfit)) + 99. gr_Rc_pyorbit = np.ones((sel_steps, nfit)) + 99. gr_Rc_pymc = np.ones((sel_steps, nfit)) + 99. for ifit in range(0, nfit): logger.info('Parameter: %13s' %(parameter_names_emcee[ifit])) #fig = plt.figure(figsize=(12,12)) fig = plt.figure(figsize=(6,6)) ax = plt.subplot2grid((1, 1), (0,0)) for istep in range(0,sel_steps): #print 'istep',istep #time0 = time.time() #gr_Rc_1[istep,ifit] = anc.GelmanRubin_test_1(chains_T[:steps[istep], :, ifit]) #if(istep == sel_steps-1): #LBo_d, LBo_h, LBo_m, LBo_s = anc.computation_time(time.time()-time0) #logger.info('steps = %6d for %13s ==> Gelman-Rubin test: LBo time = %2d m %6.3f s' %(steps[istep], parameter_names_emcee[ifit], LBo_m, LBo_s)) time0 = time.time() gr_Rc_2[istep,ifit] = anc.GelmanRubin(chains_T[:steps[istep], :, ifit]) if(istep == sel_steps-1): LBo_d, LBo_h, LBo_m, LBo_s = anc.computation_time(time.time()-time0) logger.info('steps = %6d for %13s ==> Gelman-Rubin test: LBo time = %2d m %6.3f s' %(steps[istep], parameter_names_emcee[ifit], LBo_m, LBo_s)) time0 = time.time() gr_Rc_pyorbit[istep,:] = anc.GelmanRubin_PyORBIT(chains_T[:steps[istep], :, ifit]) if(istep == sel_steps-1): LMa_d, LMa_h, LMa_m, LMa_s = anc.computation_time(time.time()-time0) logger.info('steps = %6d for %13s ==> Gelman-Rubin test: LMa time = %2d m %6.3f s' %(steps[istep], parameter_names_emcee[ifit], LMa_m, LMa_s)) time0 = time.time() gr_Rc_pymc[istep,:] = np.sqrt(anc.GelmanRubin_pymc(chains_T[:steps[istep], :, ifit].T)) if(istep == sel_steps-1): pymc_d, pymc_h, pymc_m, pymc_s = anc.computation_time(time.time()-time0) logger.info('steps = %6d for %13s ==> Gelman-Rubin test: pymc time = %2d m %6.3f s' %(steps[istep], parameter_names_emcee[ifit], pymc_m, pymc_s)) ax.axhline(1.01, color='gray') #ax.plot(steps, gr_Rc_1[:,ifit], '-', color='k', label='LBo 1') ax.plot(steps, gr_Rc_2[:,ifit], '-', color='k', lw=1.3, label='LBo 2') ax.plot(steps, gr_Rc_pyorbit[:,ifit], '--', color='lightgray', alpha=0.7, label='LMa') ax.plot(steps, gr_Rc_pymc[:,ifit], '-.', color='red', lw=1.5, alpha=0.7, label='pymc') ax.set_ylim(0.95, 2.3) ax.set_xlabel('steps (%s)' %(parameter_names_emcee[ifit].strip())) ax.legend(loc='center left', fontsize=9, bbox_to_anchor=(1, 0.5)) fig.savefig(os.path.join(emcee_plots, 'GR_%03d_%s.png' %(ifit+1, parameter_names_emcee[ifit])), bbox_inches='tight', dpi=200) plt.close(fig) logger.info('saved plot %s' %(os.path.join(emcee_plots, 'GRtrace_pam_%s.png' %(parameter_names_emcee[ifit])))) logger.info('') return
def main(): # READ COMMAND LINE ARGUMENTS cli = get_args() # STARTING TIME start = time.localtime() pc_output_dir = '%d-%02d-%02dT%02dh%02dm%02ds_' % ( start.tm_year, start.tm_mon, start.tm_mday, start.tm_hour, start.tm_min, start.tm_sec) pc_output_files = 'trades_pc' # RENAME working_path = cli.full_path nthreads = 1 # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ... pytrades.initialize_trades(working_path, cli.sub_folder, nthreads) # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof n_bodies = pytrades.n_bodies # NUMBER OF TOTAL BODIES OF THE SYSTEM n_planets = n_bodies - 1 # NUMBER OF PLANETS IN THE SYSTEM ndata = pytrades.ndata # TOTAL NUMBER OF DATA AVAILABLE npar = pytrades.npar # NUMBER OF TOTAL PARAMATERS ~n_planets X 6 nfit = pytrades.nfit # NUMBER OF PARAMETERS TO FIT nfree = pytrades.nfree # NUMBER OF FREE PARAMETERS (ie nrvset) dof = pytrades.dof # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT global inv_dof #inv_dof = np.float64(1.0 / dof) inv_dof = pytrades_lib.pytrades.inv_dof # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS str_len = pytrades.str_len temp_names = pytrades.get_parameter_names(nfit, str_len) trades_names = anc.convert_fortran_charray2python_strararray(temp_names) fitting_names = anc.trades_names_to_emcee(trades_names) # save initial_fitting parameters into array original_fit_parameters = trades_parameters.copy() fitting_parameters = anc.e_to_sqrte_fitting(trades_parameters, trades_names) trades_minmax = pytrades.parameters_minmax # PARAMETER BOUNDARIES parameters_minmax = anc.e_to_sqrte_boundaries(trades_minmax, trades_names) # RADIAL VELOCITIES SET n_rv = pytrades_lib.pytrades.nrv n_set_rv = pytrades_lib.pytrades.nrvset # TRANSITS SET n_t0 = pytrades_lib.pytrades.nt0 n_t0_sum = pytrades_lib.pytrades.ntts n_set_t0 = 0 for i in range(0, n_bodies - 1): if (n_t0[i] > 0): n_set_t0 += 1 # compute global constant for the loglhd global ln_err_const #try: ## fortran variable RV in python will be rv!!! #e_RVo = np.array(pytrades_lib.pytrades.ervobs[:], dtype=np.float64) #except: #e_RVo = np.array([0.], dtype=np.float64) #try: #e_T0o = np.array(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1)) #except: #e_T0o = np.array([0.], dtype=np.float64) #ln_err_const = anc.compute_ln_err_const(dof, e_RVo, e_T0o, True) ln_err_const = pytrades_lib.pytrades.ln_err_const # INITIALISE SCRIPT FOLDER/LOG FILE working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder) anc.print_both('', of_run) anc.print_both(' ======== ', of_run) anc.print_both(' pyTRADES', of_run) anc.print_both(' ======== ', of_run) anc.print_both('', of_run) anc.print_both(' WORKING PATH = %s' % (working_path), of_run) anc.print_both( ' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' % (ndata, nfit, nfree, dof), of_run) anc.print_both(' Total N_RV = %d for %d set(s)' % (n_rv, n_set_rv), of_run) anc.print_both( ' Total N_T0 = %d for %d out of %d planet(s)' % (n_t0_sum, n_set_t0, n_planets), of_run) anc.print_both(' %s = %.7f' % ('log constant error = ', ln_err_const), of_run) # SET PYPOLYCHORD # needed to define number of derived parameters for PyPolyChord nder = 0 # define the loglikelihood function for PyPolyChord def likelihood(fitting_par): # derived parameters derived_par = [0.0] * nder # convert fitting_par to trades_par trades_par = anc.sqrte_to_e_fitting(fitting_par, fitting_names) loglhd = 0. check = 1 loglhd, check = pytrades.fortran_loglikelihood( np.array(trades_par, dtype=np.float64)) #print loglhd, ln_err_const loglhd = loglhd + ln_err_const # ln_err_const: global variable return loglhd, derived_par # define the prior for the fitting parameters def prior(hypercube): """ Uniform prior from [-1,1]^D. """ fitting_par = [0.0] * nfit for i, x in enumerate(hypercube): fitting_par[i] = PC_priors.UniformPrior(parameters_minmax[i, 0], parameters_minmax[i, 1])(x) return fitting_par # set PyPolyChord: the pc_settings define how to run PC, e.g. nlive, precision_criterio, etc. pc_settings = PC_settings.PolyChordSettings(nfit, nder) pc_settings.base_dir = cli.pc_output_dir pc_settings.file_root = cli.pc_output_files pc_settings.do_clustering = True # Possible PyPolyChord settings: #Keyword arguments #----------------- #nlive: int #(Default: nDims*25) #The number of live points. #Increasing nlive increases the accuracy of posteriors and evidences, #and proportionally increases runtime ~ O(nlive). #num_repeats : int #(Default: nDims*5) #The number of slice slice-sampling steps to generate a new point. #Increasing num_repeats increases the reliability of the algorithm. #Typically #* for reliable evidences need num_repeats ~ O(5*nDims). #* for reliable posteriors need num_repeats ~ O(nDims) #nprior : int #(Default: nlive) #The number of prior samples to draw before starting compression. #do_clustering : boolean #(Default: True) #Whether or not to use clustering at run time. #feedback : {0,1,2,3} #(Default: 1) #How much command line feedback to give #precision_criterion : float #(Default: 0.001) #Termination criterion. Nested sampling terminates when the evidence #contained in the live points is precision_criterion fraction of the #total evidence. #max_ndead : int #(Default: -1) #Alternative termination criterion. Stop after max_ndead iterations. #Set negative to ignore (default). #boost_posterior : float #(Default: 0.0) #Increase the number of posterior samples produced. This can be set #arbitrarily high, but you won't be able to boost by more than #num_repeats #Warning: in high dimensions PolyChord produces _a lot_ of posterior #samples. You probably don't need to change this #posteriors : boolean #(Default: True) #Produce (weighted) posterior samples. Stored in <root>.txt. #equals : boolean #(Default: True) #Produce (equally weighted) posterior samples. Stored in #<root>_equal_weights.txt #cluster_posteriors : boolean #(Default: True) #Produce posterior files for each cluster? #Does nothing if do_clustering=False. #write_resume : boolean #(Default: True) #Create a resume file. #read_resume : boolean #(Default: True) #Read from resume file. #write_stats : boolean #(Default: True) #Write an evidence statistics file. #write_live : boolean #(Default: True) #Write a live points file. #write_dead : boolean #(Default: True) #Write a dead points file. #write_dead : boolean #(Default: True) #Write a prior points file. #update_files : int #(Default: nlive) #How often to update the files in <base_dir>. #base_dir : string #(Default: 'chains') #Where to store output files. #file_root : string #(Default: 'test') #Root name of the files produced. #grade_frac : List[float] #(Default: 1) #The amount of time to spend in each speed. #grade_dims : List[int] #(Default: 1) #The number of parameters within each speed. # RUN POLYCHORD pc_run = PC.run_polychord(likelihood, nfit, nder, pc_settings, prior) # set label and legend names kel_plot_labels = anc.keplerian_legend(fitting_names, cli.m_type) pc_paramnames = [('%s' % (fitting_names[i]), r'%s' % (kel_plot_labels[i])) for i in range(nfit)] #pc_paramnames += [('r*', 'r')] pc_run.make_paramnames_files(pc_paramnames) if (cli.pc_plot): import getdist.plots import matplotlib.pyplot as plt plt.rc('font', **{ 'family': 'serif', 'serif': ['Computer Modern Roman'] }) plt.rc('text', usetex=True) posterior = pc_run.posterior g = getdist.plots.getSubplotPlotter() g.triangle_plot(posterior, filled=True) plt.show() return
def main(): # --- # initialize logger logger = logging.getLogger("Main_log") logger.setLevel(logging.DEBUG) formatter = logging.Formatter("%(asctime)s - %(message)s") # global variables label_separation = -0.90 # if uses this, comment ax.xyaxis.labelpad = label_pad label_pad = 12 # it uses this, comment ax.xyaxis.set_label_coords()... label_size = 8 ticklabel_size = 4 def set_xaxis(ax, label_size, label_separation, label_pad, ticklabel_size, kel_label, ticks_formatter, tick_fmt='%.4f'): ax.get_xaxis().set_visible(True) ax.xaxis.set_tick_params(labelsize=ticklabel_size) ax.xaxis.set_label_coords(0.5, label_separation) #ax.ticklabel_format(style='plain', axis='both', useOffset=False) plt.setp(ax.xaxis.get_majorticklabels(), rotation=70) #ax.xaxis.labelpad = label_pad ax.set_xlabel(kel_label, fontsize=label_size) tick_step = (ticks_formatter[1] - ticks_formatter[0]) / ticks_formatter[2] ax.xaxis.set_ticks( np.arange(ticks_formatter[0], ticks_formatter[1], tick_step)) tick_formatter = FormatStrFormatter(tick_fmt) ax.xaxis.set_major_formatter(tick_formatter) return def set_yaxis(ax, label_size, label_separation, label_pad, ticklabel_size, kel_label, ticks_formatter, tick_fmt='%.4f'): ax.get_yaxis().set_visible(True) ax.yaxis.set_tick_params(labelsize=ticklabel_size) ax.yaxis.set_label_coords(label_separation, 0.5) #ax.ticklabel_format(style='plain', axis='both', useOffset=False) #ax.yaxis.labelpad = label_pad ax.set_ylabel(kel_label, fontsize=label_size) tick_step = (ticks_formatter[1] - ticks_formatter[0]) / ticks_formatter[2] ax.yaxis.set_ticks( np.arange(ticks_formatter[0], ticks_formatter[1], tick_step)) tick_formatter = FormatStrFormatter(tick_fmt) ax.yaxis.set_major_formatter(tick_formatter) return print print ' ================== ' print ' CORRELATION PLOTS' print ' ================== ' print # read cli arguments cli = anc.get_args() #plot_folder = prepare_plot_folder(working_path) emcee_plots = anc.prepare_emcee_plot_folder(cli.full_path) log_file = os.path.join(emcee_plots, 'emcee_triangle_log.txt') flog = logging.FileHandler(log_file, 'w') flog.setLevel(logging.DEBUG) flog.setFormatter(formatter) logger.addHandler(flog) # log screen slog = logging.StreamHandler() slog.setLevel(logging.DEBUG) slog.setFormatter(formatter) logger.addHandler(slog) # computes mass conversion factor #m_factor = anc.mass_conversion_factor(cli.m_type) m_factor, m_unit = anc.mass_type_factor(1., cli.m_type, False) # set emcee and trades folder emcee_folder = cli.full_path trades_folder = os.path.join(os.path.dirname(cli.full_path), '') # and best folder emcee_file, emcee_best, folder_best = anc.get_emcee_file_and_best( emcee_folder, cli.temp_status) # get data from the hdf5 file parameter_names_emcee, parameter_boundaries, chains, acceptance_fraction, autocor_time, lnprobability, ln_err_const, completed_steps = anc.get_data( emcee_file, cli.temp_status) # print Memory occupation of ... anc.print_memory_usage(chains) nfit, nwalkers, nruns, nburnin, nruns_sel = anc.get_emcee_parameters( chains, cli.temp_status, cli.nburnin, completed_steps) logger.info( 'nfit(%d), nwalkers(%d), nruns(%d), nburnin(%d), nruns_sel(%d)' % (nfit, nwalkers, nruns, nburnin, nruns_sel)) # test label_separation #if (nfit <= 3): label_separation = -0.1 if (nfit > 2): #label_separation = -0.1 - ( 0.075 * (nfit-2) ) # good for figsize=(12,12) label_separation = -0.15 - (0.125 * (nfit - 2)) # testing #else: #label_separation = -0.15 #label_size = label_size - 1 * int(nfit / 5.) label_size = label_size - 1 * int(nfit / 2.5) # set label and legend names kel_plot_labels = anc.keplerian_legend(parameter_names_emcee, cli.m_type) chains_T_full, parameter_boundaries = anc.select_transpose_convert_chains( nfit, nwalkers, nburnin, nruns, nruns_sel, m_factor, parameter_names_emcee, parameter_boundaries, chains) chains_T, flatchain_posterior_0, lnprob_burnin, thin_steps = anc.thin_the_chains( cli.use_thin, nburnin, nruns, nruns_sel, autocor_time, chains_T_full, lnprobability, burnin_done=False) flatchain_posterior_0 = anc.fix_lambda(flatchain_posterior_0, parameter_names_emcee) if (cli.boot_id > 0): flatchain_posterior_msun = anc.posterior_back_to_msun( m_factor, parameter_names_emcee, flatchain_posterior_0) boot_file = anc.save_bootstrap_like(emcee_folder, cli.boot_id, parameter_names_emcee, flatchain_posterior_msun) logger.info('saved bootstrap like file: %s' % (boot_file)) del flatchain_posterior_msun k = anc.get_auto_bins(flatchain_posterior_0) if (cli.overplot is not None): if (cli.adhoc is not None): overp_names, read_par = anc.read_fitted_file(cli.adhoc) cli.overplot = 777 else: ## OPEN summary_parameters.hdf5 FILE s_h5f = h5py.File( os.path.join(cli.full_path, 'summary_parameters.hdf5'), 'r') # take only the selected sample s_overplot = '%04d' % (cli.overplot) read_par = s_h5f['parameters/%s/fitted/parameters' % (s_overplot)][...] s_h5f.close() # fitted parameters has always Mp/Ms in Msun/Mstar, so it is needed to rescale it properly overp_par = read_par.copy() for ii in range(0, nfit): if ('Ms' in parameter_names_emcee[ii]): #if('Ms' in overp_names[ii]): overp_par[ii] = overp_par[ii] * m_factor #fig, ax = plt.subplots(nrows = nfit-1, ncols=nfit, figsize=(12,12)) #fig = plt.figure(figsize=(12,12)) fig = plt.figure(figsize=(6, 6)) fig.subplots_adjust(hspace=0.05, wspace=0.05) for ix in range(0, nfit, 1): x_data = flatchain_posterior_0[:, ix] ##x_med = median_parameters[ix] x_min, x_max = anc.compute_limits(x_data, 0.05) if (x_min == x_max): x_min = parameter_boundaries[ix, 0] x_max = parameter_boundaries[ix, 1] #x_max_mean = mode_parameters[ix] for iy in range(nfit - 1, -1, -1): y_data = flatchain_posterior_0[:, iy] y_min, y_max = anc.compute_limits(y_data, 0.05) if (y_min == y_max): y_min = parameter_boundaries[iy, 0] y_max = parameter_boundaries[iy, 1] #y_max_mean = mode_parameters[iy] if (iy > ix): # correlation plot logger.info( '%s vs %s' % (parameter_names_emcee[ix], parameter_names_emcee[iy])) ax = plt.subplot2grid((nfit + 1, nfit), (iy, ix)) #hist2d_counts, xedges, yedges, image2d = ax.hist2d(x_data, y_data, bins=k, range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]], cmap=cm.gray_r, normed=True) hist2d_counts, xedges, yedges, image2d = ax.hist2d(\ x_data, y_data, bins=k, range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]], cmap=cm.gray_r, normed=False #density=False ) #new_k = int(k/3) new_k = k hist2d_counts_2, xedges_2, yedges_2 = np.histogram2d(\ x_data, y_data, bins=new_k, range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]], #normed=True density=False ) x_bins = [ 0.5 * (xedges_2[i] + xedges_2[i + 1]) for i in range(0, new_k) ] y_bins = [ 0.5 * (yedges_2[i] + yedges_2[i + 1]) for i in range(0, new_k) ] #ax.contour(x_bins, y_bins, hist2d_counts_2.T, 3, cmap=cm.gray, linestyle='solid', linewidths=(0.7, 0.7, 0.7)) nl = 5 levels = [1. - np.exp(-0.5 * ii) for ii in range(0, nl) ] # 2D sigmas: 0sigma, 1sigma, 2sigma, 3sigma, .. #ax.contour(x_bins, y_bins, hist2d_counts_2.T, levels, cmap=cm.gray, linestyle='solid', linewidths=(0.7, 0.7, 0.7)) #ax.contour(x_bins, y_bins, hist2d_counts_2.T, levels, cmap=cm.viridis, linestyle='solid', linewidths=1.) #ax.contour(x_bins, y_bins, hist2d_counts_2.T, cmap=cm.viridis, linestyle='solid', linewidths=0.7) #ax.contour(x_bins, y_bins, hist2d_counts_2.T, levels, cmap=cm.viridis, linestyle='solid', linewidths=0.7, normed=True) ax.contour( x_bins, y_bins, hist2d_counts_2.T, nl, cmap=cm.viridis, linestyles='solid', linewidths=0.5, #normed=True ) if (cli.overplot is not None): # plot selected overplot sample ax.axvline(overp_par[ix], color='C0', ls='--', lw=1.1, alpha=0.5) ax.axhline(overp_par[iy], color='C0', ls='--', lw=1.1, alpha=0.5) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) if (iy == nfit - 1): set_xaxis(ax, label_size, label_separation, label_pad, ticklabel_size, kel_plot_labels[ix], [xedges[0], xedges[-1], 4]) if (ix == 0): set_yaxis(ax, label_size, label_separation, label_pad, ticklabel_size, kel_plot_labels[iy], [yedges[0], yedges[-1], 5]) ax.set_ylim([y_min, y_max]) ax.set_xlim([x_min, x_max]) plt.draw() elif (iy == ix): # distribution plot logger.info('%s histogram' % (parameter_names_emcee[ix])) ax = plt.subplot2grid((nfit + 1, nfit), (ix, ix)) if (ix == nfit - 1): hist_orientation = 'horizontal' else: hist_orientation = 'vertical' idx = np.argsort(x_data) if (not cli.cumulative): # HISTOGRAM hist_counts, edges, patces = ax.hist( x_data, bins=k, range=[x_data.min(), x_data.max()], histtype='stepfilled', color='darkgrey', #edgecolor='lightgray', edgecolor='None', align='mid', orientation=hist_orientation, #normed=True, density=True, stacked=True) else: # CUMULATIVE HISTOGRAM hist_counts, edges, patces = ax.hist( x_data, bins=k, range=[x_data.min(), x_data.max()], histtype='stepfilled', color='darkgrey', #edgecolor='lightgray', edgecolor='None', align='mid', orientation=hist_orientation, density=True, stacked=True, cumulative=True) if (ix == nfit - 1): ax.set_ylim([y_min, y_max]) if (cli.overplot is not None): # plot selected overplot sample ax.axhline(overp_par[ix], color='C0', ls='--', lw=1.1, alpha=0.5) else: ax.set_xlim([x_min, x_max]) if (cli.overplot is not None): # plot selected overplot sample ax.axvline(overp_par[ix], color='C0', ls='--', lw=1.1, alpha=0.5) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) ax.set_title(kel_plot_labels[ix], fontsize=label_size) plt.draw() logger.info('saving plot') emcee_fig_file = os.path.join(emcee_plots, 'emcee_triangle.png') fig.savefig(emcee_fig_file, bbox_inches='tight', dpi=300) logger.info('png done') emcee_fig_file = os.path.join(emcee_plots, 'emcee_triangle.pdf') fig.savefig(emcee_fig_file, bbox_inches='tight', dpi=96) logger.info('pdf done') plt.close(fig) logger.info('') return
def main(): # --- # initialize logger logger = logging.getLogger("Main_log") logger.setLevel(logging.DEBUG) formatter = logging.Formatter("%(asctime)s - %(message)s") # read cli arguments cli = anc.get_args() #plot_folder = prepare_plot_folder(working_path) emcee_plots = anc.prepare_emcee_plot_folder(cli.full_path) log_file = os.path.join(emcee_plots, 'GelmanRubin_log.txt') flog = logging.FileHandler(log_file, 'w') flog.setLevel(logging.DEBUG) flog.setFormatter(formatter) logger.addHandler(flog) # log screen slog = logging.StreamHandler() slog.setLevel(logging.DEBUG) slog.setFormatter(formatter) logger.addHandler(slog) # computes mass conversion factor #m_factor = anc.mass_conversion_factor(cli.m_type) m_factor, m_unit = anc.mass_type_factor(1., cli.m_type, False) # set emcee and trades folder emcee_folder = cli.full_path trades_folder = os.path.join(os.path.dirname(cli.full_path), '') # and best folder emcee_file, emcee_best, folder_best = anc.get_emcee_file_and_best( emcee_folder, cli.temp_status) # get data from the hdf5 file parameter_names_emcee, parameter_boundaries, chains, acceptance_fraction, autocor_time, lnprobability, ln_err_const, completed_steps = anc.get_data( emcee_file, cli.temp_status) # print Memory occupation of ... anc.print_memory_usage(chains) nfit, nwalkers, nruns, nburnin, nruns_sel = anc.get_emcee_parameters( chains, cli.temp_status, cli.nburnin, completed_steps) logger.info( 'nfit(%d), nwalkers(%d), nruns(%d), nburnin(%d), nruns_sel(%d)' % (nfit, nwalkers, nruns, nburnin, nruns_sel)) # set label and legend names kel_labels = anc.keplerian_legend(parameter_names_emcee, cli.m_type) chains_T, parameter_boundaries = anc.select_transpose_convert_chains( nfit, nwalkers, nburnin, nruns, nruns_sel, m_factor, parameter_names_emcee, parameter_boundaries, chains) if (cli.temp_status): n_steps = completed_steps else: n_steps = nruns sel_steps = int(cli.sel_steps) if (sel_steps == 0): sel_steps = n_steps steps = np.linspace(start=0, stop=n_steps, num=sel_steps, endpoint=True, dtype=np.int) steps[0] = 10 #print steps sel_steps = steps.shape[0] gr_Rc_1 = np.ones((sel_steps, nfit)) + 99. gr_Rc_2 = np.ones((sel_steps, nfit)) + 99. gr_Rc_pyorbit = np.ones((sel_steps, nfit)) + 99. gr_Rc_pymc = np.ones((sel_steps, nfit)) + 99. for ifit in range(0, nfit): logger.info('Parameter: %13s' % (parameter_names_emcee[ifit])) #fig = plt.figure(figsize=(12,12)) fig = plt.figure(figsize=(6, 6)) ax = plt.subplot2grid((1, 1), (0, 0)) for istep in range(0, sel_steps): #print 'istep',istep #time0 = time.time() #gr_Rc_1[istep,ifit] = anc.GelmanRubin_test_1(chains_T[:steps[istep], :, ifit]) #if(istep == sel_steps-1): #LBo_d, LBo_h, LBo_m, LBo_s = anc.computation_time(time.time()-time0) #logger.info('steps = %6d for %13s ==> Gelman-Rubin test: LBo time = %2d m %6.3f s' %(steps[istep], parameter_names_emcee[ifit], LBo_m, LBo_s)) time0 = time.time() gr_Rc_2[istep, ifit] = anc.GelmanRubin(chains_T[:steps[istep], :, ifit]) if (istep == sel_steps - 1): LBo_d, LBo_h, LBo_m, LBo_s = anc.computation_time(time.time() - time0) logger.info( 'steps = %6d for %13s ==> Gelman-Rubin test: LBo time = %2d m %6.3f s' % (steps[istep], parameter_names_emcee[ifit], LBo_m, LBo_s)) time0 = time.time() gr_Rc_pyorbit[istep, :] = anc.GelmanRubin_PyORBIT( chains_T[:steps[istep], :, ifit]) if (istep == sel_steps - 1): LMa_d, LMa_h, LMa_m, LMa_s = anc.computation_time(time.time() - time0) logger.info( 'steps = %6d for %13s ==> Gelman-Rubin test: LMa time = %2d m %6.3f s' % (steps[istep], parameter_names_emcee[ifit], LMa_m, LMa_s)) time0 = time.time() gr_Rc_pymc[istep, :] = np.sqrt( anc.GelmanRubin_pymc(chains_T[:steps[istep], :, ifit].T)) if (istep == sel_steps - 1): pymc_d, pymc_h, pymc_m, pymc_s = anc.computation_time( time.time() - time0) logger.info( 'steps = %6d for %13s ==> Gelman-Rubin test: pymc time = %2d m %6.3f s' % (steps[istep], parameter_names_emcee[ifit], pymc_m, pymc_s)) ax.axhline(1.01, color='gray') #ax.plot(steps, gr_Rc_1[:,ifit], '-', color='k', label='LBo 1') ax.plot(steps, gr_Rc_2[:, ifit], '-', color='k', lw=1.3, label='LBo 2') ax.plot(steps, gr_Rc_pyorbit[:, ifit], '--', color='lightgray', alpha=0.7, label='LMa') ax.plot(steps, gr_Rc_pymc[:, ifit], '-.', color='red', lw=1.5, alpha=0.7, label='pymc') ax.set_ylim(0.95, 2.3) ax.set_xlabel('steps (%s)' % (parameter_names_emcee[ifit].strip())) ax.legend(loc='center left', fontsize=9, bbox_to_anchor=(1, 0.5)) fig.savefig(os.path.join( emcee_plots, 'GR_%03d_%s.png' % (ifit + 1, parameter_names_emcee[ifit])), bbox_inches='tight', dpi=200) plt.close(fig) logger.info( 'saved plot %s' % (os.path.join(emcee_plots, 'GRtrace_pam_%s.png' % (parameter_names_emcee[ifit])))) logger.info('') return
def main(): print print ' ======================== ' print ' TRADES+EMCEE CHAIN PLOTS' print ' ======================== ' print # read cli arguments cli = anc.get_args() # computes mass conversion factor #m_factor, m_unit = anc.mass_conversion_factor_and_unit(cli.m_type) m_factor, m_unit = anc.mass_type_factor(1., cli.m_type, False) # set emcee and trades folder emcee_folder = cli.full_path trades_folder = os.path.join(os.path.dirname(cli.full_path), '') # and best folder emcee_file, emcee_best, folder_best = anc.get_emcee_file_and_best( emcee_folder, cli.temp_status) parameter_names_emcee, parameter_boundaries, chains, acceptance_fraction, autocor_time, lnprobability, ln_err_const, completed_steps = anc.get_data( emcee_file, cli.temp_status) # set label and legend names kel_labels = anc.keplerian_legend(parameter_names_emcee, cli.m_type) nfit, nwalkers, nruns, nburnin, nruns_sel = anc.get_emcee_parameters( chains, cli.temp_status, cli.nburnin, completed_steps) anc.print_memory_usage(chains) chains_T_full, parameter_boundaries = anc.select_transpose_convert_chains( nfit, nwalkers, nburnin, nruns, nruns_sel, m_factor, parameter_names_emcee, parameter_boundaries, chains) if (cli.use_thin or cli.use_thin > 0): chains_T, flatchain_posterior_0, lnprob_burnin, thin_steps, chains_T_full_thinned = anc.thin_the_chains( cli.use_thin, nburnin, nruns, nruns_sel, autocor_time, chains_T_full, lnprobability, burnin_done=False, full_chains_thinned=True) nburnin_plt = np.rint(nburnin / thin_steps).astype(int) nend = np.rint(nruns / thin_steps).astype(int) else: chains_T, flatchain_posterior_0, lnprob_burnin, thin_steps = anc.thin_the_chains( cli.use_thin, nburnin, nruns, nruns_sel, autocor_time, chains_T_full, lnprobability, burnin_done=False, full_chains_thinned=False) nburnin_plt = nburnin nend = nruns #name_par, name_excluded = anc.get_sample_list(cli.sample_str, parameter_names_emcee) #sample_parameters, idx_sample = anc.pick_sample_parameters(flatchain_posterior_0, parameter_names_emcee, name_par = name_par, name_excluded = name_excluded) #flatchain_posterior_1 = flatchain_posterior_0 # fix lambda? #flatchain_posterior_0 = anc.fix_lambda(flatchain_posterior_0, #parameter_names_emcee #) if (cli.boot_id > 0): flatchain_posterior_msun = anc.posterior_back_to_msun( m_factor, parameter_names_emcee, flatchain_posterior_0) boot_file = anc.save_bootstrap_like(emcee_folder, cli.boot_id, parameter_names_emcee, flatchain_posterior_msun) logger.info('saved bootstrap like file: %s' % (boot_file)) del flatchain_posterior_msun k = anc.get_auto_bins(flatchain_posterior_0) try: overplot = int(cli.overplot) except: overplot = None ## OPEN summary_parameters.hdf5 FILE s_h5f = h5py.File(os.path.join(cli.full_path, 'summary_parameters.hdf5'), 'r') if (overplot is not None): # sample_parameters ci_fitted = s_h5f['confidence_intervals/fitted/ci'][...] sample_parameters = s_h5f['parameters/0666/fitted/parameters'][...] sample_lgllhd = s_h5f['parameters/0666'].attrs['lgllhd'] try: sample2_parameters = s_h5f['parameters/0667/fitted/parameters'][ ...] sample2_lgllhd = s_h5f['parameters/0667'].attrs['lgllhd'] except: sample2_parameters = None sample2_lgllhd = None try: sample3_parameters = s_h5f['parameters/0668/fitted/parameters'][ ...] sample3_lgllhd = s_h5f['parameters/0668'].attrs['lgllhd'] except: sample3_parameters = None sample3_lgllhd = None median_parameters = s_h5f['parameters/1051/fitted/parameters'][...] median_lgllhd = s_h5f['parameters/1051'].attrs['lgllhd'] max_lnprob_parameters = s_h5f['parameters/2050/fitted/parameters'][...] max_lgllhd = s_h5f['parameters/2050'].attrs['lgllhd'] try: mode_parameters = s_h5f['parameters/3051/fitted/parameters'][...] mode_lgllhd = s_h5f['parameters/3051'].attrs['lgllhd'] except: mode_parameters = None mode_lgllhd = None overp_par = s_h5f['parameters/%04d/fitted/parameters' % (overplot)][...] overp_lgllhd = s_h5f['parameters/%04d' % (overplot)].attrs['lgllhd'] #nfit = s_h5f['confidence_intervals/fitted'].attrs['nfit'] ndata = s_h5f['confidence_intervals/fitted'].attrs['ndata'] dof = s_h5f['confidence_intervals/fitted'].attrs['dof'] s_h5f.close() emcee_plots = os.path.join(cli.full_path, 'plots') if (not os.path.isdir(emcee_plots)): os.makedirs(emcee_plots) for i in range(0, nfit): if ('Ms' in parameter_names_emcee[i]): conv_plot = m_factor else: conv_plot = 1. emcee_fig_file = os.path.join( emcee_plots, 'chain_%03d_%s.png' % (i + 1, parameter_names_emcee[i].strip())) print ' %s' % (emcee_fig_file), #fig, (axChain, axHist) = plt.subplots(nrows=1, ncols=2, figsize=(12,12)) fig, (axChain, axHist) = plt.subplots(nrows=1, ncols=2, figsize=(6, 6)) (counts, bins_val, patches) = axHist.hist(flatchain_posterior_0[:, i], bins=k, range=(flatchain_posterior_0[:, i].min(), flatchain_posterior_0[:, i].max()), orientation='horizontal', density=True, stacked=True, histtype='stepfilled', color='darkgrey', edgecolor='lightgray', align='mid') xpdf = scipy_norm.pdf(flatchain_posterior_0[:, i], loc=flatchain_posterior_0[:, i].mean(), scale=flatchain_posterior_0[:, i].std()) idx = np.argsort(flatchain_posterior_0[:, i]) axHist.plot(xpdf[idx], flatchain_posterior_0[idx, i], color='black', marker='None', ls='-.', lw=1.5, label='pdf') # chains after burn-in #axChain.plot(chains_T[:,:,i], '-', alpha=0.3) # chains with the burn-in if (cli.use_thin): axChain.plot(chains_T_full_thinned[:, :, i], '-', alpha=0.3) else: axChain.plot(chains_T_full[:, :, i], '-', alpha=0.3) axChain.axvspan(0, nburnin_plt, color='gray', alpha=0.45) axChain.axvline(nburnin_plt, color='gray', ls='-', lw=1.5) if (overplot is not None): if (mode_parameters is not None): # plot of mode (mean of higher peak/bin) axChain.axhline(mode_parameters[i] * conv_plot, color='red', ls='-', lw=2.1, alpha=1, label='mode') # plot of median axChain.axhline(median_parameters[i] * conv_plot, marker='None', c='blue', ls='-', lw=2.1, alpha=1.0, label='median fit') # plot of max_lnprob axChain.axhline(max_lnprob_parameters[i] * conv_plot, marker='None', c='black', ls='-', lw=1.1, alpha=1.0, label='max lnprob') if (sample_parameters is not None): # plot of sample_parameters axChain.axhline(sample_parameters[i] * conv_plot, marker='None', c='orange', ls='--', lw=2.3, alpha=0.77, label='picked: %12.7f' % (sample_parameters[i])) if (sample2_parameters is not None): # plot of sample2_parameters axChain.axhline(sample2_parameters[i] * conv_plot, marker='None', c='cyan', ls=':', lw=2.7, alpha=0.77, label='close lgllhd: %12.7f' % (sample2_parameters[i])) if (sample3_parameters is not None): # plot of sample3_parameters axChain.axhline(sample3_parameters[i] * conv_plot, marker='None', c='yellow', ls='-', lw=3.1, alpha=0.66, label='close lgllhd: %12.7f' % (sample3_parameters[i])) if (overplot not in [1050, 1051, 2050, 3050, 3051]): axChain.axhline(overp_par[i] * conv_plot, marker='None', c='black', ls='--', lw=2.5, alpha=0.6, label='overplot %d' % (overplot)) # plot ci axChain.axhline(ci_fitted[i, 0] * conv_plot, marker='None', c='forestgreen', ls='-', lw=2.1, alpha=1.0, label='CI 15.865th (%.5f)' % (ci_fitted[i, 0] * conv_plot)) axChain.axhline(ci_fitted[i, 1] * conv_plot, marker='None', c='forestgreen', ls='-', lw=2.1, alpha=1.0, label='CI 84.135th (%.5f)' % (ci_fitted[i, 1] * conv_plot)) axChain.ticklabel_format(useOffset=False) xlabel = '$N_\mathrm{steps}$' if (cli.use_thin): xlabel = '$N_\mathrm{steps} \\times %d$' % (thin_steps) axChain.set_xlabel(xlabel) axChain.set_xlim([0, nend]) axChain.set_ylabel(kel_labels[i]) y_min = flatchain_posterior_0[:, i].min() y_max = flatchain_posterior_0[:, i].max() axChain.set_ylim([y_min, y_max]) axChain.set_title('Full chain %s:=[%.3f , %.3f]' % (kel_labels[i], parameter_boundaries[i, 0], parameter_boundaries[i, 1])) plt.draw() axHist.ticklabel_format(useOffset=False) axHist.tick_params(direction='inout', labelleft=False) axHist.set_ylim([y_min, y_max]) if (overplot is not None): if (mode_parameters is not None): # plot mode axHist.axhline(mode_parameters[i] * conv_plot, color='red', ls='-', lw=2.1, alpha=1, label='mode') # plot median axHist.axhline(median_parameters[i] * conv_plot, marker='None', c='blue', ls='-', lw=2.1, alpha=1.0, label='median fit') # plot of max_lnprob axHist.axhline(max_lnprob_parameters[i] * conv_plot, marker='None', c='black', ls='-', lw=1.1, alpha=1.0, label='max lnprob') if (sample_parameters is not None): # plot of sample_parameters axHist.axhline(sample_parameters[i] * conv_plot, marker='None', c='orange', ls='--', lw=2.3, alpha=0.77, label='picked: %12.7f' % (sample_parameters[i] * conv_plot)) if (sample2_parameters is not None): # plot of sample2_parameters axHist.axhline(sample2_parameters[i] * conv_plot, marker='None', c='cyan', ls=':', lw=2.7, alpha=0.77, label='close lgllhd: %12.7f' % (sample2_parameters[i])) if (sample3_parameters is not None): # plot of sample3_parameters axHist.axhline(sample3_parameters[i] * conv_plot, marker='None', c='yellow', ls='-', lw=3.1, alpha=0.66, label='close lgllhd: %12.7f' % (sample3_parameters[i])) if (overplot not in [1050, 1051, 2050, 3050, 3051]): axHist.axhline(overp_par[i] * conv_plot, marker='None', c='black', ls='--', lw=2.5, alpha=0.8, label='overplot %d' % (overplot)) # plot ci axHist.axhline(ci_fitted[i, 0] * conv_plot, marker='None', c='forestgreen', ls='-', lw=2.1, alpha=1.0, label='CI 15.865th (%.5f)' % (ci_fitted[i, 0] * conv_plot)) axHist.axhline(ci_fitted[i, 1] * conv_plot, marker='None', c='forestgreen', ls='-', lw=2.1, alpha=1.0, label='CI 84.135th (%.5f)' % (ci_fitted[i, 1] * conv_plot)) axHist.set_title('Distribution of posterior chain') axHist.legend(loc='center left', fontsize=9, bbox_to_anchor=(1, 0.5)) plt.draw() fig.savefig(emcee_fig_file, bbox_inches='tight', dpi=150) print ' saved' print #fig = plt.figure(figsize=(12,12)) fig = plt.figure(figsize=(6, 6)) # lnprob xlabel = '$N_\mathrm{steps}$' if (cli.use_thin): xlabel = '$N_\mathrm{steps} \\times %d$' % (thin_steps) ax = plt.subplot2grid((2, 1), (0, 0)) ax.plot(lnprob_burnin.T, '-', alpha=0.3) if (overplot is not None): posterior_msun = anc.posterior_back_to_msun(m_factor, parameter_names_emcee, flatchain_posterior_0) post_sel, lnprob_sel = anc.select_within_all_ci( posterior_msun, ci_fitted[:, 0:2], lnprob_burnin.T.reshape(-1)) #lnprob_sel = lnprob_burnin.T.reshape((-1)) lgllhd_med = np.percentile(lnprob_burnin.T.reshape(-1), 50., interpolation='midpoint') abs_dlg = np.abs(lnprob_sel - lgllhd_med) lgllhd_mad = np.percentile(abs_dlg, 50., interpolation='midpoint') #lnp_min = np.min(lnprob_sel) #lnp_max = np.max(lnprob_sel) lnp_min = lgllhd_med - lgllhd_mad lnp_max = lgllhd_med + lgllhd_mad print ' lgllhd_med & mad = ', lgllhd_med, lgllhd_mad print ' lnp_min = ', lnp_min, ' lnp_max = ', lnp_max print ' lnl_668 = ', sample3_lgllhd ax.axhline(lgllhd_med, color='black', ls='-', lw=1.6, alpha=0.77) #if(sample2_lgllhd is not None): #ax.axhline(sample2_lgllhd, marker='None', c='cyan',ls=':', lw=2.7, alpha=0.9) if (sample3_lgllhd is not None): ax.axhline(sample3_lgllhd, marker='None', c='yellow', ls='-', lw=3.1, alpha=0.9) ax.axhspan(lnp_min, lnp_max, color='gray', alpha=0.77) ax.axhline(lnp_min, color='black', ls='--', lw=1.6, alpha=0.77) ax.axhline(lnp_max, color='black', ls='--', lw=1.6, alpha=0.77) min_lnp = np.min(lnprob_burnin.T, axis=0).min() max_lnp = np.max(lnprob_burnin.T, axis=0).max() y_min, y_max = anc.compute_limits(np.asarray([min_lnp, max_lnp]), 0.05) ax.set_ylim((y_min, y_max)) ax.set_ylabel('lnprob') #ax.get_xaxis().set_visible(False) ax.set_xlabel(xlabel) # chi2r chi2r = -2. * (lnprob_burnin.T - ln_err_const) / np.float64(dof) ax = plt.subplot2grid((2, 1), (1, 0)) ax.axhline(1.0, color='gray', ls='-') ax.plot(chi2r, '-', alpha=0.3) if (overplot is not None): c2r_med = -(2. * (lgllhd_med - ln_err_const)) / np.float64(dof) c2r_smax = -(2. * (lnp_min - ln_err_const)) / np.float64(dof) c2r_smin = -(2. * (lnp_max - ln_err_const)) / np.float64(dof) print ' c2r_med = ', c2r_med print ' c2r_smin = ', c2r_smin, ' c2r_smax = ', c2r_smax ax.axhline(c2r_med, color='black', ls='-', lw=1.6, alpha=0.77) ax.axhspan(c2r_smin, c2r_smax, color='gray', alpha=0.77) ax.axhline(c2r_smin, color='black', ls='--', lw=1.6, alpha=0.77) ax.axhline(c2r_smax, color='black', ls='--', lw=1.6, alpha=0.77) #if(sample2_lgllhd is not None): #c2r_sample2 = -2.*(sample2_lgllhd - ln_err_const)/np.float64(dof) #ax.axhline(c2r_sample2, marker='None', c='cyan',ls=':', lw=2.7, alpha=0.9) if (sample3_lgllhd is not None): c2r_sample3 = -2. * (sample3_lgllhd - ln_err_const) / np.float64(dof) ax.axhline(c2r_sample3, marker='None', c='yellow', ls='-', lw=3.1, alpha=0.9) c2r_min = -2. * (y_max - ln_err_const) / np.float64(dof) c2r_max = -2. * (y_min - ln_err_const) / np.float64(dof) ax.set_ylim((c2r_min, c2r_max)) ax.set_ylabel('$\chi^{2}/\mathrm{dof}$') #ax.get_xaxis().set_visible(True) ax.set_xlabel(xlabel) fig.savefig(os.path.join(emcee_plots, 'emcee_lnprobability.png'), bbox_inches='tight', dpi=150) print ' %s saved' % (os.path.join(emcee_plots, 'emcee_lnprobability.png')) return