def init_folder(working_path, sub_folder): working_folder = os.path.join(working_path, sub_folder) if (not os.path.isdir(working_folder)): os.makedirs(working_folder) #arg_file = os.path.join(working_path, 'arg.in') #shutil.copy(arg_file, os.path.join(working_folder,'')) #bodies_file = os.path.join(working_path, 'bodies.lst') #shutil.copy(bodies_file, os.path.join(working_folder,'')) #obd = open(bodies_file, 'r') #for line in obd.readlines(): #shutil.copy(os.path.join(working_path, line.strip().split()[0]), os.path.join(working_folder,'')) #obd.close() #t0files = glob.glob(os.path.join(working_path,'NB*_observations.dat')) #for t0f in t0files: #shutil.copy(t0f, os.path.join(working_folder,'')) #if(os.path.exists(os.path.join(working_path,'obsRV.dat'))): #shutil.copy(os.path.join(working_path,'obsRV.dat'), os.path.join(working_folder,'')) # copy files anc.copy_simulation_files(working_path, working_folder) run_log = os.path.join(working_folder, "trades_run.log") of_run = open(run_log, 'w') anc.print_both("# pyTRADES LOG FILE", of_run) anc.print_both("# working_path = %s" %(working_path), of_run) anc.print_both("# working_folder = %s" %(working_folder), of_run) anc.print_both("# run_log = %s" %(run_log), of_run) return working_folder, run_log, of_run
def init_folder(working_path, sub_folder): working_folder = os.path.join(working_path, sub_folder) if (not os.path.isdir(working_folder)): os.makedirs(working_folder) # copy files anc.copy_simulation_files(working_path, working_folder) run_log = os.path.join(working_folder, "trades_run.log") of_run = open(run_log, 'w') anc.print_both("# pyTRADES LOG FILE", of_run) anc.print_both("# working_path = %s" %(working_path), of_run) anc.print_both("# working_folder = %s" %(working_folder), of_run) anc.print_both("# run_log = %s" %(run_log), of_run) return working_folder, run_log, of_run
def init_folder(working_path, sub_folder): working_folder = os.path.join(working_path, sub_folder) if (not os.path.isdir(working_folder)): os.makedirs(working_folder) # copy files anc.copy_simulation_files(working_path, working_folder) run_log = os.path.join(working_folder, "trades_run.log") of_run = open(run_log, 'w') anc.print_both("# pyTRADES LOG FILE", of_run) anc.print_both("# working_path = %s" % (working_path), of_run) anc.print_both("# working_folder = %s" % (working_folder), of_run) anc.print_both("# run_log = %s" % (run_log), of_run) return working_folder, run_log, of_run
def main(): # MAIN -- TRADES + EMCEE # READ COMMAND LINE ARGUMENTS cli = get_args() # STARTING TIME start = time.time() # RENAME working_path = cli.full_path nthreads = cli.nthreads # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ... pytrades_lib.pytrades.initialize_trades(working_path, cli.sub_folder, nthreads) # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof n_bodies = pytrades_lib.pytrades.n_bodies # NUMBER OF TOTAL BODIES OF THE SYSTEM n_planets = n_bodies - 1 # NUMBER OF PLANETS IN THE SYSTEM ndata = pytrades_lib.pytrades.ndata # TOTAL NUMBER OF DATA AVAILABLE npar = pytrades_lib.pytrades.npar # NUMBER OF TOTAL PARAMATERS ~n_planets X 6 nfit = pytrades_lib.pytrades.nfit # NUMBER OF PARAMETERS TO FIT nfree = pytrades_lib.pytrades.nfree # NUMBER OF FREE PARAMETERS (ie nrvset) dof = pytrades_lib.pytrades.dof # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT #inv_dof = np.float64(1.0 / dof) inv_dof = pytrades_lib.pytrades.inv_dof # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS #trades_names = anc.convert_fortran2python_strarray(pytrades_lib.pytrades.parameter_names, #nfit, str_len=10 #) ##parameter_names = anc.trades_names_to_emcee(trades_names) str_len = pytrades_lib.pytrades.str_len temp_names = pytrades_lib.pytrades.get_parameter_names(nfit, str_len) trades_names = anc.convert_fortran_charray2python_strararray(temp_names) parameter_names = trades_names fitting_parameters = pytrades_lib.pytrades.fitting_parameters # INITIAL PARAMETER SET (NEEDED ONLY TO HAVE THE PROPER ARRAY/VECTOR) parameters_minmax = pytrades_lib.pytrades.parameters_minmax # PARAMETER BOUNDARIES delta_parameters = np.abs( parameters_minmax[:, 1] - parameters_minmax[:, 0]) # DELTA BETWEEN MAX AND MIN OF BOUNDARIES # RADIAL VELOCITIES SET n_rv = pytrades_lib.pytrades.nrv n_set_rv = pytrades_lib.pytrades.nrvset # TRANSITS SET n_t0 = pytrades_lib.pytrades.nt0 n_t0_sum = pytrades_lib.pytrades.ntts n_set_t0 = 0 for i in range(0, n_bodies - 1): if (n_t0[i] > 0): n_set_t0 += 1 # compute global constant for the loglhd global ln_err_const #try: #e_RVo = np.asarray(pytrades_lib.pytrades.ervobs[:], dtype=np.float64) # fortran variable RV in python will be rv!!! #except: #e_RVo = np.asarray([0.], dtype=np.float64) #try: #e_T0o = np.asarray(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1)) #except: #e_T0o = np.asarray([0.], dtype=np.float64) #ln_err_const = anc.compute_ln_err_const(ndata, dof, e_RVo, e_T0o, cli.ln_flag) ln_err_const = pytrades_lib.pytrades.ln_err_const # SET EMCEE PARAMETERS: nwalkers, nruns, nsave, npost = get_emcee_arguments(cli, nfit) # INITIALISE SCRIPT FOLDER/LOG FILE working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder) anc.print_both('', of_run) anc.print_both(' ======== ', of_run) anc.print_both(' pyTRADES', of_run) anc.print_both(' ======== ', of_run) anc.print_both('', of_run) anc.print_both(' WORKING PATH = %s' % (working_path), of_run) anc.print_both(' NUMBER OF THREADS = %d' % (nthreads), of_run) anc.print_both( ' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' % (ndata, nfit, nfree, dof), of_run) anc.print_both(' Total N_RV = %d for %d set(s)' % (n_rv, n_set_rv), of_run) anc.print_both( ' Total N_T0 = %d for %d out of %d planet(s)' % (n_t0_sum, n_set_t0, n_planets), of_run) anc.print_both(' %s = %.7f' % ('log constant error = ', ln_err_const), of_run) anc.print_both( ' %s = %.7f' % ('IN FORTRAN log constant error = ', pytrades_lib.pytrades.ln_err_const), of_run) # INITIALISE PSO ARGUMENTS FROM pso.opt FILE pytrades_lib.pytrades.init_pso(1, working_path) # read PSO options # PSO VARIABLES np_pso = pytrades_lib.pytrades.np_pso nit_pso = pytrades_lib.pytrades.nit_pso n_global = pytrades_lib.pytrades.n_global #n_global = 1 anc.print_both( ' PSO n_global = %d npop = %d ngen = %d' % (n_global, np_pso, nit_pso), of_run) # RUN PSO+EMCEE n_global TIMES for iter_global in range(0, n_global): threads_pool = emcee.interruptible_pool.InterruptiblePool(1) # CREATES PROPER WORKING PATH AND NAME i_global = iter_global + 1 pso_path = os.path.join( os.path.join(working_folder, '%04d_pso2emcee' % (i_global)), '') pytrades_lib.pytrades.path_change(pso_path) anc.print_both( '\n\n GLOBAL RUN %04d INTO PATH: %s\n' % (i_global, pso_path), of_run) if (cli.pso_type == 'run'): # RUN PSO anc.print_both(' RUN PSO', of_run) pso_start = time.time() if (not os.path.exists(pso_path)): os.makedirs(pso_path) # copy files anc.copy_simulation_files(working_path, pso_path) # CALL RUN_PSO SUBROUTINE FROM TRADES_LIB: RUNS PSO AND COMPUTES THE BEST SOLUTION, SAVING ALL THE POPULATION EVOLUTION pso_parameters = fitting_parameters.copy() pso_fitness = 0. pso_parameters, pso_fitness = pytrades_lib.pytrades.pyrun_pso( nfit, i_global) anc.print_both(' completed run_pso', of_run) pso_best_evolution = np.asarray( pytrades_lib.pytrades.pso_best_evolution[...], dtype=np.float64) anc.print_both(' pso_best_evolution retrieved', of_run) anc.print_both(' last pso_best_evolution', of_run) last_pso_parameters = np.asarray(pso_best_evolution[:nfit, -1], dtype=np.float64) last_pso_fitness = pso_best_evolution[-1, -1].astype(np.float64) anc.print_both(' fitness = %.f' % (last_pso_fitness), of_run) # SAVE PSO SIMULATION IN pso_run.hdf5 FILE print ' Creating pso hdf5 file: %s' % (os.path.join( pso_path, 'pso_run.hdf5')) pso_hdf5 = h5py.File(os.path.join(pso_path, 'pso_run.hdf5'), 'w') pso_hdf5.create_dataset('population', data=pytrades_lib.pytrades.population, dtype=np.float64) pso_hdf5.create_dataset( 'population_fitness', data=pytrades_lib.pytrades.population_fitness, dtype=np.float64) pso_hdf5.create_dataset('pso_parameters', data=pso_parameters, dtype=np.float64) pso_hdf5.create_dataset('pso_fitness', data=np.array(pso_fitness), dtype=np.float64) pso_hdf5.create_dataset('pso_best_evolution', data=pso_best_evolution, dtype=np.float64) pso_hdf5.create_dataset('parameters_minmax', data=parameters_minmax, dtype=np.float64) pso_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10') pso_hdf5['population'].attrs['npop'] = np_pso pso_hdf5['population'].attrs['niter'] = nit_pso pso_hdf5['population'].attrs['iter_global'] = iter_global + 1 pso_hdf5['population'].attrs['nfit'] = nfit pso_hdf5.close() population = np.asarray(pytrades_lib.pytrades.population, dtype=np.float64) population_fitness = np.asarray( pytrades_lib.pytrades.population_fitness, dtype=np.float64) anc.print_both(' ', of_run) fitness_iter, lgllhd_iter, check_iter = pytrades_lib.pytrades.write_summary_files( i_global, pso_parameters) elapsed = time.time() - pso_start elapsed_d, elapsed_h, elapsed_m, elapsed_s = anc.computation_time( elapsed) anc.print_both(' ', of_run) anc.print_both( ' PSO FINISHED in %2d day %02d hour %02d min %.2f sec - bye bye' % (int(elapsed_d), int(elapsed_h), int(elapsed_m), elapsed_s), of_run) #p0, pso_fitness_p0 = pso_to_emcee(nfit, nwalkers, population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution) p0 = compute_initial_walkers(nfit, nwalkers, pso_parameters, parameters_minmax, parameter_names, delta_sigma, of_run) elif (cli.pso_type == 'exists'): # READ PREVIOUS PSO_RUN.HDF5 FILE AND INITIALISE POPULATION FOR EMCEE anc.print_both( ' READ PREVIOUS PSO_RUN.HDF5 FILE AND INITIALISE POPULATION FOR EMCEE', of_run) population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution, pso_parameters_minmax, pso_parameter_names, pop_shape = get_pso_data( os.path.join(pso_path, 'pso_run.hdf5')) fitness_iter, lgllhd_iter, check_iter = pytrades_lib.pytrades.write_summary_files( i_global, pso_parameters) anc.print_both( ' read pso_run.hdf5 file with best pso_fitness = %.7f' % (pso_fitness), of_run) #p0, pso_fitness_p0 = pso_to_emcee(nfit, nwalkers, population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution) p0 = compute_initial_walkers(nfit, nwalkers, pso_parameters, parameters_minmax, parameter_names, delta_sigma, of_run) elif (cli.pso_type == 'skip'): # DO NOT RUN PSO, ONLY EMCEE anc.print_both(' DO NOT RUN PSO, ONLY EMCEE', of_run) #p0 = [parameters_minmax[:,0] + np.random.random(nfit)*delta_parameters for i in range(0, nwalkers)] p0 = compute_initial_walkers(nfit, nwalkers, fitting_parameters, parameters_minmax, parameter_names, delta_sigma, of_run) anc.print_both( ' emcee chain: nwalkers = %d nruns = %d' % (nwalkers, nruns), of_run) anc.print_both(' sampler ... ', of_run) # old version with threads #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, threads=nthreads) #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob_sq, threads=nthreads, args=[parameter_names]) # close the pool of threads threads_pool.close() threads_pool.terminate() threads_pool.join() threads_pool = emcee.interruptible_pool.InterruptiblePool(nthreads) sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, pool=threads_pool) anc.print_both(' ready to go', of_run) anc.print_both(' with nsave = %r' % (nsave), of_run) sys.stdout.flush() #sys.exit() if (nsave != False): # save temporary sampling during emcee every nruns*10% #if(os.path.exists(os.path.join(pso_path, 'emcee_temp.hdf5')) and os.path.isfile(os.path.join(pso_path, 'emcee_temp.hdf5'))): #os.remove(os.path.join(pso_path, 'emcee_temp.hdf5')) if (os.path.exists(os.path.join(pso_path, 'emcee_summary.hdf5')) and os.path.isfile( os.path.join(pso_path, 'emcee_summary.hdf5'))): os.remove(os.path.join(pso_path, 'emcee_summary.hdf5')) f_hdf5 = h5py.File(os.path.join(pso_path, 'emcee_summary.hdf5'), 'a') f_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10') f_hdf5.create_dataset('boundaries', data=parameters_minmax, dtype=np.float64) temp_dset = f_hdf5.create_dataset('chains', (nwalkers, nruns, nfit), dtype=np.float64) f_hdf5['chains'].attrs['nwalkers'] = nwalkers f_hdf5['chains'].attrs['nruns'] = nruns f_hdf5['chains'].attrs['nfit'] = nfit f_hdf5['chains'].attrs['nfree'] = nfree temp_lnprob = f_hdf5.create_dataset('lnprobability', (nwalkers, nruns), dtype=np.float64) temp_lnprob.attrs['ln_err_const'] = ln_err_const temp_acceptance = f_hdf5.create_dataset('acceptance_fraction', data=np.zeros((nfit)), dtype=np.float64) temp_acor = f_hdf5.create_dataset('autocor_time', data=np.zeros((nfit)), dtype=np.float64) f_hdf5.close() pos = p0 niter_save = int(nruns / nsave) state = None anc.print_both(' Running emcee with temporary saving', of_run) sys.stdout.flush() for i in range(0, niter_save): anc.print_both('', of_run) anc.print_both(' iter: %6d ' % (i + 1), of_run) aaa = i * nsave bbb = aaa + nsave pos, prob, state = sampler.run_mcmc(pos, N=nsave, rstate0=state) anc.print_both('completed %d steps of %d' % (bbb, nruns), of_run) f_hdf5 = h5py.File( os.path.join(pso_path, 'emcee_summary.hdf5'), 'a') temp_dset = f_hdf5['chains'] #[:,:,:] temp_dset[:, aaa:bbb, :] = sampler.chain[:, aaa:bbb, :] temp_dset.attrs['completed_steps'] = bbb temp_lnprob = f_hdf5['lnprobability'] #[:,:] temp_lnprob[:, aaa:bbb] = sampler.lnprobability[:, aaa:bbb] shape_lnprob = sampler.lnprobability.shape acceptance_fraction = sampler.acceptance_fraction temp_acceptance = f_hdf5['acceptance_fraction'] temp_acceptance = acceptance_fraction #f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64) mean_acceptance_fraction = np.mean(acceptance_fraction) #temp_chains_T = np.zeros((bbb, nwalkers, nfit)) #for ifit in range(0,nfit): #temp_chains_T[:,:,ifit] = sampler.chain[:, :bbb, ifit].T #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True) acor_time = anc.compute_acor_time(sampler, steps_done=bbb) temp_acor = f_hdf5['autocor_time'] temp_acor[...] = acor_time #f_hdf5.create_dataset('autocor_time', data=np.array(acor_temp, dtype=np.float64), dtype=np.float64) #f_hdf5.create_dataset('autocor_time', data=np.array(sampler.acor, dtype=np.float64), dtype=np.float64) # not working #print 'aaa = %6d bbb = %6d -> sampler.lnprobability.shape = (%6d , %6d)' %(aaa, bbb, shape_lnprob[0], shape_lnprob[1]) f_hdf5.close() sys.stdout.flush() anc.print_both('', of_run) anc.print_both( '...done with saving temporary total shape = %s' % (str(np.shape(sampler.chain))), of_run) anc.print_both('', of_run) sys.stdout.flush() else: # GOOD COMPLETE SINGLE RUNNING OF EMCEE, WITHOUT REMOVING THE BURN-IN anc.print_both(' Running full emcee ...', of_run) sys.stdout.flush() sampler.run_mcmc(p0, nruns) anc.print_both('done', of_run) anc.print_both('', of_run) sys.stdout.flush() flatchains = sampler.chain[:, :, :].reshape( (nwalkers * nruns, nfit)) # full chain values acceptance_fraction = sampler.acceptance_fraction mean_acceptance_fraction = np.mean(acceptance_fraction) #autocor_time = sampler.acor #temp_chains_T = np.zeros((bbb, nwalkers, nfit)) #for ifit in range(0,nfit): #temp_chains_T[:,:,ifit] = sampler.chain[:, :, ifit].T #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True) acor_time = anc.compute_acor_time(sampler) lnprobability = sampler.lnprobability # save chains with original shape as hdf5 file f_hdf5 = h5py.File(os.path.join(pso_path, 'emcee_summary.hdf5'), 'w') f_hdf5.create_dataset('chains', data=sampler.chain, dtype=np.float64) f_hdf5['chains'].attrs['nwalkers'] = nwalkers f_hdf5['chains'].attrs['nruns'] = nruns f_hdf5['chains'].attrs['nfit'] = nfit f_hdf5['chains'].attrs['nfree'] = nfree f_hdf5['chains'].attrs['completed_steps'] = nruns f_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10') f_hdf5.create_dataset('boundaries', data=parameters_minmax, dtype=np.float64) f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64) f_hdf5.create_dataset('autocor_time', data=acor_time, dtype=np.float64) f_hdf5.create_dataset('lnprobability', data=lnprobability, dtype=np.float64) f_hdf5['lnprobability'].attrs['ln_err_const'] = ln_err_const f_hdf5.close() anc.print_both( " Mean_acceptance_fraction should be between [0.25-0.5] = %.6f" % (mean_acceptance_fraction), of_run) anc.print_both('', of_run) # close the pool of threads threads_pool.close() threads_pool.terminate() threads_pool.join() anc.print_both('COMPLETED EMCEE', of_run) elapsed = time.time() - start elapsed_d, elapsed_h, elapsed_m, elapsed_s = anc.computation_time( elapsed) anc.print_both('', of_run) anc.print_both( ' pyTRADES: EMCEE FINISHED in %2d day %02d hour %02d min %.2f sec - bye bye' % (int(elapsed_d), int(elapsed_h), int(elapsed_m), elapsed_s), of_run) anc.print_both('', of_run) of_run.close() pytrades_lib.pytrades.deallocate_variables() return
def compute_initial_walkers(nfit, nwalkers, fitting_parameters, parameters_minmax, parameter_names, delta_sigma, of_run): # initial walkers as input fitting_parameters + N(loc=0.,sigma=1.,size=nwalkers)*delta_sigma #p0 = [parameters_minmax[:,0] + np.random.random(nfit)*delta_parameters for i in range(0, nwalkers)] anc.print_both( ' Inititializing walkers with delta_sigma = %s' % (str(delta_sigma).strip()), of_run) p0 = [] i_p0 = 0 anc.print_both(' good p0:', of_run) # 2017-02-03 LUCA --0-- try: d_sigma = np.float64(delta_sigma) except: d_sigma = np.float64(1.e-4) delta_sigma_out = compute_proper_sigma(nfit, d_sigma, parameter_names) print ' ', # init all initial walkers while True: test_p0 = np.array([ fitting_parameters[ifit] + np.random.normal(loc=0., scale=delta_sigma_out[ifit]) for ifit in range(0, nfit) ], dtype=np.float64) test_lg = lnprob(test_p0) #test_lg = lnprob_sq(test_p0, parameter_names) if (not np.isinf(test_lg)): i_p0 += 1 p0.append(test_p0) print i_p0, if (i_p0 == nwalkers): break p0[-1] = fitting_parameters # I want the original fitting paramameters in the initial walkers print # if 'random' opt ==> create other Gaussian starting points (<->nwalkers) if ('ran' in str(delta_sigma).strip().lower()): delta_parameters = np.abs( parameters_minmax[:, 1] - parameters_minmax[:, 0]) # DELTA BETWEEN MAX AND MIN OF BOUNDARIES nw_min = 30 n_gpts = int( (nwalkers - nw_min) / nw_min ) # a new Gaussian starting point each nw_min walkers, keeping at least nw_min walkers Gaussian to the original fitting parameters print ' new gaussian starting points: ', n_gpts if (n_gpts > 0): print ' doing random-gaussian points ... ' for i_gpt in range(0, n_gpts): # create new starting point, but check if lnL != -inf new_start = fitting_parameters.copy() sel_fit = int(np.random.random() * (nfit - 1)) # change only parameter... print 'gpt ', i_gpt + 1 print 'selected sel_fit = ', sel_fit, ' ==> ', parameter_names[ sel_fit] print 'val = ', new_start[ sel_fit], ' with min = ', parameters_minmax[ sel_fit, 0], ' and delta = ', delta_parameters[sel_fit] while True: new_start[sel_fit] = parameters_minmax[ sel_fit, 0] + delta_parameters[sel_fit] * np.random.random() test_lg = lnprob(new_start) if (not np.isinf(test_lg)): break i_pos = nw_min * i_gpt print 'i_pos = ', while True: test_p0 = np.array([ new_start[ifit] + np.random.normal(loc=0., scale=delta_sigma_out[ifit]) for ifit in range(0, nfit) ], dtype=np.float64) test_lg = lnprob(test_p0) if (not np.isinf(test_lg)): p0[i_pos] = test_p0 print i_pos, i_pos += 1 if (i_pos % nw_min == 0): break print print anc.print_both(' done initial walkers.', of_run) return p0
def main(): # MAIN -- TRADES + EMCEE # READ COMMAND LINE ARGUMENTS cli = get_args() # STARTING TIME start = time.time() # RENAME working_path = cli.full_path nthreads = cli.nthreads # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ... pytrades_lib.pytrades.initialize_trades(working_path, cli.sub_folder, nthreads) # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof n_bodies = pytrades_lib.pytrades.n_bodies # NUMBER OF TOTAL BODIES OF THE SYSTEM n_planets = n_bodies - 1 # NUMBER OF PLANETS IN THE SYSTEM ndata = pytrades_lib.pytrades.ndata # TOTAL NUMBER OF DATA AVAILABLE npar = pytrades_lib.pytrades.npar # NUMBER OF TOTAL PARAMATERS ~n_planets X 6 nfit = pytrades_lib.pytrades.nfit # NUMBER OF PARAMETERS TO FIT nfree = pytrades_lib.pytrades.nfree # NUMBER OF FREE PARAMETERS (ie nrvset) dof = pytrades_lib.pytrades.dof # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT #inv_dof = np.float64(1.0 / dof) inv_dof = pytrades_lib.pytrades.inv_dof # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS #trades_names = anc.convert_fortran2python_strarray(pytrades_lib.pytrades.parameter_names, #nfit, str_len=10 #) ##parameter_names = anc.trades_names_to_emcee(trades_names) str_len = pytrades_lib.pytrades.str_len temp_names = pytrades_lib.pytrades.get_parameter_names(nfit,str_len) trades_names = anc.convert_fortran_charray2python_strararray(temp_names) parameter_names = trades_names fitting_parameters = pytrades_lib.pytrades.fitting_parameters # INITIAL PARAMETER SET (NEEDED ONLY TO HAVE THE PROPER ARRAY/VECTOR) parameters_minmax = pytrades_lib.pytrades.parameters_minmax # PARAMETER BOUNDARIES delta_parameters = np.abs(parameters_minmax[:,1] - parameters_minmax[:,0]) # DELTA BETWEEN MAX AND MIN OF BOUNDARIES # RADIAL VELOCITIES SET n_rv = pytrades_lib.pytrades.nrv n_set_rv = pytrades_lib.pytrades.nrvset # TRANSITS SET n_t0 = pytrades_lib.pytrades.nt0 n_t0_sum = pytrades_lib.pytrades.ntts n_set_t0 = 0 for i in range(0, n_bodies-1): if (n_t0[i] > 0): n_set_t0 += 1 # compute global constant for the loglhd global ln_err_const #try: #e_RVo = np.asarray(pytrades_lib.pytrades.ervobs[:], dtype=np.float64) # fortran variable RV in python will be rv!!! #except: #e_RVo = np.asarray([0.], dtype=np.float64) #try: #e_T0o = np.asarray(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1)) #except: #e_T0o = np.asarray([0.], dtype=np.float64) #ln_err_const = anc.compute_ln_err_const(ndata, dof, e_RVo, e_T0o, cli.ln_flag) ln_err_const = pytrades_lib.pytrades.ln_err_const # SET EMCEE PARAMETERS: nwalkers, nruns, nsave, npost = get_emcee_arguments(cli,nfit) # INITIALISE SCRIPT FOLDER/LOG FILE working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder) anc.print_both('',of_run) anc.print_both(' ======== ',of_run) anc.print_both(' pyTRADES' ,of_run) anc.print_both(' ======== ',of_run) anc.print_both('',of_run) anc.print_both(' WORKING PATH = %s' %(working_path),of_run) anc.print_both(' NUMBER OF THREADS = %d' %(nthreads),of_run) anc.print_both(' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' %(ndata, nfit, nfree, dof),of_run) anc.print_both(' Total N_RV = %d for %d set(s)' %(n_rv, n_set_rv),of_run) anc.print_both(' Total N_T0 = %d for %d out of %d planet(s)' %(n_t0_sum, n_set_t0, n_planets),of_run) anc.print_both(' %s = %.7f' %('log constant error = ', ln_err_const),of_run) anc.print_both(' %s = %.7f' %('IN FORTRAN log constant error = ', pytrades_lib.pytrades.ln_err_const),of_run) # INITIALISE PSO ARGUMENTS FROM pso.opt FILE pytrades_lib.pytrades.init_pso(1,working_path) # read PSO options # PSO VARIABLES np_pso = pytrades_lib.pytrades.np_pso nit_pso = pytrades_lib.pytrades.nit_pso n_global = pytrades_lib.pytrades.n_global #n_global = 1 anc.print_both(' PSO n_global = %d npop = %d ngen = %d' %(n_global, np_pso, nit_pso), of_run) # RUN PSO+EMCEE n_global TIMES for iter_global in range(0,n_global): threads_pool = emcee.interruptible_pool.InterruptiblePool(1) # CREATES PROPER WORKING PATH AND NAME i_global = iter_global + 1 pso_path = os.path.join(os.path.join(working_folder, '%04d_pso2emcee' %(i_global)), '') pytrades_lib.pytrades.path_change(pso_path) anc.print_both('\n\n GLOBAL RUN %04d INTO PATH: %s\n' %(i_global, pso_path), of_run) if (cli.pso_type == 'run'): # RUN PSO anc.print_both(' RUN PSO', of_run) pso_start = time.time() if(not os.path.exists(pso_path)): os.makedirs(pso_path) # copy files anc.copy_simulation_files(working_path, pso_path) # CALL RUN_PSO SUBROUTINE FROM TRADES_LIB: RUNS PSO AND COMPUTES THE BEST SOLUTION, SAVING ALL THE POPULATION EVOLUTION pso_parameters = fitting_parameters.copy() pso_fitness = 0. pso_parameters, pso_fitness = pytrades_lib.pytrades.pyrun_pso(nfit,i_global) anc.print_both(' completed run_pso', of_run) pso_best_evolution = np.asarray(pytrades_lib.pytrades.pso_best_evolution[...], dtype=np.float64) anc.print_both(' pso_best_evolution retrieved', of_run) anc.print_both(' last pso_best_evolution', of_run) last_pso_parameters = np.asarray(pso_best_evolution[:nfit,-1],dtype=np.float64) last_pso_fitness = pso_best_evolution[-1,-1].astype(np.float64) anc.print_both(' fitness = %.f' %(last_pso_fitness), of_run) # SAVE PSO SIMULATION IN pso_run.hdf5 FILE print ' Creating pso hdf5 file: %s' %(os.path.join(pso_path, 'pso_run.hdf5')) pso_hdf5 = h5py.File(os.path.join(pso_path, 'pso_run.hdf5'), 'w') pso_hdf5.create_dataset('population', data=pytrades_lib.pytrades.population, dtype=np.float64) pso_hdf5.create_dataset('population_fitness', data=pytrades_lib.pytrades.population_fitness, dtype=np.float64) pso_hdf5.create_dataset('pso_parameters', data=pso_parameters, dtype=np.float64) pso_hdf5.create_dataset('pso_fitness', data=np.array(pso_fitness), dtype=np.float64) pso_hdf5.create_dataset('pso_best_evolution', data=pso_best_evolution, dtype=np.float64) pso_hdf5.create_dataset('parameters_minmax', data=parameters_minmax, dtype=np.float64) pso_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10') pso_hdf5['population'].attrs['npop'] = np_pso pso_hdf5['population'].attrs['niter'] = nit_pso pso_hdf5['population'].attrs['iter_global'] = iter_global+1 pso_hdf5['population'].attrs['nfit'] = nfit pso_hdf5.close() population = np.asarray(pytrades_lib.pytrades.population, dtype=np.float64) population_fitness = np.asarray(pytrades_lib.pytrades.population_fitness, dtype=np.float64) anc.print_both(' ', of_run) fitness_iter, lgllhd_iter, check_iter = pytrades_lib.pytrades.write_summary_files(i_global, pso_parameters) elapsed = time.time() - pso_start elapsed_d, elapsed_h, elapsed_m, elapsed_s = anc.computation_time(elapsed) anc.print_both(' ', of_run) anc.print_both(' PSO FINISHED in %2d day %02d hour %02d min %.2f sec - bye bye' %(int(elapsed_d), int(elapsed_h), int(elapsed_m), elapsed_s), of_run) #p0, pso_fitness_p0 = pso_to_emcee(nfit, nwalkers, population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution) p0 = compute_initial_walkers(nfit, nwalkers, pso_parameters, parameters_minmax, parameter_names, delta_sigma, of_run) elif (cli.pso_type == 'exists'): # READ PREVIOUS PSO_RUN.HDF5 FILE AND INITIALISE POPULATION FOR EMCEE anc.print_both(' READ PREVIOUS PSO_RUN.HDF5 FILE AND INITIALISE POPULATION FOR EMCEE', of_run) population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution, pso_parameters_minmax, pso_parameter_names, pop_shape = get_pso_data(os.path.join(pso_path, 'pso_run.hdf5')) fitness_iter, lgllhd_iter, check_iter = pytrades_lib.pytrades.write_summary_files(i_global, pso_parameters) anc.print_both(' read pso_run.hdf5 file with best pso_fitness = %.7f' %(pso_fitness), of_run) #p0, pso_fitness_p0 = pso_to_emcee(nfit, nwalkers, population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution) p0 = compute_initial_walkers(nfit, nwalkers, pso_parameters, parameters_minmax, parameter_names, delta_sigma, of_run) elif (cli.pso_type == 'skip'): # DO NOT RUN PSO, ONLY EMCEE anc.print_both(' DO NOT RUN PSO, ONLY EMCEE', of_run) #p0 = [parameters_minmax[:,0] + np.random.random(nfit)*delta_parameters for i in range(0, nwalkers)] p0 = compute_initial_walkers(nfit, nwalkers, fitting_parameters, parameters_minmax, parameter_names, delta_sigma, of_run) anc.print_both(' emcee chain: nwalkers = %d nruns = %d' %(nwalkers, nruns), of_run) anc.print_both(' sampler ... ',of_run) # old version with threads #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, threads=nthreads) #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob_sq, threads=nthreads, args=[parameter_names]) # close the pool of threads threads_pool.close() threads_pool.terminate() threads_pool.join() threads_pool = emcee.interruptible_pool.InterruptiblePool(nthreads) sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, pool=threads_pool) anc.print_both(' ready to go', of_run) anc.print_both(' with nsave = %r' %(nsave), of_run) sys.stdout.flush() #sys.exit() if (nsave != False): # save temporary sampling during emcee every nruns*10% #if(os.path.exists(os.path.join(pso_path, 'emcee_temp.hdf5')) and os.path.isfile(os.path.join(pso_path, 'emcee_temp.hdf5'))): #os.remove(os.path.join(pso_path, 'emcee_temp.hdf5')) if(os.path.exists(os.path.join(pso_path, 'emcee_summary.hdf5')) and os.path.isfile(os.path.join(pso_path, 'emcee_summary.hdf5'))): os.remove(os.path.join(pso_path, 'emcee_summary.hdf5')) f_hdf5 = h5py.File(os.path.join(pso_path, 'emcee_summary.hdf5'), 'a') f_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10') f_hdf5.create_dataset('boundaries', data=parameters_minmax, dtype=np.float64) temp_dset = f_hdf5.create_dataset('chains', (nwalkers, nruns, nfit), dtype=np.float64) f_hdf5['chains'].attrs['nwalkers'] = nwalkers f_hdf5['chains'].attrs['nruns'] = nruns f_hdf5['chains'].attrs['nfit'] = nfit f_hdf5['chains'].attrs['nfree'] = nfree temp_lnprob = f_hdf5.create_dataset('lnprobability', (nwalkers, nruns), dtype=np.float64) temp_lnprob.attrs['ln_err_const'] = ln_err_const temp_acceptance = f_hdf5.create_dataset('acceptance_fraction', data=np.zeros((nfit)), dtype=np.float64) temp_acor = f_hdf5.create_dataset('autocor_time', data=np.zeros((nfit)), dtype=np.float64) f_hdf5.close() pos = p0 niter_save = int(nruns/nsave) state=None anc.print_both(' Running emcee with temporary saving', of_run) sys.stdout.flush() for i in range(0, niter_save): anc.print_both('', of_run) anc.print_both(' iter: %6d ' %(i+1), of_run) aaa = i*nsave bbb = aaa+nsave pos, prob, state = sampler.run_mcmc(pos, N=nsave, rstate0=state) anc.print_both('completed %d steps of %d' %(bbb, nruns), of_run) f_hdf5 = h5py.File(os.path.join(pso_path, 'emcee_summary.hdf5'), 'a') temp_dset = f_hdf5['chains'] #[:,:,:] temp_dset[:,aaa:bbb,:] = sampler.chain[:, aaa:bbb, :] temp_dset.attrs['completed_steps'] = bbb temp_lnprob = f_hdf5['lnprobability'] #[:,:] temp_lnprob[:, aaa:bbb] = sampler.lnprobability[:, aaa:bbb] shape_lnprob = sampler.lnprobability.shape acceptance_fraction = sampler.acceptance_fraction temp_acceptance = f_hdf5['acceptance_fraction'] temp_acceptance = acceptance_fraction #f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64) mean_acceptance_fraction = np.mean(acceptance_fraction) #temp_chains_T = np.zeros((bbb, nwalkers, nfit)) #for ifit in range(0,nfit): #temp_chains_T[:,:,ifit] = sampler.chain[:, :bbb, ifit].T #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True) acor_time = anc.compute_acor_time(sampler, steps_done=bbb) temp_acor = f_hdf5['autocor_time'] temp_acor[...] = acor_time #f_hdf5.create_dataset('autocor_time', data=np.array(acor_temp, dtype=np.float64), dtype=np.float64) #f_hdf5.create_dataset('autocor_time', data=np.array(sampler.acor, dtype=np.float64), dtype=np.float64) # not working #print 'aaa = %6d bbb = %6d -> sampler.lnprobability.shape = (%6d , %6d)' %(aaa, bbb, shape_lnprob[0], shape_lnprob[1]) f_hdf5.close() sys.stdout.flush() anc.print_both('', of_run) anc.print_both('...done with saving temporary total shape = %s' %(str(np.shape(sampler.chain))), of_run) anc.print_both('', of_run) sys.stdout.flush() else: # GOOD COMPLETE SINGLE RUNNING OF EMCEE, WITHOUT REMOVING THE BURN-IN anc.print_both(' Running full emcee ...', of_run) sys.stdout.flush() sampler.run_mcmc(p0, nruns) anc.print_both('done', of_run) anc.print_both('', of_run) sys.stdout.flush() flatchains = sampler.chain[:, :, :].reshape((nwalkers*nruns, nfit)) # full chain values acceptance_fraction = sampler.acceptance_fraction mean_acceptance_fraction = np.mean(acceptance_fraction) #autocor_time = sampler.acor #temp_chains_T = np.zeros((bbb, nwalkers, nfit)) #for ifit in range(0,nfit): #temp_chains_T[:,:,ifit] = sampler.chain[:, :, ifit].T #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True) acor_time = anc.compute_acor_time(sampler) lnprobability = sampler.lnprobability # save chains with original shape as hdf5 file f_hdf5 = h5py.File(os.path.join(pso_path, 'emcee_summary.hdf5'), 'w') f_hdf5.create_dataset('chains', data=sampler.chain, dtype=np.float64) f_hdf5['chains'].attrs['nwalkers'] = nwalkers f_hdf5['chains'].attrs['nruns'] = nruns f_hdf5['chains'].attrs['nfit'] = nfit f_hdf5['chains'].attrs['nfree'] = nfree f_hdf5['chains'].attrs['completed_steps'] = nruns f_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10') f_hdf5.create_dataset('boundaries', data=parameters_minmax, dtype=np.float64) f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64) f_hdf5.create_dataset('autocor_time', data=acor_time, dtype=np.float64) f_hdf5.create_dataset('lnprobability', data=lnprobability, dtype=np.float64) f_hdf5['lnprobability'].attrs['ln_err_const'] = ln_err_const f_hdf5.close() anc.print_both(" Mean_acceptance_fraction should be between [0.25-0.5] = %.6f" %(mean_acceptance_fraction), of_run) anc.print_both('', of_run) # close the pool of threads threads_pool.close() threads_pool.terminate() threads_pool.join() anc.print_both('COMPLETED EMCEE', of_run) elapsed = time.time() - start elapsed_d, elapsed_h, elapsed_m, elapsed_s = anc.computation_time(elapsed) anc.print_both('', of_run) anc.print_both(' pyTRADES: EMCEE FINISHED in %2d day %02d hour %02d min %.2f sec - bye bye' %(int(elapsed_d), int(elapsed_h), int(elapsed_m), elapsed_s), of_run) anc.print_both('', of_run) of_run.close() pytrades_lib.pytrades.deallocate_variables() return
def compute_initial_walkers(nfit, nwalkers, fitting_parameters, parameters_minmax, parameter_names, delta_sigma, of_run): # initial walkers as input fitting_parameters + N(loc=0.,sigma=1.,size=nwalkers)*delta_sigma #p0 = [parameters_minmax[:,0] + np.random.random(nfit)*delta_parameters for i in range(0, nwalkers)] anc.print_both(' Inititializing walkers with delta_sigma = %s' %(str(delta_sigma).strip()), of_run) p0 = [] i_p0 = 0 anc.print_both(' good p0:', of_run) # 2017-02-03 LUCA --0-- try: d_sigma = np.float64(delta_sigma) except: d_sigma = np.float64(1.e-4) delta_sigma_out = compute_proper_sigma(nfit, d_sigma, parameter_names) print ' ', # init all initial walkers while True: test_p0 = np.array([fitting_parameters[ifit] + np.random.normal(loc=0., scale=delta_sigma_out[ifit]) for ifit in range(0,nfit)], dtype=np.float64) test_lg = lnprob(test_p0) #test_lg = lnprob_sq(test_p0, parameter_names) if(not np.isinf(test_lg)): i_p0 +=1 p0.append(test_p0) print i_p0, if(i_p0 == nwalkers): break p0[-1] = fitting_parameters # I want the original fitting paramameters in the initial walkers print # if 'random' opt ==> create other Gaussian starting points (<->nwalkers) if('ran' in str(delta_sigma).strip().lower()): delta_parameters = np.abs(parameters_minmax[:,1] - parameters_minmax[:,0]) # DELTA BETWEEN MAX AND MIN OF BOUNDARIES nw_min = 30 n_gpts = int((nwalkers-nw_min)/nw_min) # a new Gaussian starting point each nw_min walkers, keeping at least nw_min walkers Gaussian to the original fitting parameters print ' new gaussian starting points: ',n_gpts if(n_gpts > 0): print ' doing random-gaussian points ... ' for i_gpt in range(0, n_gpts): # create new starting point, but check if lnL != -inf new_start = fitting_parameters.copy() sel_fit = int(np.random.random()*(nfit-1)) # change only parameter... print 'gpt ',i_gpt+1 print 'selected sel_fit = ',sel_fit,' ==> ',parameter_names[sel_fit] print 'val = ', new_start[sel_fit],' with min = ',parameters_minmax[sel_fit,0],' and delta = ',delta_parameters[sel_fit] while True: new_start[sel_fit] = parameters_minmax[sel_fit,0] + delta_parameters[sel_fit]*np.random.random() test_lg = lnprob(new_start) if(not np.isinf(test_lg)): break i_pos = nw_min * i_gpt print 'i_pos = ', while True: test_p0 = np.array([new_start[ifit] + np.random.normal(loc=0., scale=delta_sigma_out[ifit]) for ifit in range(0,nfit)], dtype=np.float64) test_lg = lnprob(test_p0) if(not np.isinf(test_lg)): p0[i_pos] = test_p0 print i_pos, i_pos +=1 if(i_pos%nw_min == 0): break print print anc.print_both(' done initial walkers.', of_run) return p0
def main(): # MAIN -- TRADES + EMCEE # READ COMMAND LINE ARGUMENTS cli = get_args() # STARTING TIME start = time.time() # RENAME working_path = cli.full_path nthreads = cli.nthreads np.random.RandomState(cli.seed) # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ... pytrades_lib.pytrades.initialize_trades(working_path, cli.sub_folder, nthreads) # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof n_bodies = pytrades_lib.pytrades.n_bodies # NUMBER OF TOTAL BODIES OF THE SYSTEM n_planets = n_bodies - 1 # NUMBER OF PLANETS IN THE SYSTEM ndata = pytrades_lib.pytrades.ndata # TOTAL NUMBER OF DATA AVAILABLE npar = pytrades_lib.pytrades.npar # NUMBER OF TOTAL PARAMATERS ~n_planets X 6 nfit = pytrades_lib.pytrades.nfit # NUMBER OF PARAMETERS TO FIT nfree = pytrades_lib.pytrades.nfree # NUMBER OF FREE PARAMETERS (ie nrvset) dof = pytrades_lib.pytrades.dof # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT global inv_dof #inv_dof = np.float64(1.0 / dof) inv_dof = pytrades_lib.pytrades.inv_dof # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS #reshaped_names = pytrades_lib.pytrades.parameter_names.reshape((10,nfit), order='F').T #parameter_names = [''.join(reshaped_names[i,:]).strip() for i in range(0,nfit)] #parameter_names = anc.convert_fortran2python_strarray(pytrades_lib.pytrades.parameter_names, nfit, str_len=10) #trades_names = anc.convert_fortran2python_strarray(pytrades_lib.pytrades.parameter_names, #nfit, str_len=10 #) str_len = pytrades_lib.pytrades.str_len temp_names = pytrades_lib.pytrades.get_parameter_names(nfit,str_len) trades_names = anc.convert_fortran_charray2python_strararray(temp_names) parameter_names = anc.trades_names_to_emcee(trades_names) if(cli.trades_previous is not None): temp_names, trades_parameters = anc.read_fitted_file(cli.trades_previous) if(nfit != np.shape(trades_parameters)[0]): anc.print_both(' NUMBER OF PARAMETERS (%d) IN TRADES-PREVIOUS FILE DOES NOT' \ 'MATCH THE CURRENT CONFIGURATION nfit=%d\nSTOP' \ %(np.shape(trades_parameters)[0], nfit) ) sys.exit() del temp_names else: # INITIAL PARAMETER SET (NEEDED ONLY TO HAVE THE PROPER ARRAY/VECTOR) #fitting_parameters = pytrades_lib.pytrades.fitting_parameters trades_parameters = pytrades_lib.pytrades.fitting_parameters # save initial_fitting parameters into array original_fit_parameters = trades_parameters.copy() fitting_parameters = anc.e_to_sqrte_fitting(trades_parameters, trades_names) trades_minmax = pytrades_lib.pytrades.parameters_minmax # PARAMETER BOUNDARIES #parameters_minmax = trades_minmax.copy() #parameters_minmax[:,0] = anc.e_to_sqrte_fitting(trades_minmax[:,0], trades_names) #parameters_minmax[:,1] = anc.e_to_sqrte_fitting(trades_minmax[:,1], trades_names) parameters_minmax = anc.e_to_sqrte_boundaries(trades_minmax, trades_names) # RADIAL VELOCITIES SET n_rv = pytrades_lib.pytrades.nrv n_set_rv = pytrades_lib.pytrades.nrvset # TRANSITS SET n_t0 = pytrades_lib.pytrades.nt0 n_t0_sum = pytrades_lib.pytrades.ntts n_set_t0 = 0 for i in range(0, n_bodies-1): if (n_t0[i] > 0): n_set_t0 += 1 # compute global constant for the loglhd global ln_err_const #try: ## fortran variable RV in python will be rv!!! #e_RVo = np.array(pytrades_lib.pytrades.ervobs[:], dtype=np.float64) #except: #e_RVo = np.array([0.], dtype=np.float64) #try: #e_T0o = np.array(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1)) #except: #e_T0o = np.array([0.], dtype=np.float64) #ln_err_const = anc.compute_ln_err_const(dof, e_RVo, e_T0o, cli.ln_flag) ln_err_const = pytrades_lib.pytrades.ln_err_const # SET EMCEE PARAMETERS: nwalkers, nruns, nsave, npost = get_emcee_arguments(cli,nfit) # INITIALISE SCRIPT FOLDER/LOG FILE working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder) anc.print_both('',of_run) anc.print_both(' ======== ',of_run) anc.print_both(' pyTRADES' ,of_run) anc.print_both(' ======== ',of_run) anc.print_both('',of_run) anc.print_both(' WORKING PATH = %s' %(working_path),of_run) anc.print_both(' NUMBER OF THREADS = %d' %(nthreads),of_run) anc.print_both(' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' %(ndata, nfit, nfree, dof),of_run) anc.print_both(' Total N_RV = %d for %d set(s)' %(n_rv, n_set_rv),of_run) anc.print_both(' Total N_T0 = %d for %d out of %d planet(s)' %(n_t0_sum, n_set_t0, n_planets),of_run) anc.print_both(' %s = %.7f' %('log constant error = ', ln_err_const),of_run) anc.print_both(' %s = %.7f' %('IN FORTRAN log constant error = ', pytrades_lib.pytrades.ln_err_const),of_run) anc.print_both(' seed = %s' %(str(cli.seed)), of_run) if(cli.trades_previous is not None): anc.print_both('\n ******\n INITIAL FITTING PARAMETERS FROM PREVIOUS' \ ' TRADES-EMCEE SIM IN FILE:\n %s\n ******\n' %(cli.trades_previous), of_run ) anc.print_both(' ORIGINAL PARAMETER VALUES -> 0000', of_run) fitness_0000, lgllhd_0000, check_0000 = pytrades_lib.pytrades.write_summary_files(0, original_fit_parameters) anc.print_both(' ', of_run) anc.print_both(' TESTING LNPROB_SQ ...', of_run) lgllhd_zero = lnprob(trades_parameters) lgllhd_sq_zero = lnprob_sq(fitting_parameters, parameter_names) anc.print_both(' ', of_run) anc.print_both(' %15s %23s %23s %15s %23s' %('trades_names', 'original_trades', 'trades_par', 'emcee_names', 'emcee_par'), of_run) for ifit in range(0, nfit): anc.print_both(' %15s %23.16e %23.16e %15s %23.16e' %(trades_names[ifit], original_fit_parameters[ifit], trades_parameters[ifit], parameter_names[ifit], fitting_parameters[ifit]), of_run) anc.print_both(' ', of_run) anc.print_both(' %15s %23.16e %23.16e %15s %23.16e' %('lnprob', lgllhd_0000, lgllhd_zero, 'lnprob_sq', lgllhd_sq_zero), of_run) anc.print_both(' ', of_run) # INITIALISES THE WALKERS if(cli.emcee_previous is not None): anc.print_both(' Use a previous emcee simulation: %s' %(cli.emcee_previous), of_run) last_p0, old_nwalkers, last_done = anc.get_last_emcee_iteration(cli.emcee_previous, nwalkers) if(not last_done): anc.print_both('**STOP: USING A DIFFERENT NUMBER OF WALKERS (%d) W.R.T. PREVIOUS EMCEE SIMULATION (%d).' %(nwalkers, old_nwalkers), of_run) sys.exit() p0 = last_p0 else: p0 = compute_initial_walkers(nfit, nwalkers, fitting_parameters, parameters_minmax, parameter_names, cli.delta_sigma, of_run) anc.print_both(' emcee chain: nwalkers = %d nruns = %d' %(nwalkers, nruns), of_run) anc.print_both(' sampler ... ',of_run) # old version with threads #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, threads=nthreads) #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob_sq, threads=nthreads, args=[parameter_names]) # needed to use sqrt(e) in emcee instead of e (in fortran) threads_pool = emcee.interruptible_pool.InterruptiblePool(nthreads) #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, pool=threads_pool) sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob_sq, pool=threads_pool, args=[parameter_names] ) # needed to use sqrt(e) in emcee instead of e (in fortran) anc.print_both(' TEST A PRE-EMCEE OF 1000 STEPS', of_run) p0, prob, state = sampler.run_mcmc(p0, 1000) anc.print_both(' TEST A RESET OF THE SAMPLER', of_run) sampler.reset() anc.print_both(' ready to go', of_run) anc.print_both(' with nsave = %s' %(str(nsave)), of_run) sys.stdout.flush() #sys.exit() if (nsave != False): # save temporary sampling during emcee every nruns*10% #if(os.path.exists(os.path.join(working_folder, 'emcee_temp.hdf5')) and os.path.isfile(os.path.join(working_folder, 'emcee_temp.hdf5'))): #os.remove(os.path.join(working_folder, 'emcee_temp.hdf5')) if(os.path.exists(os.path.join(working_folder, 'emcee_summary.hdf5')) and os.path.isfile(os.path.join(working_folder, 'emcee_summary.hdf5'))): os.remove(os.path.join(working_folder, 'emcee_summary.hdf5')) f_hdf5 = h5py.File(os.path.join(working_folder, 'emcee_summary.hdf5'), 'a') f_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10') f_hdf5.create_dataset('boundaries', data=parameters_minmax, dtype=np.float64) temp_dset = f_hdf5.create_dataset('chains', (nwalkers, nruns, nfit), dtype=np.float64) temp_lnprob = f_hdf5.create_dataset('lnprobability', (nwalkers, nruns), dtype=np.float64) temp_lnprob.attrs['ln_err_const'] = ln_err_const temp_acceptance = f_hdf5.create_dataset('acceptance_fraction', data=np.zeros((nfit)), dtype=np.float64) temp_acor = f_hdf5.create_dataset('autocor_time', data=np.zeros((nfit)), dtype=np.float64) f_hdf5.close() pos = p0 nchains = int(nruns/nsave) state=None anc.print_both(' Running emcee with temporary saving', of_run) sys.stdout.flush() for i in range(0, nchains): anc.print_both('', of_run) anc.print_both(' iter: %6d ' %(i+1), of_run) aaa = i*nsave bbb = aaa+nsave pos, prob, state = sampler.run_mcmc(pos, N=nsave, rstate0=state) anc.print_both('completed %d steps of %d' %(bbb, nruns), of_run) f_hdf5 = h5py.File(os.path.join(working_folder, 'emcee_summary.hdf5'), 'a') temp_dset = f_hdf5['chains'] #[:,:,:] temp_dset[:,aaa:bbb,:] = sampler.chain[:, aaa:bbb, :] #f_hdf5['chains'].attrs['completed_steps'] = bbb temp_dset.attrs['completed_steps'] = bbb temp_lnprob = f_hdf5['lnprobability'] #[:,:] temp_lnprob[:, aaa:bbb] = sampler.lnprobability[:, aaa:bbb] shape_lnprob = sampler.lnprobability.shape acceptance_fraction = sampler.acceptance_fraction temp_acceptance = f_hdf5['acceptance_fraction'] temp_acceptance = acceptance_fraction #f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64) mean_acceptance_fraction = np.mean(acceptance_fraction) #temp_chains_T = np.zeros((bbb, nwalkers, nfit)) #for ifit in range(0,nfit): #temp_chains_T[:,:,ifit] = sampler.chain[:, :bbb, ifit].T #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True) acor_time = anc.compute_acor_time(sampler, steps_done=bbb) temp_acor = f_hdf5['autocor_time'] temp_acor[...] = acor_time #f_hdf5.create_dataset('autocor_time', data=np.array(acor_temp, dtype=np.float64), dtype=np.float64) #f_hdf5.create_dataset('autocor_time', data=np.array(sampler.acor, dtype=np.float64), dtype=np.float64) # not working #print 'aaa = %6d bbb = %6d -> sampler.lnprobability.shape = (%6d , %6d)' %(aaa, bbb, shape_lnprob[0], shape_lnprob[1]) f_hdf5.close() sys.stdout.flush() anc.print_both('', of_run) anc.print_both('...done with saving temporary total shape = %s' %(str(np.shape(sampler.chain))), of_run) anc.print_both('', of_run) sys.stdout.flush() # RUN EMCEE AND RESET AFTER REMOVE BURN-IN #pos, prob, state = sampler.run_mcmc(p0, npost) #sampler.reset() #sampler.run_mcmc(pos, nruns, rstate0=state) else: # GOOD COMPLETE SINGLE RUNNING OF EMCEE, WITHOUT REMOVING THE BURN-IN anc.print_both(' Running full emcee ...', of_run) sys.stdout.flush() sampler.run_mcmc(p0, nruns) anc.print_both('done', of_run) anc.print_both('', of_run) sys.stdout.flush() flatchains = sampler.chain[:, :, :].reshape((nwalkers*nruns, nfit)) # full chain values acceptance_fraction = sampler.acceptance_fraction mean_acceptance_fraction = np.mean(acceptance_fraction) #autocor_time = sampler.acor #temp_chains_T = np.zeros((nwalkers, nsteps, nfit)) #for ifit in range(0,nfit): #temp_chains_T[:,:,ifit] = sampler.chain[:, :, ifit].T #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True) acor_time = anc.compute_acor_time(sampler) lnprobability = sampler.lnprobability # save chains with original shape as hdf5 file f_hdf5 = h5py.File(os.path.join(working_folder, 'emcee_summary.hdf5'), 'w') f_hdf5.create_dataset('chains', data=sampler.chain, dtype=np.float64) f_hdf5['chains'].attrs['completed_steps'] = nruns f_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10') f_hdf5.create_dataset('boundaries', data=parameters_minmax, dtype=np.float64) f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64) f_hdf5.create_dataset('autocor_time', data=acor_time, dtype=np.float64) f_hdf5.create_dataset('lnprobability', data=lnprobability, dtype=np.float64) f_hdf5['lnprobability'].attrs['ln_err_const'] = ln_err_const f_hdf5.close() anc.print_both(" Mean_acceptance_fraction should be between [0.25-0.5] = %.6f" %(mean_acceptance_fraction), of_run) anc.print_both('', of_run) # close the pool of threads threads_pool.close() threads_pool.terminate() threads_pool.join() anc.print_both('COMPLETED EMCEE', of_run) elapsed = time.time() - start elapsed_d, elapsed_h, elapsed_m, elapsed_s = anc.computation_time(elapsed) anc.print_both('', of_run) anc.print_both(' pyTRADES: EMCEE FINISHED in %2d day %02d hour %02d min %.2f sec - bye bye' %(int(elapsed_d), int(elapsed_h), int(elapsed_m), elapsed_s), of_run) anc.print_both('', of_run) of_run.close() pytrades_lib.pytrades.deallocate_variables() return
def main(): # READ COMMAND LINE ARGUMENTS cli = get_args() # STARTING TIME start = time.localtime() pc_output_dir = '%d-%02d-%02dT%02dh%02dm%02ds_' %(start.tm_year, start.tm_mon, start.tm_mday, start.tm_hour, start.tm_min, start.tm_sec) pc_output_files = 'trades_pc' # RENAME working_path = cli.full_path nthreads=1 # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ... pytrades.initialize_trades(working_path, cli.sub_folder, nthreads) # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof n_bodies = pytrades.n_bodies # NUMBER OF TOTAL BODIES OF THE SYSTEM n_planets = n_bodies - 1 # NUMBER OF PLANETS IN THE SYSTEM ndata = pytrades.ndata # TOTAL NUMBER OF DATA AVAILABLE npar = pytrades.npar # NUMBER OF TOTAL PARAMATERS ~n_planets X 6 nfit = pytrades.nfit # NUMBER OF PARAMETERS TO FIT nfree = pytrades.nfree # NUMBER OF FREE PARAMETERS (ie nrvset) dof = pytrades.dof # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT global inv_dof #inv_dof = np.float64(1.0 / dof) inv_dof = pytrades_lib.pytrades.inv_dof # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS str_len = pytrades.str_len temp_names = pytrades.get_parameter_names(nfit,str_len) trades_names = anc.convert_fortran_charray2python_strararray(temp_names) fitting_names = anc.trades_names_to_emcee(trades_names) # save initial_fitting parameters into array original_fit_parameters = trades_parameters.copy() fitting_parameters = anc.e_to_sqrte_fitting(trades_parameters, trades_names) trades_minmax = pytrades.parameters_minmax # PARAMETER BOUNDARIES parameters_minmax = anc.e_to_sqrte_boundaries(trades_minmax, trades_names) # RADIAL VELOCITIES SET n_rv = pytrades_lib.pytrades.nrv n_set_rv = pytrades_lib.pytrades.nrvset # TRANSITS SET n_t0 = pytrades_lib.pytrades.nt0 n_t0_sum = pytrades_lib.pytrades.ntts n_set_t0 = 0 for i in range(0, n_bodies-1): if (n_t0[i] > 0): n_set_t0 += 1 # compute global constant for the loglhd global ln_err_const #try: ## fortran variable RV in python will be rv!!! #e_RVo = np.array(pytrades_lib.pytrades.ervobs[:], dtype=np.float64) #except: #e_RVo = np.array([0.], dtype=np.float64) #try: #e_T0o = np.array(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1)) #except: #e_T0o = np.array([0.], dtype=np.float64) #ln_err_const = anc.compute_ln_err_const(dof, e_RVo, e_T0o, True) ln_err_const = pytrades_lib.pytrades.ln_err_const # INITIALISE SCRIPT FOLDER/LOG FILE working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder) anc.print_both('',of_run) anc.print_both(' ======== ',of_run) anc.print_both(' pyTRADES' ,of_run) anc.print_both(' ======== ',of_run) anc.print_both('',of_run) anc.print_both(' WORKING PATH = %s' %(working_path),of_run) anc.print_both(' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' %(ndata, nfit, nfree, dof),of_run) anc.print_both(' Total N_RV = %d for %d set(s)' %(n_rv, n_set_rv),of_run) anc.print_both(' Total N_T0 = %d for %d out of %d planet(s)' %(n_t0_sum, n_set_t0, n_planets),of_run) anc.print_both(' %s = %.7f' %('log constant error = ', ln_err_const),of_run) # SET PYPOLYCHORD # needed to define number of derived parameters for PyPolyChord nder = 0 # define the loglikelihood function for PyPolyChord def likelihood(fitting_par): # derived parameters derived_par = [0.0] * nder # convert fitting_par to trades_par trades_par = anc.sqrte_to_e_fitting(fitting_par, fitting_names) loglhd = 0. check = 1 loglhd, check = pytrades.fortran_loglikelihood(np.array(trades_par, dtype=np.float64)) #print loglhd, ln_err_const loglhd = loglhd + ln_err_const # ln_err_const: global variable return loglhd, derived_par # define the prior for the fitting parameters def prior(hypercube): """ Uniform prior from [-1,1]^D. """ fitting_par = [0.0] * nfit for i, x in enumerate(hypercube): fitting_par[i] = PC_priors.UniformPrior(parameters_minmax[i,0], parameters_minmax[i,1])(x) return fitting_par # set PyPolyChord: the pc_settings define how to run PC, e.g. nlive, precision_criterio, etc. pc_settings = PC_settings.PolyChordSettings(nfit, nder) pc_settings.base_dir = cli.pc_output_dir pc_settings.file_root = cli.pc_output_files pc_settings.do_clustering = True # Possible PyPolyChord settings: #Keyword arguments #----------------- #nlive: int #(Default: nDims*25) #The number of live points. #Increasing nlive increases the accuracy of posteriors and evidences, #and proportionally increases runtime ~ O(nlive). #num_repeats : int #(Default: nDims*5) #The number of slice slice-sampling steps to generate a new point. #Increasing num_repeats increases the reliability of the algorithm. #Typically #* for reliable evidences need num_repeats ~ O(5*nDims). #* for reliable posteriors need num_repeats ~ O(nDims) #nprior : int #(Default: nlive) #The number of prior samples to draw before starting compression. #do_clustering : boolean #(Default: True) #Whether or not to use clustering at run time. #feedback : {0,1,2,3} #(Default: 1) #How much command line feedback to give #precision_criterion : float #(Default: 0.001) #Termination criterion. Nested sampling terminates when the evidence #contained in the live points is precision_criterion fraction of the #total evidence. #max_ndead : int #(Default: -1) #Alternative termination criterion. Stop after max_ndead iterations. #Set negative to ignore (default). #boost_posterior : float #(Default: 0.0) #Increase the number of posterior samples produced. This can be set #arbitrarily high, but you won't be able to boost by more than #num_repeats #Warning: in high dimensions PolyChord produces _a lot_ of posterior #samples. You probably don't need to change this #posteriors : boolean #(Default: True) #Produce (weighted) posterior samples. Stored in <root>.txt. #equals : boolean #(Default: True) #Produce (equally weighted) posterior samples. Stored in #<root>_equal_weights.txt #cluster_posteriors : boolean #(Default: True) #Produce posterior files for each cluster? #Does nothing if do_clustering=False. #write_resume : boolean #(Default: True) #Create a resume file. #read_resume : boolean #(Default: True) #Read from resume file. #write_stats : boolean #(Default: True) #Write an evidence statistics file. #write_live : boolean #(Default: True) #Write a live points file. #write_dead : boolean #(Default: True) #Write a dead points file. #write_dead : boolean #(Default: True) #Write a prior points file. #update_files : int #(Default: nlive) #How often to update the files in <base_dir>. #base_dir : string #(Default: 'chains') #Where to store output files. #file_root : string #(Default: 'test') #Root name of the files produced. #grade_frac : List[float] #(Default: 1) #The amount of time to spend in each speed. #grade_dims : List[int] #(Default: 1) #The number of parameters within each speed. # RUN POLYCHORD pc_run = PC.run_polychord(likelihood, nfit, nder, pc_settings, prior) # set label and legend names kel_plot_labels = anc.keplerian_legend(fitting_names, cli.m_type) pc_paramnames = [('%s' %(fitting_names[i]), r'%s' %(kel_plot_labels[i])) for i in range(nfit)] #pc_paramnames += [('r*', 'r')] pc_run.make_paramnames_files(pc_paramnames) if(cli.pc_plot): import getdist.plots import matplotlib.pyplot as plt plt.rc('font',**{'family':'serif','serif':['Computer Modern Roman']}) plt.rc('text', usetex=True) posterior = pc_run.posterior g = getdist.plots.getSubplotPlotter() g.triangle_plot(posterior, filled=True) plt.show() return
def main(): cli = anc.get_args() # read derived posterior file derived_file = os.path.join(cli.full_path, 'derived_posterior.hdf5') h5f = h5py.File(derived_file, 'r') derived_names = np.array(h5f['derived_names'], dtype='S10') derived_posterior_in = np.array(h5f['derived_posterior'], dtype=np.float64) h5f.close() n_der = derived_names.shape[0] n_flatchain = derived_posterior_in.shape[0] derived_posterior = anc.derived_posterior_check(derived_names, derived_posterior_in) label_separation = -0.90 # if uses this, comment ax.xyaxis.labelpad = label_pad label_pad = 12 # it uses this, comment ax.xyaxis.set_label_coords()... label_size = 8 ticklabel_size = 4 if (n_der > 2): #label_separation = -0.1 - ( 0.075 * (n_der-2) ) label_separation = -0.15 - (0.125 * (n_der - 2)) #else: #label_separation = -0.15 #label_size = label_size - 1 * int(n_der / 10.) #label_size = label_size - 1 * int(n_der / 5.) label_size = label_size - 1 * int(n_der / 2.5) labels_list = anc.derived_labels(derived_names, cli.m_type) k = anc.get_bins(derived_posterior, rule='doane') if (cli.overplot is not None): ## OPEN summary_parameters.hdf5 FILE s_h5f = h5py.File( os.path.join(cli.full_path, 'summary_parameters.hdf5'), 'r') # take only the selected sample s_overplot = '%04d' % (cli.overplot) #overp_der = s_h5f['parameters/%s/derived/parameters' %(s_overplot)][...] read_der = s_h5f['parameters/%s/derived/parameters' % (s_overplot)][...] s_h5f.close() overp_der = anc.derived_parameters_check(derived_names, read_der, derived_posterior) #fig = plt.figure(figsize=(12,12)) fig = plt.figure(figsize=(6, 6)) fig.subplots_adjust(hspace=0.05, wspace=0.05) for ix in range(0, n_der): x_data = derived_posterior[:, ix] x_min, x_max = anc.compute_limits(x_data, 0.05) if (x_min == x_max): x_min = x_min - 1. x_max = x_max + 1. for iy in range(0, n_der): y_data = derived_posterior[:, iy] y_min, y_max = anc.compute_limits(y_data, 0.05) if (y_min == y_max): y_min = y_min - 1. y_max = y_max + 1. if (iy > ix): # correlation plot anc.print_both('correlation %s vs %s' % (derived_names[ix], derived_names[iy])) ax = plt.subplot2grid((n_der + 1, n_der), (iy, ix)) hist2d_counts, xedges, yedges, image2d = ax.hist2d(\ x_data, y_data, bins=k, range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]], cmap=cm.gray_r, #normed=True normed=False ) #new_k = int(k/3) new_k = k hist2d_counts_2, xedges_2, yedges_2 = np.histogram2d(\ x_data, y_data, bins=new_k, range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]], #normed=True density=False ) x_bins = [ 0.5 * (xedges_2[i] + xedges_2[i + 1]) for i in range(0, new_k) ] y_bins = [ 0.5 * (yedges_2[i] + yedges_2[i + 1]) for i in range(0, new_k) ] nl = 5 levels = [1. - np.exp(-0.5 * ii) for ii in range(0, nl) ] # 2D sigmas: 0sigma, 1sigma, 2sigma, 3sigma, .. ax.contour( x_bins, y_bins, hist2d_counts_2.T, nl, cmap=cm.viridis, linestyles='solid', linewidths=0.5, #normed=True ) if (cli.overplot is not None): # plot selected overplot sample # check angle and plot %360 and %-360... if ('w' in derived_names[ix] or 'lN' in derived_names[ix] or 'mA' in derived_names[ix]): ax.axvline(overp_der[ix] % 360., color='C0', ls='--', lw=1.1, alpha=0.7) ax.axvline(overp_der[ix] % -360., color='C0', ls='--', lw=1.1, alpha=0.7) else: ax.axvline(overp_der[ix], color='C0', ls='--', lw=1.1, alpha=0.7) if ('w' in derived_names[iy] or 'lN' in derived_names[iy] or 'mA' in derived_names[iy]): ax.axhline(overp_der[iy] % 360., color='C0', ls='--', lw=1.1, alpha=0.7) ax.axhline(overp_der[iy] % -360., color='C0', ls='--', lw=1.1, alpha=0.7) else: ax.axhline(overp_der[iy], color='C0', ls='--', lw=1.1, alpha=0.7) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) if (iy == n_der - 1): set_xaxis(ax, label_size, label_separation, label_pad, ticklabel_size, labels_list[ix], [xedges[0], xedges[-1], 4]) if (ix == 0): set_yaxis(ax, label_size, label_separation, label_pad, ticklabel_size, labels_list[iy], [yedges[0], yedges[-1], 5]) ax.set_ylim([y_min, y_max]) ax.set_xlim([x_min, x_max]) plt.draw() elif (iy == ix): # distribution plot anc.print_both('%s histogram' % (derived_names[ix])) ax = plt.subplot2grid((n_der + 1, n_der), (ix, ix)) if (ix == n_der - 1): hist_orientation = 'horizontal' else: hist_orientation = 'vertical' idx = np.argsort(x_data) if (not cli.cumulative): # HISTOGRAM hist_counts, edges, patces = ax.hist( x_data, bins=k, range=[x_data.min(), x_data.max()], histtype='stepfilled', color='darkgrey', #edgecolor='lightgray', edgecolor='None', align='mid', orientation=hist_orientation, #normed=True, density=True, stacked=True) else: # CUMULATIVE HISTOGRAM hist_counts, edges, patces = ax.hist( x_data, bins=k, range=[x_data.min(), x_data.max()], histtype='stepfilled', color='darkgrey', #edgecolor='lightgray', edgecolor='None', align='mid', orientation=hist_orientation, density=True, stacked=True, cumulative=True) #print parameter_names_emcee[ix], overp_der[ix] if (ix == n_der - 1): if (cli.overplot is not None): # check angle and plot %360 and %-360... if ('w' in derived_names[ix] or 'lN' in derived_names[ix] or 'mA' in derived_names[ix]): ax.axhline(overp_der[ix] % 360., color='C0', ls='--', lw=1.1, alpha=0.7) ax.axhline(overp_der[ix] % -360., color='C0', ls='--', lw=1.1, alpha=0.7) else: # plot selected overplot sample ax.axhline(overp_der[ix], color='C0', ls='--', lw=1.1, alpha=0.7) ax.set_ylim([y_min, y_max]) else: if (cli.overplot is not None): if ('w' in derived_names[ix] or 'lN' in derived_names[ix] or 'mA' in derived_names[ix]): ax.axvline(overp_der[ix] % 360., color='C0', ls='--', lw=1.1, alpha=0.7) ax.axvline(overp_der[ix] % -360., color='C0', ls='--', lw=1.1, alpha=0.7) else: # plot selected overplot sample ax.axvline(overp_der[ix], color='C0', ls='--', lw=1.1, alpha=0.7) ax.set_xlim([x_min, x_max]) if (cli.overplot is not None): print derived_names[ix], ' overplot val = ', overp_der[ ix], ' min = ', x_data.min(), ' max = ', x_data.max() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) ax.set_title(labels_list[ix], fontsize=label_size) plt.draw() plot_folder = os.path.join(cli.full_path, 'plots') if (not os.path.isdir(plot_folder)): os.makedirs(plot_folder) correlation_file = os.path.join(plot_folder, 'derived_triangle.png') fig.savefig(correlation_file, bbox_inches='tight', dpi=300) anc.print_both('png done') correlation_file = os.path.join(plot_folder, 'derived_triangle.pdf') fig.savefig(correlation_file, bbox_inches='tight', dpi=96) anc.print_both('pdf done') plt.close(fig) return
def main(): # ================================================================================= # MAIN print "" print " --- read_finalpar_v2.py --- " print "" cli = get_args() fpath, idsim, lmflag, boot, mtype, mgauss, fit_type = cli.fpath, cli.idsim, cli.lmflag, cli.boot, cli.mtype, cli.mgauss, cli.fit_type nfit, NB, bodies_file, id_fit, id_all, nfit_list, cols_list, case_list = anc.get_fitted(fpath) MR_star = get_MR_start(fpath, bodies_file[0]) if(len(MR_star.shape)==2): Mstar = MR_star[0,0] else: Mstar = MR_star[0].copy() MR_star = np.zeros((2,2)) MR_star[0,0] = Mstar if(boot): file_boot = os.path.join(fpath, '%s_bootstrap_sim.dat' %(idsim)) try: bootstrap = np.genfromtxt(file_boot)[:,1:] except: sys.exit(' CANNOT FIND BOOTSTRAP FILE: %s' %(file_boot)) if(mgauss): m_factor, mass_unit = anc.mass_type_factor(Ms=Mstar, mtype=mtype, mscale=True) m_factor_boot = m_factor else: m_factor, mass_unit = anc.mass_type_factor(Ms=1.0, mtype=mtype, mscale=False) np.random.seed(seed=cli.seed) Ms_gaussian = MR_star[0,0] + np.random.normal(0., 1., size=(np.shape(bootstrap)[0]))*MR_star[0,1] # if exists an error on the mass, it creates a Normal distribution for the values and use it to re-scale mp/Ms to mp. m_factor_boot = m_factor * Ms_gaussian # given the factor from Msun to mass_unit it multiply it by the Normal Mstar. m_factor = m_factor * MR_star[0,0] else: bootstrap = None m_factor, mass_unit = anc.mass_type_factor(Ms=Mstar, mtype=mtype, mscale=True) kel_file, kep_elem = anc.elements(fpath, int(idsim), int(lmflag)) file_par = parameters_file(fpath, idsim, lmflag) names_par, par, fitness_s, fitness_x_dof_s, bic, chi2, ndata, dof = read_parameters(file_par, lmflag) #if (boot): #file_boot = os.path.join(fpath, '%s_bootstrap_sim.dat' %(idsim)) #try: #bootstrap = np.genfromtxt(file_boot)[:,1:] #except: #sys.exit(' CANNOT FIND BOOTSTRAP FILE: %s' %(file_boot)) #else: #bootstrap = None units_par = anc.get_units(names_par, mass_unit) names_derived, derived_par = anc.compute_derived_parameters(names_par, kep_elem, id_fit, case_list, cols_list, par, conv_factor=m_factor) units_der = anc.get_units(names_derived, mass_unit) if(boot): sigma_par = anc.compute_intervals(bootstrap, par, anc.percentile_val) names_derived, der_posterior = anc.compute_derived_posterior(names_par, kep_elem, id_fit, case_list, cols_list, bootstrap, conv_factor=m_factor_boot) derived_par, der_posterior = anc.adjust_derived_parameters(names_derived, derived_par, der_posterior) sigma_derived = anc.compute_intervals(der_posterior, derived_par, anc.percentile_val) else: sigma_par = None sigma_derived = None output_file = '%s_%s.log' %(os.path.splitext(file_par)[0], mass_unit) out = open(output_file, 'w') top_header, header = anc.get_header(anc.percentile_val) # print to screen and into file anc.print_both('', out) anc.print_both('# Number of bodies = %d' %(NB), out) anc.print_both('# OUTPUT FILE: %s' %(output_file), out) anc.print_both('# fitness = %s' %(fitness_s), out) anc.print_both('# fitness x dof = %s' %(fitness_x_dof_s), out) anc.print_both('# bic = %s' %(bic), out) anc.print_both('# chi2 = %s' %(chi2), out) anc.print_both('# ndata = %s' %(ndata), out) anc.print_both('# dof = %s' %(dof), out) anc.print_both('# Mstar = %.4f +/- %.4f M_sun' %(MR_star[0,0], MR_star[0,1]), out) anc.print_both('# FITTED PARAMETERS (nfit = %d)' %(nfit), out) anc.print_both('# FITTED PARAMETERS', out) anc.print_parameters(top_header, header, names_par, units_par, par, sigma_par, out) anc.print_both('# DERIVED PARAMETERS', out) anc.print_parameters(top_header, header, names_derived, units_der, derived_par, sigma_derived, out) out.close() return
def main(): print print ' TRADES: EMCEE confidence intervals' print cli = anc.get_args() # init trades pytrades_lib.pytrades.initialize_trades(os.path.join(cli.full_path, ''), '', 1) nfit, NB, bodies_file, id_fit, id_all, nfit_list, cols_list, case_list = anc.get_fitted( cli.full_path) ndata = pytrades_lib.pytrades.ndata nfree = pytrades_lib.pytrades.nfree dof = pytrades_lib.pytrades.dof # read emcee data emcee_file, emcee_best, folder_best = anc.get_emcee_file_and_best( cli.full_path, cli.temp_status) # get data from the hdf5 file names_par, parameter_boundaries, chains, acceptance_fraction, autocor_time, lnprobability, ln_err_const, completed_steps = anc.get_data( emcee_file, cli.temp_status) # print Memory occupation of ... anc.print_memory_usage(chains) nfit, nwalkers, nruns, nburnin, nruns_sel = anc.get_emcee_parameters( chains, cli.temp_status, cli.nburnin, completed_steps) #chains_T, parameter_boundaries = anc.select_transpose_convert_chains(nfit, nwalkers, nburnin, nruns, nruns_sel, m_factor, names_par, parameter_boundaries, chains) chains_T_full = np.zeros((nruns, nwalkers, nfit)) for ii in xrange(0, nfit): chains_T_full[:, :, ii] = chains[:, :nruns, ii].T # transpose chains_T, flatchain_posterior_0, lnprob_burnin, thin_steps = anc.thin_the_chains( cli.use_thin, nburnin, nruns, nruns_sel, autocor_time, chains_T_full, lnprobability, burnin_done=False) # lambda fix flatchain_posterior_0 = anc.fix_lambda(flatchain_posterior_0, names_par) # computes mass conversion factor #m_factor = anc.mass_conversion_factor(cli.m_type) MR_star = pytrades_lib.pytrades.mr_star m_factor_0, mass_unit = anc.mass_type_factor(Ms=1.0, mtype=cli.m_type, mscale=False) np.random.seed(seed=cli.seed) Ms_gaussian = MR_star[0, 0] + np.random.normal( 0., 1., size=(np.shape(flatchain_posterior_0)[0]) ) * MR_star[ 0, 1] # if exists an error on the mass, it creates a Normal distribution for the values and use it to re-scale mp/Ms to mp. m_factor_boot = m_factor_0 * Ms_gaussian # given the factor from Msun to mass_unit it multiply it by the Normal Mstar. m_factor = m_factor_0 * MR_star[0, 0] # set label and legend names #kel_legends, labels_list = anc.keplerian_legend(names_par, cli.m_type) flatchain_posterior = flatchain_posterior_0.copy() for ifit in range(0, nfit): if ('Ms' in names_par[ifit]): flatchain_posterior[:, ifit] = m_factor_0 * flatchain_posterior[:, ifit] posterior_file = os.path.join(cli.full_path, 'posterior.hdf5') p_h5f = h5py.File(posterior_file, 'w') p_h5f.create_dataset('posterior', data=flatchain_posterior, dtype=np.float64) p_h5f.create_dataset('loglikelihood', data=lnprob_burnin.reshape((-1)), dtype=np.float64) p_h5f['posterior'].attrs['nfit'] = nfit p_h5f['posterior'].attrs['nposterior'] = np.shape(flatchain_posterior)[0] p_h5f.create_dataset('parameter_names', data=names_par, dtype='S10') p_h5f.close() anc.print_both(' Saved posterior file: %s' % (posterior_file)) top_header, header = anc.get_header(anc.percentile_val) # ============================================================================== # ============================================================================== # 2017-01-26 EMCEE NOW USED sqrt(e)cos(w), sqrt(e)sin(w) # GET INTERVALS # ============================================================================== # ============================================================================== def get_intervals(full_path, id_sim, names_par_in, parameters_in, flatchain_posterior_in, derived_type=None, full_output=False, idx_sample=None, summary_file_hdf5=None): names_trades = anc.emcee_names_to_trades( names_par_in) # emcee to trades parameters_trades = anc.sqrte_to_e_fitting( parameters_in, names_par_in) # emcee to trades names_par = names_par_in # emcee kind parameters = parameters_in # emcee kind flatchain_posterior = flatchain_posterior_in # emcee kind loglhdx, checkx = pytrades_lib.pytrades.fortran_loglikelihood( np.array(parameters_trades, dtype=np.float64)) loglhdx = loglhdx + ln_err_const out_folder = os.path.join( os.path.join(full_path, '%04d_sim' % (id_sim)), '') if (not os.path.isdir(out_folder)): os.makedirs(out_folder) out_file = os.path.join(out_folder, 'parameters_summary.txt') out = open(out_file, 'w') pytrades_lib.pytrades.path_change(out_folder) anc.print_both(' #', out) anc.print_both(' # --------------------------------- ', out) anc.print_both(' # PARAMETER VALUES -> %d' % (id_sim), out) fitness, lgllhd, check = pytrades_lib.pytrades.write_summary_files( id_sim, parameters_trades) kel_file, kep_elem = anc.elements(out_folder, id_sim, lmf=0) #sigma_par = anc.compute_intervals(flatchain_posterior, parameters, anc.percentile_val) sigma_par = anc.compute_sigma_hdi(flatchain_posterior, parameters) # uses HDI sigma_par = sigma_par.T units_par = anc.get_units(names_par, mass_unit) if (not bool(check)): print 'WRTING WARNING FILE: %s' % (os.path.join( out_folder, 'WARNING.txt')) warn_o = open(os.path.join(out_folder, 'WARNING.txt'), 'w') warn_o.write( '*******\nWARNING: FITTED PARAMETERS COULD NOT BE PHYSICAL!\nWARNING: BE VERY CAREFUL WITH THIS PARAMETER SET!\n*******' ) warn_o.close() nbins = anc.get_auto_bins(flatchain_posterior_0) names_derived, der_posterior = anc.compute_derived_posterior( names_par, kep_elem, id_fit, case_list, cols_list, flatchain_posterior, conv_factor=m_factor_boot) #der_posterior_T = der_posterior der_posterior_T = anc.derived_posterior_check(names_derived, der_posterior) par_type = '' descr = '' if (str(derived_type).strip().lower() == 'median'): # MEDIAN PARAMETERS ID == 1050 derived_par = np.percentile(der_posterior_T, 50., axis=0, interpolation='midpoint') par_type = 'MEDIAN:' descr = 'median of posterior and median of derived posterior' elif (str(derived_type).strip().lower() == 'mode'): # MODE-LIKE PARAMETERS -> id 3050 #k = anc.get_bins(flatchain_posterior, rule='doane') der_bin, derived_par = anc.get_mode_parameters( der_posterior_T, nbins) par_type = 'MODE' descr = 'mode of posterior and mode of derived posterior' else: # ORIGINAL FITTING PARAMETERS ID == 0 # or # MAX LNPROBABILITY -> id 2050 names_derived, derived_par = anc.compute_derived_parameters( names_par, kep_elem, id_fit, case_list, cols_list, parameters, conv_factor=m_factor) derived_par, der_posterior_T = anc.adjust_derived_parameters( names_derived, derived_par, der_posterior_T) if (id_sim == 0): par_type = 'ORIGINAL FIT:' descr = 'initial set of parameters' elif (id_sim == 1051): par_type = 'MEDIAN PARAMETERS TO DERIVED:' descr = 'median of posterior and converted to derived parameter' elif (id_sim == 2050): par_type = 'MAX LNPROB' elif (id_sim == 3051): par_type = 'MODE PARAMETERS TO DERIVED:' descr = 'mode of posterior and converted to derived parameter' elif (id_sim == 666): par_type = 'SELECTED SAMPLE WITHIN HDI' # ***COMMENTED 2017-02-02: TO CHECK IF REALLY NEEDED #if(idx_sample is not None): #par_type = '%s <-> idx = %d' %(par_type, idx_sample) #derived_par = der_posterior_T[idx_sample, :] #for ider in range(0,np.shape(derived_par)[0]): ##print ider, names_derived[ider], names_derived[ider][0], names_derived[ider][1] #if(names_derived[ider][0] == 'm' and names_derived[ider][1] != 'A'): ##print 'doing' #derived_par[ider] = der_posterior_T[idx_sample, ider]*m_factor/m_factor_boot[idx_sample] elif (id_sim == 667): par_type = 'SELECTED SAMPLE CLOSE TO MEDIAN LGLLHD WITHIN POSTERIOR HDI' descr = "" elif (id_sim == 668): par_type = 'MAX LGLLHD WITHIN POSTERIOR HDI:' descr = "Select posterior within HDI and take the parameter set with higher loglikelihood." else: par_type = 'AD HOC' descr = "from input file" par_type = '%s %s' % (par_type, descr) #sigma_derived = anc.compute_intervals(der_posterior_T, derived_par, anc.percentile_val) sigma_derived = anc.compute_sigma_hdi(der_posterior_T, derived_par) sigma_derived = sigma_derived.T units_der = anc.get_units(names_derived, mass_unit) if (s_h5f is not None): s_id_sim = '%04d' % (id_sim) s_h5f.create_dataset('parameters/%s/fitted/parameters' % (s_id_sim), data=parameters, dtype=np.float64, compression='gzip') s_h5f.create_dataset('parameters/%s/fitted/names' % (s_id_sim), data=names_par, dtype='S10', compression='gzip') s_h5f.create_dataset('parameters/%s/fitted/units' % (s_id_sim), data=units_par, dtype='S15', compression='gzip') s_h5f.create_dataset('parameters/%s/fitted/sigma' % (s_id_sim), data=sigma_par.T, dtype=np.float64, compression='gzip') s_h5f['parameters/%s/fitted/sigma' % (s_id_sim)].attrs['percentiles'] = anc.percentile_val s_h5f.create_dataset('parameters/%s/derived/parameters' % (s_id_sim), data=derived_par, dtype=np.float64, compression='gzip') s_h5f.create_dataset('parameters/%s/derived/names' % (s_id_sim), data=names_derived, dtype='S10', compression='gzip') s_h5f.create_dataset('parameters/%s/derived/units' % (s_id_sim), data=units_der, dtype='S15', compression='gzip') s_h5f.create_dataset('parameters/%s/derived/sigma' % (s_id_sim), data=sigma_derived.T, dtype=np.float64, compression='gzip') s_h5f['parameters/%s/derived/sigma' % (s_id_sim)].attrs['percentiles'] = anc.percentile_val s_h5f['parameters/%s' % (s_id_sim)].attrs['info'] = '%s ==> %s' % (s_id_sim, par_type) s_h5f['parameters/%s' % (s_id_sim)].attrs['fitness'] = fitness s_h5f['parameters/%s' % (s_id_sim)].attrs['lgllhd'] = lgllhd s_h5f['parameters/%s' % (s_id_sim)].attrs['check'] = check if (idx_sample is not None): s_h5f['parameters/%s' % (s_id_sim)].attrs['idx_sample'] = idx_sample #print '\nComputed sigma_par with shape ',np.shape(sigma_par) #print 'Computed sigma_derived with shape ',np.shape(sigma_derived) anc.print_both('\n# SUMMARY: %s' % (par_type), out) anc.print_both('# FITTED PARAMETERS', out) anc.print_parameters(top_header, header, names_par, units_par, parameters, sigma_par, out) anc.print_both('# DERIVED PARAMETERS', out) anc.print_parameters(top_header, header, names_derived, units_der, derived_par, sigma_derived, out) out.close() if (full_output): return out_folder, names_derived, der_posterior_T else: return out_folder # ============================================================================== # ============================================================================== # ============================================================================== ## CREATE A HDF5 FILE WITH CONFIDNCE INTERVALS AND ALL THE SUMMARY PARAMETERS # ============================================================================== summary_file = os.path.join(cli.full_path, 'summary_parameters.hdf5') s_h5f = h5py.File(summary_file, 'w') ### COMPUTE CONFIDENCE INTERVALS OF THE FITTED PARAMETER DISTRIBUTIONS #ci_fitted = np.percentile(flatchain_posterior_0, anc.percentile_val[2:], axis=0, interpolation='midpoint') # (n_percentile-2 x nfit) ==> skip 1st and 2nd items, the 68.27th and 50th # ============================================================================== # HDI INSTEAD OF CREDIBLE INTERVALS # ============================================================================== nbins = anc.get_auto_bins(flatchain_posterior_0) hdi_ci, mode_parameters = anc.compute_hdi_full(flatchain_posterior_0, mode_output=True) ci_fitted = hdi_ci.T print ' shape: hdi_ci = ', np.shape(hdi_ci), ' ci_fitted = ', np.shape( ci_fitted) # hdi_ci: nfit x nci # ci_fitted: nci x nfit # nci -> -1sigma(0) +1sigma(1) -2sigma(2) +2sigma(3) -3sigma(4) +3sigma(5) #sys.exit() units_par = anc.get_units(names_par, mass_unit) s_h5f.create_dataset('confidence_intervals/fitted/ci', data=ci_fitted.T, dtype=np.float64, compression='gzip') s_h5f.create_dataset('confidence_intervals/fitted/names', data=names_par, dtype='S10', compression='gzip') s_h5f.create_dataset('confidence_intervals/fitted/units', data=units_par, dtype='S15', compression='gzip') s_h5f.create_dataset('confidence_intervals/fitted/percentiles', data=np.array(anc.percentile_val[2:]), dtype=np.float64, compression='gzip') # now it not true... s_h5f['confidence_intervals/fitted'].attrs['nfit'] = nfit s_h5f['confidence_intervals/fitted'].attrs['nfree'] = nfree s_h5f['confidence_intervals/fitted'].attrs['ndata'] = ndata s_h5f['confidence_intervals/fitted'].attrs['dof'] = dof # ============================================================================== # ============================================================================== ## ORIGINAL FITTING PARAMETERS ID == 0 # ============================================================================== # save initial_fitting parameters into array original_fit_parameters = pytrades_lib.pytrades.fitting_parameters # INITIAL PARAMETER SET (NEEDED ONLY TO HAVE THE PROPER ARRAY/VECTOR) #folder_0 = get_intervals(cli.full_path, 0, names_par, original_fit_parameters, flatchain_posterior_0, derived_type=None, summary_file_hdf5=s_h5f) # WARNING: original_fit_parameters from TRADES have to be converted to emcee parameters: # (ecosw,esinw) => (sqrecosw, sqrtesinw) trades_names = anc.emcee_names_to_trades(names_par) original_parameters = anc.e_to_sqrte_fitting(original_fit_parameters, trades_names) folder_0 = get_intervals(cli.full_path, 0, names_par, original_parameters, flatchain_posterior_0, derived_type=None, summary_file_hdf5=s_h5f) # ============================================================================== print print # ============================================================================== ## MAX LNPROBABILITY AND PARAMETERS -> id 2050 # ============================================================================== max_lnprob, max_lnprob_parameters, max_lnprob_perc68, max_lnprob_confint = anc.get_maxlnprob_parameters( lnprob_burnin, chains_T, flatchain_posterior_0) max_id1, max_id2 = anc.get_max_indices(lnprob_burnin) folder_2050, names_derived, der_posterior = get_intervals( cli.full_path, 2050, names_par, max_lnprob_parameters, flatchain_posterior_0, derived_type=None, full_output=True, summary_file_hdf5=s_h5f) units_der = anc.get_units(names_derived, mass_unit) # write out the derived names and posterior into an hdf5 file der_post_file = os.path.join(cli.full_path, 'derived_posterior.hdf5') h5f = h5py.File(der_post_file, 'w') h5f.create_dataset('derived_names', data=names_derived, dtype='S10', compression='gzip') h5f.create_dataset('derived_posterior', data=der_posterior, dtype=np.float64, compression='gzip') h5f.create_dataset('units_derived', data=units_der, dtype='S15', compression='gzip') h5f.close() # ============================================================================== ### COMPUTE CONFIDENCE INTERVALS OF THE DERIVED PARAMETER DISTRIBUTIONS #ci_derived = np.percentile(der_posterior, anc.percentile_val[2:], axis=0, interpolation='midpoint') # (n_percentile-1 x nder) ==> skip first item, the 68.27th # ============================================================================== # HDI INSTEAD OF CREDIBLE INTERVALS # ============================================================================== #npost_der, nder = np.shape(der_posterior) #k_der = anc.get_auto_bins(der_posterior) hdi_ci_derived = anc.compute_hdi_full(der_posterior, mode_output=False) ci_derived = hdi_ci_derived.T s_h5f.create_dataset('confidence_intervals/derived/ci', data=ci_derived.T, dtype=np.float64, compression='gzip') s_h5f.create_dataset('confidence_intervals/derived/names', data=names_derived, dtype='S10', compression='gzip') s_h5f.create_dataset('confidence_intervals/derived/units', data=units_der, dtype='S15', compression='gzip') s_h5f.create_dataset('confidence_intervals/derived/percentiles', data=np.array(anc.percentile_val[2:]), dtype=np.float64, compression='gzip') # ============================================================================== print print # ============================================================================== ## MEDIAN PARAMETERS ID == 1050 # ============================================================================== median_parameters, median_perc68, median_confint = anc.get_median_parameters( flatchain_posterior_0) folder_1050 = get_intervals(cli.full_path, 1050, names_par, median_parameters, flatchain_posterior_0, derived_type='median', summary_file_hdf5=s_h5f) ## MEDIAN PARAMETERS ID == 1051 folder_1051 = get_intervals(cli.full_path, 1051, names_par, median_parameters, flatchain_posterior_0, derived_type=None, summary_file_hdf5=s_h5f) # ============================================================================== print print # ============================================================================== # select n_samples from the posterior within the CI # ============================================================================== if (cli.n_samples > 0): anc.print_both(' Selecting %d samples from the posterior ...' % (cli.n_samples)) sys.stdout.flush() samples_fit_par = anc.take_n_samples(flatchain_posterior_0, ci_fitted[0:2, :], n_samples=cli.n_samples) samples_fit_par[ 0, :] = median_parameters # first sample as the median of the posterior anc.print_both(' Running TRADES and computing the T0s and RVs ...') samples_file = os.path.join(cli.full_path, 'samples_ttra_rv.hdf5') anc.print_both(' Saving into %s' % (samples_file)) smp_h5 = h5py.File(samples_file, 'w') save_ttra_and_rv_from_samples(samples_fit_par, names_par, NB, cli.n_samples, smp_h5) #tra_gr = smp_h5.create_group('T0') #for key in ttra_samples.keys(): #tra_gr.create_dataset(key, data=ttra_samples[key], dtype=np.float64, compression='gzip') #rv_gr = smp_h5.create_group('RV') #for key in rv_samples.keys(): #rv_gr.create_dataset(key, data=rv_samples[key], dtype=np.float64, compression='gzip') #rv_gr['time_rv_mod'].attrs['tepoch'] = pytrades_lib.pytrades.tepoch smp_h5.close() anc.print_both(' ... done') sys.stdout.flush() #sys.exit() # ============================================================================== print print # ============================================================================== ## MODE-LIKE PARAMETERS -> id 3050 # ============================================================================== ## take the mean of 5 bin centered to the higher bin #anc.print_both('nbins = %d' %(nbins)) #sys.stdout.flush() #mode_bin, mode_parameters = anc.get_mode_parameters(flatchain_posterior_0, nbins) # mode_parameters computed at the beginning with hdi if (np.any(np.isnan(mode_parameters))): print 'Some values are Nan, skip the mode parameters' else: folder_3050 = get_intervals(cli.full_path, 3050, names_par, mode_parameters, flatchain_posterior_0, derived_type='mode', summary_file_hdf5=s_h5f) ## MODE-LIKE PARAMETERS -> id 3051 folder_3051 = get_intervals(cli.full_path, 3051, names_par, mode_parameters, flatchain_posterior_0, derived_type=None, summary_file_hdf5=s_h5f) # ============================================================================== print print # ============================================================================== # ONE SAMPLE PARAMETER SET --> 666 # ============================================================================== name_par, name_excluded = anc.get_sample_list(cli.sample_str, names_par) sample_parameters, idx_sample = anc.pick_sample_parameters( flatchain_posterior_0, names_par, name_par=name_par, name_excluded=name_excluded, post_ci=ci_fitted[0:2, :]) if (sample_parameters is not None): folder_666 = get_intervals(cli.full_path, 666, names_par, sample_parameters, flatchain_posterior_0, idx_sample=idx_sample, summary_file_hdf5=s_h5f) s_h5f['parameters/%04d' % (666)].attrs['par_selection'] = name_par if (name_excluded is not None): s_h5f['parameters/%04d' % (666)].attrs['par_excluded'] = name_excluded else: print 'NONE SAMPLE PARAMETERS!!!' # ============================================================================== # ============================================================================== ## SELECT AD HOC PARAMETERS: # ============================================================================== #adhoc_par = median_parameters.copy() ##adhoc_par[10:] = mode_parameters[10:].copy() #adhoc_par[12] = mode_parameters[12].copy() #if(cli.overplot is not None): if (cli.adhoc is not None): print cli.overplot, cli.adhoc adhoc_names, adhoc_par_trades = anc.read_fitted_file(cli.adhoc) adhoc_par = anc.e_to_sqrte_fitting(adhoc_par_trades, adhoc_names) folder_777 = get_intervals(cli.full_path, 777, names_par, adhoc_par, flatchain_posterior_0, derived_type=777, summary_file_hdf5=s_h5f) # ============================================================================== # ============================================================================== # select the sample within post_ci and close to median lgllhd --> 667 # ============================================================================== sample2_parameters, sample2_lgllhd = anc.get_sample_by_sorted_lgllhd( flatchain_posterior_0, lnprob_burnin.T, #post_ci = ci_fitted[0:2,:] post_ci=ci_fitted.T) folder_667 = get_intervals(cli.full_path, 667, names_par, sample2_parameters, flatchain_posterior_0, derived_type=667, summary_file_hdf5=s_h5f) # ============================================================================== # ============================================================================== # another kind of selection: parameter set within HDI, then take the max(loglikelihood) --> 668 # ============================================================================== name_par, name_excluded = anc.get_sample_list(cli.sample_str, names_par) #sample3_parameters, sample3_lgllhd = anc.get_sample_by_par_and_lgllhd(flatchain_posterior_0, #lnprob_burnin.T, #names_par, #post_ci = ci_fitted[0:2,:], #name_par= name_par) sample3_parameters, sample3_lgllhd = \ anc.select_maxlglhd_with_hdi(flatchain_posterior_0, #ci_fitted[0:2,:], ci_fitted.T, lnprob_burnin.T ) folder_668 = get_intervals(cli.full_path, 668, names_par, sample3_parameters, flatchain_posterior_0, derived_type=668, summary_file_hdf5=s_h5f) # ============================================================================== s_h5f.close() print # ============================================================================== # print into file CONFIDENCE INTERVALS of fitted and derived parameters # ============================================================================== ci_file = os.path.join(cli.full_path, 'confidence_intervals.dat') oci = open(ci_file, 'w') anc.print_both('\n# SUMMARY:\n# CONFIDENCE INTERVALS', oci) anc.print_both('## FITTED PARAMETERS', oci) #anc.print_confidence_intervals(anc.percentile_val[2:], conf_interv=ci_fitted, name_parameters=names_par, unit_parameters=units_par, output=oci) anc.print_hdi(conf_interv=ci_fitted, name_parameters=names_par, unit_parameters=units_par, output=oci) anc.print_both('## DERIVED PARAMETERS', oci) #anc.print_confidence_intervals(anc.percentile_val[2:], conf_interv=ci_derived, name_parameters=names_derived, unit_parameters=units_der, output=oci) anc.print_hdi(conf_interv=ci_derived, name_parameters=names_derived, unit_parameters=units_der, output=oci) oci.close() # ============================================================================== pytrades_lib.pytrades.deallocate_variables() return
def get_intervals(full_path, id_sim, names_par_in, parameters_in, flatchain_posterior_in, derived_type=None, full_output=False, idx_sample=None, summary_file_hdf5=None): names_trades = anc.emcee_names_to_trades( names_par_in) # emcee to trades parameters_trades = anc.sqrte_to_e_fitting( parameters_in, names_par_in) # emcee to trades names_par = names_par_in # emcee kind parameters = parameters_in # emcee kind flatchain_posterior = flatchain_posterior_in # emcee kind loglhdx, checkx = pytrades_lib.pytrades.fortran_loglikelihood( np.array(parameters_trades, dtype=np.float64)) loglhdx = loglhdx + ln_err_const out_folder = os.path.join( os.path.join(full_path, '%04d_sim' % (id_sim)), '') if (not os.path.isdir(out_folder)): os.makedirs(out_folder) out_file = os.path.join(out_folder, 'parameters_summary.txt') out = open(out_file, 'w') pytrades_lib.pytrades.path_change(out_folder) anc.print_both(' #', out) anc.print_both(' # --------------------------------- ', out) anc.print_both(' # PARAMETER VALUES -> %d' % (id_sim), out) fitness, lgllhd, check = pytrades_lib.pytrades.write_summary_files( id_sim, parameters_trades) kel_file, kep_elem = anc.elements(out_folder, id_sim, lmf=0) #sigma_par = anc.compute_intervals(flatchain_posterior, parameters, anc.percentile_val) sigma_par = anc.compute_sigma_hdi(flatchain_posterior, parameters) # uses HDI sigma_par = sigma_par.T units_par = anc.get_units(names_par, mass_unit) if (not bool(check)): print 'WRTING WARNING FILE: %s' % (os.path.join( out_folder, 'WARNING.txt')) warn_o = open(os.path.join(out_folder, 'WARNING.txt'), 'w') warn_o.write( '*******\nWARNING: FITTED PARAMETERS COULD NOT BE PHYSICAL!\nWARNING: BE VERY CAREFUL WITH THIS PARAMETER SET!\n*******' ) warn_o.close() nbins = anc.get_auto_bins(flatchain_posterior_0) names_derived, der_posterior = anc.compute_derived_posterior( names_par, kep_elem, id_fit, case_list, cols_list, flatchain_posterior, conv_factor=m_factor_boot) #der_posterior_T = der_posterior der_posterior_T = anc.derived_posterior_check(names_derived, der_posterior) par_type = '' descr = '' if (str(derived_type).strip().lower() == 'median'): # MEDIAN PARAMETERS ID == 1050 derived_par = np.percentile(der_posterior_T, 50., axis=0, interpolation='midpoint') par_type = 'MEDIAN:' descr = 'median of posterior and median of derived posterior' elif (str(derived_type).strip().lower() == 'mode'): # MODE-LIKE PARAMETERS -> id 3050 #k = anc.get_bins(flatchain_posterior, rule='doane') der_bin, derived_par = anc.get_mode_parameters( der_posterior_T, nbins) par_type = 'MODE' descr = 'mode of posterior and mode of derived posterior' else: # ORIGINAL FITTING PARAMETERS ID == 0 # or # MAX LNPROBABILITY -> id 2050 names_derived, derived_par = anc.compute_derived_parameters( names_par, kep_elem, id_fit, case_list, cols_list, parameters, conv_factor=m_factor) derived_par, der_posterior_T = anc.adjust_derived_parameters( names_derived, derived_par, der_posterior_T) if (id_sim == 0): par_type = 'ORIGINAL FIT:' descr = 'initial set of parameters' elif (id_sim == 1051): par_type = 'MEDIAN PARAMETERS TO DERIVED:' descr = 'median of posterior and converted to derived parameter' elif (id_sim == 2050): par_type = 'MAX LNPROB' elif (id_sim == 3051): par_type = 'MODE PARAMETERS TO DERIVED:' descr = 'mode of posterior and converted to derived parameter' elif (id_sim == 666): par_type = 'SELECTED SAMPLE WITHIN HDI' # ***COMMENTED 2017-02-02: TO CHECK IF REALLY NEEDED #if(idx_sample is not None): #par_type = '%s <-> idx = %d' %(par_type, idx_sample) #derived_par = der_posterior_T[idx_sample, :] #for ider in range(0,np.shape(derived_par)[0]): ##print ider, names_derived[ider], names_derived[ider][0], names_derived[ider][1] #if(names_derived[ider][0] == 'm' and names_derived[ider][1] != 'A'): ##print 'doing' #derived_par[ider] = der_posterior_T[idx_sample, ider]*m_factor/m_factor_boot[idx_sample] elif (id_sim == 667): par_type = 'SELECTED SAMPLE CLOSE TO MEDIAN LGLLHD WITHIN POSTERIOR HDI' descr = "" elif (id_sim == 668): par_type = 'MAX LGLLHD WITHIN POSTERIOR HDI:' descr = "Select posterior within HDI and take the parameter set with higher loglikelihood." else: par_type = 'AD HOC' descr = "from input file" par_type = '%s %s' % (par_type, descr) #sigma_derived = anc.compute_intervals(der_posterior_T, derived_par, anc.percentile_val) sigma_derived = anc.compute_sigma_hdi(der_posterior_T, derived_par) sigma_derived = sigma_derived.T units_der = anc.get_units(names_derived, mass_unit) if (s_h5f is not None): s_id_sim = '%04d' % (id_sim) s_h5f.create_dataset('parameters/%s/fitted/parameters' % (s_id_sim), data=parameters, dtype=np.float64, compression='gzip') s_h5f.create_dataset('parameters/%s/fitted/names' % (s_id_sim), data=names_par, dtype='S10', compression='gzip') s_h5f.create_dataset('parameters/%s/fitted/units' % (s_id_sim), data=units_par, dtype='S15', compression='gzip') s_h5f.create_dataset('parameters/%s/fitted/sigma' % (s_id_sim), data=sigma_par.T, dtype=np.float64, compression='gzip') s_h5f['parameters/%s/fitted/sigma' % (s_id_sim)].attrs['percentiles'] = anc.percentile_val s_h5f.create_dataset('parameters/%s/derived/parameters' % (s_id_sim), data=derived_par, dtype=np.float64, compression='gzip') s_h5f.create_dataset('parameters/%s/derived/names' % (s_id_sim), data=names_derived, dtype='S10', compression='gzip') s_h5f.create_dataset('parameters/%s/derived/units' % (s_id_sim), data=units_der, dtype='S15', compression='gzip') s_h5f.create_dataset('parameters/%s/derived/sigma' % (s_id_sim), data=sigma_derived.T, dtype=np.float64, compression='gzip') s_h5f['parameters/%s/derived/sigma' % (s_id_sim)].attrs['percentiles'] = anc.percentile_val s_h5f['parameters/%s' % (s_id_sim)].attrs['info'] = '%s ==> %s' % (s_id_sim, par_type) s_h5f['parameters/%s' % (s_id_sim)].attrs['fitness'] = fitness s_h5f['parameters/%s' % (s_id_sim)].attrs['lgllhd'] = lgllhd s_h5f['parameters/%s' % (s_id_sim)].attrs['check'] = check if (idx_sample is not None): s_h5f['parameters/%s' % (s_id_sim)].attrs['idx_sample'] = idx_sample #print '\nComputed sigma_par with shape ',np.shape(sigma_par) #print 'Computed sigma_derived with shape ',np.shape(sigma_derived) anc.print_both('\n# SUMMARY: %s' % (par_type), out) anc.print_both('# FITTED PARAMETERS', out) anc.print_parameters(top_header, header, names_par, units_par, parameters, sigma_par, out) anc.print_both('# DERIVED PARAMETERS', out) anc.print_parameters(top_header, header, names_derived, units_der, derived_par, sigma_derived, out) out.close() if (full_output): return out_folder, names_derived, der_posterior_T else: return out_folder
def main(): # MAIN -- TRADES + EMCEE # READ COMMAND LINE ARGUMENTS cli = get_args() # STARTING TIME start = time.time() # RENAME working_path = cli.full_path nthreads = cli.nthreads np.random.RandomState(cli.seed) # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ... pytrades_lib.pytrades.initialize_trades(working_path, cli.sub_folder, nthreads) # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof n_bodies = pytrades_lib.pytrades.n_bodies # NUMBER OF TOTAL BODIES OF THE SYSTEM n_planets = n_bodies - 1 # NUMBER OF PLANETS IN THE SYSTEM ndata = pytrades_lib.pytrades.ndata # TOTAL NUMBER OF DATA AVAILABLE npar = pytrades_lib.pytrades.npar # NUMBER OF TOTAL PARAMATERS ~n_planets X 6 nfit = pytrades_lib.pytrades.nfit # NUMBER OF PARAMETERS TO FIT nfree = pytrades_lib.pytrades.nfree # NUMBER OF FREE PARAMETERS (ie nrvset) dof = pytrades_lib.pytrades.dof # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT global inv_dof #inv_dof = np.float64(1.0 / dof) inv_dof = pytrades_lib.pytrades.inv_dof # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS #reshaped_names = pytrades_lib.pytrades.parameter_names.reshape((10,nfit), order='F').T #parameter_names = [''.join(reshaped_names[i,:]).strip() for i in range(0,nfit)] #parameter_names = anc.convert_fortran2python_strarray(pytrades_lib.pytrades.parameter_names, nfit, str_len=10) #trades_names = anc.convert_fortran2python_strarray(pytrades_lib.pytrades.parameter_names, #nfit, str_len=10 #) str_len = pytrades_lib.pytrades.str_len temp_names = pytrades_lib.pytrades.get_parameter_names(nfit, str_len) trades_names = anc.convert_fortran_charray2python_strararray(temp_names) parameter_names = anc.trades_names_to_emcee(trades_names) if (cli.trades_previous is not None): temp_names, trades_parameters = anc.read_fitted_file( cli.trades_previous) if (nfit != np.shape(trades_parameters)[0]): anc.print_both(' NUMBER OF PARAMETERS (%d) IN TRADES-PREVIOUS FILE DOES NOT' \ 'MATCH THE CURRENT CONFIGURATION nfit=%d\nSTOP' \ %(np.shape(trades_parameters)[0], nfit) ) sys.exit() del temp_names else: # INITIAL PARAMETER SET (NEEDED ONLY TO HAVE THE PROPER ARRAY/VECTOR) #fitting_parameters = pytrades_lib.pytrades.fitting_parameters trades_parameters = pytrades_lib.pytrades.fitting_parameters # save initial_fitting parameters into array original_fit_parameters = trades_parameters.copy() fitting_parameters = anc.e_to_sqrte_fitting(trades_parameters, trades_names) trades_minmax = pytrades_lib.pytrades.parameters_minmax # PARAMETER BOUNDARIES #parameters_minmax = trades_minmax.copy() #parameters_minmax[:,0] = anc.e_to_sqrte_fitting(trades_minmax[:,0], trades_names) #parameters_minmax[:,1] = anc.e_to_sqrte_fitting(trades_minmax[:,1], trades_names) parameters_minmax = anc.e_to_sqrte_boundaries(trades_minmax, trades_names) # RADIAL VELOCITIES SET n_rv = pytrades_lib.pytrades.nrv n_set_rv = pytrades_lib.pytrades.nrvset # TRANSITS SET n_t0 = pytrades_lib.pytrades.nt0 n_t0_sum = pytrades_lib.pytrades.ntts n_set_t0 = 0 for i in range(0, n_bodies - 1): if (n_t0[i] > 0): n_set_t0 += 1 # compute global constant for the loglhd global ln_err_const #try: ## fortran variable RV in python will be rv!!! #e_RVo = np.array(pytrades_lib.pytrades.ervobs[:], dtype=np.float64) #except: #e_RVo = np.array([0.], dtype=np.float64) #try: #e_T0o = np.array(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1)) #except: #e_T0o = np.array([0.], dtype=np.float64) #ln_err_const = anc.compute_ln_err_const(dof, e_RVo, e_T0o, cli.ln_flag) ln_err_const = pytrades_lib.pytrades.ln_err_const # SET EMCEE PARAMETERS: nwalkers, nruns, nsave, npost = get_emcee_arguments(cli, nfit) # INITIALISE SCRIPT FOLDER/LOG FILE working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder) anc.print_both('', of_run) anc.print_both(' ======== ', of_run) anc.print_both(' pyTRADES', of_run) anc.print_both(' ======== ', of_run) anc.print_both('', of_run) anc.print_both(' WORKING PATH = %s' % (working_path), of_run) anc.print_both(' NUMBER OF THREADS = %d' % (nthreads), of_run) anc.print_both( ' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' % (ndata, nfit, nfree, dof), of_run) anc.print_both(' Total N_RV = %d for %d set(s)' % (n_rv, n_set_rv), of_run) anc.print_both( ' Total N_T0 = %d for %d out of %d planet(s)' % (n_t0_sum, n_set_t0, n_planets), of_run) anc.print_both(' %s = %.7f' % ('log constant error = ', ln_err_const), of_run) anc.print_both( ' %s = %.7f' % ('IN FORTRAN log constant error = ', pytrades_lib.pytrades.ln_err_const), of_run) anc.print_both(' seed = %s' % (str(cli.seed)), of_run) if (cli.trades_previous is not None): anc.print_both('\n ******\n INITIAL FITTING PARAMETERS FROM PREVIOUS' \ ' TRADES-EMCEE SIM IN FILE:\n %s\n ******\n' %(cli.trades_previous), of_run ) anc.print_both(' ORIGINAL PARAMETER VALUES -> 0000', of_run) fitness_0000, lgllhd_0000, check_0000 = pytrades_lib.pytrades.write_summary_files( 0, original_fit_parameters) anc.print_both(' ', of_run) anc.print_both(' TESTING LNPROB_SQ ...', of_run) lgllhd_zero = lnprob(trades_parameters) lgllhd_sq_zero = lnprob_sq(fitting_parameters, parameter_names) anc.print_both(' ', of_run) anc.print_both( ' %15s %23s %23s %15s %23s' % ('trades_names', 'original_trades', 'trades_par', 'emcee_names', 'emcee_par'), of_run) for ifit in range(0, nfit): anc.print_both( ' %15s %23.16e %23.16e %15s %23.16e' % (trades_names[ifit], original_fit_parameters[ifit], trades_parameters[ifit], parameter_names[ifit], fitting_parameters[ifit]), of_run) anc.print_both(' ', of_run) anc.print_both( ' %15s %23.16e %23.16e %15s %23.16e' % ('lnprob', lgllhd_0000, lgllhd_zero, 'lnprob_sq', lgllhd_sq_zero), of_run) anc.print_both(' ', of_run) # INITIALISES THE WALKERS if (cli.emcee_previous is not None): anc.print_both( ' Use a previous emcee simulation: %s' % (cli.emcee_previous), of_run) last_p0, old_nwalkers, last_done = anc.get_last_emcee_iteration( cli.emcee_previous, nwalkers) if (not last_done): anc.print_both( '**STOP: USING A DIFFERENT NUMBER OF WALKERS (%d) W.R.T. PREVIOUS EMCEE SIMULATION (%d).' % (nwalkers, old_nwalkers), of_run) sys.exit() p0 = last_p0 else: p0 = compute_initial_walkers(nfit, nwalkers, fitting_parameters, parameters_minmax, parameter_names, cli.delta_sigma, of_run) anc.print_both( ' emcee chain: nwalkers = %d nruns = %d' % (nwalkers, nruns), of_run) anc.print_both(' sampler ... ', of_run) # old version with threads #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, threads=nthreads) #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob_sq, threads=nthreads, args=[parameter_names]) # needed to use sqrt(e) in emcee instead of e (in fortran) threads_pool = emcee.interruptible_pool.InterruptiblePool(nthreads) #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, pool=threads_pool) sampler = emcee.EnsembleSampler( nwalkers, nfit, lnprob_sq, pool=threads_pool, args=[parameter_names ]) # needed to use sqrt(e) in emcee instead of e (in fortran) anc.print_both(' TEST A PRE-EMCEE OF 1000 STEPS', of_run) p0, prob, state = sampler.run_mcmc(p0, 1000) anc.print_both(' TEST A RESET OF THE SAMPLER', of_run) sampler.reset() anc.print_both(' ready to go', of_run) anc.print_both(' with nsave = %s' % (str(nsave)), of_run) sys.stdout.flush() #sys.exit() if (nsave != False): # save temporary sampling during emcee every nruns*10% #if(os.path.exists(os.path.join(working_folder, 'emcee_temp.hdf5')) and os.path.isfile(os.path.join(working_folder, 'emcee_temp.hdf5'))): #os.remove(os.path.join(working_folder, 'emcee_temp.hdf5')) if (os.path.exists(os.path.join(working_folder, 'emcee_summary.hdf5')) and os.path.isfile( os.path.join(working_folder, 'emcee_summary.hdf5'))): os.remove(os.path.join(working_folder, 'emcee_summary.hdf5')) f_hdf5 = h5py.File(os.path.join(working_folder, 'emcee_summary.hdf5'), 'a') f_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10') f_hdf5.create_dataset('boundaries', data=parameters_minmax, dtype=np.float64) temp_dset = f_hdf5.create_dataset('chains', (nwalkers, nruns, nfit), dtype=np.float64) temp_lnprob = f_hdf5.create_dataset('lnprobability', (nwalkers, nruns), dtype=np.float64) temp_lnprob.attrs['ln_err_const'] = ln_err_const temp_acceptance = f_hdf5.create_dataset('acceptance_fraction', data=np.zeros((nfit)), dtype=np.float64) temp_acor = f_hdf5.create_dataset('autocor_time', data=np.zeros((nfit)), dtype=np.float64) f_hdf5.close() pos = p0 nchains = int(nruns / nsave) state = None anc.print_both(' Running emcee with temporary saving', of_run) sys.stdout.flush() for i in range(0, nchains): anc.print_both('', of_run) anc.print_both(' iter: %6d ' % (i + 1), of_run) aaa = i * nsave bbb = aaa + nsave pos, prob, state = sampler.run_mcmc(pos, N=nsave, rstate0=state) anc.print_both('completed %d steps of %d' % (bbb, nruns), of_run) f_hdf5 = h5py.File( os.path.join(working_folder, 'emcee_summary.hdf5'), 'a') temp_dset = f_hdf5['chains'] #[:,:,:] temp_dset[:, aaa:bbb, :] = sampler.chain[:, aaa:bbb, :] #f_hdf5['chains'].attrs['completed_steps'] = bbb temp_dset.attrs['completed_steps'] = bbb temp_lnprob = f_hdf5['lnprobability'] #[:,:] temp_lnprob[:, aaa:bbb] = sampler.lnprobability[:, aaa:bbb] shape_lnprob = sampler.lnprobability.shape acceptance_fraction = sampler.acceptance_fraction temp_acceptance = f_hdf5['acceptance_fraction'] temp_acceptance = acceptance_fraction #f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64) mean_acceptance_fraction = np.mean(acceptance_fraction) #temp_chains_T = np.zeros((bbb, nwalkers, nfit)) #for ifit in range(0,nfit): #temp_chains_T[:,:,ifit] = sampler.chain[:, :bbb, ifit].T #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True) acor_time = anc.compute_acor_time(sampler, steps_done=bbb) temp_acor = f_hdf5['autocor_time'] temp_acor[...] = acor_time #f_hdf5.create_dataset('autocor_time', data=np.array(acor_temp, dtype=np.float64), dtype=np.float64) #f_hdf5.create_dataset('autocor_time', data=np.array(sampler.acor, dtype=np.float64), dtype=np.float64) # not working #print 'aaa = %6d bbb = %6d -> sampler.lnprobability.shape = (%6d , %6d)' %(aaa, bbb, shape_lnprob[0], shape_lnprob[1]) f_hdf5.close() sys.stdout.flush() anc.print_both('', of_run) anc.print_both( '...done with saving temporary total shape = %s' % (str(np.shape(sampler.chain))), of_run) anc.print_both('', of_run) sys.stdout.flush() # RUN EMCEE AND RESET AFTER REMOVE BURN-IN #pos, prob, state = sampler.run_mcmc(p0, npost) #sampler.reset() #sampler.run_mcmc(pos, nruns, rstate0=state) else: # GOOD COMPLETE SINGLE RUNNING OF EMCEE, WITHOUT REMOVING THE BURN-IN anc.print_both(' Running full emcee ...', of_run) sys.stdout.flush() sampler.run_mcmc(p0, nruns) anc.print_both('done', of_run) anc.print_both('', of_run) sys.stdout.flush() flatchains = sampler.chain[:, :, :].reshape( (nwalkers * nruns, nfit)) # full chain values acceptance_fraction = sampler.acceptance_fraction mean_acceptance_fraction = np.mean(acceptance_fraction) #autocor_time = sampler.acor #temp_chains_T = np.zeros((nwalkers, nsteps, nfit)) #for ifit in range(0,nfit): #temp_chains_T[:,:,ifit] = sampler.chain[:, :, ifit].T #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True) acor_time = anc.compute_acor_time(sampler) lnprobability = sampler.lnprobability # save chains with original shape as hdf5 file f_hdf5 = h5py.File(os.path.join(working_folder, 'emcee_summary.hdf5'), 'w') f_hdf5.create_dataset('chains', data=sampler.chain, dtype=np.float64) f_hdf5['chains'].attrs['completed_steps'] = nruns f_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10') f_hdf5.create_dataset('boundaries', data=parameters_minmax, dtype=np.float64) f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64) f_hdf5.create_dataset('autocor_time', data=acor_time, dtype=np.float64) f_hdf5.create_dataset('lnprobability', data=lnprobability, dtype=np.float64) f_hdf5['lnprobability'].attrs['ln_err_const'] = ln_err_const f_hdf5.close() anc.print_both( " Mean_acceptance_fraction should be between [0.25-0.5] = %.6f" % (mean_acceptance_fraction), of_run) anc.print_both('', of_run) # close the pool of threads threads_pool.close() threads_pool.terminate() threads_pool.join() anc.print_both('COMPLETED EMCEE', of_run) elapsed = time.time() - start elapsed_d, elapsed_h, elapsed_m, elapsed_s = anc.computation_time(elapsed) anc.print_both('', of_run) anc.print_both( ' pyTRADES: EMCEE FINISHED in %2d day %02d hour %02d min %.2f sec - bye bye' % (int(elapsed_d), int(elapsed_h), int(elapsed_m), elapsed_s), of_run) anc.print_both('', of_run) of_run.close() pytrades_lib.pytrades.deallocate_variables() return
def main(): cli = anc.get_args() # read derived posterior file derived_file = os.path.join(cli.full_path, 'derived_posterior.hdf5') h5f = h5py.File(derived_file, 'r') derived_names = np.array(h5f['derived_names'], dtype='S10') derived_posterior_in = np.array(h5f['derived_posterior'], dtype=np.float64) h5f.close() n_der = derived_names.shape[0] n_flatchain = derived_posterior_in.shape[0] derived_posterior = anc.derived_posterior_check(derived_names, derived_posterior_in) label_separation=-0.90 # if uses this, comment ax.xyaxis.labelpad = label_pad label_pad = 12 # it uses this, comment ax.xyaxis.set_label_coords()... label_size = 8 ticklabel_size = 4 if(n_der > 2): #label_separation = -0.1 - ( 0.075 * (n_der-2) ) label_separation = -0.15 - ( 0.125 * (n_der-2) ) #else: #label_separation = -0.15 #label_size = label_size - 1 * int(n_der / 10.) #label_size = label_size - 1 * int(n_der / 5.) label_size = label_size - 1 * int(n_der / 2.5) labels_list = anc.derived_labels(derived_names, cli.m_type) k = anc.get_bins(derived_posterior, rule='doane') if(cli.overplot is not None): ## OPEN summary_parameters.hdf5 FILE s_h5f = h5py.File(os.path.join(cli.full_path, 'summary_parameters.hdf5'), 'r') # take only the selected sample s_overplot = '%04d' %(cli.overplot) #overp_der = s_h5f['parameters/%s/derived/parameters' %(s_overplot)][...] read_der = s_h5f['parameters/%s/derived/parameters' %(s_overplot)][...] s_h5f.close() overp_der = anc.derived_parameters_check(derived_names, read_der, derived_posterior) #fig = plt.figure(figsize=(12,12)) fig = plt.figure(figsize=(6,6)) fig.subplots_adjust(hspace=0.05, wspace=0.05) for ix in range(0, n_der): x_data = derived_posterior[:,ix] x_min, x_max = anc.compute_limits(x_data, 0.05) if(x_min == x_max): x_min = x_min - 1. x_max = x_max + 1. for iy in range(0, n_der): y_data = derived_posterior[:,iy] y_min, y_max = anc.compute_limits(y_data, 0.05) if(y_min == y_max): y_min = y_min - 1. y_max = y_max + 1. if(iy > ix): # correlation plot anc.print_both('correlation %s vs %s' %(derived_names[ix], derived_names[iy]) ) ax = plt.subplot2grid((n_der+1, n_der), (iy,ix)) hist2d_counts, xedges, yedges, image2d = ax.hist2d(\ x_data, y_data, bins=k, range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]], cmap=cm.gray_r, #normed=True normed=False ) #new_k = int(k/3) new_k = k hist2d_counts_2, xedges_2, yedges_2 = np.histogram2d(\ x_data, y_data, bins=new_k, range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]], #normed=True density=False ) x_bins = [0.5*(xedges_2[i]+xedges_2[i+1]) for i in range(0, new_k)] y_bins = [0.5*(yedges_2[i]+yedges_2[i+1]) for i in range(0, new_k)] nl = 5 levels = [1.-np.exp(-0.5*ii) for ii in range(0,nl)] # 2D sigmas: 0sigma, 1sigma, 2sigma, 3sigma, .. ax.contour(x_bins, y_bins, hist2d_counts_2.T, nl, cmap=cm.viridis, linestyles='solid', linewidths=0.5, #normed=True ) if(cli.overplot is not None): # plot selected overplot sample # check angle and plot %360 and %-360... if('w' in derived_names[ix] or 'lN' in derived_names[ix] or 'mA' in derived_names[ix]): ax.axvline(overp_der[ix]%360., color='C0', ls='--', lw=1.1, alpha=0.7) ax.axvline(overp_der[ix]%-360., color='C0', ls='--', lw=1.1, alpha=0.7) else: ax.axvline(overp_der[ix], color='C0', ls='--', lw=1.1, alpha=0.7) if('w' in derived_names[iy] or 'lN' in derived_names[iy] or 'mA' in derived_names[iy]): ax.axhline(overp_der[iy]%360., color='C0', ls='--', lw=1.1, alpha=0.7) ax.axhline(overp_der[iy]%-360., color='C0', ls='--', lw=1.1, alpha=0.7) else: ax.axhline(overp_der[iy], color='C0', ls='--', lw=1.1, alpha=0.7) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) if(iy == n_der-1): set_xaxis(ax, label_size, label_separation, label_pad, ticklabel_size, labels_list[ix], [xedges[0], xedges[-1], 4]) if(ix == 0): set_yaxis(ax, label_size, label_separation, label_pad, ticklabel_size, labels_list[iy], [yedges[0], yedges[-1], 5]) ax.set_ylim([y_min, y_max]) ax.set_xlim([x_min, x_max]) plt.draw() elif(iy == ix): # distribution plot anc.print_both('%s histogram' %(derived_names[ix])) ax = plt.subplot2grid((n_der+1, n_der), (ix,ix)) if (ix == n_der-1): hist_orientation='horizontal' else: hist_orientation='vertical' idx = np.argsort(x_data) if(not cli.cumulative): # HISTOGRAM hist_counts, edges, patces = ax.hist(x_data, bins=k, range=[x_data.min(), x_data.max()], histtype='stepfilled', color='darkgrey', #edgecolor='lightgray', edgecolor='None', align='mid', orientation=hist_orientation, #normed=True, density=True, stacked=True ) else: # CUMULATIVE HISTOGRAM hist_counts, edges, patces = ax.hist(x_data, bins=k, range=[x_data.min(), x_data.max()], histtype='stepfilled', color='darkgrey', #edgecolor='lightgray', edgecolor='None', align='mid', orientation=hist_orientation, density=True, stacked=True, cumulative=True ) #print parameter_names_emcee[ix], overp_der[ix] if (ix == n_der-1): if(cli.overplot is not None): # check angle and plot %360 and %-360... if('w' in derived_names[ix] or 'lN' in derived_names[ix] or 'mA' in derived_names[ix]): ax.axhline(overp_der[ix]%360., color='C0', ls='--', lw=1.1, alpha=0.7) ax.axhline(overp_der[ix]%-360., color='C0', ls='--', lw=1.1, alpha=0.7) else: # plot selected overplot sample ax.axhline(overp_der[ix], color='C0', ls='--', lw=1.1, alpha=0.7) ax.set_ylim([y_min, y_max]) else: if(cli.overplot is not None): if('w' in derived_names[ix] or 'lN' in derived_names[ix] or 'mA' in derived_names[ix]): ax.axvline(overp_der[ix]%360., color='C0', ls='--', lw=1.1, alpha=0.7) ax.axvline(overp_der[ix]%-360., color='C0', ls='--', lw=1.1, alpha=0.7) else: # plot selected overplot sample ax.axvline(overp_der[ix], color='C0', ls='--', lw=1.1, alpha=0.7) ax.set_xlim([x_min, x_max]) if(cli.overplot is not None): print derived_names[ix], ' overplot val = ', overp_der[ix], ' min = ', x_data.min(), ' max = ', x_data.max() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) ax.set_title(labels_list[ix], fontsize=label_size) plt.draw() plot_folder = os.path.join(cli.full_path, 'plots') if (not os.path.isdir(plot_folder)): os.makedirs(plot_folder) correlation_file = os.path.join(plot_folder, 'derived_triangle.png') fig.savefig(correlation_file, bbox_inches='tight', dpi=300) anc.print_both('png done') correlation_file = os.path.join(plot_folder, 'derived_triangle.pdf') fig.savefig(correlation_file, bbox_inches='tight', dpi=96) anc.print_both('pdf done') plt.close(fig) return
def main(): # READ COMMAND LINE ARGUMENTS cli = get_args() # STARTING TIME start = time.localtime() pc_output_dir = '%d-%02d-%02dT%02dh%02dm%02ds_' % ( start.tm_year, start.tm_mon, start.tm_mday, start.tm_hour, start.tm_min, start.tm_sec) pc_output_files = 'trades_pc' # RENAME working_path = cli.full_path nthreads = 1 # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ... pytrades.initialize_trades(working_path, cli.sub_folder, nthreads) # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof n_bodies = pytrades.n_bodies # NUMBER OF TOTAL BODIES OF THE SYSTEM n_planets = n_bodies - 1 # NUMBER OF PLANETS IN THE SYSTEM ndata = pytrades.ndata # TOTAL NUMBER OF DATA AVAILABLE npar = pytrades.npar # NUMBER OF TOTAL PARAMATERS ~n_planets X 6 nfit = pytrades.nfit # NUMBER OF PARAMETERS TO FIT nfree = pytrades.nfree # NUMBER OF FREE PARAMETERS (ie nrvset) dof = pytrades.dof # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT global inv_dof #inv_dof = np.float64(1.0 / dof) inv_dof = pytrades_lib.pytrades.inv_dof # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS str_len = pytrades.str_len temp_names = pytrades.get_parameter_names(nfit, str_len) trades_names = anc.convert_fortran_charray2python_strararray(temp_names) fitting_names = anc.trades_names_to_emcee(trades_names) # save initial_fitting parameters into array original_fit_parameters = trades_parameters.copy() fitting_parameters = anc.e_to_sqrte_fitting(trades_parameters, trades_names) trades_minmax = pytrades.parameters_minmax # PARAMETER BOUNDARIES parameters_minmax = anc.e_to_sqrte_boundaries(trades_minmax, trades_names) # RADIAL VELOCITIES SET n_rv = pytrades_lib.pytrades.nrv n_set_rv = pytrades_lib.pytrades.nrvset # TRANSITS SET n_t0 = pytrades_lib.pytrades.nt0 n_t0_sum = pytrades_lib.pytrades.ntts n_set_t0 = 0 for i in range(0, n_bodies - 1): if (n_t0[i] > 0): n_set_t0 += 1 # compute global constant for the loglhd global ln_err_const #try: ## fortran variable RV in python will be rv!!! #e_RVo = np.array(pytrades_lib.pytrades.ervobs[:], dtype=np.float64) #except: #e_RVo = np.array([0.], dtype=np.float64) #try: #e_T0o = np.array(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1)) #except: #e_T0o = np.array([0.], dtype=np.float64) #ln_err_const = anc.compute_ln_err_const(dof, e_RVo, e_T0o, True) ln_err_const = pytrades_lib.pytrades.ln_err_const # INITIALISE SCRIPT FOLDER/LOG FILE working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder) anc.print_both('', of_run) anc.print_both(' ======== ', of_run) anc.print_both(' pyTRADES', of_run) anc.print_both(' ======== ', of_run) anc.print_both('', of_run) anc.print_both(' WORKING PATH = %s' % (working_path), of_run) anc.print_both( ' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' % (ndata, nfit, nfree, dof), of_run) anc.print_both(' Total N_RV = %d for %d set(s)' % (n_rv, n_set_rv), of_run) anc.print_both( ' Total N_T0 = %d for %d out of %d planet(s)' % (n_t0_sum, n_set_t0, n_planets), of_run) anc.print_both(' %s = %.7f' % ('log constant error = ', ln_err_const), of_run) # SET PYPOLYCHORD # needed to define number of derived parameters for PyPolyChord nder = 0 # define the loglikelihood function for PyPolyChord def likelihood(fitting_par): # derived parameters derived_par = [0.0] * nder # convert fitting_par to trades_par trades_par = anc.sqrte_to_e_fitting(fitting_par, fitting_names) loglhd = 0. check = 1 loglhd, check = pytrades.fortran_loglikelihood( np.array(trades_par, dtype=np.float64)) #print loglhd, ln_err_const loglhd = loglhd + ln_err_const # ln_err_const: global variable return loglhd, derived_par # define the prior for the fitting parameters def prior(hypercube): """ Uniform prior from [-1,1]^D. """ fitting_par = [0.0] * nfit for i, x in enumerate(hypercube): fitting_par[i] = PC_priors.UniformPrior(parameters_minmax[i, 0], parameters_minmax[i, 1])(x) return fitting_par # set PyPolyChord: the pc_settings define how to run PC, e.g. nlive, precision_criterio, etc. pc_settings = PC_settings.PolyChordSettings(nfit, nder) pc_settings.base_dir = cli.pc_output_dir pc_settings.file_root = cli.pc_output_files pc_settings.do_clustering = True # Possible PyPolyChord settings: #Keyword arguments #----------------- #nlive: int #(Default: nDims*25) #The number of live points. #Increasing nlive increases the accuracy of posteriors and evidences, #and proportionally increases runtime ~ O(nlive). #num_repeats : int #(Default: nDims*5) #The number of slice slice-sampling steps to generate a new point. #Increasing num_repeats increases the reliability of the algorithm. #Typically #* for reliable evidences need num_repeats ~ O(5*nDims). #* for reliable posteriors need num_repeats ~ O(nDims) #nprior : int #(Default: nlive) #The number of prior samples to draw before starting compression. #do_clustering : boolean #(Default: True) #Whether or not to use clustering at run time. #feedback : {0,1,2,3} #(Default: 1) #How much command line feedback to give #precision_criterion : float #(Default: 0.001) #Termination criterion. Nested sampling terminates when the evidence #contained in the live points is precision_criterion fraction of the #total evidence. #max_ndead : int #(Default: -1) #Alternative termination criterion. Stop after max_ndead iterations. #Set negative to ignore (default). #boost_posterior : float #(Default: 0.0) #Increase the number of posterior samples produced. This can be set #arbitrarily high, but you won't be able to boost by more than #num_repeats #Warning: in high dimensions PolyChord produces _a lot_ of posterior #samples. You probably don't need to change this #posteriors : boolean #(Default: True) #Produce (weighted) posterior samples. Stored in <root>.txt. #equals : boolean #(Default: True) #Produce (equally weighted) posterior samples. Stored in #<root>_equal_weights.txt #cluster_posteriors : boolean #(Default: True) #Produce posterior files for each cluster? #Does nothing if do_clustering=False. #write_resume : boolean #(Default: True) #Create a resume file. #read_resume : boolean #(Default: True) #Read from resume file. #write_stats : boolean #(Default: True) #Write an evidence statistics file. #write_live : boolean #(Default: True) #Write a live points file. #write_dead : boolean #(Default: True) #Write a dead points file. #write_dead : boolean #(Default: True) #Write a prior points file. #update_files : int #(Default: nlive) #How often to update the files in <base_dir>. #base_dir : string #(Default: 'chains') #Where to store output files. #file_root : string #(Default: 'test') #Root name of the files produced. #grade_frac : List[float] #(Default: 1) #The amount of time to spend in each speed. #grade_dims : List[int] #(Default: 1) #The number of parameters within each speed. # RUN POLYCHORD pc_run = PC.run_polychord(likelihood, nfit, nder, pc_settings, prior) # set label and legend names kel_plot_labels = anc.keplerian_legend(fitting_names, cli.m_type) pc_paramnames = [('%s' % (fitting_names[i]), r'%s' % (kel_plot_labels[i])) for i in range(nfit)] #pc_paramnames += [('r*', 'r')] pc_run.make_paramnames_files(pc_paramnames) if (cli.pc_plot): import getdist.plots import matplotlib.pyplot as plt plt.rc('font', **{ 'family': 'serif', 'serif': ['Computer Modern Roman'] }) plt.rc('text', usetex=True) posterior = pc_run.posterior g = getdist.plots.getSubplotPlotter() g.triangle_plot(posterior, filled=True) plt.show() return