def fit(Deltam, nsne, xi, redshiftterm, mpi=False, p0=None, nchain=2000, **kwargs): ndim, nwalkers = 4, 8 sig = numpy.array([0.1, 0.01, 0.01, 50 / 3e5]) if p0 is None: p0 = [ numpy.array([1, 0, 0.08, 200 / 3e5]) + numpy.random.uniform(low=-sig, high=sig) for i in range(nwalkers) ] if mpi: pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) else: import time starttime = time.time() print("Start {}".format(starttime)) sampler = emcee.EnsembleSampler( nwalkers, ndim, Fit.lnprob, args=[Deltam, nsne, xi, redshiftterm], pool=pool) else: import time starttime = time.time() print("Start {}".format(starttime)) sampler = emcee.EnsembleSampler( nwalkers, ndim, Fit.lnprob, args=[Deltam, nsne, xi, redshiftterm]) sampler.run_mcmc(p0, nchain) if mpi: if pool.is_master(): endtime = time.time() print("End {}".format(endtime)) print("Difference {}".format(endtime - starttime)) pool.close() else: endtime = time.time() print("End {}".format(endtime)) print("Difference {}".format(endtime - starttime)) return sampler
def mcmc(mass_bins, dot_val, initial_c, nwalkers, ndim, burn_in, steps_wlk): import numpy as np import scipy.optimize as op import emcee from emcee.utils import MPIPool import sys def lnlike(c, dot_val): loglike = np.zeros((1)) loglike[0] = sum( np.log( (1 - c) * np.sqrt(1 + (c / 2)) * (1 - c * (1 - 3 * (dot_val * dot_val / 2)))**(-1.5))) #log-likelihood return loglike def lnprior(c): if (-1.5 < c < 0.99): #Assumes a flat prior, uninformative prior return 0.0 return -np.inf def lnprob(c, dot_val): lp = lnprior(c) if not np.isfinite(lp): return -np.inf return lp + lnlike(c, dot_val) #Parallel MCMC - initiallizes pool object; if process isn't running as master, wait for instr. and exit pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) pos = [initial_c + 1e-2 * np.random.randn(ndim) for i in range(nwalkers) ] #initial positions for walkers "Gaussian ball" #MCMC Running sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=[dot_val], pool=pool) pos, _, _ = sampler.run_mcmc(pos, burn_in) #running of emcee burn-in period sampler.reset() sampler.run_mcmc( pos, steps_wlk ) #running of emcee for steps specified, using pos as initial walker positions pool.close() chain = sampler.flatchain[:, 0] return chain
def run_pool(pC, pW, walk, step): #pCenter and pWidths steps = step nwalkers = walk ndim = len(pC) ## r in, del r, i, PA p0 = [pC[0], pC[1], pC[2], pC[3]] widths = [pW[0], pW[1], pW[2], pW[3]] p = emcee.utils.sample_ball(p0, widths, size=nwalkers) pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnlike_vis_and_sed, live_dangerously=True, pool=pool) print 'Beginning the MCMC run.' start = time.clock() sampler.run_mcmc(p, steps) stop = time.clock() pool.close() print 'MCMC finished successfully.\n' print 'This was a simultaneous visibility and SED run with {} walkers and {} steps'.format( nwalkers, steps) print "Mean acor time: " + str(np.mean(sampler.acor)) print "Mean acceptance fraction: " + str( np.mean(sampler.acceptance_fraction)) print '\nRun took %r minutes' % ((stop - start) / 60.) chain = sampler.chain chi = (sampler.lnprobability) / (-0.5) whatbywhat = str(nwalkers) + 'x' + str(steps) os.system('mkdir MCMCRUNS/vis_and_sed/' + whatbywhat) chainFile = 'MCMCRUNS/vis_and_sed/' + whatbywhat + '/' + whatbywhat + '.chain.fits' chiFile = 'MCMCRUNS/vis_and_sed/' + whatbywhat + '/' + whatbywhat + '.chi.fits' infoFile = 'MCMCRUNS/vis_and_sed/' + whatbywhat + '/' + whatbywhat + '.runInfo.txt' fits.writeto(chainFile, chain) fits.writeto(chiFile, chi) #f = open('runInfo.txt','w') f = open(infoFile, 'w') f.write('run took %r minutes\n' % ((stop - start) / 60.)) f.write('walkers: %r\n' % nwalkers) f.write('steps: %r\n' % steps) f.write('initial model: %r\n' % p0) f.write('widths: %r\n' % widths) f.write("mean acor time: " + str(np.mean(sampler.acor))) f.write("\nmean acceptance fraction: " + str(np.mean(sampler.acceptance_fraction))) f.close() print 'Data written to: \n' + chainFile + '\n' + chiFile + '\n' + infoFile
def run_mcmc(self, n_walkers=100, n_iterations=100): """ Run emcee MCMC. Args: n_walkers (int): Number of walkers to pass to the MCMC. n_iteratins (int): Number of iterations to pass to the MCMC. """ # Initialize walker matrix with initial parameters walkers_matrix = [] # must be a list, not an np.array for walker in range(n_walkers): walker_params = [] for component in self.components: walker_params = walker_params + component.initial_values( self.data_spectrum) walkers_matrix.append(walker_params) global iteration_count iteration_count = 0 # Create MCMC sampler. To enable multiproccessing, set threads > 1. # If using multiprocessing, the "lnpostfn" and "args" parameters # must be pickleable. if self.mpi: # Initialize the multiprocessing pool object. from emcee.utils import MPIPool pool = MPIPool(loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) self.sampler = emcee.EnsembleSampler( nwalkers=n_walkers, dim=len(walkers_matrix[0]), lnpostfn=ln_posterior, args=[self], pool=pool, runtime_sortingfn=sort_on_runtime) self.sampler.run_mcmc(walkers_matrix, n_iterations) pool.close() else: self.sampler = emcee.EnsembleSampler(nwalkers=n_walkers, dim=len(walkers_matrix[0]), lnpostfn=ln_posterior, args=[self], threads=1) #self.sampler_output = self.sampler.run_mcmc(walkers_matrix, n_iterations) self.sampler.run_mcmc(walkers_matrix, n_iterations)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--mpi', action='store_true', default=False) parser.add_argument('--walkers', type=int, default=100) parser.add_argument('--steps', type=int, default=1000) opt = parser.parse_args() pool = None if opt.mpi: import socket import os from emcee.utils import MPIPool pool = MPIPool() print('Running in MPI. Host', socket.gethostname(), 'pid', os.getpid(), 'is master?', pool.is_master()) if not pool.is_master(): pool.wait() return ndim, nwalkers = 2, opt.walkers ivar = 1. / np.random.rand(ndim) p0 = [np.random.rand(ndim) for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=[ivar], pool=pool) import time print('Running for', opt.steps, 'steps with', opt.walkers, 'walkers') t0 = time.time() sampler.run_mcmc(p0, opt.steps) print('Finished in', time.time() - t0, 'seconds') if pool: pool.close() import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt plt.figure() for i in range(ndim): plt.subplot(1, ndim, 1 + i) plt.hist(sampler.flatchain[:, i], 100, color="k", histtype="step") plt.title("Dimension {0:d}".format(i)) plt.savefig('plot.png') print('Saved plot')
def fit_bim_bh3_curves(p0=None): # Choose initial position if p0 is None: p0 = np.zeros((nwalkers, ndim)) for walk_ix in range(nwalkers): for d_ix in range(len(data)): p0[walk_ix, d_ix*3] = np.random.uniform(1, 6) p0[walk_ix, d_ix*3 + 1] = np.random.uniform(6e-5, 1e-3) p0[walk_ix, d_ix*3 + 2] = np.random.uniform(2, 3) hp_ix = len(data)*3 p0[walk_ix, hp_ix] = np.random.uniform(1,6) # fmax mean p0[walk_ix, hp_ix + 1] = np.random.uniform(0,1) # fmax sd p0[walk_ix, hp_ix + 2] = np.random.uniform(6e-5, 1e-3) # k mean p0[walk_ix, hp_ix + 3] = np.random.uniform(0,1e-1) # k sd p0[walk_ix, hp_ix + 4] = np.random.uniform(2,3) # f0 mean p0[walk_ix, hp_ix + 5] = np.random.uniform(0,1) # f0 sd #plt.figure() #for d_ix, data_i in enumerate(data): # plt.plot(time, data_i, color=colors[d_ix]) # plt.plot(time, fit_func(p0[0, d_ix*3:(d_ix+1)*3]), color='k') # Initialize the MPI pool pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) # Get the sampler sampler = emcee.EnsembleSampler(nwalkers, ndim, posterior, pool=pool) # Burn-in print("Burn-in sampling...") pos, prob, state = sampler.run_mcmc(p0, burn_steps, storechain=False) sampler.reset() # Main sampling print("Main sampling...") sampler.run_mcmc(pos, sample_steps) # Close the pool! pool.close() # Pickle the sampler sampler.pool = None with open('bimbh3_141125_2.pck','w') as f: pickle.dump(sampler, f) return sampler
def run_pool(pC, pW, walk, step): #pCenter and pWidths steps = step nwalkers = walk ndim = len(pC) ## r in, del r, i, PA p0 = [pC[0], pC[1], pC[2], pC[3], pC[4]] widths = [pW[0], pW[1], pW[2], pW[3], pW[4]] p = emcee.utils.sample_ball(p0,widths,size=nwalkers) pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnlike_visonly, live_dangerously=True, pool=pool) print 'Beginning the MCMC run.' start = time.clock() sampler.run_mcmc(p, steps) stop = time.clock() pool.close() print 'MCMC finished successfully.\n' print 'This was a visibility-only run with {} walkers and {} steps'.format(nwalkers,steps) print "Mean acor time: "+str(np.mean(sampler.acor)) print "\nMean acceptance fraction: "+str(np.mean(sampler.acceptance_fraction)) print 'Run took %r minutes' % ((stop - start)/60.) chain = sampler.chain chi = (sampler.lnprobability)/(-0.5) whatbywhat = str(nwalkers)+'x'+str(steps) os.system('mkdir MCMCRUNS/vis_only/'+whatbywhat) chainFile = 'MCMCRUNS/vis_only/'+whatbywhat+'/'+whatbywhat+'.chain.fits' chiFile = 'MCMCRUNS/vis_only/'+whatbywhat+'/'+whatbywhat+'.chi.fits' infoFile = 'MCMCRUNS/vis_only/'+whatbywhat+'/'+whatbywhat+'.runInfo.txt' fits.writeto(chainFile,chain) fits.writeto(chiFile,chi) f = open(infoFile,'w') f.write('run took %r minutes\n' % ((stop - start)/60.)) f.write('walkers: %r\n' % nwalkers) f.write('steps: %r\n' % steps) f.write('initial model: %r\n' % p0) f.write('widths: %r\n' % widths) f.write("mean acor time: "+str(np.mean(sampler.acor))) f.write("\nmean acceptance fraction: "+str(np.mean(sampler.acceptance_fraction))) f.close() print 'Data written to: \n'+chainFile+'\n'+chiFile+'\n'+infoFile
def tdelay_dt_mcmc(run, theta, Niter=20, Nwalkers=10, Ndim=2, sigma_smhm=0.2, nsnap0=15, downsampled='14', flag=None, continue_chain=False): ''' ''' if Ndim == 2: tdelay_range = [0., 3.]#np.arange(0., 3., 0.5) dt_range = [0.1, 4.] # new chain chain_file = ''.join([UT.fig_dir(), run, '.tdelay_dt_mcmc.chain.dat']) if os.path.isfile(chain_file) and continue_chain: print 'Continuing previous MCMC chain!' sample = np.loadtxt(chain_file) Niter = Niter - (np.float(len(sample))/np.float(Nwalkers)) # Number of chains left to finish if Niter <= 0: raise ValueError print Niter, ' iterations left to finish' else: f = open(chain_file, 'w') f.close() # Initializing Walkers pos0 = [np.array([np.random.uniform(tdelay_range[0], tdelay_range[1]), np.random.uniform(dt_range[0], dt_range[1])]) for i in range(Nwalkers)] pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) # Initializing the emcee sampler kwargs = { 'theta': theta, 'sigma_smhm': 0.2, 'nsnap0': 15, 'downsampled': '14', } sampler = emcee.EnsembleSampler(Nwalkers, Ndim, sigM, pool=pool, kwargs=kwargs) for result in sampler.sample(pos0, iterations=Niter, storechain=False): position = result[0] #print position f = open(chain_file, 'a') for k in range(position.shape[0]): output_str = '\t'.join(position[k].astype('str')) + '\n' f.write(output_str) f.close() pool.close() return None
def lnPost(theta): '''log-posterior ''' # prior calculations if prior_min[0] < theta[0] < prior_max[0] and \ prior_min[1] < theta[1] < prior_max[1] and \ prior_min[2] < theta[2] < prior_max[2] and \ prior_min[3] < theta[3] < prior_max[3] and \ prior_min[4] < theta[4] < prior_max[4]: lnPrior = 0.0 else: lnPrior = -np.inf if not np.isfinite(lnPrior): return -np.inf return lnPrior + lnLike(theta) """Initializing Walkers""" pos = [np.array([11. , np.log(.4) , 11.5 , 1.0 , 13.5]) + 1e-3*np.random.randn(Ndim) for i in range(Nwalkers)] """Initializing MPIPool""" pool = MPIPool(loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) """Initializing the emcee sampler""" sampler = emcee.EnsembleSampler(Nwalkers, Ndim, lnprob, pool=pool) # Burn in + Production sampler.run_mcmc(pos, Nchains_burn + Nchains_pro) # Production. samples = sampler.chain[:, Nchains_burn:, :].reshape((-1, Ndim)) #closing the pool pool.close() np.savetxt("mcmc_sample.dat" , samples)
def main(): ''' A parallel run. ''' pool = MPIPool(loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) clf = hbsgc.HBSGC(pool=pool) # save start time clf.last_clock = time.clock() clf.filter_calcs() clf.data_calcs() clf.star_model_calcs() # if clf.calc_model_mags: # clf.star_model_mags() clf.gal_model_calcs() # if clf.calc_model_mags: # clf.gal_model_mags() clf.fit_calcs() clf.count_tot = 0 clf.sample() clf.save_proba() if clf.min_chi2_write: clf.save_min_chi2() pool.close()
def lnPost(theta): '''log-posterior ''' # prior calculations if prior_min[0] < theta[0] < prior_max[0] and \ prior_min[1] < theta[1] < prior_max[1] and \ prior_min[2] < theta[2] < prior_max[2] and \ prior_min[3] < theta[3] < prior_max[3] and \ prior_min[4] < theta[4] < prior_max[4]: lnPrior = 0.0 else: lnPrior = -np.inf if not np.isfinite(lnPrior): return -np.inf return lnPrior + lnLike(theta) """Initializing Walkers""" pos = [ np.array([11., np.log(.4), 11.5, 1.0, 13.5]) + 1e-3 * np.random.randn(Ndim) for i in range(Nwalkers) ] """Initializing MPIPool""" pool = MPIPool(loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) """Initializing the emcee sampler""" sampler = emcee.EnsembleSampler(Nwalkers, Ndim, lnprob, pool=pool) # Burn in + Production sampler.run_mcmc(pos, Nchains_burn + Nchains_pro) # Production. samples = sampler.chain[:, Nchains_burn:, :].reshape((-1, Ndim)) #closing the pool pool.close() np.savetxt("mcmc_sample.dat", samples)
def ens_mpi_sample(gf, nwalkers, burn_steps, sample_steps, pos=None, random_state=None): pool = MPIPool(loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) # Initialize the parameter array with initial values (in log10 units) # Number of parameters to estimate ndim = (len(gf.builder.global_params) + (len(gf.data) * len(gf.builder.local_params))) # Initialize the walkers with starting positions drawn from the priors # Note that the priors are in log10 scale already, so they don't # need to be transformed here if pos is None: p0 = np.zeros((nwalkers, ndim)) for walk_ix in range(nwalkers): for p_ix in range(ndim): p0[walk_ix, p_ix] = gf.priors[p_ix].random() else: p0 = pos # Create the sampler object sampler = emcee.EnsembleSampler(nwalkers, ndim, posterior, args=[gf], pool=pool) if random_state is not None: sampler.random_state = random_state print "Burn in sampling..." pos, prob, state = sampler.run_mcmc(p0, burn_steps, storechain=False) sampler.reset() print "Main sampling..." sampler.run_mcmc(pos, sample_steps) # Close the pool! pool.close() print "Done sampling." return sampler
def runModel(): pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) # pool=None observation = simulateData() nTrans = len(observation["spectype"]) ndim, nwalkers = 8 + nTrans, 1000 # mns = numpy.concatenate(([inputs.Om0, inputs.w0, inputs.rate_II_r, inputs.logL_snIa, inputs.sigma_snIa, \ # inputs.logL_snII,inputs.sigma_snII,inputs.Z], -.35*numpy.zeros(nTrans))) sigs = numpy.concatenate( ( [ 0.1, 0.2, 0.1, uncertainties.logL_snIa, uncertainties.sigma_snIa, uncertainties.logL_snII, uncertainties.sigma_snII, uncertainties.Z, ], 0.05 + numpy.zeros(nTrans), ) ) p0 = [] for i in range(nwalkers): dum = numpy.random.rand(nTrans) dum = numpy.array(numpy.round(dum), dtype="int") lnL_init = dum + (1 - dum) * 0.5 lnL_init = numpy.log(lnL_init) mns = numpy.concatenate( ( [ inputs.Om0, inputs.w0, inputs.rate_II_r, inputs.logL_snIa, inputs.sigma_snIa, inputs.logL_snII, inputs.sigma_snII, inputs.Z, ], lnL_init, ) ) p0.append((numpy.random.rand(ndim) - 0.5) * sigs + mns) # p0 = [numpy.random.randn(ndim)*sigs + mns for i in range(nwalkers)] dco = 1e-11 # measurement error very small sampler = emcee.EnsembleSampler( nwalkers, ndim, lnprob, args=[ observation["counts"], observation["specz"], numpy.zeros(nTrans) + dco, observation["zprob"], observation["spectype"], ], pool=pool, ) sampler.run_mcmc(p0, 2000) pool.close() output = open("data.pkl", "wb") pickle.dump(sampler.chain, output) output.close()
class MPIManager(object): """ Class to serve as context manager to handle to MPI-related issues, specifically, the managing of ``MPIPool`` and splitting of communicators """ logger = logging.getLogger("MPIManager") def __init__(self, comm, nruns, debug=False): """ Parameters ---------- comm : MPI.Communicator the global communicator to split nruns : int the number of independent algorithms to run concurrently debug : bool, optional set the logging level to debug in the `MPIPool`; default is `False` """ self.comm = comm self.nruns = nruns self.debug = debug if debug: self.logger.setLevel(logging.DEBUG) # initialize comm for parallel runs self.par_runs_group = None self.par_runs_comm = None # intiialize comm for pool of workers for each # parallel run self.pool_comm = None self.pool = None def __enter__(self): """ Setup the MPIPool, such that only the ``pool`` master returns, while the other processes wait for tasks """ # split ranks if we need to if self.comm.size > 1: ranges = [] for i, ranks in split_ranks(self.comm.size, self.nruns): ranges.append(ranks[0]) if self.comm.rank in ranks: color = i # split the global comm into pools of workers self.pool_comm = self.comm.Split(color, 0) # make the comm to communicate b/w parallel runs if self.nruns > 1: self.par_runs_group = self.comm.group.Incl(ranges) self.par_runs_comm = self.comm.Create(self.par_runs_group) # initialize the MPI pool, if the comm has more than 1 process if self.pool_comm is not None and self.pool_comm.size > 1: from emcee.utils import MPIPool kws = { 'loadbalance': True, 'comm': self.pool_comm, 'debug': self.debug } self.pool = MPIPool(**kws) # explicitly force non-master ranks in pool to wait if self.pool is not None and not self.pool.is_master(): self.pool.wait() self.logger.debug("exiting after pool closed") sys.exit(0) # log if self.pool is not None: self.logger.debug("using an MPIPool instance with %d worker(s)" % self.pool.size) self.rank = 0 if self.par_runs_comm is not None: self.rank = self.par_runs_comm.rank return self def __exit__(self, exc_type, exc_value, exc_traceback): """ Exit gracefully by closing and freeing the MPI-related variables """ if exc_value is not None: trace = ''.join( traceback.format_exception(exc_type, exc_value, exc_traceback, limit=5)) self.logger.error("traceback:\n%s" % trace) # wait for all the processes, if we more than one if self.par_runs_comm is not None and self.par_runs_comm.size > 1: self.par_runs_comm.Barrier() # close and free the MPI stuff self.logger.debug("beginning to close MPI variables...") if self.par_runs_group is not None: self.par_runs_group.Free() if self.par_runs_comm is not None: self.par_runs_comm.Free() if self.pool is not None: self.pool.close() self.logger.debug('...MPI variables closed') return True
def main(): ################################################# ############Option parsing####################### ################################################# #Parse command line options parser = argparse.ArgumentParser() parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file") parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity") parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity") parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area") parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!") parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix") parser.add_argument("-s","--save_features",dest="save_features",action="store_true",default=False,help="save features profiles") parser.add_argument("-ss","--save",dest="save",action="store_true",default=False,help="save the best fits and corresponding chi2") parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle") parser.add_argument("-l","--likelihood",dest="likelihood",action="store_true",default=False,help="save the likelihood cubes for the mocks") parser.add_argument("-o","--observation",dest="observation",action="store_true",default=False,help="append the actual observation results to the mock results for direct comparison") parser.add_argument("-d","--differentiate",dest="differentiate",action="store_true",default=False,help="differentiate the first minkowski functional to get the PDF") cmd_args = parser.parse_args() if cmd_args.options_file is None: parser.print_help() sys.exit(0) #Set verbosity level if cmd_args.verbose_plus: logging.basicConfig(level=DEBUG_PLUS) elif cmd_args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) #Initialize MPI Pool try: pool = MPIPool() except: pool = None if (pool is not None) and (not pool.is_master()): pool.wait() sys.exit(0) if pool is not None: logging.info("Started MPI Pool.") ################################################################################################################# #################Info gathering: covariance matrix, observation and emulator##################################### ################################################################################################################# #start start = time.time() last_timestamp = start #Instantiate a FeatureLoader object that will take care of the memory loading feature_loader = FeatureLoader(cmd_args) ########################################################################################################################################### #Use this model for the covariance matrix (from the new set of 50 N body simulations) covariance_model = CFHTcov.getModels(root_path=feature_loader.options.get("simulations","root_path")) logging.info("Measuring covariance matrix from model {0}".format(covariance_model)) #Load in the covariance matrix fiducial_feature_ensemble = feature_loader.load_features(covariance_model) fiducial_features = fiducial_feature_ensemble.mean() features_covariance = fiducial_feature_ensemble.covariance() #timestamp now = time.time() logging.info("covariance loaded in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################ #Treat the 50N-body simulation set as data observation = CFHTcov.getModels(root_path=feature_loader.options.get("observations","root_path")) logging.info("Measuring the observations from {0}".format(observation)) #And load the observations observed_feature = feature_loader.load_features(observation) #timestamp now = time.time() logging.info("observation loaded in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################ #Create a LikelihoodAnalysis instance by unpickling one of the emulators emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators") emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string))) logging.info("Unpickling emulator from {0}...".format(emulator_file)) analysis = LikelihoodAnalysis.load(emulator_file) #timestamp now = time.time() logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now #################################################################################################################### ######################################Compute the chi2 cube######################################################### #################################################################################################################### logging.info("Initializing chi2 meshgrid...") #Set the points in parameter space on which to compute the chi2 (read from options) Om = np.ogrid[feature_loader.options.getfloat("Omega_m","min"):feature_loader.options.getfloat("Omega_m","max"):feature_loader.options.getint("Omega_m","num_points")*1j] w = np.ogrid[feature_loader.options.getfloat("w","min"):feature_loader.options.getfloat("w","max"):feature_loader.options.getint("w","num_points")*1j] si8 = np.ogrid[feature_loader.options.getfloat("sigma8","min"):feature_loader.options.getfloat("sigma8","max"):feature_loader.options.getint("sigma8","num_points")*1j] num_points = len(Om) * len(w) * len(si8) points = np.array(np.meshgrid(Om,w,si8,indexing="ij")).reshape(3,num_points).transpose() #Now compute the chi2 at each of these points if pool: split_chunks = pool.size logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size)) else: split_chunks = None logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0])) #Allocate array for best fit first_realization = feature_loader.options.getint("mocks","first_realization") last_realization = feature_loader.options.getint("mocks","last_realization") if cmd_args.observation: best_fit_all = np.zeros((last_realization-first_realization+1 + 1,analysis.parameter_set.shape[1])) chi2_all = np.zeros(last_realization-first_realization+1 + 1) chi2_from_expected_all = np.zeros(last_realization-first_realization+1 + 1) else: best_fit_all = np.zeros((last_realization-first_realization+1,analysis.parameter_set.shape[1])) chi2_all = np.zeros(last_realization-first_realization+1) chi2_from_expected_all = np.zeros(last_realization-first_realization+1) #Cycle through the realizations and obtain a best fit for each one of them for nreal in range(first_realization-1,last_realization): chi_squared = analysis.chi2(points,observed_feature=observed_feature[nreal],features_covariance=features_covariance,pool=pool,split_chunks=split_chunks) now = time.time() logging.info("realization {0}, chi2 calculations completed in {1:.1f}s".format(nreal+1,now-last_timestamp)) last_timestamp = now #After chi2, compute the likelihood likelihood_cube = analysis.likelihood(chi_squared.reshape(Om.shape + w.shape + si8.shape)) #Maybe save the likelihood cube? if cmd_args.likelihood: likelihood_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","likelihood{0}_{1}.npy".format(nreal+1,output_string(feature_loader.feature_string))) logging.info("Saving likelihood cube to {0}...".format(likelihood_filename)) np.save(likelihood_filename,likelihood_cube) #Maybe save the feature profiles? if cmd_args.save_features: features_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","features{0}_{1}.npy".format(nreal+1,output_string(feature_loader.feature_string))) logging.info("Saving features for realization {0} to {1}...".format(nreal+1,features_filename)) np.save(features_filename,observed_feature[nreal]) #Find the maximum of the likelihood using ContourPlot functionality contour = ContourPlot() contour.getLikelihood(likelihood_cube) contour.getUnitsFromOptions(feature_loader.options) parameters_maximum = contour.getMaximum() parameter_keys = parameters_maximum.keys() parameter_keys.sort(key=contour.parameter_axes.get) #Display the new best fit before exiting best_fit_parameters = np.array([ parameters_maximum[par_key] for par_key in parameter_keys ]) best_fit_chi2 = analysis.chi2(best_fit_parameters,features_covariance=features_covariance,observed_feature=observed_feature[nreal])[0] chi2_from_expected = analysis.chi2(np.array([0.26,-1.0,0.800]),features_covariance=features_covariance,observed_feature=observed_feature[nreal])[0] logging.info("Best fit for realization {4} is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2_best={1:.3f}({2} dof), chi2_expected={3:.3f}({2} dof)".format(best_fit_parameters,best_fit_chi2,analysis.training_set.shape[1],chi2_from_expected,nreal+1)) #Update global array with best fit parameters and corresponding chi2 best_fit_all[nreal-first_realization+1,:] = best_fit_parameters.copy() chi2_all[nreal-first_realization+1] = best_fit_chi2 chi2_from_expected_all[nreal-first_realization+1] = chi2_from_expected ####################################################################################################################################################################### #If option was selected, append the observation results to the mock ones, for comparison if cmd_args.observation: observed_feature = feature_loader.load_features(CFHTLens(root_path=feature_loader.options.get("observations","root_path")))[0] chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks) now = time.time() logging.info("actual observation, chi2 calculations completed in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now #After chi2, compute the likelihood likelihood_cube = analysis.likelihood(chi_squared.reshape(Om.shape + w.shape + si8.shape)) #Maybe save the likelihood cube? if cmd_args.likelihood: likelihood_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","likelihood_obs_{0}.npy".format(output_string(feature_loader.feature_string))) logging.info("Saving likelihood cube to {0}...".format(likelihood_filename)) np.save(likelihood_filename,likelihood_cube) #Maybe save the feature profiles? if cmd_args.save_features: features_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","features_obs_{0}.npy".format(output_string(feature_loader.feature_string))) logging.info("Saving observed features to {0}...".format(features_filename)) np.save(features_filename,observed_feature) #Find the maximum of the likelihood using ContourPlot functionality contour = ContourPlot() contour.getLikelihood(likelihood_cube) contour.getUnitsFromOptions(feature_loader.options) parameters_maximum = contour.getMaximum() parameter_keys = parameters_maximum.keys() parameter_keys.sort(key=contour.parameter_axes.get) #Display the new best fit before exiting best_fit_parameters = np.array([ parameters_maximum[par_key] for par_key in parameter_keys ]) best_fit_chi2 = analysis.chi2(best_fit_parameters,features_covariance=features_covariance,observed_feature=observed_feature)[0] chi2_from_expected = analysis.chi2(np.array([0.26,-1.0,0.800]),features_covariance=features_covariance,observed_feature=observed_feature)[0] logging.info("Best fit for observation is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2_best={1:.3f}({2} dof), chi2_expected={3:.3f}({2} dof)".format(best_fit_parameters,best_fit_chi2,analysis.training_set.shape[1],chi2_from_expected)) #Update global array with best fit parameters and corresponding chi2 best_fit_all[-1,:] = best_fit_parameters.copy() chi2_all[-1] = best_fit_chi2 chi2_from_expected_all[-1] = chi2_from_expected ####################################################################################################################################################################### #Close MPI Pool if pool is not None: pool.close() logging.info("Closed MPI Pool.") if cmd_args.save: #Save the best fit parameters for all realizations best_fit_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","best_fit_all_{0}.npy".format(output_string(feature_loader.feature_string))) logging.info("Saving best fit to {0}...".format(best_fit_filename)) np.save(best_fit_filename,best_fit_all) #Save the best fit chi2 for all realizations chi2_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","chi2_all_{0}.npy".format(output_string(feature_loader.feature_string))) logging.info("Saving best fit chi2 to {0}...".format(chi2_filename)) np.save(chi2_filename,chi2_all) #Save also the chi2 for the expected best fit chi2_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","chi2_all_expected_{0}.npy".format(output_string(feature_loader.feature_string))) logging.info("Saving expected chi2 to {0}...".format(chi2_filename)) np.save(chi2_filename,chi2_from_expected_all) end = time.time() logging.info("DONE!!") logging.info("Completed in {0:.1f}s".format(end-start))
def main(): # firstly, read one file and get k bins we want for the fitting range; It's dark matter power spectrum in redshift space parameter_file = sys.argv[1] with open(parameter_file, 'r') as fr: input_params = yaml.load(fr) data_m = np.genfromtxt( input_params['Pwig_ifile'], dtype='f8', comments='#', delimiter='', skip_header=11 ) # skip the first data row, the first 10 rows are comments. #print(data_m) num_kbin = np.size(data_m, axis=0) kk = data_m[:, 0] # get indices based on the ascending k indices_sort = [i[0] for i in sorted(enumerate(kk), key=lambda x: x[1])] # sort out the indices whose k<=0.3 h/Mpc for i in range(num_kbin): if kk[indices_sort[i]] > 0.3: break print(indices_sort[i - 1]) indices_p = indices_sort[0:i] k_p = kk[indices_p] N_fitbin = len(k_p) mu_p, Pwig = data_m[indices_p, 1], data_m[indices_p, 2] print(k_p, N_fitbin) # input Pnow, note the (k, mu) indices have the same order as those of Pwig data file data_m = np.genfromtxt(input_params['Pnow_ifile'], dtype='f8', comments='#', delimiter='', skip_header=11) Pnow = data_m[indices_p, 2] Pwnw_diff_obs = Pwig - Pnow # input diagonal terms of covariance matrix of (Pwig-Pnow) diag_Cov_Pwnw = np.loadtxt(input_params['diag_Cov_Pwnw_ifile'], dtype='f8', comments='#', usecols=(2, )) ivar_Pk_wnow = 1.0 / diag_Cov_Pwnw #print(ivar_Pk_wnow) # input (theoretical) linear power spectrum k_wiggle, Pk_wiggle = np.loadtxt(input_params['Pwig_linear'], dtype='f8', comments='#', unpack=True) tck_Pk_linw = interpolate.splrep(k_wiggle, Pk_wiggle) k_smooth, Pk_smooth = np.loadtxt(input_params['Pnow_linear'], dtype='f8', comments='#', unpack=True) tck_Pk_sm = interpolate.splrep(k_smooth, Pk_smooth) q_max = 110.0 # Mpc/h, BAO radius Sigma2_sm_array = np.linspace(10.0, 610.0, 61) #print(Sigma2_sm_array) #const = 1.0 Sigma2_dd_array = np.array([ integrate.quad(Sigma2_dd_integrand, 1.05e-5, 100.0, args=(tck_Pk_linw, q_max, Sigma2_sm), epsabs=1.e-4, epsrel=1.e-4)[0] for Sigma2_sm in Sigma2_sm_array ]) Sigma2_sd_array = np.array([ integrate.quad(Sigma2_sd_integrand, 1.05e-5, 100.0, args=(tck_Pk_linw, q_max, Sigma2_sm), epsabs=1.e-4, epsrel=1.e-4)[0] for Sigma2_sm in Sigma2_sm_array ]) Sigma2_ss_array = np.array([ integrate.quad(Sigma2_ss_integrand, 1.05e-5, 100.0, args=(tck_Pk_linw, q_max, Sigma2_sm), epsabs=1.e-4, epsrel=1.e-4)[0] for Sigma2_sm in Sigma2_sm_array ]) tck_Sigma2_dd = interpolate.splrep(Sigma2_sm_array, Sigma2_dd_array, k=3) tck_Sigma2_sd = interpolate.splrep(Sigma2_sm_array, Sigma2_sd_array, k=3) tck_Sigma2_ss = interpolate.splrep(Sigma2_sm_array, Sigma2_ss_array, k=3) all_params = list(input_params['init_params'].values()) params_indices = input_params['params_indices'] params_name = list(input_params['init_params'].keys()) all_temperature = input_params['all_temperature'] N_params, theta, fix_params, params_T, params_name = set_params( all_params, params_indices, params_name, all_temperature) print(N_params, theta, fix_params, params_T, params_name) sim_z = input_params['sim_z'] # redshift of the simulated power spectrum N_walkers = input_params['N_walkers'] Omega_m = input_params['Omega_m'] G_0 = growth_factor(0.0, Omega_m) # G_0 at z=0, normalization factor norm_gf = growth_factor(sim_z, Omega_m) / G_0 const = 1.0 / (6.0 * np.pi**2.0) * norm_gf**2.0 pool = MPIPool(loadbalance=True) np.random.seed(1) # set random seed for random number generator params_mcmc = mcmc_routine(N_params, N_walkers, theta, params_T, params_indices, fix_params, k_p, mu_p, Pwnw_diff_obs, ivar_Pk_wnow, tck_Pk_linw, tck_Pk_sm, tck_Sigma2_dd, tck_Sigma2_sd, tck_Sigma2_ss, norm_gf, const, params_name, pool) print(params_mcmc) chi_square = chi2(params_mcmc[:, 0], params_indices, fix_params, k_p, mu_p, Pwnw_diff_obs, ivar_Pk_wnow, tck_Pk_linw, tck_Pk_sm, tck_Sigma2_dd, tck_Sigma2_sd, tck_Sigma2_ss, norm_gf, const) dof = N_fitbin - N_params reduced_chi2 = chi_square / dof odir = './output_files/' if not os.path.exists(odir): os.makedirs(odir) ofile_params = odir + input_params['ofile_name'].format( sim_z, ''.join(map(str, params_indices))) write_params(ofile_params, params_mcmc, params_name, reduced_chi2, fix_params, dof) pool.close()
def mcmc(tag=None, zbin=1, nwalkers=48, Nchains=4, minlength=600, likelihood='pseudo'): ''' Parameters ---------- Nchains : int Number of independent chains to run for the gelman rubin convergence test ''' if tag is None: raise ValueError("specify a tag, otherwise it's confusing") temperature = 2.e-3 # temperature # read in BOSS P(k) NGC pkay = Dat.Pk() k0, p0k_ngc = pkay.Observation(0, zbin, 'ngc') k2, p2k_ngc = pkay.Observation(2, zbin, 'ngc') k4, p4k_ngc = pkay.Observation(4, zbin, 'ngc') pk_ngc_list = [p0k_ngc, p2k_ngc, p4k_ngc] k_list = [k0, k2, k4] # read in BOSS P(k) SGC k0, p0k_sgc = pkay.Observation(0, zbin, 'sgc') k2, p2k_sgc = pkay.Observation(2, zbin, 'sgc') k4, p4k_sgc = pkay.Observation(4, zbin, 'sgc') pk_sgc_list = [p0k_sgc, p2k_sgc, p4k_sgc] if likelihood == 'psuedo': # standard pseudo Gaussian likelihood # read in Covariance matrix # currently for testing purposes, # implemented to read in Florian's covariance matrix _, _, C_pk_ngc = Dat.beutlerCov(zbin, NorS='ngc', ell='all') _, _, C_pk_sgc = Dat.beutlerCov(zbin, NorS='sgc', ell='all') # calculate precision matrices (including the hartlap factor) Cinv_ngc = np.linalg.inv(C_pk_ngc) Cinv_sgc = np.linalg.inv(C_pk_sgc) # hartlap factor n_mocks_ngc = 2045 n_mocks_sgc = 2048 f_hartlap_ngc = (float(n_mocks_ngc) - float( len(np.concatenate(pk_ngc_list))) - 2.) / (float(n_mocks_ngc) - 1.) f_hartlap_sgc = (float(n_mocks_sgc) - float( len(np.concatenate(pk_sgc_list))) - 2.) / (float(n_mocks_sgc) - 1.) Cinv_ngc *= f_hartlap_ngc Cinv_sgc *= f_hartlap_sgc # ln Posterior function lnPost = lnPost_pseudo # args for ln Posterior function # data ks, BOSS NGC P_l(k), BOSS SGC P_l(k), NGC precision matrix, SGC precision matrix lnpost_args = (k_list, pk_ngc_list, pk_sgc_list, Cinv_ngc, Cinv_sgc) elif likelihood in ['pca', 'ica']: # read in patchy mock P(k)s for ngc and sgc pk_ngc_list, pk_sgc_list = [], [] for ell in [0, 2, 4]: if ell == 4: kmax = 0.1 else: kmax = 0.15 pk_ngc_list.append( NG.X_pk('patchy.z' + str(kwargs['zbin']), krange=[0.01, kmax], ell=ell, NorS='ngc', sys='fc')) pk_sgc_list.append( NG.X_pk('patchy.z' + str(kwargs['zbin']), krange=[0.01, kmax], ell=ell, NorS='sgc', sys='fc')) pk_ngc_mock = np.concatenate(pk_ngc_list, axis=1) pk_sgc_mock = np.concatenate(pk_sgc_list, axis=1) else: raise NotImplementedError if zbin == 1: # 0.2 < z < 0.5 # maximum likelihood value start = np.array([ 1.008, 1.001, 0.478, 1.339, 1.337, 1.16, 0.32, -1580., -930., 6.1, 6.8 ]) ndim = len(start) # initialize MPI pool try: pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) except ValueError: pool = None print("initializing ", Nchains, " independent emcee chains") pos, samplers = [], [] for ichain in range(Nchains): pos.append([ start + temperature * start * (2. * np.random.random_sample(ndim) - 1.) for i in range(nwalkers) ]) samplers.append( emcee.EnsembleSampler(nwalkers, ndim, lnPost, args=lnpost_args, pool=pool)) # Start MCMC print("Running MCMC...") withinchainvar = np.zeros((Nchains, ndim)) meanchain = np.zeros((Nchains, ndim)) scalereduction = np.repeat(2., ndim) # bunch of numbers for the mcmc run itercounter = 0 chainstep = minlength loop = 1 epsilon = 0.02 #0.02 ichaincheck = 100 rstate = np.random.get_state() while loop: itercounter += chainstep print("chain length =", itercounter) for jj in range(Nchains): for result in samplers[jj].sample(pos[jj], iterations=chainstep, rstate0=rstate, storechain=True): pos[jj] = result[0] chainchi2 = -2. * result[1] rstate = result[2] # append chain outputs to chain file chain_file = ''.join([ UT.dat_dir(), 'mcmc/', tag, '.chain', str(jj), '.zbin', str(zbin), '.dat' ]) f = open(chain_file, 'a') for k in range(pos[jj].shape[0]): output_str = '\t'.join( pos[jj][k].astype('str')) + '\t' + str( chainchi2[k]) + '\n' f.write(output_str) f.close() # we do the convergence test on the second half of the current chain (itercounter/2) chainsamples = samplers[jj].chain[:, itercounter / 2:, :].reshape( (-1, ndim)) withinchainvar[jj] = np.var(chainsamples, axis=0) meanchain[jj] = np.mean(chainsamples, axis=0) scalereduction = gelman_rubin_convergence(withinchainvar, meanchain, itercounter / 2, Nchains, ndim) print("scalereduction = ", scalereduction) loop = 0 for jj in range(ndim): if np.abs(1 - scalereduction[jj]) > epsilon: loopcriteria = 1 chainstep = ichaincheck if pool is not None: pool.close() return None
def fit_BAO(args): kmin = float(args.kmin) kmax = float(args.kmax) params_str = args.params_str Pk_type = args.Pk_type params_indices = [int(i) for i in params_str] old_stdout = sys.stdout odir = './fit_kmin{}_kmax{}_{}/'.format(kmin, kmax, Pk_type) if not os.path.exists(odir): os.makedirs(odir) ofile = odir + "mcmc_fit_params{}.log".format(params_str) log_file = open(ofile, "w") sys.stdout = log_file print('Arguments for the fitting: ', args) ifile = '/Users/mehdi/work/quicksurvey/ELG/run8/planck_camb_56106182_matterpower_z0.dat' klin, Pk_linw = np.loadtxt(ifile, dtype='f8', comments='#', unpack=True) Pwig_spl = InterpolatedUnivariateSpline(klin, Pk_linw) ifile = '/Users/mehdi/work/quicksurvey/ELG/run8/planck_camb_56106182_matterpower_smooth_z0.dat' klin, Pk_sm = np.loadtxt(ifile, dtype='f8', comments='#', unpack=True) Psm_spl = InterpolatedUnivariateSpline(klin, Pk_sm) norm_gf = 1.0 N_walkers = 40 ##params_indices = [1, 0, 0] # 1: free parameter; 0: fixed parameter all_param_names = 'alpha', 'Sigma2_xy', 'A', 'B' all_temperature = 0.01, 1.0, 0.1, 0.1 Omega_m = 0.3075 # matter density G_0 = growth_factor(0.0, Omega_m) Sigma_0 = 7.7840 # This is exactly calculated from theoretical prediction with q_{BAO}=110 Mpc/h. z_list = [0.625] #, 0.875, 1.125, 1.375] # z list for the data files cut_list = ['F'] #, 'T'] # initial guess for fitting, Sigma2_xy=31.176 at z=0.65 is from theory prediction alpha, A, B = 1.0, 1.0, 10.0 idir = '../kp0kp2knmodes/surveyscaled-nmodes/' odir = './mcmc_fit_params_{}/kmin{}_kmax{}/'.format(Pk_type, kmin, kmax) comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() if rank == 0: if not os.path.exists(odir): os.makedirs(odir) pool = MPIPool(loadbalance=True) for z_value in z_list: norm_gf = growth_factor(z_value, Omega_m) / G_0 Sigma2_xy = (Sigma_0 * norm_gf)**2.0 print('z, Sigma2_xy: ', z_value, Sigma2_xy) all_params = alpha, Sigma2_xy, A, B N_params, theta, fix_params, params_T, params_name = set_params( all_params, params_indices, all_param_names, all_temperature) for cut_type in cut_list: ifile = idir + 'kp0kp2knmodes_z{}RADECcut{}.dat'.format( z_value, cut_type) print(ifile) data_m = np.loadtxt(ifile, dtype='f8', comments='#') # k, P0(k), P2(k), N_modes indices = np.argwhere((data_m[:, 0] >= kmin) & (data_m[:, 0] <= kmax)).flatten() N_fitbin = len(indices) k_obs, Pk_wig_obs, N_modes = data_m[indices, 0], data_m[indices, 1], data_m[indices, 3] ivar_Pk_wig = N_modes / (2.0 * Pk_wig_obs**2.0) #print('ivar_Pk_wig', ivar_Pk_wig) params_mcmc = mcmc_routine(N_params, N_walkers, theta, params_T, params_indices, fix_params, k_obs, Pk_wig_obs, ivar_Pk_wig, Pwig_spl, Psm_spl, norm_gf, params_name, pool) print(params_mcmc) chi_square = chi2(params_mcmc[:, 0], params_indices, fix_params, k_obs, Pk_wig_obs, ivar_Pk_wig, Pwig_spl, Psm_spl, norm_gf) reduced_chi2 = chi_square / (N_fitbin - N_params) print("chi^2/dof: ", reduced_chi2, "\n") ofile_params = odir + 'fit_p0_z{}RADECcut{}_params{}.dat'.format( z_value, cut_type, params_str) write_params(ofile_params, params_mcmc, params_name, reduced_chi2) pool.close() sys.stdout = old_stdout log_file.close()
def sample(self): ''' Run the MCMC. ''' # First make sure that the maximum likelihood params are fitted if not self.minimized: self.approximate_ml() # print(self.params_all) ndim, nwalkers = len(self.params_vary), self.config['NWALKERS'] p0 = np.zeros((nwalkers, len(self.params_vary))) pml = [self.params_all[pname] for pname in self.params_vary] for pnum, pname in enumerate(self.params_vary): p0[:, pnum] = (np.random.randn(nwalkers)\ * self.config['SAMPLE_BALL']+1.)*pml[pnum] plist = [] for key in self.params_vary.keys(): plist.append(key) args = (self.freqs, self.tb_meas, self.var_tb, self.params_all, plist, self.params_vary, self.fg_model, self.sig_model) if self.config['MPI']: from emcee.utils import MPIPool pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) self.sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=args, pool=pool) self.sampler.run_mcmc(p0, self.config['NBURN']) # burn in p0 = self.sampler.chain[:, -1, :].squeeze() self.sampler.reset() self.sampler.run_mcmc(p0, self.config['NSTEPS']) pool.close() else: if self.config['SAMPLER'] == 'PARALLELTEMPERING': logl = lambda x: lnlike( x, self.freqs, self.tb_meas, self.var_tb, self.params_all, self.params_vary, self.fg_model, self.sig_model) logp = lambda x: lnprior(x, self.params_vary.keys(), self. params_vary) self.sampler = ptemcee.Sampler( ntemps=self.config['NTEMPS'], nwalkers=self.config['NWALKERS'], dim=self.ndim, logl=logl, logp=logp) else: self.sampler = emcee.EnsembleSampler( nwalkers=self.config['NWALKERS'], ndim=ndim, log_prob_fn=lnprob, args=args, threads=self.config['THREADS']) # If we use PT sampling, we need a further dimension of # start parameters for the different temperatures if self.config['SAMPLER'] == 'PARALLELTEMPERING': p0 = np.array([p0 for m in range(self.config['NTEMPS'])]) # Run the MCMC for the burn-in self.sampler.run_mcmc(p0, self.config['NBURN'], thin=self.config['NTHIN']) # Reset after burn-in and run the full chain if self.config['SAMPLER'] == 'PARALLELTEMPERING': p0 = self.sampler.chain[:, :, -1, :] else: p0 = self.sampler.chain[:, -1, :].squeeze() self.sampler.reset() self.sampler.run_mcmc(p0, self.config['NSTEPS'], thin=self.config['NTHIN']) # Create output directory if not os.path.exists(self.config['PROJECT_NAME']): os.makedirs(self.config['PROJECT_NAME']) # Save output and configuration with open(os.path.join(self.config['PROJECT_NAME'], 'config.yaml'), 'w') as f: yaml.dump(self.config, f, default_flow_style=False) with open(os.path.join(self.config['PROJECT_NAME'], 'ml_params.yaml'), 'w') as f: yaml.dump(self.params_all, f, default_flow_style=False) self.sampled = True # Collect result parameters ########################### resultdict = {} # Chain ####### resultdict['chain'] = self.sampler.chain, # Conservative evidence ####################### if (self.config['COMPUTECOVARIANCE'] & (self.config['SAMPLER'] == 'ENSEMBLESAMPLER')): # Estimate autocorrelation self.acors = self.sampler.acor.astype(int) resultdict['autocorrs'] = self.acors # Estimate covariance self.cov_samples = np.zeros( (len(self.params_vary), len(self.params_vary))) resultdict['cov_samples'] = self.cov_samples for i in range(len(self.params_vary)): for j in range(len(self.params_vary)): stepsize = np.max([self.acors[i], self.acors[j]]) csample_i = self.sampler.chain[i, ::stepsize, :].flatten() csample_j = self.sampler.chain[j, ::stepsize, :].flatten() self.cov_samples[i, j] = np.mean( (csample_i - csample_i.mean()) * (csample_j - csample_j.mean())) # Compute conservative evidence without prior factor self.conservative_evidence = np.exp(self.ln_ml) / np.sqrt( np.linalg.det(self.cov_samples)) resultdict['conservative_evidence'] = self.conservative_evidence # Evidence from thermodynamic integration from the PT sampler ############################################################# if self.config['SAMPLER'].lower() == 'paralleltempering': self.logz, self.dlogz = self.sampler.log_evidence_estimate( fburnin=0.) resultdict['log_thd_evidence'] = self.logz resultdict['dlog_thd_evidence'] = self.dlogz # Posterior mean # The posterior mean values of the parameters ############### post_mean_vals = np.mean(self.sampler.flatchain, axis=0) resultdict['post_mean_vals'] = post_mean_vals # Likelihood # The value of the posterior for the best-fit results ############ logL = self.sampler.log_prob_fn(post_mean_vals) resultdict['logL'] = logL # Save as .npz np.savez(os.path.join(self.config['PROJECT_NAME'], 'output.npz'), **resultdict)
def mcmc_mpi(Nwalkers, Niters, Mr, prior_name = 'first_try', pois = False): ''' Parameters ----------- - Nwalker : Number of walkers - Nchains : Number of MCMC chains ''' #data and covariance matrix fake_obs_icov = Data.load_covariance(Mr , pois = False) fake_obs = Data.load_data(Mr) # True HOD parameters data_hod = Data.load_dechod_random_guess(Mr) Ndim = len(data_hod) # Priors prior_min, prior_max = PriorRange(prior_name , Mr) prior_range = np.zeros((len(prior_min),2)) prior_range[:,0] = prior_min prior_range[:,1] = prior_max # mcmc chain output file chain_file_name = ''.join([util.mcmc_dir(),'group_nopoisson_mcmc_chain_Mr',str(Mr),'.hdf5']) if os.path.isfile(chain_file_name) and continue_chain: print 'Continuing previous MCMC chain!' sample = h5py.File(chain_file_name , "r") Nchains = Niters - len(sample) # Number of chains left to finish if Nchains > 0: pass else: raise ValueError print Nchains, ' iterations left to finish' # Initializing Walkers from the end of the chain pos0 = sample[-Nwalkers:] else: # new chain print "chain_file_name=" , chain_file_name sample_file = h5py.File(chain_file_name , 'w') sample_file.create_dataset("mcmc",(Niters, Nwalkers, Ndim), data = np.zeros((Niters, Nwalkers , Ndim))) sample_file.close() # Initializing Walkers random_guess = data_hod pos0 = np.repeat(random_guess, Nwalkers).reshape(Ndim, Nwalkers).T + \ 5.e-2 * np.random.randn(Ndim * Nwalkers).reshape(Nwalkers, Ndim) print "initial position of the walkers = " , pos0.shape # Initializing MPIPool pool = MPIPool(loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) # Initializing the emcee sampler hod_kwargs = { 'prior_range': prior_range, 'data': fake_obs, 'data_icov': fake_obs_icov, 'Mr': Mr } sampler = emcee.EnsembleSampler(Nwalkers, Ndim, lnPost, pool=pool, kwargs=hod_kwargs) cnt = 0 # Initializing Walkers for result in sampler.sample(pos0, iterations = Niters, storechain=False): position = result[0] sample_file = h5py.File(chain_file_name) sample_file["mcmc"][cnt] = position sample_file.close() print "iteration=" , cnt cnt += 1 pass pool.close()
def run_emcee(run_name, nsteps, nwalkers, lnprob, to_vary): """The heart of it. Args: run_name (str): the name to output I guess nsteps (int): nwalkers (int): lnprob (something): to_vary (list of lists): list of [param name, initial_position_center, initial_position_sigma, (prior low bound, prior high bound)] for each parameter. The second two values set the position & size for a random Gaussian ball of initial positions """ # Something to do with parallelizing pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) # initiate sampler chain ndim = len(to_vary) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(run_name, to_vary), pool=pool) # Name the chain we're looking for chain_filename = run_name + '/' + run_name + '_chain.csv' # Try to resume an existing run of this name. try: # Read in an existing chain chain = pd.read_csv(chain_filename) start_step = chain.index[-1] // nwalkers print 'Resuming {} at step {}'.format(run_name, start_step) pos = np.array(chain.iloc[-nwalkers:, :-1]) # If we're adding new steps, just put in a new line and get started. with open(chain_filename, 'a') as f: f.write('\n') end = np.array(chain.iloc[-nwalkers:, :]) print 'Start step: {}'.format(np.mean(end[:, -1])) # If there's no pre-existing run, set one up. except IOError: sp.call(['mkdir', run_name]) sp.call(['mkdir', run_name + '/model_files']) print 'Starting {}'.format(run_name) start_step = 0 # Start a new file for the chain # Set up a header line with open(chain_filename, 'w') as f: param_names = [param[0] for param in to_vary] np.savetxt(f, (np.append(param_names, 'lnprob'), ), delimiter=',', fmt='%s') # Set up initial positions? """I think this is saying the same thing as the nested list comps. pos = [] for i in range(nwalkers): for param in to_vary: pos.append(param[1] + param[2]*np.random.randn()) """ # randn makes an n-dimensional array of rands in [0,1] pos = [[param[1] + param[2] * np.random.randn() for param in to_vary] for i in range(nwalkers)] # Initialize the lnprob list lnprobs = [] first_sample = sampler.sample(pos, iterations=nsteps, storechain=False) for i, result in enumerate(first_sample): """Enumerate returns a tuple the element and a counter. tuples = [t for t in enumerate(['a', 'b', 'c'])] counters = [c for c, l in enumerate(['a', 'b', 'c'])] """ old_lnprobs = np.copy(lnprobs) pos, lnprobs, blob = result print "Step {}: {}".format(start_step + i, np.mean(lnprobs)) # print('Acceptances: {}'.format([lnprob for lnprob in lnprobs if lnprob not in old_lnprobs])) # print('') # print(lnprobs) # print(np.mean(pos)) # Log out the new positions with open(chain_filename, 'a') as f: new_step = [np.append(pos[k], lnprobs[k]) for k in range(nwalkers)] np.savetxt(f, new_step, delimiter=',') pool.close()
def run_emcee_simple(run_name, nsteps, nwalkers, lnprob, to_vary, burn_in=0, pool=False, resume=False): """A new version of run_emcee. Args: run_name (str): nsteps (int): nwalkers (int): lnprob (function?): I think lnprob here is a function? Maybe equivalent in docs to lnpostfn to_vary (list of lists): burn_in (int?): how many steps to remove from the front pool (bool): Want to parallelize? resume (bool): Are you resuming a previous run? """ # Set up parallelization if pool: pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) start = time.time() # initiate sampler chain ndim = len(to_vary[0]) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, pool=pool) # Name the chain we're looking for chain_filename = run_name + '/' + run_name + '_chain.csv' # This seems a little risky bc if you forget, you just overwrite. if resume: chain = pd.read_csv(chain_filename) start_step = chain.index[-1] // nwalkers print('Resuming {} at step {}'.format(run_name, start_step)) with open(chain_filename, 'a') as f: f.write('\n') pos = np.array(chain.iloc[-nwalkers:, :-1]) else: sp.call('rm -rf ' + run_name, shell=True) sp.call(['mkdir', run_name]) print('Starting {}'.format(run_name)) start_step = 0 with open(chain_filename, 'w') as f: f.write(','.join([param[0] for param in to_vary] + ['lnprob']) + '\n') pos = [[param[1] + param[2] * np.random.randn() for param in to_vary] for i in range(nwalkers)] # Run the sampler and then query it run = sampler.sample(pos, iterations=nsteps, storechain=False) """Note that sampler.sample returns: pos: list of the walkers' current positions in an object of shape [nwalkers, ndim] lnprob: The list of log posterior probabilities for the walkers at positions given by pos. The shape of this object is (nwalkers, dim) rstate: The current state of the random number generator. blobs (optional): The metadata "blobs" associated with the current position. The value is only returned if lnpostfn returns blobs too. """ for i, result in enumerate(run): print "Step {}".format(start_step + i) # Where did chisum come from? What is it returning? pos, chisum, blob = result with open(run_name + '/' + run_name + '_chain.csv', 'a') as f: for i in range(nwalkers): f.write(','.join(map(str, np.append(pos[i], chisum[i]))) + '\n') print('{} samples in {:.1f} seconds'.format(nsteps * nwalkers, time.time() - start)) if pool: pool.close() return MCMCrun(run_name, nwalkers, burn_in=burn_in)
def LensModelMCMC(data,lens,source, xmax=30.,highresbox=[-3.,3.,-3.,3.],emitres=None,fieldres=None, sourcedatamap=None, scaleamp=False, shiftphase=False, modelcal=True,cosmo=Planck15, nwalkers=1e3,nburn=1e3,nstep=1e3,pool=None,nthreads=1,mpirun=False): """ Wrapper function which basically takes what the user wants and turns it into the format needed for the acutal MCMC lens modeling. Inputs: data: One or more visdata objects; if multiple datasets are being fit to, should be a list of visdata objects. lens: Any of the currently implemented lens objects or ExternalShear. source: One or more of the currently implemented source objects; if more than one source to be fit, should be a list of multiple sources. xmax: (Half-)Grid size, in arcseconds; the grid will span +/-xmax in x&y highresbox: The region to model at higher resolution (to account for high-magnification and differential lensing effects), as [xmin, xmax, ymin, ymax]. Note the sign convention is: +x = West, +y = North, like the lens positions. sourcedatamap: A list of length the number of datasets which tells which source(s) are to be fit to which dataset(s). Eg, if two sources are to be fit to two datasets jointly, should be [[0,1],[0,1]]. If we have four sources and three datasets, could be [[0,1],[0,1],[2,3]] to say that the first two sources should both be fit to the first two datasets, while the second two should be fit to the third dataset. If None, will assume all sources should be fit to all datasets. scaleamp: A list of length the number of datasets which tells whether a flux rescaling is allowed and which dataset the scaling should be relative to. False indicates no scaling should be done, while True indicates that amplitude scaling should be allowed. shiftphase: Similar to scaleamp above, but allowing for positional/astrometric offsets. modelcal: Whether or not to perform the pseudo-selfcal procedure of H+13 cosmo: The cosmology to use, as an astropy object, e.g., from astropy.cosmology import WMAP9; cosmo=WMAP9 Default is Planck15. nwalkers: Number of walkers to use in the mcmc process; see dan.iel.fm/emcee/current for more details. nburn: Number of burn-in steps to take with the chain. nstep: Number of actual steps to take in the mcmc chains after the burn-in nthreads: Number of threads (read: cores) to use during the fitting, default 1. mpirun: Whether to parallelize using MPI instead of multiprocessing. If True, nthreads has no effect, and your script should be run with, eg, mpirun -np 16 python lensmodel.py. Returns: mcmcresult: A nested dict containing the chains requested. Will have all the MCMC chain results, plus metadata about the run (initial params, data used, etc.). Formatting still a work in progress (esp. for modelcal phases). chains: The raw chain data, for testing. blobs: Everything else returned by the likelihood function; will have magnifications and any modelcal phase offsets at each step; eventually will remove this once get everything packaged up for mcmcresult nicely. colnames: Basically all the keys to the mcmcresult dict; eventually won't need to return this once mcmcresult is packaged up nicely. """ if pool: nthreads = 1 elif mpirun: nthreads = 1 from emcee.utils import MPIPool pool = MPIPool(debug=False,loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) else: pool = None # Making these lists just makes later stuff easier since we now know the dtype lens = list(np.array([lens]).flatten()) source = list(np.array([source]).flatten()) # Ensure source(s) are a list data = list(np.array([data]).flatten()) # Same for dataset(s) scaleamp = list(np.array([scaleamp]).flatten()) shiftphase = list(np.array([shiftphase]).flatten()) modelcal = list(np.array([modelcal]).flatten()) if len(scaleamp)==1 and len(scaleamp)<len(data): scaleamp *= len(data) if len(shiftphase)==1 and len(shiftphase)<len(data): shiftphase *= len(data) if len(modelcal)==1 and len(modelcal)<len(data): modelcal *= len(data) if sourcedatamap is None: sourcedatamap = [None]*len(data) # emcee isn't very flexible in terms of how it gets initialized; start by # assembling the user-provided info into a form it likes ndim, p0, colnames = 0, [], [] # Lens(es) first for i,ilens in enumerate(lens): if ilens.__class__.__name__=='SIELens': for key in ['x','y','M','e','PA']: if not vars(ilens)[key]['fixed']: ndim += 1 p0.append(vars(ilens)[key]['value']) colnames.append(key+'L'+str(i)) elif ilens.__class__.__name__=='ExternalShear': for key in ['shear','shearangle']: if not vars(ilens)[key]['fixed']: ndim += 1 p0.append(vars(ilens)[key]['value']) colnames.append(key) # Then source(s) for i,src in enumerate(source): if src.__class__.__name__=='GaussSource': for key in ['xoff','yoff','flux','width']: if not vars(src)[key]['fixed']: ndim += 1 p0.append(vars(src)[key]['value']) colnames.append(key+'S'+str(i)) elif src.__class__.__name__=='SersicSource': for key in ['xoff','yoff','flux','majax','index','axisratio','PA']: if not vars(src)[key]['fixed']: ndim += 1 p0.append(vars(src)[key]['value']) colnames.append(key+'S'+str(i)) elif src.__class__.__name__=='PointSource': for key in ['xoff','yoff','flux']: if not vars(src)[key]['fixed']: ndim += 1 p0.append(vars(src)[key]['value']) colnames.append(key+'S'+str(i)) # Then flux rescaling; only matters if >1 dataset for i,t in enumerate(scaleamp[1:]): if t: ndim += 1 p0.append(1.) # Assume 1.0 scale factor to start colnames.append('ampscale_dset'+str(i+1)) # Then phase/astrometric shift; each has two vals for a shift in x&y for i,t in enumerate(shiftphase[1:]): if t: ndim += 2 p0.append(0.); p0.append(0.) # Assume zero initial offset colnames.append('astromshift_x_dset'+str(i+1)) colnames.append('astromshift_y_dset'+str(i+1)) # Get any model-cal parameters set up. The process involves some expensive # matrix inversions, but these only need to be done once, so we'll do them # now and pass the results as arguments to the likelihood function. See docs # in calc_likelihood.model_cal for more info. for i,dset in enumerate(data): if modelcal[i]: uniqant = np.unique(np.asarray([dset.ant1,dset.ant2]).flatten()) dPhi_dphi = np.zeros((uniqant.size-1,dset.u.size)) for j in range(1,uniqant.size): dPhi_dphi[j-1,:]=(dset.ant1==uniqant[j])-1*(dset.ant2==uniqant[j]) C = scipy.sparse.diags((dset.sigma/dset.amp)**-2.,0) F = np.dot(dPhi_dphi,C*dPhi_dphi.T) Finv = np.linalg.inv(F) FdPC = np.dot(-Finv,dPhi_dphi*C) modelcal[i] = [dPhi_dphi,FdPC] # Create our lensing grid coordinates now, since those shouldn't be # recalculated with every call to the likelihood function xmap,ymap,xemit,yemit,indices = GenerateLensingGrid(data,xmax,highresbox, fieldres,emitres) # Calculate the uv coordinates we'll interpolate onto; only need to calculate # this once, so do it here. kmax = 0.5/((xmap[0,1]-xmap[0,0])*arcsec2rad) ug = np.linspace(-kmax,kmax,xmap.shape[0]) # Calculate some distances; we only need to calculate these once. # This assumes multiple sources are all at same z; should be this # way anyway or else we'd have to deal with multiple lensing planes if cosmo is None: cosmo = Planck15 Dd = cosmo.angular_diameter_distance(lens[0].z).value Ds = cosmo.angular_diameter_distance(source[0].z).value Dds= cosmo.angular_diameter_distance_z1z2(lens[0].z,source[0].z).value p0 = np.array(p0) # Create a ball of starting points for the walkers, gaussian ball of # 10% width; if initial value is 0 (eg, astrometric shift), give a small sigma # for angles, generally need more spread than 10% to sample well, do 30% for those cases [~0.5% >180deg for p0=100deg] isangle = np.array([0.30 if 'PA' in s or 'angle' in s else 0.1 for s in colnames]) initials = emcee.utils.sample_ball(p0,np.asarray([isangle[i]*x if x else 0.05 for i,x in enumerate(p0)]),int(nwalkers)) # All the lens objects know if their parameters have been altered since the last time # we calculated the deflections. If all the lens pars are fixed, we only need to do the # deflections once. This step ensures that the lens object we create the sampler with # has these initial deflections. for i,ilens in enumerate(lens): if ilens.__class__.__name__ == 'SIELens': ilens.deflect(xemit,yemit,Dd,Ds,Dds) elif ilens.__class__.__name__ == 'ExternalShear': ilens.deflect(xemit,yemit,lens[0]) # Create the sampler object; uses calc_likelihood function defined elsewhere lenssampler = emcee.EnsembleSampler(nwalkers,ndim,calc_vis_lnlike, args = [data,lens,source,Dd,Ds,Dds,ug, xmap,ymap,xemit,yemit,indices, sourcedatamap,scaleamp,shiftphase,modelcal], threads=nthreads,pool=pool) # Run burn-in phase print "Running burn-in... " #pos,prob,rstate,mus = lenssampler.run_mcmc(initials,nburn,storechain=False) for i,result in enumerate(lenssampler.sample(initials,iterations=nburn,storechain=False)): if i%20==0: print 'Burn-in step ',i,'/',nburn pos,prob,rstate,blob = result lenssampler.reset() # Run actual chains print "Done. Running chains... " for i,result in enumerate(lenssampler.sample(pos,rstate0=rstate,iterations=nstep,storechain=True)): if i%20==0: print 'Chain step ',i,'/',nstep #lenssampler.run_mcmc(pos,nstep,rstate0=rstate) if mpirun: pool.close() print "Mean acceptance fraction: ",np.mean(lenssampler.acceptance_fraction) #return lenssampler.flatchain,lenssampler.blobs,colnames # Package up the magnifications and modelcal phases; disregards nan points (where # we failed the prior, usu. because a periodic angle wrapped). blobs = lenssampler.blobs mus = np.asarray([[a[0] for a in l] for l in blobs]).flatten(order='F') bad = np.where(np.asarray([np.any(np.isnan(m)) for m in mus],dtype=bool))[0] for k in bad: mus[k] = np.array([np.nan]*len(source)) mus = np.asarray(list(mus),dtype=float).reshape((-1,len(source)),order='F') # stupid-ass hack bad = np.isnan(mus)[:,0] #bad = bad.reshape((-1,len(source)),order='F')[:,0] #mus = np.atleast_2d(np.asarray([mus[i] if not bad[i] else [np.nan]*len(source) for i in range(mus.size)])).T colnames.extend(['mu{0:.0f}'.format(i) for i in range(len(source))]) # Assemble the output. Want to return something that contains both the MCMC chains # themselves, but also metadata about the run. mcmcresult = {} # keep track of git revision, for reproducibility's sake # if run under mpi, this will spew some scaremongering warning text, # but it's fine. use --mca mpi_warn_on_fork 0 in the mpirun statement to disable try: import subprocess gitd = os.path.abspath(os.path.join(os.path.dirname(__file__),os.pardir)) mcmcresult['githash'] = subprocess.check_output('git --git-dir={0:s} --work-tree={1:s} '\ 'rev-parse HEAD'.format(gitd+'/.git',gitd),shell=True).rstrip() except: mcmcresult['githash'] = 'No repo found' mcmcresult['datasets'] = [dset.filename for dset in data] # Data files used mcmcresult['lens_p0'] = lens # Initial params for lens,src(s),shear; also tells if fixed, priors, etc. mcmcresult['source_p0'] = source if sourcedatamap: mcmcresult['sourcedatamap'] = sourcedatamap mcmcresult['xmax'] = xmax mcmcresult['highresbox'] = highresbox mcmcresult['fieldres'] = fieldres mcmcresult['emitres'] = emitres if any(scaleamp): mcmcresult['scaleamp'] = scaleamp if any(shiftphase): mcmcresult['shiftphase'] = shiftphase mcmcresult['chains'] = np.core.records.fromarrays(np.hstack((lenssampler.flatchain[~bad],mus[~bad])).T,names=colnames) mcmcresult['lnlike'] = lenssampler.flatlnprobability[~bad] # Keep track of best-fit params, derived from chains. c = copy.deepcopy(mcmcresult['chains']) mcmcresult['best-fit'] = {} pbest = [] # Calculate the best fit values as medians of each param lens,source = copy.deepcopy(mcmcresult['lens_p0']), copy.deepcopy(mcmcresult['source_p0']) for i,ilens in enumerate(lens): if ilens.__class__.__name__ == 'SIELens': ilens.__dict__['_altered'] = True for key in ['x','y','M','e','PA']: if not vars(ilens)[key]['fixed']: ilens.__dict__[key]['value'] = np.median(c[key+'L'+str(i)]) pbest.append(np.median(c[key+'L'+str(i)])) elif ilens.__class__.__name__ == 'ExternalShear': for key in ['shear','shearangle']: if not vars(ilens)[key]['fixed']: ilens.__dict__[key]['value'] = np.median(c[key]) pbest.append(np.median(c[key])) mcmcresult['best-fit']['lens'] = lens # now do the source(s) for i,src in enumerate(source): # Source is a list of source objects if src.__class__.__name__ == 'GaussSource': for key in ['xoff','yoff','flux','width']: if not vars(src)[key]['fixed']: src.__dict__[key]['value'] = np.median(c[key+'S'+str(i)]) pbest.append(np.median(c[key+'S'+str(i)])) elif src.__class__.__name__ == 'SersicSource': for key in ['xoff','yoff','flux','majax','index','axisratio','PA']: if not vars(src)[key]['fixed']: src.__dict__[key]['value'] = np.median(c[key+'S'+str(i)]) pbest.append(np.median(c[key+'S'+str(i)])) elif src.__class__.__name__ == 'PointSource': for key in ['xoff','yoff','flux']: if not vars(src)[key]['fixed']: src.__dict__[key]['value'] = np.median(c[key+'S'+str(i)]) pbest.append(np.median(c[key+'S'+str(i)])) mcmcresult['best-fit']['source'] = source mcmcresult['best-fit']['magnification'] = np.median(mus[~bad],axis=0) # Any amplitude scaling or astrometric shifts bfscaleamp = np.ones(len(data)) if 'scaleamp' in mcmcresult.keys(): for i,t in enumerate(mcmcresult['scaleamp']): # only matters if >1 datasets if i==0: pass elif t: bfscaleamp[i] = np.median(c['ampscale_dset'+str(i)]) pbest.append(np.median(c['ampscale_dset'+str(i)])) else: pass mcmcresult['best-fit']['scaleamp'] = bfscaleamp bfshiftphase = np.zeros((len(data),2)) if 'shiftphase' in mcmcresult.keys(): for i,t in enumerate(mcmcresult['shiftphase']): if i==0: pass # only matters if >1 datasets elif t: bfshiftphase[i][0] = np.median(c['astromshift_x_dset'+str(i)]) bfshiftphase[i][1] = np.median(c['astromshift_y_dset'+str(i)]) pbest.append(np.median(c['astromshift_x_dset'+str(i)])) pbest.append(np.median(c['astromshift_y_dset'+str(i)])) else: pass # no shifting mcmcresult['best-fit']['shiftphase'] = bfshiftphase mcmcresult['best-fit']['lnlike'] = calc_vis_lnlike(pbest,data,mcmcresult['best-fit']['lens'], mcmcresult['best-fit']['source'], Dd,Ds,Dds,ug,xmap,ymap,xemit,yemit,indices, sourcedatamap,scaleamp,shiftphase,modelcal)[0] # Calculate the deviance information criterion, using the Spiegelhalter+02 definition (cf Gelman+04) mcmcresult['best-fit']['DIC'] = -4*np.mean(mcmcresult['lnlike']) + 2*mcmcresult['best-fit']['lnlike'] # If we did any modelcal stuff, keep the antenna phase offsets here if any(modelcal): mcmcresult['modelcal'] = [True if j else False for j in modelcal] dp = np.squeeze(np.asarray([[a[1] for a in l if ~np.any(np.isnan(a[0]))] for l in blobs])) a = [x for l in dp for x in l] # Have to dick around with this if we had any nan's dphases = np.squeeze(np.reshape(a,(nwalkers*nstep-bad.sum(),len(data),-1),order='F')) if len(data) > 1: for i in range(len(data)): if modelcal[i]: mcmcresult['calphases_dset'+str(i)] = np.vstack(dphases[:,i]) else: if any(modelcal): mcmcresult['calphases_dset0'] = dphases return mcmcresult
def mcmc_mpi( Nwalkers, Nchains, observables=["nbar", "xi"], data_dict={"Mr": 21, "b_normal": 0.25}, prior_name="first_try", mcmcrun=None, ): """ Standard MCMC implementaion Parameters ----------- - Nwalker : Number of walkers - Nchains : Number of MCMC chains - observables : list of observables. Options are: ['nbar','xi'],['nbar','gmf'],['xi'] - data_dict : dictionary that specifies the observation keywords """ # Initializing the vector of observables and inverse covariance matrix if observables == ["xi"]: fake_obs = Data.data_xi(**data_dict) # fake_obs_icov = Data.data_inv_cov('xi', **data_dict) fake_obs_icov = Data.data_cov(inference="mcmc", **data_dict)[1:16, 1:16] if observables == ["nbar", "xi"]: fake_obs = np.hstack([Data.data_nbar(**data_dict), Data.data_xi(**data_dict)]) fake_obs_icov = Data.data_cov(inference="mcmc", **data_dict)[:16, :16] if observables == ["nbar", "gmf"]: ##### FIRST BIN OF GMF DROPPED ############### # CAUTION: hardcoded fake_obs = np.hstack([Data.data_nbar(**data_dict), Data.data_gmf(**data_dict)[1:]]) fake_obs_icov = np.zeros((10, 10)) # print Data.data_cov(**data_dict)[17: , 17:].shape # Covariance matrix being adjusted accordingly fake_obs_icov[1:, 1:] = Data.data_cov(inference="mcmc", **data_dict)[17:, 17:] fake_obs_icov[0, 1:] = Data.data_cov(inference="mcmc", **data_dict)[0, 17:] fake_obs_icov[1:, 0] = Data.data_cov(inference="mcmc", **data_dict)[17:, 0] fake_obs_icov[0, 0] = Data.data_cov(inference="mcmc", **data_dict)[0, 0] # True HOD parameters data_hod_dict = Data.data_hod_param(Mr=data_dict["Mr"]) data_hod = np.array( [ data_hod_dict["logM0"], # log M0 np.log(data_hod_dict["sigma_logM"]), # log(sigma) data_hod_dict["logMmin"], # log Mmin data_hod_dict["alpha"], # alpha data_hod_dict["logM1"], # log M1 ] ) Ndim = len(data_hod) # Priors prior_min, prior_max = PriorRange(prior_name) prior_range = np.zeros((len(prior_min), 2)) prior_range[:, 0] = prior_min prior_range[:, 1] = prior_max # mcmc chain output file chain_file = "".join([util.mcmc_dir(), util.observable_id_flag(observables), ".", mcmcrun, ".mcmc_chain.dat"]) # print chain_file if os.path.isfile(chain_file) and continue_chain: print "Continuing previous MCMC chain!" sample = np.loadtxt(chain_file) Nchain = Niter - (len(sample) / Nwalkers) # Number of chains left to finish if Nchain > 0: pass else: raise ValueError print Nchain, " iterations left to finish" # Initializing Walkers from the end of the chain pos0 = sample[-Nwalkers:] else: # new chain f = open(chain_file, "w") f.close() Nchain = Niter # Initializing Walkers random_guess = data_hod pos0 = np.repeat(random_guess, Nwalkers).reshape(Ndim, Nwalkers).T + 5.0e-2 * np.random.randn( Ndim * Nwalkers ).reshape(Nwalkers, Ndim) # print pos0.shape # Initializing MPIPool pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) # Initializing the emcee sampler hod_kwargs = { "prior_range": prior_range, "data": fake_obs, "data_icov": fake_obs_icov, "observables": observables, "Mr": data_dict["Mr"], } sampler = emcee.EnsembleSampler(Nwalkers, Ndim, lnPost, pool=pool, kwargs=hod_kwargs) # Initializing Walkers for result in sampler.sample(pos0, iterations=Nchain, storechain=False): position = result[0] # print position f = open(chain_file, "a") for k in range(position.shape[0]): output_str = "\t".join(position[k].astype("str")) + "\n" f.write(output_str) f.close() pool.close()
def run(N): fn = chainDirRel + '.pickle' nwalkers = 500 ndim = 9 # 15 #eta, epsff, fg0, muNorm, muScaling, fixedQ, accScaleLength, xiREC, accNorm, accAlphaZ, accAlphaMh, accCeiling, fcool, kappaMetals, ZIGM = emceeParams #p00 = np.array([ .9, .1, -1., .08, .50959, .38, -.25, .7, .01 ]) #p0 = [p00*(1.0+0.2*np.random.randn( ndim )) for i in range(nwalkers)] p0 = [sampleFromPrior() for i in range(nwalkers)] restart = {} restart['currentPosition'] = p0 restart['chain'] = None restart['state'] = None restart['prob'] = None restart['iterationCounter'] = 0 restart['mcmcRunCounter'] = 0 updateRestart(fn, restart) global runNumber runNumber = restart['mcmcRunCounter'] restart['iterationCounter'] += N restart['mcmcRunCounter'] += 1 pool = MPIPool(comm=comm, loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnProb, pool=pool) #pos, prob, state = sampler.run_mcmc(restart['currentPosition'], N, rstate0=restart['state'], lnprob0=restart['prob']) counter = 0 for result in sampler.sample(restart['currentPosition'], iterations=N, lnprob0=restart['prob'], rstate0=restart['state']): print "Beginning iteration number ", counter, " of ", N pos, prob, state = result restart[ 'acor'] = sampler.acor[:] # autocorr length for each param (ndim) restart[ 'accept'] = sampler.acceptance_fraction[:] # acceptance frac for each walker. restart['currentPosition'] = pos # same shape as p0: nwalkers x ndim restart['state'] = state # random number generator state restart['prob'] = prob # nwalkers x dim if restart['chain'] is None: restart['chain'] = sampler.chain # nwalkers x niterations x ndim else: print np.shape(restart['chain']), np.shape( sampler.chain[:, -1, :]), np.shape(sampler.chain) print restart['mcmcRunCounter'], restart['iterationCounter'] #restart['chain'] = np.concatenate((restart['chain'], sampler.chain[:,-1,:]), axis=1) print "dbg1: ", np.shape(restart['chain']), np.shape( np.zeros( (nwalkers, 1, ndim))), np.shape(np.expand_dims(pos, 1)) restart['chain'] = np.concatenate( (restart['chain'], np.expand_dims(pos, 1)), axis=1) saveRestart(fn, restart) counter += 1 pool.close()
def emceeinit(w0, inclin, nbins, nthreads, nsteps, savename, data, dbins, MPI=0, allbinseq=0): """Emcee driver function""" #HARDCODED - Warning. Also bins. global incl incl = inclin #Initialize the MPI-based pool used for parallelization. if MPI: print MPI pool = MPIPool() if not pool.is_master(): # Wait for instructions from the master process. pool.wait() sys.exit(0) #Setup ndim = nbins #Removing inclination as a variable. nwalkers = 4*ndim p0 = np.zeros((nwalkers, ndim)) print 'Nbins is now', nbins #Needed for fixing unresolved starting balls global b1 global rin rin, b1 = dbins #Initialize walkers radii = np.arange(nbins) sizecorr = 1 #Currently Hardcoded; Scaling factor to treat different radii differently scale = 0.2 #Currently hardcoded; Fraction of parameter by which it can vary for walker in range(nwalkers): for rs in radii: rand = np.random.uniform(-(w0[rs]*scale*sizecorr), (w0[rs]*scale*sizecorr)) if (b1[rs] <= res) and (allbinseq <1) : rand = np.random.uniform(0, 2.*w0[rs]) p0[walker][rs] = w0[rs] + rand #Make it rs+2, if a & l vary # #Initialize a & l # p0[walker][0] = np.random.uniform(.0001, .5) #When adding back in, make prev statement rs+1 # while True: # p0[walker][1] = np.random.gamma(2., 2.)*np.amax(dbins[1:])/20. + np.amin(np.diff(dbins[1:])) # if (p0[walker][1]>=np.amin(dbins[1:]) or p0[walker][1]<=np.amax(dbins[1:])): # break #THIS IS A PROBLEM FOR THE 1st BIN WITH rin. Also the normalization # p0[walker][0] = incl+np.random.uniform(0.85*incl,1.15*incl) #When adding back in, make prev statement rs+1 #Write emcee perturbation params to log file f = open('emceerand.log', 'a') FORMAT = '%m-%d-%Y-%H%M' f.write(savename+', '+str(nbins)+', '+str(nsteps)+', '+str(scale)+', '+str(sizecorr)+', '+datetime.now().strftime(FORMAT)) #Model initialization u, v, dreal, dimag, dwgt = data udeproj = u * np.cos(incl) #Deproject rho = 1e3*np.sqrt(udeproj**2+v**2) indices = np.arange(b1.size) global gpbins gpbins = dbins #rin, indices global rbin rbin = np.concatenate([np.array([rin]), b1]) jarg = np.outer(2.*np.pi*rbin, rho/206264.806427) global jinc jinc = sc.j1(jarg)/jarg # pool = mp.Pool(nthreads-1) #Initialize sampler using MPI if necessary if MPI: sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, pool=pool) else: sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, threads=nthreads) print 'Nbins, Ndim', nbins, ndim print 'Dbins', dbins #Run emcee, and time it tic = time.time() print "I'm line 110, before the threads" sampler.run_mcmc(p0, nsteps) print "I'm line 112, after the threads" toc = time.time() #Display and record run information print 'Elapsed emcee run time:', ((toc-tic)/60.) print 'Acceptance:', sampler.acceptance_fraction f.write(' ,'+str(round((toc-tic)/60., 2))+', '+str(np.round(np.mean(sampler.acceptance_fraction),2))+'\n') f.close() #Save the results in a binary file np.save('mc_'+savename,sampler.chain) if MPI: #Close the processes. pool.close() print 'Done with this emcee run' #Allow user interaction at end, if not using MPI # if not MPI: # pdb.set_trace() return sampler.chain
def fit_BAO(): parser = argparse.ArgumentParser(description='Use mcmc routine to get the BAO peak stretching parameter alpha and damping parameter, made by Zhejie.',\ formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "--lt", help= '*The type of weak lensing survey. (TF: Tully-Fisher; TW: traditional (photo-z) weak lensing.)', required=True) parser.add_argument("--nrbin", help='*Number of tomographic bins.', type=int, required=True) parser.add_argument("--nkbin", help='*Number of output k bins.', type=int, required=True) parser.add_argument("--shapenf", help='*Shape noise factor.', required=True) parser.add_argument("--kmin", help='*kmin fit boundary.', required=True) parser.add_argument("--kmax", help='*kmax fit boundary.', required=True) parser.add_argument("--params_str", help='Set fitting parameters. 1: free; 0: fixed.', required=True) parser.add_argument("--Sigma2_inf", help='Whether setting Sigma2_xy as infinity or not.', default='False') parser.add_argument("--alpha", help='Fix the parameter alpha value.', default=1.0, type=np.float) parser.add_argument( "--Pwig_type", help= '*The spatial P(k)_wig whether is linear or not. Type Pwig_linear or Pwig_nonlinear; in nonlinear, BAO is damped.', required=True) parser.add_argument( "--Psm_type", help= 'The expression of Pnorm. The default case, Pnorm from Eisenstein & Zaldarriaga 1999. \ Test Pnorm=Pnow, which is derived from transfer function.' ) #parser.add_argument("--Sigma", help = 'BAO damping parameter Sigma value. (Either 0.0 or 100.0.)', type=float, required=True) parser.add_argument( "--survey_stage", help= 'Optional parameter. KW_stage_IV (kinematic weak lensing) or PW_stage_IV (photo-z). It could also be considered as the directory of data\ files to be fitted.') #parser.add_argument("--mpi_used", help = 'Whether MPI is implemented in the calculation of Cijl_prime, Gm_prime. Either True or False.') parser.add_argument( "--f_sky", help= 'This addtional argument is for data files in TF_cross-ps. Distinguish cases with different f_sky value.' ) parser.add_argument( "--set_SVc_on_CovP", help= '*Whether we replace smaller SV to be SVc in W matrix for the output inverse covariance matrix of Pk. Either True or False.', required=True) parser.add_argument( "--start_lmin", help= "*The minimum ell value considered in the analysis. Default is 1. For Stage III, it's 10 for Stage III and 4 for Stage IV", default=1, type=int) parser.add_argument( "--nSV_min", help= "Fitting the extracted power spectrum with nSV used. nSV_min is the strating point.", type=int, default=1) parser.add_argument( "--save_sampler", help= "Whether we save samplers of MCMC into a data file or not. True or False.", default='False') parser.add_argument( "--modify_Cov_cij_cpq", help= "Whether we have modified the Cov_cij_cpq while outputing Cijl^prime and Gm^prime.", default='False') args = parser.parse_args() #print("args: ", args.lt) lt = args.lt num_rbin = args.nrbin num_kbin = args.nkbin shapenf = args.shapenf kmin = float(args.kmin) kmax = float(args.kmax) params_str = args.params_str params_indices = [int(i) for i in params_str] Pwig_type = args.Pwig_type Psm_type = args.Psm_type survey_stage = args.survey_stage #mpi_used = args.mpi_used f_sky = args.f_sky set_SVc_on_CovP = args.set_SVc_on_CovP start_lmin = args.start_lmin comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() lt_prefix = {'TF': 'TF', 'TW': 'TW_zext'} old_stdout = sys.stdout if survey_stage: odir = './{}/fit_kmin{}_kmax{}_Pwig_over_Pnow/{}/'.format( survey_stage, kmin, kmax, Pwig_type) else: odir = './fit_kmin{}_kmax{}_Pwig_over_Pnow_fsky{}/{}/'.format( kmin, kmax, f_sky, Pwig_type) # if mpi_used == 'True': # odir = './{}/fit_kmin{}_kmax{}_Pwig_over_Pnow/mpi_{}/'.format(survey_stage, kmin, kmax, Pwig_type) if Psm_type == 'Pnow': odir = odir + 'set_Pnorm_Pnow/' if start_lmin != 1: odir = odir + 'start_ell_{}/'.format(start_lmin) if rank == 0: if not os.path.exists(odir): os.makedirs(odir) comm.Barrier() Sigma2_xy_dict = {'stage_III': 31.176, 'stage_IV': 22.578} for ss_name in Sigma2_xy_dict: if ss_name in survey_stage: stage_name = ss_name if Pwig_type == 'Pwig_nonlinear': Sigma2_xy = Sigma2_xy_dict[stage_name] elif Pwig_type == 'Pwig_linear': Sigma2_xy = 0.0 if args.Sigma2_inf == 'True': #Sigma2_xy = np.inf Sigma2_xy = 100 * 100 # test whether it influences results or not ofile = odir + "mcmc_fit_{}_{}rbin_{}kbin_snf{}_params{}_Sigma2_{}.log".format( lt, num_rbin, num_kbin, shapenf, params_str, Sigma2_xy) if params_str == '001': ofile = odir + "mcmc_fit_{0}_{1}rbin_{2}kbin_snf{3}_params{4}_alpha{5:.2f}_Sigma2_{6}.log".format( lt, num_rbin, num_kbin, shapenf, params_str, args.alpha, Sigma2_xy) log_file = open(ofile, "w") sys.stdout = log_file print('Arguments of fitting: ', args) ifile = '../Input_files/CAMB_Planck2015_matterpower.dat' kcamb, Pkcamb = np.loadtxt(ifile, dtype='f8', comments='#', unpack=True) Pwig_spl = InterpolatedUnivariateSpline(kcamb, Pkcamb) k_0 = 0.001 # unit h*Mpc^-1 Pk_0 = Pwig_spl(k_0) ifile = '../Input_files/transfer_fun_Planck2015.dat' kk, Tf = np.loadtxt(ifile, dtype='f8', comments='#', usecols=(0, 1), unpack=True) #print kk==kcamb Tf_spl = InterpolatedUnivariateSpline(kk, Tf) Tf_0 = Tf_spl(k_0) P0_a = Pk_0 / (pow(k_0, cosmic_params.ns) * Tf_0**2.0) Psm = P0_a * pow( kcamb, cosmic_params.ns ) * Tf**2.0 # Get primordial (smooth) power spectrum from the transfer function Psm_spl = InterpolatedUnivariateSpline(kcamb, Psm) norm_gf = 1.0 N_walkers = 40 all_param_names = 'alpha', 'Sigma2_xy', 'A' all_temperature = 0.01, 1.0, 0.1 alpha, A = args.alpha, 1.0 # initial guess for fitting, the value of Sigma2_xy at z=0.65 is from theory prediction (see code TF_cross_convergence_ps_bin.py) all_params = alpha, Sigma2_xy, A N_params, theta, fix_params, params_T, params_name = set_params( all_params, params_indices, all_param_names, all_temperature) # Fit for DM power spectrum if lt == 'TF': idir0 = '../{}_cross-ps/'.format(lt) if survey_stage: idir0 = '../{}/'.format(survey_stage) idir1 = '{}_Pk_output_dset_{}/' # if mpi_used == 'True': # idir1 = 'mpi_{}_Pk_output_dset_{}/' if f_sky: idir1 = '{}_Pk_output_dset_{}_fsky{}/' elif lt == 'TW': idir0 = '../{}_f2py_SVD/'.format(lt) if survey_stage: idir0 = '../{}/'.format(survey_stage) idir1 = '{}_Pk_output_dset_{}/' idir2 = '{}rbins_{}kbins_snf{}/'.format(num_rbin, num_kbin, shapenf) odir1 = 'mcmc_fit_params_Pwig_over_Pnow/{}/kmin{}_kmax{}/{}rbins_{}kbins_snf{}/'.format( Pwig_type, kmin, kmax, num_rbin, num_kbin, shapenf) if f_sky: odir1 = 'mcmc_fit_params_Pwig_over_Pnow/{}/kmin{}_kmax{}/{}rbins_{}kbins_snf{}_fsky{}/'.format( Pwig_type, kmin, kmax, num_rbin, num_kbin, shapenf, f_sky) # if mpi_used == 'True': # odir1 = 'mcmc_fit_params_Pwig_over_Pnow/{}/kmin{}_kmax{}/mpi_{}rbins_{}kbins_snf{}/'.format(Pwig_type, kmin, kmax, num_rbin, num_kbin, shapenf) if Psm_type == 'Pnow': idir2 = idir2 + 'set_Pnorm_Pnow/' odir1 = odir1 + 'set_Pnorm_Pnow/' if start_lmin != 1: idir2 = idir2 + 'start_ell_{}/'.format(start_lmin) odir1 = odir1 + 'start_ell_{}/'.format(start_lmin) if args.modify_Cov_cij_cpq == 'True': idir2 = idir2 + 'modify_Cov_cij_cpq/' odir1 = odir1 + 'modify_Cov_cij_cpq/' if params_str == '001': odir1 = odir1 + 'alpha_{0:.2f}/'.format(args.alpha) if f_sky: idir = idir0 + idir1.format( lt_prefix[lt], Pwig_type, f_sky ) + idir2 # not exactly matching format, but it's ok if there is no f_sky parameter. else: idir = idir0 + idir1.format(lt_prefix[lt], Pwig_type) + idir2 odir = idir0 + odir1 if rank == 0: if not os.path.exists(odir): os.makedirs(odir) if args.save_sampler == 'True': os.makedirs( odir + 'chain_samplers/') # store positions of samplers in chains comm.Barrier() ifile = idir + 'Pk_wnw_{}rbin_{}kbin_withshapenoisefactor{}_{}eigenvW.out'.format( num_rbin, num_kbin, shapenf, num_kbin) k_all = np.loadtxt(ifile, dtype='f8', comments='#', usecols=(0, )) indices = filter_krange( k_all, kmin, kmax) # Here we don't need to use sigma_Pk_wnw anymore. N_fitbin = len(indices) print('# of fit k bins: ', N_fitbin) print('The indices of fitting k bins: ', indices) if set_SVc_on_CovP != 'True': ifile = idir + 'Cov_Pwnw_inv_{}rbin_{}kbin_withshapenoisefactor{}.npz'.format( num_rbin, num_kbin, shapenf) npzfile = np.load(ifile) icov_Pk_wnw = npzfile['arr_0'] print('icov_Pk_wnw: ', icov_Pk_wnw) cov_Pwnw = linalg.inv(icov_Pk_wnw) print('cov_Pwnw: ', cov_Pwnw) identity = np.dot(cov_Pwnw, icov_Pk_wnw) print('cov * icov: ', identity) print('Inverse process is good?', np.allclose(identity, np.eye(num_kbin)) ) # test whether the inverse process is very successful part_cov_Pwnw = cov_Pwnw[np.ix_(indices, indices)] part_icov_Pk_wnw = linalg.inv(part_cov_Pwnw) print( 'Inverse process for marginalized matrix is good?', np.allclose(np.dot(part_cov_Pwnw, part_icov_Pk_wnw), np.eye(N_fitbin))) print('The inverse covariance matrix for fitting: ', part_icov_Pk_wnw) pool = MPIPool(loadbalance=True) for num_eigv in range(args.nSV_min, num_kbin + 1): np.random.seed(1) if f_sky: idir_Pwig = idir0 + idir1.format(lt_prefix[lt], Pwig_type, f_sky) + idir2 idir_Pnow = idir0 + idir1.format(lt_prefix[lt], 'Pnow', f_sky) + idir2 else: idir_Pwig = idir0 + idir1.format(lt_prefix[lt], Pwig_type) + idir2 idir_Pnow = idir0 + idir1.format(lt_prefix[lt], 'Pnow') + idir2 ifile = idir_Pwig + 'Pk_wig_{}rbin_{}kbin_withshapenoisefactor{}_{}eigenvW.out'.format( num_rbin, num_kbin, shapenf, num_eigv) print(ifile) data_m = np.loadtxt(ifile, dtype='f8', comments='#') # k, P(k), sigma_Pk k_obs, Pk_wig_obs = data_m[indices, 0], data_m[indices, 1] ifile = idir_Pnow + 'Pk_now_{}rbin_{}kbin_withshapenoisefactor{}_{}eigenvW.out'.format( num_rbin, num_kbin, shapenf, num_eigv) print(ifile) data_m = np.loadtxt(ifile, dtype='f8', comments='#') k_obs, Pk_now_obs = data_m[indices, 0], data_m[indices, 1] Pk_wnw_obs = Pk_wig_obs / Pk_now_obs if set_SVc_on_CovP == 'True': ifile = idir_Pwig + 'Cov_Pwnw_inv_{}rbin_{}kbin_withshapenoisefactor{}_{}eigenvW_SVc.npz'.format( num_rbin, num_kbin, shapenf, num_eigv) ####ifile = idir_Pwig + 'Cov_Pwnw_inv_{}rbin_{}kbin_withshapenoisefactor1.0_{}eigenvW_SVc.npz'.format(num_rbin, num_kbin, num_eigv) # only for some special case, it's temporary part_icov_Pk_wnw = filter_invCov_on_k(ifile, indices) mergedsamples, params_mcmc = mcmc_routine(N_params, N_walkers, theta, params_T, params_indices, fix_params, k_obs, Pk_wnw_obs, part_icov_Pk_wnw, Pwig_spl, Psm_spl, norm_gf, params_name, pool) print(params_mcmc) chi_square = chi2(params_mcmc[:, 0], params_indices, fix_params, k_obs, Pk_wnw_obs, part_icov_Pk_wnw, Pwig_spl, Psm_spl, norm_gf) dof = N_fitbin - N_params reduced_chi2 = chi_square / dof print("chi^2/dof: ", reduced_chi2, "\n") if args.Sigma2_inf == 'True': # in order to distinguish the Sigma2_xy value, we need to specify it. filename = 'Pk_wnw_{}rbin_{}kbin_snf{}_{}eigenvW_params{}_Sigma2_{}.dat'.format( num_rbin, num_kbin, shapenf, num_eigv, params_str, Sigma2_xy) else: filename = 'Pk_wnw_{}rbin_{}kbin_snf{}_{}eigenvW_params{}.dat'.format( num_rbin, num_kbin, shapenf, num_eigv, params_str) # Sigma2_xy is fixed in the default case ofile_params = odir + filename print('ofile_params:', ofile_params) write_params(ofile_params, params_mcmc, params_name, reduced_chi2, fix_params, dof) if (args.save_sampler == 'True') and (rank == 0): odir_merged_sample = odir + 'chain_samplers/' if not os.path.exists(odir_merged_sample): os.makedirs(odir_merged_sample) ofile = odir_merged_sample + 'mergedsamples_' + filename[ 0:len(filename) - 4] + '.npz' np.savez_compressed(ofile, np.array(mergedsamples)) pool.close() sys.stdout = old_stdout log_file.close()
class EnsembleSampler(GenericSampler): def __init__(self, num_walkers=None, num_steps=5000, num_burn=2000, temp_dir=None, save_interval=300): """ Uses ``emcee`` and the `EnsembleSampler <http://dan.iel.fm/emcee/current/api/#emcee.EnsembleSampler>`_ to fit the supplied model. This method sets an emcee run using the ``EnsembleSampler`` and manual chain management to allow for low to medium dimensional models. MPI running is detected automatically for less hassle, and chain progress is serialised to disk automatically for convenience. Parameters ---------- num_walkers : int, optional The number of walkers to run. If not supplied, it defaults to eight times the framework dimensionality num_steps : int, optional The number of steps to run num_burn : int, optional The number of steps to discard for burn in temp_dir : str If set, specifies a directory in which to save temporary results, like the emcee chain save_interval : float The amount of seconds between saving the chain to file. Setting to ``None`` disables serialisation. """ self.logger = logging.getLogger(__name__) import emcee self.chain = None self.pool = None self.master = True self.num_steps = num_steps self.num_burn = num_burn self.temp_dir = temp_dir if temp_dir is not None and not os.path.exists(temp_dir): os.makedirs(temp_dir) self.save_interval = save_interval self.num_walkers = num_walkers def fit(self, kwargs): """ Runs the sampler over the model and returns the flat chain of results Parameters ---------- kwargs : dict Containing the following information at a minimum: - log_posterior : function A function which takes a list of parameters and returns the log posterior - start : function|list|ndarray Either a starting position, or a function that can be called to generate a starting position - save_dims : int, optional Only return values for the first ``save_dims`` parameters. Useful to remove numerous marginalisation parameters if running low on memory or hard drive space. - uid : str, optional A unique identifier used to differentiate different fits if two fits both serialise their chains and use the same temporary directory Returns ------- dict A dictionary with key "chains" containing the final flattened chain of dimensions ``(num_dimensions, num_walkers * (num_steps - num_burn))`` """ log_posterior = kwargs.get("log_posterior") start = kwargs.get("start") save_dims = kwargs.get("save_dims") uid = kwargs.get("uid") assert log_posterior is not None assert start is not None from emcee.utils import MPIPool import emcee try: # pragma: no cover self.pool = MPIPool() if not self.pool.is_master(): self.logger.info("Slave waiting") self.master = False self.pool.wait() sys.exit(0) else: self.logger.info("MPIPool successful initialised and master found. " "Running with %d cores." % self.pool.size) except ImportError: self.logger.info("mpi4py is not installed or not configured properly. " "Ignore if running through python, not mpirun") except ValueError as e: # pragma: no cover self.logger.info("Unable to start MPI pool, expected normal python execution") self.logger.info(str(e)) if callable(start): num_dim = start().size else: num_dim = start.size if self.num_walkers is None: self.num_walkers = num_dim * 4 self.num_walkers = max(self.num_walkers, 20) self.logger.debug("Fitting framework with %d dimensions" % num_dim) self.logger.info("Using Ensemble Sampler") sampler = emcee.EnsembleSampler(self.num_walkers, num_dim, log_posterior, pool=self.pool, live_dangerously=True) emcee_wrapper = EmceeWrapper(sampler) flat_chain = emcee_wrapper.run_chain(self.num_steps, self.num_burn, self.num_walkers, num_dim, start=start, save_dim=save_dims, temp_dir=self.temp_dir, uid=uid, save_interval=self.save_interval) self.logger.debug("Fit finished") if self.pool is not None: # pragma: no cover self.pool.close() self.logger.debug("Pool closed") return {"chain": flat_chain}
def rerunPosteriorPredictive(): ''' Rerun the posterior predictive distribution. This can be used to e.g. increase the resolution spatially or in terms of the age of stellar populations, or vary some parameter systematically. The mandatory argument func is a user-provided function that specifies how a model with known parameters should be modified and (re) run.''' pool = MPIPool(comm=comm, loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) output = readoutput.Experiment(chainDirRel+'-ppd') # read in the posterior predictive distribution. output.read(paramsOnly=True,keepStars=False) emcee_params = [] print "output.models: ",len(output.models) # For each model, take the parameters we have read in and construct the corresponding emcee parameters. for model in output.models: #eta, epsff, fg0, muNorm, muMhScaling, fixedQ, accScaleLength, fcool, Mh0, fscatter, x0, x1, x2, x3, obsScale, conRF, muHgScaling = emceeParams eta = model.p['eta'] epsff = model.p['epsff'] fg0 = model.p['fg0'] muNorm = model.p['muNorm'] muMhScaling = model.p['muMhScaling'] fixedQ = model.p['fixedQ'] accScaleLength = model.p['accScaleLength'] fcool = model.p['fcool'] Mh0 = model.p['Mh0'] fscatter = model.p['fscatter'] x0 = model.p['x0'] x1 = model.p['x1'] x2 = model.p['x2'] x3 = model.p['x3'] obsScale = 1.0 # doesn't matter.. see below conRF = model.p['concentrationRandomFactor'] muHgScaling = model.p['muHgScaling'] # We have everything except obsScale, but actually that doesn't matter, # since it only affects the model in post-processing, i.e. in comparing to the data, # not the running of the model itself. So..... we good! theList = [ eta, epsff, fg0, muNorm, muMhScaling, fixedQ, accScaleLength, fcool, Mh0, fscatter, x0, x1, x2, x3, obsScale, conRF, muHgScaling] try: assert eta>0 and epsff>0 and fg0>0 and fg0<=1 and fixedQ>0 and muNorm>=0 and fcool>=0 and fcool<=1 and Mh0>0 except: print 'Unexpected ppd params: ',theList emcee_params.append( copy.deepcopy(theList) ) # OK, from here on out, we just need to emulate parts of the run() function to trick emcee into running a single iteration of the algorithm with this IC. ndim = 17 restart = {} restart['currentPosition'] = emcee_params restart['chain'] = None restart['state'] = None restart['prob'] = None restart['iterationCounter'] = 0 restart['mcmcRunCounter'] = 0 nwalkers = len(emcee_params) # Need one walker per sample from posterior predictive distribution print "Starting up the ensemble sampler!" sampler = emcee.EnsembleSampler(nwalkers, ndim, fakeProb, pool=pool) #pos, prob, state = sampler.run_mcmc(restart['currentPosition'], N, rstate0=restart['state'], lnprob0=restart['prob']) print "Take a step with the ensemble sampler" # Take a single step with the ensemble sampler. print np.shape(restart['currentPosition']), np.shape(np.random.uniform(0,1,nwalkers)) sampler._get_lnprob(pos = restart['currentPosition']) #result = sampler.sample(restart['currentPosition'], iterations=1, lnprob0=None, rstate0=None) #pos, prob, state = result print "Close the pool" pool.close()
def main(runmpi=True, nw=100, th=6, bi=10, fr=10): if runmpi: pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) else: pool = None #ldfileloc = '/Users/tom/Projects/koi2133/code/' ldfileloc = '/nobackup/tsbarcl2/Projects/koi2133/code/' #codedir = '/Users/tom/Projects/koi2133/code' codedir = '/nobackup/tsbarcl2/Projects/koi2133/code' koi = 2133 cadence = 1625.3 teff = 4550. teff_unc = 75. feh = 0.11 feh_unc = 0.07 logg = 2.943 logg_unc = 0.007 rho = 0.0073 rho_unc = 0.0001 nplanets = 1 dil = 0.0 period = 6.24672336 impact = 0.7 T0 = 136.383880 rprs = 0.02067 alb = 30. occ = 40. ell = 30. rvamp = 79.0 ecosw = 0.048 esinw = -0.045 planet_guess = np.array( [T0, period, impact, rprs, ecosw, esinw, rvamp, occ, ell, alb]) rvtime, rvval, rverr = get_rv() time, flux, ferr = get_lc() rho_prior = True ldp_prior = False nwalkers = nw threads = th burnin = bi fullrun = fr thin = 1 n_ldparams = 2 toffset_lc = 0 toffset_rv = 0 zpt_0 = 1.E-10 M = tmod.transitemcee_koi2133(nplanets, cadence, ldfileloc=ldfileloc, codedir=codedir) M.get_stellar(teff, logg, feh, n_ldparams, ldp_prior=ldp_prior) M.already_open(time, flux, ferr, rvtime, rvval, rverr, timeoffset=toffset_lc, rvtimeoffset=toffset_rv, normalize=False) rho_vals = np.array([rho, rho_unc]) M.get_rho(rho_vals, rho_prior) M.get_zpt(zpt_0) noise_model = [3.0E-4, 3.7E-2, 2.E-4, 3.] if dil is not None: M.get_sol(*planet_guess, dil=dil, noise_model=noise_model) else: M.get_sol(*planet_guess, noise_model=noise_model) outfile = 'koi{0}_np{1}_prior{2}_dil{3}GP.hdf5'.format( koi, nplanets, rho_prior, dil) p0 = M.get_guess(nwalkers) #dirty hack!! qwe = np.r_[np.arange(0, 7), np.arange(9, 21)] p0 = p0[:, qwe] l_var = np.shape(p0)[1] N = len([indval for indval in xrange(fullrun) if indval % thin == 0]) with h5py.File(outfile, u"w") as f: f.create_dataset("time", data=M.time) f.create_dataset("flux", data=M.flux) f.create_dataset("err", data=M.err) f.create_dataset("rvtime", data=M.rvtime) f.create_dataset("rvval", data=M.rvval) f.create_dataset("rverr", data=M.rverr) f.create_dataset("itime", data=M._itime) f.create_dataset("ntt", data=M._ntt) f.create_dataset("tobs", data=M._tobs) f.create_dataset("omc", data=M._omc) f.create_dataset("datatype", data=M._datatype) f.attrs["rho_0"] = M.rho_0 f.attrs["rho_0_unc"] = M.rho_0_unc f.attrs["nplanets"] = M.nplanets f.attrs["ld1"] = M.ld1 f.attrs["ld2"] = M.ld2 f.attrs["koi"] = koi f.attrs["dil"] = dil g = f.create_group("mcmc") g.attrs["nwalkers"] = nwalkers g.attrs["burnin"] = burnin g.attrs["iterations"] = fullrun g.attrs["thin"] = thin g.attrs["rho_prior"] = rho_prior g.attrs["ldp_prior"] = ldp_prior g.attrs["onlytransits"] = M.onlytransits g.attrs["tregion"] = M.tregion g.attrs["ldfileloc"] = M.ldfileloc g.attrs["n_ldparams"] = M.n_ldparams g.create_dataset("fixed_sol", data=M.fixed_sol) g.create_dataset("fit_sol_0", data=M.fit_sol_0) c_ds = g.create_dataset("chain", (nwalkers, N, l_var), dtype=np.float64) lp_ds = g.create_dataset("lnprob", (nwalkers, N), dtype=np.float64) #I don't like the default LDP unc #I'm changing them M.ld1_unc = 0.8 M.ld2_unc = 0.8 args = [ M.nplanets, M.rho_0, M.rho_0_unc, M.rho_prior, M.ld1, M.ld1_unc, M.ld2, M.ld2_unc, M.ldp_prior, M.flux, M.err, M.fixed_sol, M.time, M._itime, M._ntt, M._tobs, M._omc, M._datatype, M.rvtime, M.rvval, M.rverr, M._rvitime, M.n_ldparams, M.ldfileloc, M.onlytransits, M.tregion ] tom = tmod.logchi2_rv_phaseGP2 if runmpi: sampler = emcee.EnsembleSampler(nwalkers, l_var, tom, args=args, pool=pool) else: sampler = emcee.EnsembleSampler(nwalkers, l_var, tom, args=args, threads=th) time1 = thetime.time() p2, prob, state = sampler.run_mcmc(p0, burnin, storechain=False) sampler.reset() with h5py.File(outfile, u"a") as f: g = f["mcmc"] g.create_dataset("burnin_pos", data=p2) g.create_dataset("burnin_prob", data=prob) time2 = thetime.time() print('burn-in took ' + str((time2 - time1) / 60.) + ' min') time1 = thetime.time() for i, (pos, lnprob, state) in enumerate( sampler.sample(p2, iterations=fullrun, rstate0=state, storechain=False)): #do the thinning in the loop here if i % thin == 0: ind = i / thin with h5py.File(outfile, u"a") as f: g = f["mcmc"] c_ds = g["chain"] lp_ds = g["lnprob"] c_ds[:, ind, :] = pos lp_ds[:, ind] = lnprob time2 = thetime.time() print('MCMC run took ' + str((time2 - time1) / 60.) + ' min') print('') print("Mean acceptance: " + str(np.mean(sampler.acceptance_fraction))) print('') if runmpi: pool.close() else: sampler.pool.close() return sampler
def run(N, p00=None, nwalkers=500): fn = chainDirRel+'.pickle' ndim = 17 if p00 is not None: p0 = [p00*(1.0+0.001*np.random.randn( ndim )) for i in range(nwalkers)] else: p0 = [sampleFromPrior() for i in range(nwalkers)] restart = {} restart['currentPosition'] = p0 restart['chain'] = None restart['state'] = None restart['prob'] = None restart['iterationCounter'] = 0 restart['mcmcRunCounter'] = 0 # Read in our past progress UNLESS we've been given a new starting location. if p00 is None: updateRestart(fn,restart) if restart['chain'] is not None: # This may save some time if you change something and forget to delete the .pickle file. restartedShape = np.shape(restart['chain']) print restartedShape, nwalkers, ndim assert restartedShape[0] == nwalkers assert restartedShape[2] == ndim global runNumber runNumber = restart['mcmcRunCounter'] restart['iterationCounter'] += N restart['mcmcRunCounter'] += 1 pool = MPIPool(comm=comm, loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnProb, pool=pool) #pos, prob, state = sampler.run_mcmc(restart['currentPosition'], N, rstate0=restart['state'], lnprob0=restart['prob']) for result in sampler.sample(restart['currentPosition'], iterations=N, lnprob0=restart['prob'], rstate0=restart['state']): pos, prob, state = result restart['acor'] = sampler.acor[:] # autocorr length for each param (ndim) restart['accept'] = sampler.acceptance_fraction[:] # acceptance frac for each walker. restart['currentPosition'] = pos # same shape as p0: nwalkers x ndim restart['state'] = state # random number generator state restart['prob'] = prob # nwalkers x __ if restart['chain'] is None: restart['chain'] = np.expand_dims(sampler.chain[:,0,:],1) # nwalkers x niterations x ndim restart['allProbs'] = np.expand_dims(prob,1) # nwalkers x niterations else: print np.shape(restart['chain']), np.shape(sampler.chain[:,-1,:]), np.shape(sampler.chain) print restart['mcmcRunCounter'], restart['iterationCounter'] #restart['chain'] = np.concatenate((restart['chain'], sampler.chain[:,-1,:]), axis=1) print "dbg1: ",np.shape(restart['chain']), np.shape(np.zeros((nwalkers, 1, ndim))), np.shape(np.expand_dims(pos,1)) restart['chain'] = np.concatenate((restart['chain'], np.expand_dims(pos, 1)),axis=1) restart['allProbs'] = np.concatenate((restart['allProbs'], np.expand_dims(prob, 1)),axis=1) saveRestart(fn,restart) pool.close()
def main(): ################## #These change a lot numWaveforms = 12 numThreads = 8 ndim = 6*numWaveforms + 7 nwalkers = 25*ndim iter=10000 burnIn = 9000 ###################### doPlots = 1 # plt.ion() fitSamples = 350 timeStepSize = 1. #ns #Prepare detector tempGuess = 79.310080 gradGuess = 0.05 pcRadGuess = 2.5 pcLenGuess = 1.6 #Create a detector model detName = "conf/P42574A_grad%0.2f_pcrad%0.2f_pclen%0.2f.conf" % (0.05,2.5, 1.65) det = Detector(detName, temperature=tempGuess, timeStep=timeStepSize, numSteps=fitSamples*10 ) det.LoadFields("P42574A_fields_v3.npz") det.SetFields(pcRadGuess, pcLenGuess, gradGuess) b_over_a = 0.107213 c = -0.821158 d = 0.828957 rc1 = 74.4 rc2 = 1.79 rcfrac = 0.992 det.SetTransferFunction(b_over_a, c, d, rc1, rc2, rcfrac) tempIdx = -7 #and the remaining 4 are for the transfer function fig_size = (20,10) #Create a decent start guess by fitting waveform-by-waveform wfFileName = "P42574A_12_fastandslow_oldwfs.npz" # wfFileName = "P42574A_5_fast.npz" if os.path.isfile(wfFileName): data = np.load(wfFileName) results = data['results'] wfs = data['wfs'] # wfs = wfs[::3] # results = results[::3] numWaveforms = wfs.size else: print "No saved waveforms available. Exiting." exit(0) #prep holders for each wf-specific param r_arr = np.empty(numWaveforms) phi_arr = np.empty(numWaveforms) z_arr = np.empty(numWaveforms) scale_arr = np.empty(numWaveforms) t0_arr = np.empty(numWaveforms) smooth_arr = np.ones(numWaveforms)*7. simWfArr = np.empty((1,numWaveforms, fitSamples)) #Prepare the initial value arrays for (idx, wf) in enumerate(wfs): wf.WindowWaveformTimepoint(fallPercentage=.97, rmsMult=2,) r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx], t0_arr[idx], smooth_arr[idx] = results[idx]['x'] # t0_arr[idx] -= 15 #Initialize the multithreading # p = Pool(numThreads, initializer=initializeDetectorAndWaveforms, initargs=[det, wfs]) initializeDetectorAndWaveforms(det, wfs) p = MPIPool() if not p.is_master(): p.wait() sys.exit(0) #Do the MCMC mcmc_startguess = np.hstack((r_arr[:], phi_arr[:], z_arr[:], scale_arr[:], t0_arr[:], smooth_arr[:], # waveform-specific params tempGuess, b_over_a, c, d, rc1, rc2, rcfrac)) # detector-specific #number of walkers _must_ be even if nwalkers % 2: nwalkers +=1 pos0 = [mcmc_startguess + 1e-2*np.random.randn(ndim)*mcmc_startguess for i in range(nwalkers)] rc1idx = -3 rc2idx = -2 rcfracidx = -1 #Make sure everything in the initial guess is within bounds for pos in pos0: pos[:numWaveforms] = np.clip( pos[:numWaveforms], 0, np.floor(det.detector_radius*10.)/10.) pos[numWaveforms:2*numWaveforms] = np.clip(pos[numWaveforms:2*numWaveforms], 0, np.pi/4) pos[2*numWaveforms:3*numWaveforms] = np.clip(pos[2*numWaveforms:3*numWaveforms], 0, np.floor(det.detector_length*10.)/10.) pos[4*numWaveforms:5*numWaveforms] = np.clip(pos[4*numWaveforms:5*numWaveforms], 0, fitSamples) pos[5*numWaveforms:6*numWaveforms] = np.clip(pos[5*numWaveforms:6*numWaveforms], 0, 20.) pos[tempIdx] = np.clip(pos[tempIdx], 40, 120) pos[rcfracidx] = np.clip(pos[rcfracidx], 0, 1) pos[rc2idx] = np.clip(pos[rc2idx], 0, np.inf) pos[rc1idx] = np.clip(pos[rc1idx], 0, np.inf) prior = lnprior(pos,) if not np.isfinite(prior) : print "BAD PRIOR WITH START GUESS YOURE KILLING ME SMALLS" print pos exit(0) #Initialize, run the MCMC sampler = emcee.EnsembleSampler( nwalkers, ndim, lnprob, pool=p) #w/ progress bar, & time the thing bar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], maxval=iter).start() for (idx,result) in enumerate(sampler.sample(pos0, iterations=iter, storechain=True)): bar.update(idx+1) bar.finish() p.close() print "Dumping chain to file..." np.save("mpisampler_%dwfs.npy" % numWaveforms, sampler.chain)
def main(argv): ################## #These change a lot numWaveforms = 16 numThreads = 12 ndim = 6*numWaveforms + 8 nwalkers = 2*ndim iter=50 burnIn = 40 wfPlotNumber = 10 ###################### # plt.ion() fitSamples = 200 #Prepare detector zero_1 = -5.56351644e+07 pole_1 = -1.38796386e+04 pole_real = -2.02559385e+07 pole_imag = 9885315.37450211 zeros = [zero_1,0 ] poles = [ pole_real+pole_imag*1j, pole_real-pole_imag*1j, pole_1] system = signal.lti(zeros, poles, 1E7 ) tempGuess = 77.89 gradGuess = 0.0483 pcRadGuess = 2.591182 pcLenGuess = 1.613357 #Create a detector model detName = "conf/P42574A_grad%0.2f_pcrad%0.2f_pclen%0.2f.conf" % (0.05,2.5, 1.65) det = Detector(detName, temperature=tempGuess, timeStep=1., numSteps=fitSamples*10, tfSystem=system) det.LoadFields("P42574A_fields_v3.npz") det.SetFields(pcRadGuess, pcLenGuess, gradGuess) tempIdx = -8 gradIdx = -7 pcRadIdx = -6 pcLenIdx = -5 #and the remaining 4 are for the transfer function fig_size = (20,10) #Create a decent start guess by fitting waveform-by-waveform wfFileName = "P42574A_512waveforms_%drisetimeculled.npz" % numWaveforms if os.path.isfile(wfFileName): data = np.load(wfFileName) results = data['results'] wfs = data['wfs'] numWaveforms = wfs.size else: print "No saved waveforms available. Loading from Data" exit(0) #prep holders for each wf-specific param r_arr = np.empty(numWaveforms) phi_arr = np.empty(numWaveforms) z_arr = np.empty(numWaveforms) scale_arr = np.empty(numWaveforms) t0_arr = np.empty(numWaveforms) smooth_arr = np.ones(numWaveforms)*7. simWfArr = np.empty((1,numWaveforms, fitSamples)) #Prepare the initial value arrays for (idx, wf) in enumerate(wfs): wf.WindowWaveformTimepoint(fallPercentage=.99) r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx], t0_arr[idx], smooth_arr[idx] = results[idx]['x'] t0_arr[idx] += 10 #because i had a different windowing offset back in the day #Plot the waveforms to take a look at the initial guesses if False: fig = plt.figure() for (idx,wf) in enumerate(wfs): print "WF number %d:" % idx print " >>r: %f\n >>phi %f\n >>z %f\n >>e %f\n >>t0 %f\n >>smooth %f" % (r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx], t0_arr[idx], smooth_arr[idx]) ml_wf = det.GetSimWaveform(r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx]*100, t0_arr[idx], fitSamples, smoothing = smooth_arr[idx]) plt.plot(ml_wf, color="b") plt.plot(wf.windowedWf, color="r") value = raw_input(' --> Press q to quit, any other key to continue\n') if value == 'q': exit(0) #Initialize this thread's globals initializeDetectorAndWaveforms(det, wfs) #Initialize the multithreading pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) #Do the MCMC mcmc_startguess = np.hstack((r_arr[:], phi_arr[:], z_arr[:], scale_arr[:]*100., t0_arr[:],smooth_arr[:], # waveform-specific params tempGuess, gradGuess,pcRadGuess, pcLenGuess, zero_1, pole_1, pole_real, pole_imag)) # detector-specific #number of walkers _must_ be even if nwalkers % 2: nwalkers +=1 #Initialize walkers with a random, narrow ball around the start guess pos0 = [mcmc_startguess + 1e-2*np.random.randn(ndim)*mcmc_startguess for i in range(nwalkers)] #Make sure everything in the initial guess is within bounds for pos in pos0: pos[:numWaveforms] = np.clip( pos[:numWaveforms], 0, np.floor(det.detector_radius*10.)/10.) pos[numWaveforms:2*numWaveforms] = np.clip(pos[numWaveforms:2*numWaveforms], 0, np.pi/4) pos[2*numWaveforms:3*numWaveforms] = np.clip(pos[2*numWaveforms:3*numWaveforms], 0, np.floor(det.detector_length*10.)/10.) pos[4*numWaveforms:5*numWaveforms] = np.clip(pos[4*numWaveforms:5*numWaveforms], 0, fitSamples) pos[5*numWaveforms:6*numWaveforms] = np.clip(pos[5*numWaveforms:6*numWaveforms], 0, 20.) pos[tempIdx] = np.clip(pos[tempIdx], 40, 120) pos[gradIdx] = np.clip(pos[gradIdx], det.gradList[0], det.gradList[-1]) pos[pcRadIdx] = np.clip(pos[pcRadIdx], det.pcRadList[0], det.pcRadList[-1]) pos[pcLenIdx] = np.clip(pos[pcLenIdx], det.pcLenList[0], det.pcLenList[-1]) prior = lnprior(pos,) if not np.isfinite(prior) : print "BAD PRIOR WITH START GUESS YOURE KILLING ME SMALLS" print pos exit(0) #Initialize, run the MCMC sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, pool=p) #w/ progress bar, & time the thing start = timer() for (idx,result) in enumerate(sampler.sample(pos0, iterations=iter, storechain=True)): continue end = timer() pool.close() print "Elapsed time: " + str(end-start) print "Dumping chain to file..." np.save("sampler_mpi_%dwfs.npy" % numWaveforms, sampler.chain) print "Making MCMC steps figure..." ######### Plots for Waveform params stepsFig = plt.figure(2, figsize=fig_size) plt.clf() ax0 = stepsFig.add_subplot(611) ax1 = stepsFig.add_subplot(612, sharex=ax0) ax2 = stepsFig.add_subplot(613, sharex=ax0) ax3 = stepsFig.add_subplot(614, sharex=ax0) ax4 = stepsFig.add_subplot(615, sharex=ax0) ax5 = stepsFig.add_subplot(616, sharex=ax0) ax0.set_ylabel('r') ax1.set_ylabel('phi') ax2.set_ylabel('z') ax3.set_ylabel('scale') ax4.set_ylabel('t0') ax5.set_ylabel('smoothing') for i in range(nwalkers): for j in range(wfs.size): ax0.plot(sampler.chain[i,:,0+j], alpha=0.3) # r ax1.plot(sampler.chain[i,:,numWaveforms + j], alpha=0.3) # phi ax2.plot(sampler.chain[i,:,2*numWaveforms + j], alpha=0.3) #z ax3.plot(sampler.chain[i,:,3*numWaveforms + j], alpha=0.3) #energy ax4.plot(sampler.chain[i,:,4*numWaveforms + j], alpha=0.3) #t0 ax5.plot(sampler.chain[i,:,5*numWaveforms + j], alpha=0.3) #smoothing plt.savefig("emcee_mpi_wfchain_%dwfs.png" % numWaveforms) ######### Plots for Detector params stepsFigDet = plt.figure(3, figsize=fig_size) plt.clf() ax0 = stepsFigDet.add_subplot(411) ax1 = stepsFigDet.add_subplot(412, sharex=ax0) ax2 = stepsFigDet.add_subplot(413, sharex=ax0) ax3 = stepsFigDet.add_subplot(414, sharex=ax0) ax0.set_ylabel('temp') ax1.set_ylabel('grad') ax2.set_ylabel('pcRad') ax3.set_ylabel('pcLen') for i in range(nwalkers): ax0.plot(sampler.chain[i,:,tempIdx], "b", alpha=0.3) #temp ax1.plot(sampler.chain[i,:,gradIdx], "b", alpha=0.3) #grad ax2.plot(sampler.chain[i,:,pcRadIdx], "b", alpha=0.3) #pcrad ax3.plot(sampler.chain[i,:,pcLenIdx], "b", alpha=0.3) #pclen plt.savefig("emcee_mpi_detchain_%dwfs.png" % numWaveforms) #and for the transfer function stepsFigTF = plt.figure(4, figsize=fig_size) plt.clf() tf0 = stepsFigTF.add_subplot(411) tf1 = stepsFigTF.add_subplot(412, sharex=ax0) tf2 = stepsFigTF.add_subplot(413, sharex=ax0) tf3 = stepsFigTF.add_subplot(414, sharex=ax0) tf0.set_ylabel('zero_1') tf1.set_ylabel('pole_1') tf2.set_ylabel('pole_real') tf3.set_ylabel('pole_imag') for i in range(nwalkers): tf0.plot(sampler.chain[i,:,-4], "b", alpha=0.3) #2 tf1.plot(sampler.chain[i,:,-3], "b", alpha=0.3) #den1 tf2.plot(sampler.chain[i,:,-2], "b", alpha=0.3) #2 tf3.plot(sampler.chain[i,:,-1], "b", alpha=0.3) #3 plt.savefig("emcee_mpi_tfchain_%dwfs.png" % numWaveforms) samples = sampler.chain[:, burnIn:, :].reshape((-1, ndim)) print "temp is %f" % np.median(samples[:,tempIdx]) print "grad is %f" % np.median(samples[:,gradIdx]) print "pcrad is %f" % np.median(samples[:,pcRadIdx]) print "pclen is %f" % np.median(samples[:,pcLenIdx]) print "zero_1 is %f" % np.median(samples[:,-4]) print "pole_1 is %f" % np.median(samples[:,-3]) print "pole_real is %f" % np.median(samples[:,-2]) print "pole_imag is %f" % np.median(samples[:,-1]) #TODO: Aaaaaaand plot some waveforms.. simWfs = np.empty((wfPlotNumber,numWaveforms, fitSamples)) for idx, (theta) in enumerate(samples[np.random.randint(len(samples), size=wfPlotNumber)]): temp, impGrad, pcRad, pcLen = theta[tempIdx], theta[gradIdx], theta[pcRadIdx], theta[pcLenIdx] zero_1, pole_1, pole_real, pole_imag = theta[-4:] r_arr, phi_arr, z_arr, scale_arr, t0_arr, smooth_arr = theta[:-8].reshape((6, numWaveforms)) det.SetTemperature(temp) det.SetFields(pcRad, pcLen, impGrad) zeros = [zero_1,0 ] poles = [ pole_real+pole_imag*1j, pole_real-pole_imag*1j, pole_1] det.SetTransferFunction(zeros, poles, 1E7) for wf_idx in range(wfs.size): wf_i = det.GetSimWaveform(r_arr[wf_idx], phi_arr[wf_idx], z_arr[wf_idx], scale_arr[wf_idx], t0_arr[wf_idx], fitSamples) simWfs[idx, wf_idx, :] = wf_i if wf_i is None: print "Waveform %d, %d is None" % (idx, wf_idx) residFig = plt.figure(4, figsize=(20, 15)) helpers.plotManyResidual(simWfs, wfs, figure=residFig) plt.savefig("emcee_mpi_waveforms_%dwfs.png" % numWaveforms)
def fit_subsamplefof_mean(): parser = argparse.ArgumentParser( description= 'This is the MCMC code to get the fitting parameters, made by Zhejie Ding.' ) parser.add_argument( '-rec_id', "--rec_id", help='The id of reconstruction, either 0 or 1.', required=True) #0: pre-reconstruct; 1: post-reconstruct parser.add_argument( '-space_id', "--space_id", help= 'The type of space for fitting, 0 for real-space and 1 for redshift space.', required=True) args = parser.parse_args() print("args: ", args) rec_id = int(args.rec_id) print("rec_id: ", rec_id) space_id = int(args.space_id) print("space_id: ", space_id) N_walkers = 40 # simulation run name N_dataset = 20 N_mu_bin = 100 #N_skip_header = 11 #N_skip_footer = 31977 Omega_m = 0.3075 G_0 = growth_factor(0.0, Omega_m) # G_0 at z=0, normalization factor Volume = 1380.0**3.0 # the volume of simulation box sim_z = ['0', '0.6', '1.0'] sim_seed = [0, 9] sim_wig = ['NW', 'WG'] sim_a = ['1.0000', '0.6250', '0.5000'] sim_space = ['r', 's'] # r for real space; s for redshift space rec_dirs = [ 'DD', 'ALL' ] # "ALL" folder stores P(k, \mu) after reconstruction process, while DD is before reconstruction. rec_fprefix = ['', 'R'] mcut_Npar_list = [[37, 149, 516, 1524, 3830], [35, 123, 374, 962, 2105], [34, 103, 290, 681, 1390]] N_masscut = np.size(mcut_Npar_list, axis=1) inputf = '../Zvonimir_data/planck_camb_56106182_matterpower_smooth_z0.dat' k_smooth, Pk_smooth = np.loadtxt(inputf, dtype='f8', comments='#', unpack=True) tck_Pk_sm = interpolate.splrep(k_smooth, Pk_smooth) inputf = '../Zvonimir_data/planck_camb_56106182_matterpower_z0.dat' k_wiggle, Pk_wiggle = np.loadtxt(inputf, dtype='f8', comments='#', unpack=True) tck_Pk_linw = interpolate.splrep(k_wiggle, Pk_wiggle) # firstly, read one file and get k bins we want for the fitting range dir0 = '/Users/ding/Documents/playground/WiggleNowiggle/subsample_FoF_data_HS/Pk_obs_2d_wnw_mean_DD_ksorted_mu_masscut/' inputf = dir0 + 'fof_kaver.wnw_diff_a_0.6250_mcut35_fraction0.126.dat' k_p, mu_p = np.loadtxt(inputf, dtype='f8', comments='#', delimiter=' ', usecols=(0, 1), unpack=True) #print(k_p, mu_p) N_fitbin = len(k_p) #print('# of (k, mu) bins: ', N_fitbin) # for output parameters fitted odir = './ZV_lagrange_params_{}_wig-now_mean_dset/'.format( rec_dirs[rec_id]) if not os.path.exists(odir): os.makedirs(odir) #space_id = 1 # in redshift space if rec_id == 0: ##Sigma_0 = 8.3364 # the approximated value of \Sigma_xy and \Sigma_z, unit Mpc/h, at z=0. Sigma_0 = 7.8364 # suggested by Zvonimir, at z=0 elif rec_id == 1: Sigma_0 = 2.84 # 0: parameter fixed, 1: parameter free. #params_indices = [1, 1, 1, 1, 1, 1, 0, 0] # Only for DM case. b0 needs to be fixed for DM subsample power spectrum. For this case, make sure \Sigma_xy and \Sigma_z positive, which should be set in mcmc_routine. if space_id == 0: # It may not work in real-space just setting f=0. params_indices = [1, 1, 1, 0, 1, 1] # Set f=0 for real-space. f = 0.0 elif space_id == 1: params_indices = [1, 1, 1, 1, 1, 1] # for redshift space f = 1.0 print("params_indices: ", params_indices) alpha_1, alpha_2, sigma, b_0, b_scale = 1.0, 1.0, Sigma_0, 1.0, 0.0 all_names = "alpha_1", "alpha_2", "Sigma_qmax", "f", "b_1", "b_partial" # the same order for params_indices all_temperature = 0.01, 0.01, 0.1, 0.1, 0.01, 0.1 pool = MPIPool(loadbalance=True) for z_id in xrange(3): norm_gf = growth_factor(float(sim_z[z_id]), Omega_m) / G_0 # ##Sigma_z = Sigma_0* norm_gf # # if set_Sigma_xyz_theory == "True": # print("Sigma_z: ", Sigma_z) # sigma_xy, sigma_z = Sigma_z, Sigma_z # else: # if params_indices[2] == 0: # sigma_xy = 0.0 # else: # sigma_xy = 10.0 # if params_indices[3] == 0: # sigma_z = 0.0 # else: # sigma_z = 10.0 all_params = alpha_1, alpha_2, sigma, f, b_0, b_scale np.random.seed() for mcut_id in xrange(N_masscut): N_params, theta, fix_params, params_T, params_name = set_params( all_params, params_indices, all_names, all_temperature) ifile_Pk = './run2_3_Pk_obs_2d_wnw_mean_{}_ksorted_mu_masscut/{}kave{}.wig_minus_now_mean_fof_a_{}_mcut{}.dat'.format( rec_dirs[rec_id], rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id], mcut_Npar_list[z_id][mcut_id]) Pk_wnw_diff_obs = np.loadtxt( ifile_Pk, dtype='f4', comments='#', usecols=(2, ) ) # be careful that there are k, \mu, P(k, \mu) columns. ifile_Cov_Pk = './run2_3_Cov_Pk_obs_2d_wnw_{}_ksorted_mu_masscut/{}kave{}.wig_minus_now_mean_fof_a_{}_mcut{}.dat'.format( rec_dirs[rec_id], rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id], mcut_Npar_list[z_id][mcut_id]) Cov_Pk_wnw = np.loadtxt(ifile_Cov_Pk, dtype='f4', comments='#') ivar_Pk_wnow = N_dataset / np.diag( Cov_Pk_wnw) # the mean sigma error params_mcmc = mcmc_routine(N_params, N_walkers, theta, params_T, params_indices, fix_params, k_p, mu_p, Pk_wnw_diff_obs, ivar_Pk_wnow, tck_Pk_linw, tck_Pk_sm, norm_gf, params_name, pool) chi_square = chi2(params_mcmc[:, 0], params_indices, fix_params, k_p, mu_p, Pk_wnw_diff_obs, ivar_Pk_wnow, tck_Pk_linw, tck_Pk_sm, norm_gf) reduced_chi2 = chi_square / (N_fitbin - N_params) print("Reduced chi2: {}\n".format(reduced_chi2)) # output parameters into a file ofile_params = odir + 'fof_{}kave{}.wnw_diff_a_{}_mcut{}_params{}.dat'.format( rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id], mcut_Npar_list[z_id][mcut_id], ''.join(map( str, params_indices))) print(ofile_params) write_params(ofile_params, params_mcmc, params_name, reduced_chi2) np.random.seed() N_params, theta, fix_params, params_T, params_name = set_params( all_params, params_indices, all_names, all_temperature) # Fit for DM power spectrum ifile_Pk = './run2_3_sub_Pk_2d_wnw_mean_{}_ksorted_mu/{}kave{}.wig_minus_now_mean_sub_a_{}.dat'.format( rec_dirs[rec_id], rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id]) Pk_wnw_diff_true = np.loadtxt( ifile_Pk, dtype='f4', comments='#', usecols=( 2, )) # be careful that there are k, \mu, P(k, \mu) columns. print(ifile_Pk) ifile_Cov_Pk = './run2_3_sub_Cov_Pk_2d_wnw_{}_ksorted_mu/{}kave{}.wig_minus_now_mean_sub_a_{}.dat'.format( rec_dirs[rec_id], rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id]) Cov_Pk_wnw = np.loadtxt(ifile_Cov_Pk, dtype='f4', comments='#') ivar_Pk_wnow = N_dataset / np.diag(Cov_Pk_wnw) # the mean sigma error params_mcmc = mcmc_routine(N_params, N_walkers, theta, params_T, params_indices, fix_params, k_p, mu_p, Pk_wnw_diff_true, ivar_Pk_wnow, tck_Pk_linw, tck_Pk_sm, norm_gf, params_name, pool) chi_square = chi2(params_mcmc[:, 0], params_indices, fix_params, k_p, mu_p, Pk_wnw_diff_true, ivar_Pk_wnow, tck_Pk_linw, tck_Pk_sm, norm_gf) reduced_chi2 = chi_square / (N_fitbin - N_params) print('Reduced chi2: {}\n'.format(reduced_chi2)) ofile_params = odir + 'sub_{}kave{}.wnw_diff_a_{}_params{}.dat'.format( rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id], ''.join( map(str, params_indices))) write_params(ofile_params, params_mcmc, params_name, reduced_chi2) pool.close()
def run_espei(run_settings): """Wrapper around the ESPEI fitting procedure, taking only a settings dictionary. Parameters ---------- run_settings : dict Dictionary of input settings Returns ------- Either a Database (for generate parameters only) or a tuple of (Database, sampler) """ run_settings = get_run_settings(run_settings) system_settings = run_settings['system'] output_settings = run_settings['output'] generate_parameters_settings = run_settings.get('generate_parameters') mcmc_settings = run_settings.get('mcmc') # handle verbosity verbosity = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG} logging.basicConfig(level=verbosity[output_settings['verbosity']]) # load datasets and handle i/o logging.debug('Loading and checking datasets.') dataset_path = system_settings['datasets'] datasets = load_datasets(sorted(recursive_glob(dataset_path, '*.json'))) if len(datasets.all()) == 0: logging.warning( 'No datasets were found in the path {}. This should be a directory containing dataset files ending in `.json`.' .format(dataset_path)) logging.debug('Finished checking datasets') with open(system_settings['phase_models']) as fp: phase_models = json.load(fp) if generate_parameters_settings is not None: refdata = generate_parameters_settings['ref_state'] excess_model = generate_parameters_settings['excess_model'] dbf = generate_parameters(phase_models, datasets, refdata, excess_model) dbf.to_file(output_settings['output_db'], if_exists='overwrite') if mcmc_settings is not None: tracefile = output_settings['tracefile'] probfile = output_settings['probfile'] # check that the MCMC output files do not already exist # only matters if we are actually running MCMC if os.path.exists(tracefile): raise OSError( 'Tracefile "{}" exists and would be overwritten by a new run. Use the ``output.tracefile`` setting to set a different name.' .format(tracefile)) if os.path.exists(probfile): raise OSError( 'Probfile "{}" exists and would be overwritten by a new run. Use the ``output.probfile`` setting to set a different name.' .format(probfile)) # scheduler setup if mcmc_settings['scheduler'] == 'MPIPool': # check that cores is not an input setting if mcmc_settings.get('cores') != None: logging.warning("MPI does not take the cores input setting.") from emcee.utils import MPIPool # code recommended by emcee: if not master, wait for instructions then exit client = MPIPool() if not client.is_master(): logging.debug( 'MPIPool is not master. Waiting for instructions...') client.wait() sys.exit(0) logging.info("Using MPIPool on {} MPI ranks".format(client.size)) elif mcmc_settings['scheduler'] == 'dask': from distributed import LocalCluster cores = mcmc_settings.get('cores', multiprocessing.cpu_count()) if (cores > multiprocessing.cpu_count()): cores = multiprocessing.cpu_count() logging.warning( "The number of cores chosen is larger than available. " "Defaulting to run on the {} available cores.".format( cores)) scheduler = LocalCluster(n_workers=cores, threads_per_worker=1, processes=True) client = ImmediateClient(scheduler) client.run(logging.basicConfig, level=verbosity[output_settings['verbosity']]) logging.info("Running with dask scheduler: %s [%s cores]" % (scheduler, sum(client.ncores().values()))) try: logging.info( "bokeh server for dask scheduler at localhost:{}".format( client.scheduler_info()['services']['bokeh'])) except KeyError: logging.info("Install bokeh to use the dask bokeh server.") elif mcmc_settings['scheduler'] == 'emcee': from emcee.interruptible_pool import InterruptiblePool cores = mcmc_settings.get('cores', multiprocessing.cpu_count()) if (cores > multiprocessing.cpu_count()): cores = multiprocessing.cpu_count() logging.warning( "The number of cores chosen is larger than available. " "Defaulting to run on the {} available cores.".format( cores)) client = InterruptiblePool(processes=cores) logging.info("Using multiprocessing on {} cores".format(cores)) elif mcmc_settings['scheduler'] == 'None': client = None logging.info( "Not using a parallel scheduler. ESPEI is running MCMC on a single core." ) # get a Database if mcmc_settings.get('input_db'): dbf = Database(mcmc_settings.get('input_db')) # load the restart chain if needed if mcmc_settings.get('restart_chain'): restart_chain = np.load(mcmc_settings.get('restart_chain')) else: restart_chain = None # load the remaning mcmc fitting parameters mcmc_steps = mcmc_settings.get('mcmc_steps') save_interval = mcmc_settings.get('mcmc_save_interval') chains_per_parameter = mcmc_settings.get('chains_per_parameter') chain_std_deviation = mcmc_settings.get('chain_std_deviation') deterministic = mcmc_settings.get('deterministic') dbf, sampler = mcmc_fit( dbf, datasets, scheduler=client, mcmc_steps=mcmc_steps, chains_per_parameter=chains_per_parameter, chain_std_deviation=chain_std_deviation, save_interval=save_interval, tracefile=tracefile, probfile=probfile, restart_chain=restart_chain, deterministic=deterministic, ) dbf.to_file(output_settings['output_db'], if_exists='overwrite') # close the scheduler, if possible if hasattr(client, 'close'): client.close() return dbf, sampler return dbf
def fit_subsamplefof_mean(): parser = argparse.ArgumentParser( description= 'This is the MCMC code to get the fitting parameters, made by Zhejie Ding.' ) parser.add_argument( '-rec_id', "--rec_id", help='The id of reconstruction, either 0 or 1.', required=True) #0: pre-reconstruct; 1: post-reconstruct parser.add_argument('-space_id', "--space_id", help='0 for real space, 1 for redshift space.', required=True) parser.add_argument( '-set_Sigma_xyz_theory', "--set_Sigma_xyz_theory", help= 'Determine whether the parameters \Sigma_xy and \Sigma_z are fixed or not, either True or False', required=True) parser.add_argument( '-set_Sigma_sm_theory', "--set_Sigma_sm_theory", help= 'Determine whether we use sigma_sm from theory in the fitting model. \ If False, sigma_sm=0 (be careful that sigma_sm=\inf in real space case)', required=True) args = parser.parse_args() print("args: ", args) rec_id = int(args.rec_id) space_id = int(args.space_id) set_Sigma_xyz_theory = args.set_Sigma_xyz_theory set_Sigma_sm_theory = args.set_Sigma_sm_theory print("rec_id: ", rec_id, "space_id: ", space_id) print("set_Sigma_xyz_theory: ", set_Sigma_xyz_theory, "set_Sigma_sm_theory: ", set_Sigma_sm_theory) N_walkers = 40 # increase N_walkers would decrease the minimum number of walk steps which make fitting parameters convergent, but running time increases. N_walkersteps = 5000 # simulation run name N_dataset = 20 N_mu_bin = 100 #N_skip_header = 11 #N_skip_footer = 31977 Omega_m = 0.3075 G_0 = growth_factor(0.0, Omega_m) # G_0 at z=0, normalization factor Volume = 1380.0**3.0 # the volume of simulation box sim_z = ['0', '0.6', '1.0'] sim_seed = [0, 9] sim_wig = ['NW', 'WG'] sim_a = ['1.0000', '0.6250', '0.5000'] sim_space = ['r', 's'] # r for real space; s for redshift space rec_dirs = [ 'DD', 'ALL' ] # "ALL" folder stores P(k, \mu) after reconstruction process, while DD is before reconstruction. rec_fprefix = ['', 'R'] mcut_Npar_list = [[37, 149, 516, 1524, 3830], [35, 123, 374, 962, 2105], [34, 103, 290, 681, 1390]] N_masscut = np.size(mcut_Npar_list, axis=1) # Sigma_sm = sqrt(2.* Sig_RR) in post-reconstruction case, for pre-reconstruction, we don't use sub_Sigma_RR. Sigma_RR_list = [[37, 48.5, 65.5, 84.2, 110], [33, 38, 48.5, 63.5, 91.5], [31, 38, 49, 65, 86]] sub_Sigma_RR = 50.0 # note from Hee-Jong's recording inputf = '../Zvonimir_data/planck_camb_56106182_matterpower_smooth_z0.dat' k_smooth, Pk_smooth = np.loadtxt(inputf, dtype='f8', comments='#', unpack=True) tck_Pk_sm = interpolate.splrep(k_smooth, Pk_smooth) inputf = '../Zvonimir_data/planck_camb_56106182_matterpower_z0.dat' k_wiggle, Pk_wiggle = np.loadtxt(inputf, dtype='f8', comments='#', unpack=True) tck_Pk_linw = interpolate.splrep(k_wiggle, Pk_wiggle) # firstly, read one file and get k bins we want for the fitting range dir0 = '/Users/ding/Documents/playground/WiggleNowiggle/subsample_FoF_data_HS/Pk_obs_2d_wnw_mean_DD_ksorted_mu_masscut/' inputf = dir0 + 'fof_kaver.wnw_diff_a_0.6250_mcut35_fraction0.126.dat' k_p, mu_p = np.loadtxt(inputf, dtype='f8', comments='#', delimiter=' ', usecols=(0, 1), unpack=True) #print(k_p, mu_p) N_fitbin = len(k_p) #print('# of (k, mu) bins: ', N_fitbin) # for output parameters fitted odir = './params_{}_wig-now_b_bscale_fitted_mean_dset/'.format( rec_dirs[rec_id]) if not os.path.exists(odir): os.makedirs(odir) print("N_walkers: ", N_walkers, "N_walkersteps: ", N_walkersteps, "\n") if rec_id == 0: ##Sigma_0 = 8.3364 # the approximated value of \Sigma_xy and \Sigma_z, unit Mpc/h, at z=0. Sigma_0 = 7.8364 # suggested by Zvonimir, at z=0 elif rec_id == 1: Sigma_0 = 2.84 ##space_id = 1 # in redshift space # 0: parameter fixed, 1: parameter free. #params_indices = [1, 1, 1, 1, 1, 1] # It doesn't fit \Sigma and bscale well. Yes, it dosen't work well (it's kind of overfitting). ##params_indices = [1, 1, 1, 1, 0, 1, 1, 0, 0] # b0 needs to be fitted. For this case, make sure \Sigma_xy and \Sigma_z positive, which should be set in mcmc_routine. ##params_indices = [0, 1, 0, 1, 1, 0] # make sure \alpha_xy = \alpha_z and \Sigma_xy = \Sigma_z params_indices = [ 1, 1, 1, 1, 0, 0, 0, 0, 0 ] # Set sigma_fog=0, f=0, b_scale=0, b_0=1.0 for subsamled DM case in real space. ##params_indices = [1, 1, 0, 0, 1, 1] # with fixed Sigma from theoretical value, then need to set sigma_xy, sigma_z equal to Sigma_z ##params_indices = [0, 1, 0, 0, 1, 1] # For this case, make sure \alpha_1 = \alpha_2 in the function lnlike(..) and set sigma_xy, sigma_z equal to Sigma_z. print("params_indices: ", params_indices) ##alpha_1, alpha_2, sigma_fog, f, b_0, b_scale = 1.0, 1.0, 2.0, 0.2, 1.0, 0.0 alpha_1, alpha_2, sigma_fog, f, b_0, b_scale = 1.0, 1.0, 0.0, 0.0, 1.0, 0.0 # ! only for real space, i.e., set sigma_fog, f equal to 0. all_names = "alpha_1", "alpha_2", "sigma_xy", "sigma_z", "sigma_sm", "sigma_fog", "f", "b_0", "b_scale" # the same order for params_indices all_temperature = 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 0.01, 0.1 pool = MPIPool(loadbalance=True) for z_id in xrange(3): norm_gf = growth_factor(float(sim_z[z_id]), Omega_m) / G_0 Sigma_z = Sigma_0 * norm_gf / 2.0 # divided by 2.0 for estimated \Sigma of post-reconstruction ##Sigma_z = Sigma_0* norm_gf if set_Sigma_xyz_theory == "True": print("Sigma_z: ", Sigma_z) sigma_xy, sigma_z = Sigma_z, Sigma_z else: if params_indices[2] == 0: sigma_xy = 0.0 else: sigma_xy = 10.0 if params_indices[3] == 0: sigma_z = 0.0 else: sigma_z = 10.0 np.random.seed() # Set it for FoF fitting # for mcut_id in xrange(N_masscut): # if set_Sigma_sm_theory == "True": # sigma_sm = (float(Sigma_RR_list[z_id][mcut_id])*2.0)**0.5 # else: # sigma_sm = 0.0 # # all_params = alpha_1, alpha_2, sigma_xy, sigma_z, sigma_sm, sigma_fog, f, b_0, b_scale # N_params, theta, fix_params, params_T, params_name = set_params(all_params, params_indices, all_names, all_temperature) # # ifile_Pk = './run2_3_Pk_obs_2d_wnw_mean_{}_ksorted_mu_masscut/{}kave{}.wig_minus_now_mean_fof_a_{}_mcut{}.dat'.format(rec_dirs[rec_id], rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id], mcut_Npar_list[z_id][mcut_id]) # Pk_wnw_diff_obs = np.loadtxt(ifile_Pk, dtype='f4', comments='#', usecols=(2,)) # be careful that there are k, \mu, P(k, \mu) columns. # # ifile_Cov_Pk = './run2_3_Cov_Pk_obs_2d_wnw_{}_ksorted_mu_masscut/{}kave{}.wig_minus_now_mean_fof_a_{}_mcut{}.dat'.format(rec_dirs[rec_id], rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id], mcut_Npar_list[z_id][mcut_id]) # Cov_Pk_wnw = np.loadtxt(ifile_Cov_Pk, dtype='f4', comments='#') # ivar_Pk_wnow = N_dataset/np.diag(Cov_Pk_wnw) # the mean sigma error # # params_mcmc = mcmc_routine(N_params, N_walkers, N_walkersteps, theta, params_T, params_indices, fix_params, k_p, mu_p, Pk_wnw_diff_obs, ivar_Pk_wnow, tck_Pk_linw, tck_Pk_sm, norm_gf, params_name, pool) # # chi_square = chi2(params_mcmc[:, 0], params_indices, fix_params, k_p, mu_p, Pk_wnw_diff_obs, ivar_Pk_wnow, tck_Pk_linw, tck_Pk_sm, norm_gf) # reduced_chi2 = chi_square/(N_fitbin-N_params) # print("Reduced chi2: {}\n".format(reduced_chi2)) # # output parameters into a file # if set_Sigma_xyz_theory == "False": # ofile_params = odir + 'fof_{}kave{}.wnw_diff_a_{}_mcut{}_params{}_Sigma_sm{}.dat'.format(rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id], mcut_Npar_list[z_id][mcut_id], ''.join(map(str, params_indices)), round(sigma_sm,3)) # else: # ofile_params = odir + 'fof_{}kave{}.wnw_diff_a_{}_mcut{}_params{}_isotropic_Sigmaz_{}_Sigma_sm{}.dat'.format(rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id], mcut_Npar_list[z_id][mcut_id], ''.join(map(str, params_indices)), round(Sigma_z, 3), round(sigma_sm,3)) # print(ofile_params) # write_params(ofile_params, params_mcmc, params_name, reduced_chi2) # set it for DM subsample fitting sub_sigma_sm = (sub_Sigma_RR * 2.0)**0.5 print("sub_sigma_sm: ", sub_sigma_sm) all_params = alpha_1, alpha_2, sigma_xy, sigma_z, sub_sigma_sm, sigma_fog, f, b_0, b_scale # set \Sigma_sm = sqrt(50*2)=10, for post-rec N_params, theta, fix_params, params_T, params_name = set_params( all_params, params_indices, all_names, all_temperature) # Fit for DM power spectrum ifile_Pk = './run2_3_sub_Pk_2d_wnw_mean_{}_ksorted_mu/{}kave{}.wig_minus_now_mean_sub_a_{}.dat'.format( rec_dirs[rec_id], rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id]) Pk_wnw_diff_true = np.loadtxt( ifile_Pk, dtype='f4', comments='#', usecols=( 2, )) # be careful that there are k, \mu, P(k, \mu) columns. ifile_Cov_Pk = './run2_3_sub_Cov_Pk_2d_wnw_{}_ksorted_mu/{}kave{}.wig_minus_now_mean_sub_a_{}.dat'.format( rec_dirs[rec_id], rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id]) Cov_Pk_wnw = np.loadtxt(ifile_Cov_Pk, dtype='f4', comments='#') ivar_Pk_wnow = N_dataset / np.diag(Cov_Pk_wnw) # the mean sigma error params_mcmc = mcmc_routine(N_params, N_walkers, N_walkersteps, theta, params_T, params_indices, fix_params, k_p, mu_p, Pk_wnw_diff_true, ivar_Pk_wnow, tck_Pk_linw, tck_Pk_sm, norm_gf, params_name, pool) chi_square = chi2(params_mcmc[:, 0], params_indices, fix_params, k_p, mu_p, Pk_wnw_diff_true, ivar_Pk_wnow, tck_Pk_linw, tck_Pk_sm, norm_gf) reduced_chi2 = chi_square / (N_fitbin - N_params) print('Reduced chi2: {}\n'.format(reduced_chi2)) if rec_id == 1: if set_Sigma_xyz_theory == "False": ofile_params = odir + 'sub_{}kave{}.wnw_diff_a_{}_params{}_Sigma_sm{}.dat'.format( rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id], ''.join(map(str, params_indices)), round(sub_sigma_sm, 3)) else: ofile_params = odir + 'sub_{}kave{}.wnw_diff_a_{}_params{}_isotropic_Sigmaz_{}_Sigma_sm{}.dat'.format( rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id], ''.join(map(str, params_indices)), round(Sigma_z, 3), round(sub_sigma_sm, 3)) elif rec_id == 0: if set_Sigma_xyz_theory == "False": ofile_params = odir + 'sub_{}kave{}.wnw_diff_a_{}_params{}.dat'.format( rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id], ''.join(map(str, params_indices))) else: ofile_params = odir + 'sub_{}kave{}.wnw_diff_a_{}_params{}_isotropic_Sigmaz_{}.dat'.format( rec_fprefix[rec_id], sim_space[space_id], sim_a[z_id], ''.join(map(str, params_indices)), round(Sigma_z, 3)) # write_params(ofile_params, params_mcmc, params_name, reduced_chi2) pool.close()
try: # Initialize the MPI-based pool used for parallelization. pool = MPIPool() except: print("Either MPI doesn't seem to be installed or you aren't running with MPI... ") using_mpi = False pool=None if using_mpi: if not pool.is_master(): # Wait for instructions from the master process. pool.wait() sys.exit(0) else: print("MPI available for this code! - call this with e.g. mpirun -np 16 python test_betapic_TGAS.py") sampler = fit_group.fit_one_group(star_params, init_mod=beta_pic_group, \ nwalkers=30, nchain=10000, nburn=1000, return_sampler=True, pool=pool, \ init_sdev = np.array([1,1,1,1,1,1,1,1,1,.01,.01,.01,.1,.1]), background_density=1e-6, use_swig=True, \ plotit=False) if using_mpi: # Close the processes. pool.close() #print("Autocorrelation lengths: ") #print(sampler.get_autocorr_time(c=2.5)) pickle.dump((sampler.chain[:,-1,:], sampler.lnprobability[:,-1]), open("betaPic_sampler_end_m06.pkl",'w')) print("Autocorrelation lengths: ") print(sampler.get_autocorr_time(c=2.0))
def run_mcmc(cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,\ data_pmb,data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,\ age_number,distance_number,Number): distance = distance_list[distance_number] age = age_list[age_number] #define the objective function negativelnLikelihood = lambda *args: -lnlike(*args)[0] #initial guess for p p_0 = p0_list[age_number] #generate random values. np.random.randn provides Gaussian with mean 0 and standard deviation 1 #thus here is adding random values obeying the Gaussian like above to each values. pos = [p_0 + 1. * np.random.randn(ndim) for i in range(N_WALKERS)] #for multiprocessing pool = MPIPool(loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) obsevlosables= cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\ data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,age_number sampler = emcee.EnsembleSampler(N_WALKERS, ndim, lnprob, args=obsevlosables) #sampler = emcee.EnsembleSampler(N_WALKERS, ndim, lnprob, pool=pool, \ # args=(cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\ # data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos)) sampler.run_mcmc(pos, Nrun) pool.close() print('Done.') #--- # store the results burnin = Nburn samples = sampler.chain[:, burnin:, :].reshape((-1, ndim)) plt.clf() hight_fig_inch = np.int((ndim + 1) * 3.0) fig, axes = plt.subplots(ndim + 1, 1, sharex=True, figsize=(8, hight_fig_inch)) for i in range(ndim): axes[i].plot(sampler.chain[:, :, i].T, color='k', alpha=0.5) axes[i].set_ylabel(_list_labels[i]) # last panel shows the evolution of ln-likelihood for the ensemble of walkers axes[-1].plot(sampler.lnprobability.T, color='k', alpha=0.5) axes[-1].set_ylabel('ln(L)') maxlnlike = np.max(sampler.lnprobability) axes[-1].set_ylim(maxlnlike - 3 * ndim, maxlnlike) fig.tight_layout(h_pad=0.) filename_pre = 'newModel_1/'+distance+'/line-time_walker%dNrun%dNburn%d_withscatter_'\ +age+'Gyr_'+distance+'_%dstars_newModel_1' filename = filename_pre % (N_WALKERS, Nrun, Nburn, Number) fig.savefig(filename + '.png') # Make a triangle plot burnin = Nburn samples = sampler.chain[:, burnin:, :].reshape((-1, ndim)) #convert scatters to exp(scatters) #samples[:,-3] = np.exp(samples[:,-3]) #samples[:,-2] = np.exp(samples[:,-2]) #samples[:,-1] = np.exp(samples[:,-1]) fig = corner.corner( samples[:, :-3], labels=_list_labels, label_kwargs={'fontsize': 20}, # truths=_list_answer, quantiles=[0.16, 0.5, 0.84], plot_datapoints=True, show_titles=True, title_args={'fontsize': 20}, title_fmt='.3f', ) filename_pre = 'newModel_1/'+distance+'/trinagle_walker%dNrun%dNburn%d_withscatter_'\ +age+'Gyr_'+distance+'_%dstars_newModel_1' filename = filename_pre % (N_WALKERS, Nrun, Nburn, Number) fig.savefig(filename + '.png') #fig.savefig(filename+'.pdf') p = np.mean(samples, axis=0) e = np.var(samples, axis=0)**0.5 filename = 'newModel_1/result_' + age + 'Gyr_' + distance + '_' + str( Number) + 'stars_newModel_1' + '.txt' np.savetxt(filename, (p, e), fmt="%.3f", delimiter=',') va,vR2,sigmaphi,meanvR,sigmaR =\ lnlike(p,cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\ data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,age_number)[1],\ lnlike(p,cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\ data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,age_number)[2],\ lnlike(p,cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\ data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,age_number)[3],\ lnlike(p,cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\ data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,age_number)[4],\ lnlike(p,cos_twol,sin_twol,sin_l,cos_l,sin_b,cos_b,data_pml,data_err_pml,data_pmb,\ data_err_pmb,data_plx,data_err_plx,data_vlos,data_err_vlos,data_age,age_number)[5] f = open('va_newModel_1.txt', 'a') printline = '%s, %f, %f, %f, %f, %f, %f\n' % (filename, np.mean(va), vR2, sigmaphi, np.mean(sigmaR) / 80., meanvR, sigmaR) f.write(printline) f.close() va_list = [] sigmaR = [] print(filename) return None
def run_emcee_2(M2_d, P_orb_obs, ecc_obs, ra, dec, M2_d_err=1.0, P_orb_obs_err=1.0, ecc_obs_err=0.05, nwalkers=80, nburn=1000, nsteps=1000, threads=1, mpi=False): """ Run the emcee function Parameters ---------- M2_d : float Observed secondary mass P_orb_obs : float Observed orbital period ecc_obs : float Observed orbital eccentricity ra : float Observed right ascension dec : float Observed declination threads : int Number of threads to use for parallelization mpi : bool If true, use MPIPool for parallelization Returns ------- sampler : emcee object """ # First thing is to load the sse data and SF_history data load_sse.load_sse() sf_history.load_sf_history() # Get initial values initial_vals = get_initial_values(M2_d, nwalkers=nwalkers) # Define sampler args = [[M2_d, M2_d_err, P_orb_obs, P_orb_obs_err, ecc_obs, ecc_obs_err, ra, dec]] if mpi == True: pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) sampler = emcee.EnsembleSampler(nwalkers=nwalkers, dim=10, lnpostfn=ln_posterior, args=args, pool=pool) elif threads != 1: sampler = emcee.EnsembleSampler(nwalkers=nwalkers, dim=10, lnpostfn=ln_posterior, args=args, threads=threads) else: sampler = emcee.EnsembleSampler(nwalkers=nwalkers, dim=10, lnpostfn=ln_posterior, args=args) # Assign initial values p0 = np.zeros((nwalkers,10)) p0 = set_walkers(initial_vals, args[0], nwalkers=nwalkers) # Burn-in 1 pos,prob,state = sampler.run_mcmc(p0, N=nburn) sampler1 = copy.copy(sampler) # TESTING BEGIN - Get limiting ln_prob for worst 10 chains prob_lim = (np.sort(prob)[9] + np.sort(prob)[10])/2.0 index_best = np.argmax(prob) for i in np.arange(len(prob)): # if sampler1.acceptance_fraction[i] == 0.0: pos[i] = np.copy(pos[index_best]) + np.random.normal(0.0, 0.005, size=10) if prob[i] < prob_lim: pos[i] = np.copy(pos[index_best]) + np.random.normal(0.0, 0.005, size=10) # TESTING END print "Burn-in 1 finished." print "Starting burn-in 2..." # Burn-in 2 sampler.reset() pos,prob,state = sampler.run_mcmc(pos, N=nburn) sampler2 = copy.copy(sampler) # TESTING BEGIN - Get limiting ln_prob for worst 10 chains prob_lim = (np.sort(prob)[9] + np.sort(prob)[10])/2.0 index_best = np.argmax(prob) for i in np.arange(len(prob)): # if sampler2.acceptance_fraction[i] == 0.0: pos[i] = np.copy(pos[index_best]) + np.random.normal(0.0, 0.005, size=10) if prob[i] < prob_lim: pos[i] = np.copy(pos[index_best]) + np.random.normal(0.0, 0.005, size=10) # TESTING END print "Burn-in 2 finished." print "Starting burn-in 3..." # Burn-in 3 sampler.reset() pos,prob,state = sampler.run_mcmc(pos, N=nburn) sampler3 = copy.copy(sampler) # TESTING BEGIN - Get limiting ln_prob for worst 10 chains prob_lim = (np.sort(prob)[9] + np.sort(prob)[10])/2.0 index_best = np.argmax(prob) for i in np.arange(len(prob)): # if sampler3.acceptance_fraction[i] == 0.0: pos[i] = np.copy(pos[index_best]) + np.random.normal(0.0, 0.005, size=10) if prob[i] < prob_lim: pos[i] = np.copy(pos[index_best]) + np.random.normal(0.0, 0.005, size=10) # TESTING END print "Burn-in 3 finished." print "Starting burn-in 4..." # Burn-in 4 sampler.reset() pos,prob,state = sampler.run_mcmc(pos, N=nburn) sampler4 = copy.copy(sampler) print "Burn-in 4 finished." print "Starting production run..." # Full run sampler.reset() pos,prob,state = sampler.run_mcmc(pos, N=nsteps) print "Finished production run" if mpi is True: pool.close() return sampler1, sampler2, sampler3, sampler4, sampler
def main(): ################################################# ############Option parsing####################### ################################################# #Parse command line options parser = argparse.ArgumentParser() parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file") parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity") parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity") parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area") parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!") parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix") parser.add_argument("-s","--save_points",dest="save_points",action="store",default=None,help="save points in parameter space to external npy file") parser.add_argument("-ss","--save_debug",dest="save_debug",action="store_true",default=False,help="save a bunch of debugging info for the analysis") parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle") parser.add_argument("-r","--realizations",dest="realizations",type=int,default=None,help="use only the first N realizations to estimate the covariance matrix") parser.add_argument("-d","--differentiate",dest="differentiate",action="store_true",default=False,help="differentiate the first minkowski functional to get the PDF") parser.add_argument("-ms","--mean_subtract",dest="mean_subtract",action="store_true",default=False,help="lod in the observations with the subtracted means") cmd_args = parser.parse_args() if cmd_args.options_file is None: parser.print_help() sys.exit(0) #Set verbosity level if cmd_args.verbose_plus: logging.basicConfig(level=DEBUG_PLUS) elif cmd_args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) #Initialize MPI Pool try: pool = MPIPool() except: pool = None if (pool is not None) and (not pool.is_master()): pool.wait() sys.exit(0) if pool is not None: logging.info("Started MPI Pool.") ################################################################################################################# #################Info gathering: covariance matrix, observation and emulator##################################### ################################################################################################################# #start start = time.time() last_timestamp = start #Instantiate a FeatureLoader object that will take care of the memory loading feature_loader = FeatureLoader(cmd_args) ########################################################################################################################################### #Use this model for the covariance matrix (from the new set of 50 N body simulations) covariance_model = CFHTcov.getModels(root_path=feature_loader.options.get("simulations","root_path")) logging.info("Measuring covariance matrix from model {0}".format(covariance_model)) #Load in the covariance matrix fiducial_feature_ensemble = feature_loader.load_features(covariance_model) #If options is enabled, use only the first N realizations to estimate the covariance matrix if cmd_args.realizations is not None: logging.info("Using only the first {0} realizations to estimate the covariance matrix".format(cmd_args.realizations)) fiducial_feature_ensemble = fiducial_feature_ensemble.subset(range(cmd_args.realizations)) assert fiducial_feature_ensemble.num_realizations==cmd_args.realizations fiducial_features = fiducial_feature_ensemble.mean() features_covariance = fiducial_feature_ensemble.covariance() #timestamp now = time.time() logging.info("covariance loaded in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################ #Get also the observation instance observation = CFHTLens(root_path=feature_loader.options.get("observations","root_path")) logging.info("Measuring the observations from {0}".format(observation)) #And load the observations observed_feature = feature_loader.load_features(observation).mean() #timestamp now = time.time() logging.info("observation loaded in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################ #Create a LikelihoodAnalysis instance by unpickling one of the emulators emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators") emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string))) logging.info("Unpickling emulator from {0}...".format(emulator_file)) analysis = LikelihoodAnalysis.load(emulator_file) #timestamp now = time.time() logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now #################################################################################################################### ######################################Compute the chi2 cube######################################################### #################################################################################################################### logging.info("Initializing chi2 meshgrid...") #Read parameters to use from options use_parameters = feature_loader.options.get("parameters","use_parameters").replace(" ","").split(",") assert len(use_parameters)==3 #Reparametrization hash key use_parameters_hash = "-".join(use_parameters) ######################################################################################## #Might need to reparametrize the emulator here, use a dictionary for reparametrizations# ######################################################################################## assert use_parameters_hash in reparametrization.keys(),"No reparametrization scheme specified for {0} parametrization".format(use_parameters_hash) if reparametrization[use_parameters_hash] is not None: #Reparametrize logging.info("Reparametrizing emulator according to {0} parametrization".format(use_parameters_hash)) analysis.reparametrize(reparametrization[use_parameters_hash]) #Retrain for safety analysis.train() #Log current parametrization to user logging.info("Using parametrization {0}".format(use_parameters_hash)) #Set the points in parameter space on which to compute the chi2 (read extremes from options) par = list() for p in range(3): assert feature_loader.options.has_section(use_parameters[p]),"No extremes specified for parameter {0}".format(use_parameters[p]) par.append(np.ogrid[feature_loader.options.getfloat(use_parameters[p],"min"):feature_loader.options.getfloat(use_parameters[p],"max"):feature_loader.options.getint(use_parameters[p],"num_points")*1j]) num_points = len(par[0]) * len(par[1]) * len(par[2]) points = np.array(np.meshgrid(par[0],par[1],par[2],indexing="ij")).reshape(3,num_points).transpose() #Now compute the chi2 at each of these points if pool: split_chunks = pool.size logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size)) else: split_chunks = None logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0])) chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks) now = time.time() logging.info("chi2 calculations completed in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now #Close pool if pool is not None: pool.close() logging.info("Closed MPI Pool.") #save output likelihoods_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"likelihoods_{0}".format(use_parameters_hash)) prefix = cmd_args.prefix if cmd_args.mean_subtract: prefix += "_meansub" if not os.path.isdir(likelihoods_dir): os.mkdir(likelihoods_dir) if cmd_args.realizations is None: chi2_file = os.path.join(likelihoods_dir,"chi2{0}_{1}.npy".format(prefix,output_string(feature_loader.feature_string))) likelihood_file = os.path.join(likelihoods_dir,"likelihood{0}_{1}.npy".format(prefix,output_string(feature_loader.feature_string))) else: chi2_file = os.path.join(likelihoods_dir,"chi2{0}{1}real_{2}.npy".format(prefix,cmd_args.realizations,output_string(feature_loader.feature_string))) likelihood_file = os.path.join(likelihoods_dir,"likelihood{0}{1}real_{2}.npy".format(prefix,cmd_args.realizations,output_string(feature_loader.feature_string))) logging.info("Saving chi2 to {0}".format(chi2_file)) np.save(chi2_file,chi_squared.reshape(par[0].shape + par[1].shape + par[2].shape)) logging.info("Saving full likelihood to {0}".format(likelihood_file)) likelihood_cube = analysis.likelihood(chi_squared.reshape(par[0].shape + par[1].shape + par[2].shape)) np.save(likelihood_file,likelihood_cube) #Find the maximum of the likelihood using ContourPlot functionality contour = ContourPlot() contour.getLikelihood(likelihood_cube,parameter_axes={use_parameters[0]:0,use_parameters[1]:1,use_parameters[2]:2},parameter_labels={use_parameters[0]:"0",use_parameters[1]:"1",use_parameters[2]:"2"}) contour.getUnitsFromOptions(feature_loader.options) parameters_maximum = contour.getMaximum() parameter_keys = parameters_maximum.keys() parameter_keys.sort(key=contour.parameter_axes.get) #Display the new best fit before exiting best_fit_parameters = np.array([ parameters_maximum[par_key] for par_key in parameter_keys ]) logging.info("Best fit is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2={1[0]:.3f}({2} dof)".format(best_fit_parameters,analysis.chi2(np.array(best_fit_parameters),features_covariance=features_covariance,observed_feature=observed_feature),analysis.training_set.shape[1])) #Additionally save some debugging info to plot, etc... if cmd_args.save_debug: troubleshoot_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot_{0}".format(use_parameters_hash)) if not os.path.isdir(troubleshoot_dir): os.mkdir(troubleshoot_dir) logging.info("Saving troubleshoot info to {0}...".format(troubleshoot_dir)) np.save(os.path.join(troubleshoot_dir,"observation_{0}.npy".format(output_string(feature_loader.feature_string))),observed_feature) np.save(os.path.join(troubleshoot_dir,"covariance_{0}.npy".format(output_string(feature_loader.feature_string))),features_covariance) np.save(os.path.join(troubleshoot_dir,"fiducial_{0}.npy".format(output_string(feature_loader.feature_string))),fiducial_features) np.save(os.path.join(troubleshoot_dir,"best_fit_features_{0}.npy".format(output_string(feature_loader.feature_string))),analysis.predict(best_fit_parameters)) np.save(os.path.join(troubleshoot_dir,"fiducial_from_interpolator_{0}.npy".format(output_string(feature_loader.feature_string))),analysis.predict(np.array([0.26,-1.0,0.800]))) np.save(os.path.join(troubleshoot_dir,"chi2_contributions_{0}.npy".format(output_string(feature_loader.feature_string))),analysis.chi2Contributions(best_fit_parameters,observed_feature=observed_feature,features_covariance=features_covariance)) end = time.time() logging.info("DONE!!") logging.info("Completed in {0:.1f}s".format(end-start))
while jj< len(y): print 'icount, jj',icount,jj iipix_mask,iipix = WLanalysis.coords2grid(x[jj:jj+istep], y[jj:jj+istep], idata.flatten().reshape(1,-1)[:,jj:jj+istep], size=sizes[Wx-1]) ipix_mask += iipix_mask ipix += iipix jj+=istep print icount,'W%i done coords2grid %s'%(Wx,icount)#,time.strftime("%Y-%m-%d %H:%M") save(mask_dir+'smaller/weight0_W%i_%i_numpix'%(Wx,icount), ipix) save(mask_dir+'smaller/weight0_W%i_%i_nummask'%(Wx,icount), ipix_mask) #ipix is the num. of pixels fall in that big pix, ipix_mask is the mask return ipix, ipix_mask p = MPIPool() if not p.is_master(): p.wait() sys.exit(0) #p.map(partialdata2grid, range(63)) ismall_map=p.map(partialdata2grid, range(63)) small_map = sum(array(ismall_map),axis=0) save(mask_dir+'weight0_W%i_smaller_mask.npy'%(Wx),small_map) weight=1-small_map[1]/small_map[0] weight[isnan(weight)]=0 save(mask_dir+'ludoweight_weight0_W%i.npy'%Wx, weight) mask=weight/weight mask[isnan(mask)]=0 save(mask_dir+'ludomask_weight0_W%i.npy'%Wx, mask) p.close()
def main(): ################################################# ############Option parsing####################### ################################################# #Parse command line options parser = argparse.ArgumentParser() parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file") parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity") parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity") parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area") parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!") parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix") parser.add_argument("-s","--save_points",dest="save_points",action="store",default=None,help="save points in parameter space to external npy file") parser.add_argument("-ss","--save_debug",dest="save_debug",action="store_true",default=False,help="save a bunch of debugging info for the analysis") parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle") parser.add_argument("-r","--remove",dest="remove",action="store",type=int,default=24,help="model to remove from the analysis") parser.add_argument("-R","--random",dest="random",action="store",type=int,default=0,help="random seed initialization for realization picking") cmd_args = parser.parse_args() if cmd_args.options_file is None: parser.print_help() sys.exit(0) #Set verbosity level if cmd_args.verbose_plus: logging.basicConfig(level=DEBUG_PLUS) elif cmd_args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) #Initialize MPI Pool try: pool = MPIPool() except: pool = None if (pool is not None) and (not pool.is_master()): pool.wait() sys.exit(0) if pool is not None: logging.info("Started MPI Pool.") ################################################################################################################# #################Info gathering: covariance matrix, observation and emulator##################################### ################################################################################################################# #start start = time.time() last_timestamp = start #Instantiate a FeatureLoader object that will take care of the memory loading feature_loader = FeatureLoader(cmd_args) ########################################################################################################################################### #Get the names of all the simulated models available for the CFHT analysis, including smoothing scales and subfields all_simulated_models = CFHTemu1.getModels(root_path=feature_loader.options.get("simulations","root_path")) #Use this model for the covariance matrix covariance_model = all_simulated_models[feature_loader.options.getint("analysis","covariance_model")] logging.info("Measuring covariance matrix from model {0}".format(covariance_model)) #Load in the covariance matrix features_covariance = feature_loader.load_features(covariance_model).covariance() #timestamp now = time.time() logging.info("covariance loaded in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################ #Create a LikelihoodAnalysis instance by unpickling one of the emulators emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators") emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string))) logging.info("Unpickling emulator from {0}...".format(emulator_file)) analysis = LikelihoodAnalysis.load(emulator_file) #timestamp now = time.time() logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################## #Initialize random seed np.random.seed(cmd_args.random) realization = np.random.randint(0,1000) #Treat the removed model as data model_to_remove = all_simulated_models[cmd_args.remove] parameters_to_remove = model_to_remove.squeeze() logging.info("Treating model {0}, realization {1} as data, loading features...".format(model_to_remove,realization+1)) observed_feature = feature_loader.load_features(model_to_remove)[np.random.randint(0,1000)] #Compute the chi2 for this observed feature without removing it from the emulator (must be close to 0) logging.info("Chi2 before removal: {0[0]:.3f} ({1} dof)".format(analysis.chi2(parameters_to_remove,features_covariance=features_covariance,observed_feature=observed_feature),analysis.training_set.shape[1])) #Remove the model from the emulator remove_index = analysis.find(parameters_to_remove)[0] logging.info("Removing model {0} with parameters {1} from emulator...".format(remove_index,analysis.parameter_set[remove_index])) analysis.remove_model(remove_index) #Retrain without the removed model analysis.train() #Compute the chi2 for this observed feature after removing it from the emulator (likely it's not 0 anymore) logging.info("Chi2 after removal: {0[0]:.3f} ({1} dof)".format(analysis.chi2(parameters_to_remove,features_covariance=features_covariance,observed_feature=observed_feature),analysis.training_set.shape[1])) #################################################################################################################### ######################################Compute the chi2 cube######################################################### #################################################################################################################### logging.info("Initializing chi2 meshgrid...") #Set the points in parameter space on which to compute the chi2 (read from options) Om = np.ogrid[feature_loader.options.getfloat("Omega_m","min"):feature_loader.options.getfloat("Omega_m","max"):feature_loader.options.getint("Omega_m","num_points")*1j] w = np.ogrid[feature_loader.options.getfloat("w","min"):feature_loader.options.getfloat("w","max"):feature_loader.options.getint("w","num_points")*1j] si8 = np.ogrid[feature_loader.options.getfloat("sigma8","min"):feature_loader.options.getfloat("sigma8","max"):feature_loader.options.getint("sigma8","num_points")*1j] num_points = len(Om) * len(w) * len(si8) points = np.array(np.meshgrid(Om,w,si8,indexing="ij")).reshape(3,num_points).transpose() if cmd_args.save_points is not None: logging.info("Saving points to {0}.npy".format(cmd_args.save_points.rstrip(".npy"))) np.save(cmd_args.save_points.rstrip(".npy")+".npy",points) #Now compute the chi2 at each of these points if pool: split_chunks = pool.size logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size)) else: split_chunks = None logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0])) chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks) #Close MPI Pool if pool is not None: pool.close() logging.info("Closed MPI Pool.") now = time.time() logging.info("chi2 calculations completed in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now #Save output likelihood_file = "likelihood_remove{0}_{1}.npy".format(cmd_args.remove,output_string(feature_loader.feature_string)) chi2_file = "chi2_remove{0}_{1}.npy".format(cmd_args.remove,output_string(feature_loader.feature_string)) logging.info("Saving chi2 to {0}".format(chi2_file)) np.save(chi2_file,chi_squared.reshape(Om.shape + w.shape + si8.shape)) logging.info("Saving full likelihood to {0}".format(likelihood_file)) likelihood_cube = analysis.likelihood(chi_squared.reshape(Om.shape + w.shape + si8.shape)) np.save(likelihood_file,likelihood_cube) #Find the maximum of the likelihood using ContourPlot functionality contour = ContourPlot() contour.getLikelihood(likelihood_cube) contour.getUnitsFromOptions(feature_loader.options) parameters_maximum = contour.getMaximum() parameter_keys = parameters_maximum.keys() parameter_keys.sort(key=contour.parameter_axes.get) #Display the new best fit before exiting best_fit_parameters = [ parameters_maximum[par_key] for par_key in parameter_keys ] logging.info("New best fit is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2={1[0]:.3f}".format(best_fit_parameters,analysis.chi2(np.array(best_fit_parameters),features_covariance=features_covariance,observed_feature=observed_feature))) #End end = time.time() logging.info("DONE!!") logging.info("Completed in {0:.1f}s".format(end-start))
def main(nw=1000,th=9,bi=500,fr=2000,thin=20,runmpi=True,local=False, dil=None,codedir='/Users/tom/Projects/doug_hz/code', ldfileloc='/Users/tom/Projects/doug_hz/code/'): if runmpi: pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) else: pool=None #if not local: #sys.path.append('/u/tsbarcl2/svn_code/tom_code/') #ldfileloc = '/u/tsbarcl2/svn_code/tom_code/' #elif local: #sys.path.append('/Users/tom/svn_code/tom_code/') #ldfileloc = '/Users/tom/svn_code/tom_code/' if dil is None: dil = 0.0 files = os.listdir('.') dat_d = get_data(files) rho_prior = True ldp_prior = False #mcmc params nwalkers = nw threads = th burnin = bi fullrun = fr #use quadratic or 4 parameter limb darkening n_ldparams = 2 #lc time offset from BJD-24548333. toffset = (54832.5 + 67.) #photometric zeropoint zpt_0 = 1.E-10 #plot? #doplot=False ################ M = tmod.transitemcee_fitldp(dat_d['nplanets'],dat_d['cadence'], ldfileloc=ldfileloc) #M.get_stellar(dat_d['teff'],dat_d['logg'],dat_d['feh'],n_ldparams) M.get_stellar(dat_d['teff'], dat_d['logg'], dat_d['feh'], n_ldparams,ldp_prior=ldp_prior) M.already_open(dat_d['time'], dat_d['flux'],dat_d['err'], timeoffset=toffset,normalize=False) rho_vals = np.array([dat_d['rho'],dat_d['rho_unc']]) M.get_rho(rho_vals,rho_prior) M.get_zpt(zpt_0) if dil is not None: M.get_sol(*dat_d['sol_guess'],dil=dil) else: M.get_sol(*dat_d['sol_guess']) M.cut_non_transit(8) ################ stophere = False if not stophere: #for threadnum in np.arange(2,32,2): p0 = M.get_guess(nwalkers) l_var = np.shape(p0)[1] N = len([indval for indval in xrange(fullrun) if indval%thin == 0]) outfile = 'koi{0}_np{1}_prior{2}_dil{3}.hdf5'.format( dat_d['koi'],dat_d['nplanets'],rho_prior,dil) with h5py.File(outfile, u"w") as f: f.create_dataset("time", data=M.time) f.create_dataset("flux", data=M.flux) f.create_dataset("err", data=M.err) f.create_dataset("itime", data=M._itime) f.create_dataset("ntt", data = M._ntt) f.create_dataset("tobs", data = M._tobs) f.create_dataset("omc",data = M._omc) f.create_dataset("datatype",data = M._datatype) f.attrs["rho_0"] = M.rho_0 f.attrs["rho_0_unc"] = M.rho_0_unc f.attrs["nplanets"] = M.nplanets f.attrs["ld1"] = M.ld1 f.attrs["ld2"] = M.ld2 f.attrs["koi"] = dat_d['koi'] f.attrs["dil"] = dil g = f.create_group("mcmc") g.attrs["nwalkers"] = nwalkers g.attrs["burnin"] = burnin g.attrs["iterations"] = fullrun g.attrs["thin"] = thin g.attrs["rho_prior"] = rho_prior g.attrs["ldp_prior"] = ldp_prior g.attrs["onlytransits"] = M.onlytransits g.attrs["tregion"] = M.tregion g.attrs["ldfileloc"] = M.ldfileloc g.attrs["n_ldparams"] = M.n_ldparams g.create_dataset("fixed_sol", data= M.fixed_sol) g.create_dataset("fit_sol_0", data= M.fit_sol_0) c_ds = g.create_dataset("chain", (nwalkers, N, l_var), dtype=np.float64) lp_ds = g.create_dataset("lnprob", (nwalkers, N), dtype=np.float64) #args = [M.nplanets,M.rho_0,M.rho_0_unc,M.rho_prior, # M.Teff,M.Teff_unc,M.logg,M.logg_unc,M.FeH,M.FeH_unc, # M.flux,M.err,M.fixed_sol,M.time,M._itime,M._ntt, # M._tobs,M._omc,M._datatype,M.n_ldparams,M.ldfileloc, # M.onlytransits,M.tregion] args = [M.nplanets,M.rho_0,M.rho_0_unc,M.rho_prior, M.ld1,M.ld1_unc,M.ld2,M.ld2_unc,M.ldp_prior, M.flux,M.err,M.fixed_sol,M.time,M._itime,M._ntt, M._tobs,M._omc,M._datatype,M.n_ldparams,M.ldfileloc, M.onlytransits,M.tregion] tom = tmod.logchi2_fitldp if runmpi: sampler = emcee.EnsembleSampler(nwalkers, l_var, tom, args=args,pool=pool) else: sampler = emcee.EnsembleSampler(nwalkers, l_var, tom, args=args,threads=th) time1 = thetime.time() p2, prob, state = sampler.run_mcmc(p0, burnin, storechain=False) sampler.reset() with h5py.File(outfile, u"a") as f: g = f["mcmc"] g.create_dataset("burnin_pos", data=p2) g.create_dataset("burnin_prob", data=prob) time2 = thetime.time() print 'burn-in took ' + str((time2 - time1)/60.) + ' min' time1 = thetime.time() for i, (pos, lnprob, state) in enumerate(sampler.sample(p2, iterations=fullrun, rstate0=state, storechain=False)): #do the thinning in the loop here if i % thin == 0: ind = i / thin with h5py.File(outfile, u"a") as f: g = f["mcmc"] c_ds = g["chain"] lp_ds = g["lnprob"] c_ds[:, ind, :] = pos lp_ds[:, ind] = lnprob time2 = thetime.time() print 'MCMC run took ' + str((time2 - time1)/60.) + ' min' print print("Mean acceptance: " + str(np.mean(sampler.acceptance_fraction))) print #try: # print("Autocorrelation time:", sampler.acor) # print("Autocorrelation times sampled:", # fullrun / sampler.acor) #except RuntimeError: # print("No Autocorrelation") if runmpi: pool.close() # if doplot: # plt.ioff() # import triangle # labels=[r"rho", r"zpt"] # for ij in xrange(dat_d['nplanets']): # labels = np.r_[labels,[r"T0", # r"per",r"b", r"rprs", r"ecosw",r"esinw"]] # figure = triangle.corner(sampler.flatchain, labels=labels) # figure.savefig("data.png") #savefile = 'koi%s_np%s_prior%s.dat' %(dat_d['koi'], # dat_d['nplanets'],rho_prior) #savefile2 = 'koi%s_np%s_prior%s_prob.dat' %(dat_d['koi'], # dat_d['nplanets'],rho_prior) #np.savetxt(savefile,sampler.flatchain) #np.savetxt(savefile2,sampler.flatlnprobability) return sampler
def LensModelMCMC(data,lens,source, xmax=30.,highresbox=[-3.,3.,-3.,3.],emitres=None,fieldres=None, sourcedatamap=None, scaleamp=False, shiftphase=False, modelcal=True,cosmo=Planck15, nwalkers=1e3,nburn=1e3,nstep=1e3,pool=None,nthreads=1,mpirun=False): """ Wrapper function which basically takes what the user wants and turns it into the format needed for the acutal MCMC lens modeling. Inputs: data: One or more visdata objects; if multiple datasets are being fit to, should be a list of visdata objects. lens: Any of the currently implemented lens objects or ExternalShear. source: One or more of the currently implemented source objects; if more than one source to be fit, should be a list of multiple sources. xmax: (Half-)Grid size, in arcseconds; the grid will span +/-xmax in x&y highresbox: The region to model at higher resolution (to account for high-magnification and differential lensing effects), as [xmin, xmax, ymin, ymax]. Note the sign convention is: +x = West, +y = North, like the lens positions. sourcedatamap: A list of length the number of datasets which tells which source(s) are to be fit to which dataset(s). Eg, if two sources are to be fit to two datasets jointly, should be [[0,1],[0,1]]. If we have four sources and three datasets, could be [[0,1],[0,1],[2,3]] to say that the first two sources should both be fit to the first two datasets, while the second two should be fit to the third dataset. If None, will assume all sources should be fit to all datasets. scaleamp: A list of length the number of datasets which tells whether a flux rescaling is allowed and which dataset the scaling should be relative to. False indicates no scaling should be done, while True indicates that amplitude scaling should be allowed. shiftphase: Similar to scaleamp above, but allowing for positional/astrometric offsets. modelcal: Whether or not to perform the pseudo-selfcal procedure of H+13 cosmo: The cosmology to use, as an astropy object, e.g., from astropy.cosmology import WMAP9; cosmo=WMAP9 Default is Planck15. nwalkers: Number of walkers to use in the mcmc process; see dan.iel.fm/emcee/current for more details. nburn: Number of burn-in steps to take with the chain. nstep: Number of actual steps to take in the mcmc chains after the burn-in nthreads: Number of threads (read: cores) to use during the fitting, default 1. mpirun: Whether to parallelize using MPI instead of multiprocessing. If True, nthreads has no effect, and your script should be run with, eg, mpirun -np 16 python lensmodel.py. Returns: mcmcresult: A nested dict containing the chains requested. Will have all the MCMC chain results, plus metadata about the run (initial params, data used, etc.). Formatting still a work in progress (esp. for modelcal phases). chains: The raw chain data, for testing. blobs: Everything else returned by the likelihood function; will have magnifications and any modelcal phase offsets at each step; eventually will remove this once get everything packaged up for mcmcresult nicely. colnames: Basically all the keys to the mcmcresult dict; eventually won't need to return this once mcmcresult is packaged up nicely. """ if pool: nthreads = 1 elif mpirun: nthreads = 1 from emcee.utils import MPIPool pool = MPIPool(debug=False,loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) else: pool = None # Making these lists just makes later stuff easier since we now know the dtype lens = list(np.array([lens]).flatten()) source = list(np.array([source]).flatten()) # Ensure source(s) are a list data = list(np.array([data]).flatten()) # Same for dataset(s) scaleamp = list(np.array([scaleamp]).flatten()) shiftphase = list(np.array([shiftphase]).flatten()) modelcal = list(np.array([modelcal]).flatten()) if len(scaleamp)==1 and len(scaleamp)<len(data): scaleamp *= len(data) if len(shiftphase)==1 and len(shiftphase)<len(data): shiftphase *= len(data) if len(modelcal)==1 and len(modelcal)<len(data): modelcal *= len(data) if sourcedatamap is None: sourcedatamap = [None]*len(data) # emcee isn't very flexible in terms of how it gets initialized; start by # assembling the user-provided info into a form it likes ndim, p0, colnames = 0, [], [] # Lens(es) first for i,ilens in enumerate(lens): if ilens.__class__.__name__=='SIELens': for key in ['x','y','M','e','PA']: if not vars(ilens)[key]['fixed']: ndim += 1 p0.append(vars(ilens)[key]['value']) colnames.append(key+'L'+str(i)) elif ilens.__class__.__name__=='ExternalShear': for key in ['shear','shearangle']: if not vars(ilens)[key]['fixed']: ndim += 1 p0.append(vars(ilens)[key]['value']) colnames.append(key) # Then source(s) for i,src in enumerate(source): if src.__class__.__name__=='GaussSource': for key in ['xoff','yoff','flux','width']: if not vars(src)[key]['fixed']: ndim += 1 p0.append(vars(src)[key]['value']) colnames.append(key+'S'+str(i)) elif src.__class__.__name__=='SersicSource': for key in ['xoff','yoff','flux','majax','index','axisratio','PA']: if not vars(src)[key]['fixed']: ndim += 1 p0.append(vars(src)[key]['value']) colnames.append(key+'S'+str(i)) elif src.__class__.__name__=='PointSource': for key in ['xoff','yoff','flux']: if not vars(src)[key]['fixed']: ndim += 1 p0.append(vars(src)[key]['value']) colnames.append(key+'S'+str(i)) # Then flux rescaling; only matters if >1 dataset for i,t in enumerate(scaleamp[1:]): if t: ndim += 1 p0.append(1.) # Assume 1.0 scale factor to start colnames.append('ampscale_dset'+str(i+1)) # Then phase/astrometric shift; each has two vals for a shift in x&y for i,t in enumerate(shiftphase[1:]): if t: ndim += 2 p0.append(0.); p0.append(0.) # Assume zero initial offset colnames.append('astromshift_x_dset'+str(i+1)) colnames.append('astromshift_y_dset'+str(i+1)) # Get any model-cal parameters set up. The process involves some expensive # matrix inversions, but these only need to be done once, so we'll do them # now and pass the results as arguments to the likelihood function. See docs # in calc_likelihood.model_cal for more info. for i,dset in enumerate(data): if modelcal[i]: uniqant = np.unique(np.asarray([dset.ant1,dset.ant2]).flatten()) dPhi_dphi = np.zeros((uniqant.size-1,dset.u.size)) for j in range(1,uniqant.size): dPhi_dphi[j-1,:]=(dset.ant1==uniqant[j])-1*(dset.ant2==uniqant[j]) C = scipy.sparse.diags((dset.sigma/dset.amp)**-2.,0) F = np.dot(dPhi_dphi,C*dPhi_dphi.T) Finv = np.linalg.inv(F) FdPC = np.dot(-Finv,dPhi_dphi*C) modelcal[i] = [dPhi_dphi,FdPC] # Create our lensing grid coordinates now, since those shouldn't be # recalculated with every call to the likelihood function xmap,ymap,xemit,yemit,indices = GenerateLensingGrid(data,xmax,highresbox, fieldres,emitres) # Calculate the uv coordinates we'll interpolate onto; only need to calculate # this once, so do it here. kmax = 0.5/((xmap[0,1]-xmap[0,0])*arcsec2rad) ug = np.linspace(-kmax,kmax,xmap.shape[0]) # Calculate some distances; we only need to calculate these once. # This assumes multiple sources are all at same z; should be this # way anyway or else we'd have to deal with multiple lensing planes if cosmo is None: cosmo = Planck15 Dd = cosmo.angular_diameter_distance(lens[0].z).value Ds = cosmo.angular_diameter_distance(source[0].z).value Dds= cosmo.angular_diameter_distance_z1z2(lens[0].z,source[0].z).value p0 = np.array(p0) # Create a ball of starting points for the walkers, gaussian ball of # 10% width; if initial value is 0 (eg, astrometric shift), give a small sigma # for angles, generally need more spread than 10% to sample well, do 30% for those cases [~0.5% >180deg for p0=100deg] isangle = np.array([0.30 if 'PA' in s or 'angle' in s else 0.1 for s in colnames]) initials = emcee.utils.sample_ball(p0,np.asarray([isangle[i]*x if x else 0.05 for i,x in enumerate(p0)]),int(nwalkers)) # All the lens objects know if their parameters have been altered since the last time # we calculated the deflections. If all the lens pars are fixed, we only need to do the # deflections once. This step ensures that the lens object we create the sampler with # has these initial deflections. for i,ilens in enumerate(lens): if ilens.__class__.__name__ == 'SIELens': ilens.deflect(xemit,yemit,Dd,Ds,Dds) elif ilens.__class__.__name__ == 'ExternalShear': ilens.deflect(xemit,yemit,lens[0]) # Create the sampler object; uses calc_likelihood function defined elsewhere lenssampler = emcee.EnsembleSampler(nwalkers,ndim,calc_vis_lnlike, args = [data,lens,source,Dd,Ds,Dds,ug, xmap,ymap,xemit,yemit,indices, sourcedatamap,scaleamp,shiftphase,modelcal], threads=nthreads,pool=pool) # Run burn-in phase print("Running burn-in... ") #pos,prob,rstate,mus = lenssampler.run_mcmc(initials,nburn,storechain=False) for i,result in enumerate(lenssampler.sample(initials,iterations=nburn,storechain=False)): if i%20==0: print('Burn-in step ',i,'/',nburn) pos,prob,rstate,blob = result lenssampler.reset() # Run actual chains print("Done. Running chains... ") for i,result in enumerate(lenssampler.sample(pos,rstate0=rstate,iterations=nstep,storechain=True)): if i%20==0: print('Chain step ',i,'/',nstep) #lenssampler.run_mcmc(pos,nstep,rstate0=rstate) if mpirun: pool.close() print("Mean acceptance fraction: ",np.mean(lenssampler.acceptance_fraction)) #return lenssampler.flatchain,lenssampler.blobs,colnames # Package up the magnifications and modelcal phases; disregards nan points (where # we failed the prior, usu. because a periodic angle wrapped). blobs = lenssampler.blobs mus = np.asarray([[a[0] for a in l] for l in blobs]).flatten(order='F') bad = np.where(np.asarray([np.any(np.isnan(m)) for m in mus],dtype=bool))[0] for k in bad: mus[k] = np.array([np.nan]*len(source)) mus = np.asarray(list(mus),dtype=float).reshape((-1,len(source)),order='F') # stupid-ass hack bad = np.isnan(mus)[:,0] #bad = bad.reshape((-1,len(source)),order='F')[:,0] #mus = np.atleast_2d(np.asarray([mus[i] if not bad[i] else [np.nan]*len(source) for i in range(mus.size)])).T colnames.extend(['mu{0:.0f}'.format(i) for i in range(len(source))]) # Assemble the output. Want to return something that contains both the MCMC chains # themselves, but also metadata about the run. mcmcresult = {} # keep track of git revision, for reproducibility's sake # if run under mpi, this will spew some scaremongering warning text, # but it's fine. use --mca mpi_warn_on_fork 0 in the mpirun statement to disable try: import subprocess gitd = os.path.abspath(os.path.join(os.path.dirname(__file__),os.pardir)) mcmcresult['githash'] = subprocess.check_output('git --git-dir={0:s} --work-tree={1:s} '\ 'rev-parse HEAD'.format(gitd+'/.git',gitd),shell=True).rstrip() except: mcmcresult['githash'] = 'No repo found' mcmcresult['datasets'] = [dset.filename for dset in data] # Data files used mcmcresult['lens_p0'] = lens # Initial params for lens,src(s),shear; also tells if fixed, priors, etc. mcmcresult['source_p0'] = source if sourcedatamap: mcmcresult['sourcedatamap'] = sourcedatamap mcmcresult['xmax'] = xmax mcmcresult['highresbox'] = highresbox mcmcresult['fieldres'] = fieldres mcmcresult['emitres'] = emitres if any(scaleamp): mcmcresult['scaleamp'] = scaleamp if any(shiftphase): mcmcresult['shiftphase'] = shiftphase mcmcresult['chains'] = np.core.records.fromarrays(np.hstack((lenssampler.flatchain[~bad],mus[~bad])).T,names=colnames) mcmcresult['lnlike'] = lenssampler.flatlnprobability[~bad] # Keep track of best-fit params, derived from chains. c = copy.deepcopy(mcmcresult['chains']) mcmcresult['best-fit'] = {} pbest = [] # Calculate the best fit values as medians of each param lens,source = copy.deepcopy(mcmcresult['lens_p0']), copy.deepcopy(mcmcresult['source_p0']) for i,ilens in enumerate(lens): if ilens.__class__.__name__ == 'SIELens': ilens.__dict__['_altered'] = True for key in ['x','y','M','e','PA']: if not vars(ilens)[key]['fixed']: ilens.__dict__[key]['value'] = np.median(c[key+'L'+str(i)]) pbest.append(np.median(c[key+'L'+str(i)])) elif ilens.__class__.__name__ == 'ExternalShear': for key in ['shear','shearangle']: if not vars(ilens)[key]['fixed']: ilens.__dict__[key]['value'] = np.median(c[key]) pbest.append(np.median(c[key])) mcmcresult['best-fit']['lens'] = lens # now do the source(s) for i,src in enumerate(source): # Source is a list of source objects if src.__class__.__name__ == 'GaussSource': for key in ['xoff','yoff','flux','width']: if not vars(src)[key]['fixed']: src.__dict__[key]['value'] = np.median(c[key+'S'+str(i)]) pbest.append(np.median(c[key+'S'+str(i)])) elif src.__class__.__name__ == 'SersicSource': for key in ['xoff','yoff','flux','majax','index','axisratio','PA']: if not vars(src)[key]['fixed']: src.__dict__[key]['value'] = np.median(c[key+'S'+str(i)]) pbest.append(np.median(c[key+'S'+str(i)])) elif src.__class__.__name__ == 'PointSource': for key in ['xoff','yoff','flux']: if not vars(src)[key]['fixed']: src.__dict__[key]['value'] = np.median(c[key+'S'+str(i)]) pbest.append(np.median(c[key+'S'+str(i)])) mcmcresult['best-fit']['source'] = source mcmcresult['best-fit']['magnification'] = np.median(mus[~bad],axis=0) # Any amplitude scaling or astrometric shifts bfscaleamp = np.ones(len(data)) if 'scaleamp' in mcmcresult.keys(): for i,t in enumerate(mcmcresult['scaleamp']): # only matters if >1 datasets if i==0: pass elif t: bfscaleamp[i] = np.median(c['ampscale_dset'+str(i)]) pbest.append(np.median(c['ampscale_dset'+str(i)])) else: pass mcmcresult['best-fit']['scaleamp'] = bfscaleamp bfshiftphase = np.zeros((len(data),2)) if 'shiftphase' in mcmcresult.keys(): for i,t in enumerate(mcmcresult['shiftphase']): if i==0: pass # only matters if >1 datasets elif t: bfshiftphase[i][0] = np.median(c['astromshift_x_dset'+str(i)]) bfshiftphase[i][1] = np.median(c['astromshift_y_dset'+str(i)]) pbest.append(np.median(c['astromshift_x_dset'+str(i)])) pbest.append(np.median(c['astromshift_y_dset'+str(i)])) else: pass # no shifting mcmcresult['best-fit']['shiftphase'] = bfshiftphase mcmcresult['best-fit']['lnlike'] = calc_vis_lnlike(pbest,data,mcmcresult['best-fit']['lens'], mcmcresult['best-fit']['source'], Dd,Ds,Dds,ug,xmap,ymap,xemit,yemit,indices, sourcedatamap,scaleamp,shiftphase,modelcal)[0] # Calculate the deviance information criterion, using the Spiegelhalter+02 definition (cf Gelman+04) mcmcresult['best-fit']['DIC'] = -4*np.mean(mcmcresult['lnlike']) + 2*mcmcresult['best-fit']['lnlike'] # If we did any modelcal stuff, keep the antenna phase offsets here if any(modelcal): mcmcresult['modelcal'] = [True if j else False for j in modelcal] dp = np.squeeze(np.asarray([[a[1] for a in l if ~np.any(np.isnan(a[0]))] for l in blobs])) a = [x for l in dp for x in l] # Have to dick around with this if we had any nan's dphases = np.squeeze(np.reshape(a,(nwalkers*nstep-bad.sum(),len(data),-1),order='F')) if len(data) > 1: for i in range(len(data)): if modelcal[i]: mcmcresult['calphases_dset'+str(i)] = np.vstack(dphases[:,i]) else: if any(modelcal): mcmcresult['calphases_dset0'] = dphases return mcmcresult
def main(runmpi=True,nw=100,th=6,bi=10,fr=10): if runmpi: pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) else: pool=None #ldfileloc = '/Users/tom/Projects/koi2133/code/' ldfileloc = '/nobackup/tsbarcl2/Projects/koi2133/code/' #codedir = '/Users/tom/Projects/koi2133/code' codedir = '/nobackup/tsbarcl2/Projects/koi2133/code' koi = 2133 cadence=1625.3 teff = 4550. teff_unc = 75. feh = 0.11 feh_unc = 0.07 logg = 2.943 logg_unc = 0.007 rho = 0.0073 rho_unc = 0.0001 nplanets = 1 dil=0.0 period=6.24672336 impact=0.7 T0=136.383880 rprs=0.02067 alb=30. occ=40. ell=30. rvamp=79.0 ecosw=0.048 esinw=-0.045 planet_guess = np.array([ T0,period,impact,rprs,ecosw,esinw, rvamp,occ,ell,alb]) rvtime, rvval, rverr = get_rv() time,flux,ferr = get_lc() rho_prior = True ldp_prior = False nwalkers = nw threads = th burnin = bi fullrun = fr thin = 1 n_ldparams = 2 toffset_lc = 0 toffset_rv = 0 zpt_0 = 1.E-10 M = tmod.transitemcee_koi2133( nplanets,cadence, ldfileloc=ldfileloc,codedir=codedir) M.get_stellar(teff, logg, feh, n_ldparams,ldp_prior=ldp_prior) M.already_open(time, flux,ferr, rvtime,rvval, rverr, timeoffset=toffset_lc,rvtimeoffset=toffset_rv, normalize=False) rho_vals = np.array([rho,rho_unc]) M.get_rho(rho_vals,rho_prior) M.get_zpt(zpt_0) noise_model = [3.0E-4, 3.7E-2, 2.E-4, 3.] if dil is not None: M.get_sol(*planet_guess,dil=dil,noise_model=noise_model) else: M.get_sol(*planet_guess,noise_model=noise_model) outfile = 'koi{0}_np{1}_prior{2}_dil{3}GP.hdf5'.format( koi,nplanets,rho_prior,dil) p0 = M.get_guess(nwalkers) #dirty hack!! qwe = np.r_[np.arange(0,7),np.arange(9,21)] p0 = p0[:,qwe] l_var = np.shape(p0)[1] N = len([indval for indval in xrange(fullrun) if indval%thin == 0]) with h5py.File(outfile, u"w") as f: f.create_dataset("time", data=M.time) f.create_dataset("flux", data=M.flux) f.create_dataset("err", data=M.err) f.create_dataset("rvtime", data=M.rvtime) f.create_dataset("rvval", data=M.rvval) f.create_dataset("rverr", data=M.rverr) f.create_dataset("itime", data=M._itime) f.create_dataset("ntt", data = M._ntt) f.create_dataset("tobs", data = M._tobs) f.create_dataset("omc",data = M._omc) f.create_dataset("datatype",data = M._datatype) f.attrs["rho_0"] = M.rho_0 f.attrs["rho_0_unc"] = M.rho_0_unc f.attrs["nplanets"] = M.nplanets f.attrs["ld1"] = M.ld1 f.attrs["ld2"] = M.ld2 f.attrs["koi"] = koi f.attrs["dil"] = dil g = f.create_group("mcmc") g.attrs["nwalkers"] = nwalkers g.attrs["burnin"] = burnin g.attrs["iterations"] = fullrun g.attrs["thin"] = thin g.attrs["rho_prior"] = rho_prior g.attrs["ldp_prior"] = ldp_prior g.attrs["onlytransits"] = M.onlytransits g.attrs["tregion"] = M.tregion g.attrs["ldfileloc"] = M.ldfileloc g.attrs["n_ldparams"] = M.n_ldparams g.create_dataset("fixed_sol", data= M.fixed_sol) g.create_dataset("fit_sol_0", data= M.fit_sol_0) c_ds = g.create_dataset("chain", (nwalkers, N, l_var), dtype=np.float64) lp_ds = g.create_dataset("lnprob", (nwalkers, N), dtype=np.float64) #I don't like the default LDP unc #I'm changing them M.ld1_unc = 0.8 M.ld2_unc = 0.8 args = [M.nplanets,M.rho_0,M.rho_0_unc,M.rho_prior, M.ld1,M.ld1_unc,M.ld2,M.ld2_unc,M.ldp_prior, M.flux,M.err,M.fixed_sol,M.time,M._itime,M._ntt, M._tobs,M._omc,M._datatype, M.rvtime,M.rvval,M.rverr,M._rvitime, M.n_ldparams,M.ldfileloc, M.onlytransits,M.tregion] tom = tmod.logchi2_rv_phaseGP2 if runmpi: sampler = emcee.EnsembleSampler(nwalkers, l_var, tom, args=args,pool=pool) else: sampler = emcee.EnsembleSampler(nwalkers, l_var, tom, args=args,threads=th) time1 = thetime.time() p2, prob, state = sampler.run_mcmc(p0, burnin, storechain=False) sampler.reset() with h5py.File(outfile, u"a") as f: g = f["mcmc"] g.create_dataset("burnin_pos", data=p2) g.create_dataset("burnin_prob", data=prob) time2 = thetime.time() print('burn-in took ' + str((time2 - time1)/60.) + ' min') time1 = thetime.time() for i, (pos, lnprob, state) in enumerate(sampler.sample(p2, iterations=fullrun, rstate0=state, storechain=False)): #do the thinning in the loop here if i % thin == 0: ind = i / thin with h5py.File(outfile, u"a") as f: g = f["mcmc"] c_ds = g["chain"] lp_ds = g["lnprob"] c_ds[:, ind, :] = pos lp_ds[:, ind] = lnprob time2 = thetime.time() print('MCMC run took ' + str((time2 - time1)/60.) + ' min') print('') print("Mean acceptance: " + str(np.mean(sampler.acceptance_fraction))) print('') if runmpi: pool.close() else: sampler.pool.close() return sampler
def mcmc(zmin , zmax , iteration): ndim, nwalkers = 16, 50 bounds = [(-0.2, 0.2), (-0.2, 0.2),(-0.2,0.2),\ (1.5, 3.0) , (0.7, 2.0) , (0.3, 1.0),\ (-20.0,-1.0),(-20.0,-2.0),(-20.0,-2.0),\ (0, 1),(0.0, 4.0), (0,2.0), (0,1),\ (-7.2, 10.0),(-7.2, 10.0),(-7.2, 10.0)] p0 = np.array([0.0, 0.0, 0.0, 2.0, 1.0, 0.5 , np.log(0.1), np.log(0.1),np.log(0.1), 0.7,1.5,1.0,0.4,np.log(2.0),np.log(2.0),np.log(2.0)]) p0 = [p0 + 1e-5 * np.random.randn(ndim) for k in range(nwalkers)] pool = MPIPool(loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) # Set up the sampler. sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob , pool=pool) #sampler.sample(p0, 1000) #print sampler.chain.shape it = 0 for result in sampler.sample(p0, iterations = 1000): print it it += 1 # Run a burn-in chain and save the final location. #pos, _, _, _ = sampler.run_mcmc(p0, 3000) #pos, _, _, _ = sampler.sample(p0, 3000) from matplotlib.ticker import MaxNLocator sample = sampler.chain npars = sample.shape[2] fig , axes = plt.subplots(npars , 1 , sharex=True, figsize=(10, 12)) for i in xrange(npars): axes[i].plot(sample[:, :, i].T, color="b", alpha=.4 , lw = .5) axes[i].yaxis.set_major_locator(MaxNLocator(5)) axes[i].set_ylim([bounds[i][0], bounds[i][1]]) axes[i].set_xlim(0, 5000) #axes[i].set_ylabel(labels[i], fontsize=25) axes[-1].set_xlabel("Step Number", fontsize=25) fig.tight_layout(h_pad=0.0) fig_file = "/home/vakili/public_html/files/redsequence_all_temp/"+str(zmin)+"_z_"+str(zmax)+"burn_iter"+str(iteration)+".png" plt.savefig(fig_file) plt.close() # Run the production chain. #sampler.reset() #sampler.run_mcmc(pos, 1000) """ import corner labels = ["$m$", "$b$", "\ln f", "$Q$", "$M$", "$\ln V$"] #truths = true_params + [true_frac, true_outliers[0], np.log(true_outliers[1])] bounds = [(-0.2, 0.2), (0.7,2.0), (-20.0, -2.0), (0, 1), (0.0, 2.0), (-7.2, 5.2)] #corner.corner(sampler.flatchain, bins=35, range=bounds, labels=labels) #plt.savefig("/home/vakili/public_html/files/mcmc.png") #plt.close() """ sample = sampler.chain pool.close() npars = sample.shape[2] fig , axes = plt.subplots(npars , 1 , sharex=True, figsize=(10, 12)) for i in xrange(npars): axes[i].plot(sample[:, :, i].T, color="b", alpha=.4 , lw = .5) axes[i].yaxis.set_major_locator(MaxNLocator(5)) axes[i].set_ylim([bounds[i][0], bounds[i][1]]) axes[i].set_xlim(0, 1500) #axes[i].set_ylabel(labels[i], fontsize=25) axes[-1].set_xlabel("Step Number", fontsize=25) fig.tight_layout(h_pad=0.0) fig_file = "/home/vakili/public_html/files/redsequence_all_temp/"+str(zmin)+"_z_"+str(zmax)+"chain_iter"+str(iteration)+".png" plt.savefig(fig_file) plt.close() """ est = np.median(sampler.flatchain , axis = 0) est[2] = np.median(np.exp(sampler.flatchain)**.5 , axis = 0)[2] est_err = np.std(sampler.flatchain , axis = 0) est_err[2] = np.std(np.exp(sampler.flatchain)**.5 , axis = 0)[2] xx = np.linspace(14.5 , 25.5 , 1000) pred = est[1] + est[0]*(xx - 19) """ return None """ norm = 0.0 post_prob = np.zeros(len(x)) for i in range(sampler.chain.shape[1]): for j in range(sampler.chain.shape[0]): ll_fg, ll_bg = sampler.blobs[i][j] post_prob += np.exp(ll_fg - np.logaddexp(ll_fg, ll_bg)) norm += 1 post_prob /= norm print post_prob """ """
def main(argv): ################## #These change a lot numWaveforms = 16 numThreads = 12 ndim = 6 * numWaveforms + 8 nwalkers = 2 * ndim iter = 50 burnIn = 40 wfPlotNumber = 10 ###################### # plt.ion() fitSamples = 200 #Prepare detector zero_1 = -5.56351644e+07 pole_1 = -1.38796386e+04 pole_real = -2.02559385e+07 pole_imag = 9885315.37450211 zeros = [zero_1, 0] poles = [pole_real + pole_imag * 1j, pole_real - pole_imag * 1j, pole_1] system = signal.lti(zeros, poles, 1E7) tempGuess = 77.89 gradGuess = 0.0483 pcRadGuess = 2.591182 pcLenGuess = 1.613357 #Create a detector model detName = "conf/P42574A_grad%0.2f_pcrad%0.2f_pclen%0.2f.conf" % (0.05, 2.5, 1.65) det = Detector(detName, temperature=tempGuess, timeStep=1., numSteps=fitSamples * 10, tfSystem=system) det.LoadFields("P42574A_fields_v3.npz") det.SetFields(pcRadGuess, pcLenGuess, gradGuess) tempIdx = -8 gradIdx = -7 pcRadIdx = -6 pcLenIdx = -5 #and the remaining 4 are for the transfer function fig_size = (20, 10) #Create a decent start guess by fitting waveform-by-waveform wfFileName = "P42574A_512waveforms_%drisetimeculled.npz" % numWaveforms if os.path.isfile(wfFileName): data = np.load(wfFileName) results = data['results'] wfs = data['wfs'] numWaveforms = wfs.size else: print "No saved waveforms available. Loading from Data" exit(0) #prep holders for each wf-specific param r_arr = np.empty(numWaveforms) phi_arr = np.empty(numWaveforms) z_arr = np.empty(numWaveforms) scale_arr = np.empty(numWaveforms) t0_arr = np.empty(numWaveforms) smooth_arr = np.ones(numWaveforms) * 7. simWfArr = np.empty((1, numWaveforms, fitSamples)) #Prepare the initial value arrays for (idx, wf) in enumerate(wfs): wf.WindowWaveformTimepoint(fallPercentage=.99) r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx], t0_arr[ idx], smooth_arr[idx] = results[idx]['x'] t0_arr[ idx] += 10 #because i had a different windowing offset back in the day #Plot the waveforms to take a look at the initial guesses if False: fig = plt.figure() for (idx, wf) in enumerate(wfs): print "WF number %d:" % idx print " >>r: %f\n >>phi %f\n >>z %f\n >>e %f\n >>t0 %f\n >>smooth %f" % ( r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx], t0_arr[idx], smooth_arr[idx]) ml_wf = det.GetSimWaveform(r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx] * 100, t0_arr[idx], fitSamples, smoothing=smooth_arr[idx]) plt.plot(ml_wf, color="b") plt.plot(wf.windowedWf, color="r") value = raw_input(' --> Press q to quit, any other key to continue\n') if value == 'q': exit(0) #Initialize this thread's globals initializeDetectorAndWaveforms(det, wfs) #Initialize the multithreading pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) #Do the MCMC mcmc_startguess = np.hstack(( r_arr[:], phi_arr[:], z_arr[:], scale_arr[:] * 100., t0_arr[:], smooth_arr[:], # waveform-specific params tempGuess, gradGuess, pcRadGuess, pcLenGuess, zero_1, pole_1, pole_real, pole_imag)) # detector-specific #number of walkers _must_ be even if nwalkers % 2: nwalkers += 1 #Initialize walkers with a random, narrow ball around the start guess pos0 = [ mcmc_startguess + 1e-2 * np.random.randn(ndim) * mcmc_startguess for i in range(nwalkers) ] #Make sure everything in the initial guess is within bounds for pos in pos0: pos[:numWaveforms] = np.clip(pos[:numWaveforms], 0, np.floor(det.detector_radius * 10.) / 10.) pos[numWaveforms:2 * numWaveforms] = np.clip( pos[numWaveforms:2 * numWaveforms], 0, np.pi / 4) pos[2 * numWaveforms:3 * numWaveforms] = np.clip( pos[2 * numWaveforms:3 * numWaveforms], 0, np.floor(det.detector_length * 10.) / 10.) pos[4 * numWaveforms:5 * numWaveforms] = np.clip( pos[4 * numWaveforms:5 * numWaveforms], 0, fitSamples) pos[5 * numWaveforms:6 * numWaveforms] = np.clip( pos[5 * numWaveforms:6 * numWaveforms], 0, 20.) pos[tempIdx] = np.clip(pos[tempIdx], 40, 120) pos[gradIdx] = np.clip(pos[gradIdx], det.gradList[0], det.gradList[-1]) pos[pcRadIdx] = np.clip(pos[pcRadIdx], det.pcRadList[0], det.pcRadList[-1]) pos[pcLenIdx] = np.clip(pos[pcLenIdx], det.pcLenList[0], det.pcLenList[-1]) prior = lnprior(pos, ) if not np.isfinite(prior): print "BAD PRIOR WITH START GUESS YOURE KILLING ME SMALLS" print pos exit(0) #Initialize, run the MCMC sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, pool=p) #w/ progress bar, & time the thing start = timer() for (idx, result) in enumerate( sampler.sample(pos0, iterations=iter, storechain=True)): continue end = timer() pool.close() print "Elapsed time: " + str(end - start) print "Dumping chain to file..." np.save("sampler_mpi_%dwfs.npy" % numWaveforms, sampler.chain) print "Making MCMC steps figure..." ######### Plots for Waveform params stepsFig = plt.figure(2, figsize=fig_size) plt.clf() ax0 = stepsFig.add_subplot(611) ax1 = stepsFig.add_subplot(612, sharex=ax0) ax2 = stepsFig.add_subplot(613, sharex=ax0) ax3 = stepsFig.add_subplot(614, sharex=ax0) ax4 = stepsFig.add_subplot(615, sharex=ax0) ax5 = stepsFig.add_subplot(616, sharex=ax0) ax0.set_ylabel('r') ax1.set_ylabel('phi') ax2.set_ylabel('z') ax3.set_ylabel('scale') ax4.set_ylabel('t0') ax5.set_ylabel('smoothing') for i in range(nwalkers): for j in range(wfs.size): ax0.plot(sampler.chain[i, :, 0 + j], alpha=0.3) # r ax1.plot(sampler.chain[i, :, numWaveforms + j], alpha=0.3) # phi ax2.plot(sampler.chain[i, :, 2 * numWaveforms + j], alpha=0.3) #z ax3.plot(sampler.chain[i, :, 3 * numWaveforms + j], alpha=0.3) #energy ax4.plot(sampler.chain[i, :, 4 * numWaveforms + j], alpha=0.3) #t0 ax5.plot(sampler.chain[i, :, 5 * numWaveforms + j], alpha=0.3) #smoothing plt.savefig("emcee_mpi_wfchain_%dwfs.png" % numWaveforms) ######### Plots for Detector params stepsFigDet = plt.figure(3, figsize=fig_size) plt.clf() ax0 = stepsFigDet.add_subplot(411) ax1 = stepsFigDet.add_subplot(412, sharex=ax0) ax2 = stepsFigDet.add_subplot(413, sharex=ax0) ax3 = stepsFigDet.add_subplot(414, sharex=ax0) ax0.set_ylabel('temp') ax1.set_ylabel('grad') ax2.set_ylabel('pcRad') ax3.set_ylabel('pcLen') for i in range(nwalkers): ax0.plot(sampler.chain[i, :, tempIdx], "b", alpha=0.3) #temp ax1.plot(sampler.chain[i, :, gradIdx], "b", alpha=0.3) #grad ax2.plot(sampler.chain[i, :, pcRadIdx], "b", alpha=0.3) #pcrad ax3.plot(sampler.chain[i, :, pcLenIdx], "b", alpha=0.3) #pclen plt.savefig("emcee_mpi_detchain_%dwfs.png" % numWaveforms) #and for the transfer function stepsFigTF = plt.figure(4, figsize=fig_size) plt.clf() tf0 = stepsFigTF.add_subplot(411) tf1 = stepsFigTF.add_subplot(412, sharex=ax0) tf2 = stepsFigTF.add_subplot(413, sharex=ax0) tf3 = stepsFigTF.add_subplot(414, sharex=ax0) tf0.set_ylabel('zero_1') tf1.set_ylabel('pole_1') tf2.set_ylabel('pole_real') tf3.set_ylabel('pole_imag') for i in range(nwalkers): tf0.plot(sampler.chain[i, :, -4], "b", alpha=0.3) #2 tf1.plot(sampler.chain[i, :, -3], "b", alpha=0.3) #den1 tf2.plot(sampler.chain[i, :, -2], "b", alpha=0.3) #2 tf3.plot(sampler.chain[i, :, -1], "b", alpha=0.3) #3 plt.savefig("emcee_mpi_tfchain_%dwfs.png" % numWaveforms) samples = sampler.chain[:, burnIn:, :].reshape((-1, ndim)) print "temp is %f" % np.median(samples[:, tempIdx]) print "grad is %f" % np.median(samples[:, gradIdx]) print "pcrad is %f" % np.median(samples[:, pcRadIdx]) print "pclen is %f" % np.median(samples[:, pcLenIdx]) print "zero_1 is %f" % np.median(samples[:, -4]) print "pole_1 is %f" % np.median(samples[:, -3]) print "pole_real is %f" % np.median(samples[:, -2]) print "pole_imag is %f" % np.median(samples[:, -1]) #TODO: Aaaaaaand plot some waveforms.. simWfs = np.empty((wfPlotNumber, numWaveforms, fitSamples)) for idx, (theta) in enumerate(samples[np.random.randint( len(samples), size=wfPlotNumber)]): temp, impGrad, pcRad, pcLen = theta[tempIdx], theta[gradIdx], theta[ pcRadIdx], theta[pcLenIdx] zero_1, pole_1, pole_real, pole_imag = theta[-4:] r_arr, phi_arr, z_arr, scale_arr, t0_arr, smooth_arr = theta[:-8].reshape( (6, numWaveforms)) det.SetTemperature(temp) det.SetFields(pcRad, pcLen, impGrad) zeros = [zero_1, 0] poles = [ pole_real + pole_imag * 1j, pole_real - pole_imag * 1j, pole_1 ] det.SetTransferFunction(zeros, poles, 1E7) for wf_idx in range(wfs.size): wf_i = det.GetSimWaveform(r_arr[wf_idx], phi_arr[wf_idx], z_arr[wf_idx], scale_arr[wf_idx], t0_arr[wf_idx], fitSamples) simWfs[idx, wf_idx, :] = wf_i if wf_i is None: print "Waveform %d, %d is None" % (idx, wf_idx) residFig = plt.figure(4, figsize=(20, 15)) helpers.plotManyResidual(simWfs, wfs, figure=residFig) plt.savefig("emcee_mpi_waveforms_%dwfs.png" % numWaveforms)
def mcmc(result_path, result_name, param_dict, nwalkers=10, steps=2, output='./mcmc_chains.h5', overwrite=False, covmat=None, redshift=0.05, logmass_min=11., logmass_max=16.): pool = MPIPool(loadbalance=True, debug=False) rank = pool.rank comm = pool.comm # initializign CAMB power spectrum _camb.set_matter_power(redshifts=[redshift, ], kmax=3.e1) if rank == 0: result = h5py.File(result_path + result_name, 'r') pksz = result['pkSZ'][:] jk_sample = False if 'pkSZ_random' in result.keys(): pksz_random = result['pkSZ_random'][:] jk_sample = False elif 'pkSZ_jk' in result.keys(): pksz_random = result['pkSZ_jk'][:] jk_sample = True else: print "Need random samples" exit() pksz_bins = result['pkSZ_bins'][:] d_bins = pksz_bins[1:] - pksz_bins[:-1] pksz_bins = pksz_bins[:-1] + 0.5 * d_bins result.close() lin_scale = pksz_bins > 25. pksz_obs = pksz[lin_scale] pksz_err = None pksz_cov = np.cov(pksz_random, rowvar=False, bias=jk_sample) if jk_sample: spl_n = float(pksz_random.shape[0] - 1) bin_n = pksz_cov.shape[0] pksz_cov *= spl_n pksz_covm = np.linalg.inv(pksz_cov[:, lin_scale][lin_scale, :]) pksz_covm *= (spl_n - bin_n) / spl_n else: pksz_covm = np.linalg.inv(pksz_cov[:, lin_scale][lin_scale, :]) pksz_bins = pksz_bins[lin_scale] else: pksz_obs = None pksz_err = None pksz_bins = None pksz_covm = None pksz_obs = comm.bcast(pksz_obs, root=0) #pksz_err = comm.bcast(pksz_err, root=0) pksz_bins = comm.bcast(pksz_bins, root=0) pksz_covm = comm.bcast(pksz_covm, root=0) comm.barrier() if rank != 0: pool.wait() sys.exit(0) param, theta, theta_min, theta_max, param_tex, cosm_param_idx, camb_run\ = read_param_dict(param_dict, camb_param=_camb.params.keys()) paramnames = open(output.replace('.h5', '.paramnames'), 'w') for i in range(param.shape[0]): paramnames.write('%s\t\t'%param[i] + param_tex[i] + '\n') paramnames.close() # param = [cent, min, max] #tau_bar = [1., -1., 3.] #mnu = [0.4, 0.2, 0.6] #ombh2 = [0.0221, 0.005, 0.1] #omch2 = [0.12, 0.001, 0.99] #w = [-1, -3., 1.] #wa = [0., -3., 3.] #theta = param #theta_min = np.array([tau_bar[1], w[1]]) #theta_max = np.array([tau_bar[2], w[2]]) ndim = theta.shape[0] #threads = nwalkers threads = 1 pos = [theta + 1.e-4 * np.random.randn(ndim) for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, threads=threads, pool=pool, args=(pksz_bins, pksz_obs, pksz_err, pksz_covm, param), kwargs={ 'theta_min' :theta_min, 'theta_max' :theta_max, 'cosm_param_idx' :cosm_param_idx, 'camb_run' :camb_run, 'T_CMB' :2.7255, 'logmass_min' :logmass_min, 'logmass_max' :logmass_max}) # Run 100 steps as a burn-in. #pos, prob, state = sampler.run_mcmc(pos, 100) # Reset the chain to remove the burn-in samples. #sampler.reset() # Starting from the final position in the burn-in chain #sampler.run_mcmc(pos, steps, rstate0=state) #chain = np.zeros((size, ) + chain_local.shape ) #comm.Gather(chain_local, chain, root=0) step_group = 100 if step_group > steps: step_group = steps n_steps = steps / step_group state = None if rank == 0: if overwrite or not os.path.exists(output): mcmc_chains = h5py.File(output, 'w') #mcmc_chains['n_steps'] = n_steps mcmc_chains['n_steps'] = 0 mcmc_chains['pos'] = pos if covmat is not None: mcmc_chains['covmat'] = np.zeros((ndim, ndim)) #mcmc_chains['state'] = 0 n_steps0 = 0 mcmc_chains.close() else: mcmc_chains = h5py.File(output, 'a') n_steps0 = mcmc_chains['n_steps'][...] pos = mcmc_chains['pos'][...] if covmat is not None and n_steps0 != 0: covmat = mcmc_chains['covmat'] #state = mcmc_chains['state'][...] #if state == 0: state = None mcmc_chains.close() for i in range(n_steps0, n_steps0 + n_steps): if rank == 0: t1 = time.time() if covmat is not None: mh_proposal = MH_proposal(covmat, sampler._random) else: mh_proposal = None #mh_proposal = None pos, prob, state = sampler.run_mcmc(pos, step_group, state, mh_proposal=mh_proposal) if rank == 0: chain = sampler.chain chisq = sampler.lnprobability * (-2.) mcmc_chains = h5py.File(output, 'a') mcmc_chains['chains_%02d'%i] = chain mcmc_chains['chisqs_%02d'%i] = chisq mcmc_chains['n_steps'][...] = i + 1 mcmc_chains['pos'][...] = pos #mcmc_chains['state'][...] = state #pksz_covm = np.cov(pksz_random[:, lin_scale], rowvar=False) if covmat is not None: covmat = np.cov(chain.reshape(-1, chain.shape[-1]), rowvar=False) mcmc_chains['covmat'][...] = covmat mcmc_chains.close() print "[TIMING] %3d x %4d Steps: %8.4f [s]"\ %(nwalkers, step_group, time.time() - t1) print "Mean acceptance fraction: ", np.mean(sampler.acceptance_fraction) sampler.reset() pool.close()
sampler.sample(burninPos, lnprob0=burninProb, rstate0=burninRstate, iterations=mcIterations)): #if (i+1)%2 == 0: # print("{0:5.1%}".format(float(i)/mcIterations)) print('running step {0} of {1} in main chain'.format(i, mcIterations)) fout = open('mainchain.dat', 'a') pos = samplerResult[0] prob = samplerResult[1] for k in range(pos.shape[0]): fout.write("{0} {1} {2}\n".format(k, pos[k], prob[k])) fout.close() if useMPI: processPool.close() samples = sampler.chain[:, :, :].reshape((-1, nDim)) if not e0_only: # Compute the quantiles. # this comes from https://github.com/dfm/emcee/blob/master/examples/line.py quartileResults = map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samples, [16, 50, 84], axis=0))) ed_mcmc, loc_mcmc, scale_mcmc, s_mcmc = quartileResults[:4] print("""MCMC result: E_D initial = {0[0]} +{0[1]} -{0[2]} loc = {1[0]} +{1[1]} -{1[2]} scale = {2[0]} + {2[1]} - {2[2]} s = {3[0]} + {3[1]} - {3[2]}
def main(runmpi=False, nw=100, th=6, bi=10, fr=10): if runmpi: pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) else: pool=None time, flux, ferr = get_lc() toi = 175 cadence = 120 rho = 18 rho_unc = 1 nplanets = 3 ld1 = 0.1642 ld2 = 0.4259 dil=0.0 periods = [2.25321888449, 3.6906274382, 7.45131144274] impacts = [0.26, 0.21, 0.89] T0s = [1354.90455205, 1356.203624274, 1355.2866249] rprss = [0.02011, 0.038564, 0.0438550698] planet_guess = np.array([]) for i in range(nplanets): planet_guess = np.r_[planet_guess, T0s[i], periods[i], impacts[i], rprss[i], 0.0, 0.0 ] nwalkers = nw threads = th burnin = bi fullrun = fr thin = 1 M = tmod.transitmc2( nplanets, cadence) M.get_ld(ld1, ld2) M.already_open(time, flux, ferr) M.get_rho([rho, rho_unc]) M.get_zpt(0.0) M.get_sol(*planet_guess) outfile = 'koi{0}_np{1}.hdf5'.format( toi, nplanets) p0 = M.get_guess(nwalkers) l_var = np.shape(p0)[1] tom = tmod.logchi2 args = [M.nplanets, M.rho_0, M.rho_0_unc, M.ld1, M.ld1_unc, M.ld2, M.ld2_unc, M.flux, M.err, M.fixed_sol, M.time, M._itime, M._ntt, M._tobs, M._omc, M._datatype] N = len([indval for indval in range(fullrun) if indval%thin == 0]) with h5py.File(outfile, u"w") as f: f.create_dataset("time", data=M.time) f.create_dataset("flux", data=M.flux) f.create_dataset("err", data=M.err) f.attrs["rho_0"] = M.rho_0 f.attrs["rho_0_unc"] = M.rho_0_unc f.attrs["nplanets"] = M.nplanets f.attrs["ld1"] = M.ld1 f.attrs["ld2"] = M.ld2 g = f.create_group("mcmc") g.attrs["nwalkers"] = nwalkers g.attrs["burnin"] = burnin g.attrs["iterations"] = fullrun g.attrs["thin"] = thin g.create_dataset("fixed_sol", data= M.fixed_sol) g.create_dataset("fit_sol_0", data= M.fit_sol_0) c_ds = g.create_dataset("chain", (nwalkers, N, l_var), dtype=np.float64) lp_ds = g.create_dataset("lnprob", (nwalkers, N), dtype=np.float64) if runmpi: sampler = emcee.EnsembleSampler(nwalkers, l_var, tom, args=args,pool=pool) else: sampler = emcee.EnsembleSampler(nwalkers, l_var, tom, args=args,threads=th) time1 = thetime.time() p2, prob, state = sampler.run_mcmc(p0, burnin, storechain=False) sampler.reset() with h5py.File(outfile, u"a") as f: g = f["mcmc"] g.create_dataset("burnin_pos", data=p2) g.create_dataset("burnin_prob", data=prob) time2 = thetime.time() print('burn-in took ' + str((time2 - time1)/60.) + ' min') time1 = thetime.time() for i, (pos, lnprob, state) in enumerate(tqdm(sampler.sample(p2, iterations=fullrun, rstate0=state, storechain=False), total=fullrun)): #do the thinning in the loop here if i % thin == 0: ind = i / thin with h5py.File(outfile, u"a") as f: g = f["mcmc"] c_ds = g["chain"] lp_ds = g["lnprob"] c_ds[:, ind, :] = pos lp_ds[:, ind] = lnprob time2 = thetime.time() print('MCMC run took ' + str((time2 - time1)/60.) + ' min') print('') print("Mean acceptance: " + str(np.mean(sampler.acceptance_fraction))) print('') if runmpi: pool.close() else: sampler.pool.close() return sampler
def run_emcee_seeded(light_curve, transit_params, spot_parameters, n_steps, n_walkers, output_path, burnin=0.7, n_extra_spots=1, skip_priors=False): """ Fit for transit depth and spot parameters given initial guess informed by results from `peak_finder` Parameters ---------- light_curve : `friedrich.lightcurve.TransitLightCurve` Light curve to fit transit_params : `~batman.TransitParams` Transit light curve parameters spot_parameters : list List of all spot parameters in [amp, t0, sig, amp, t0, sig, ...] order n_steps : int Number of MCMC steps to take n_walkers : int Number of MCMC walkers to initialize (must be even, more than twice the number of free params in fit) output_path : str Path to HDF5 archive output for storing results burnin : float Fraction of total number of steps to save to output (will truncate the first `burnin` of the light curve) n_extra_spots : int Add `n_extra_spots` extra spots to the fit to soak up spots not predicted by `peak_finder` skip_priors : bool Should a prior be applied to the depth parameter? Returns ------- sampler : `emcee.EnsembleSampler` Sampler object returned by `emcee` """ times = light_curve.times.jd fluxes = light_curve.fluxes errors = light_curve.errors lower_t_bound, upper_t_bound = get_in_transit_bounds(times, transit_params) amps = spot_parameters[::3] init_depth = transit_params.rp**2 extra_spot_params = [0.1*np.min(amps), np.mean(times), 0.05*(upper_t_bound-lower_t_bound)] fit_params = np.concatenate([spot_parameters, n_extra_spots*extra_spot_params]) ndim, nwalkers = len(fit_params), n_walkers pos = [] while len(pos) < nwalkers: realization = fit_params + 1e-5*np.random.randn(ndim) if not np.isinf(lnprior(realization, fluxes, lower_t_bound, upper_t_bound, transit_params, skip_priors)): pos.append(realization) print('Begin MCMC...') pool = MPIPool(loadbalance=True) if not pool.is_master(): pool.wait() sys.exit(0) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(times, fluxes, errors, lower_t_bound, upper_t_bound, transit_params, skip_priors), pool=pool) sampler.run_mcmc(pos, n_steps) print('Finished MCMC...') pool.close() burnin_len = int(burnin*n_steps) from .storage import create_results_archive create_results_archive(output_path, light_curve, sampler, burnin_len, ndim) return sampler
### skips if files do not exist for some reason print 'Warning: file does not exist, cosmo, snap' return if os.path.isfile(out_fn): ###### in case the code breaks return ######### read rockstar files print 'Opening rockstar files:', rockstar_fn reader = sim_manager.TabularAsciiReader(rockstar_fn, columns_to_keep_dict) rock_arr = reader.read_ascii() logM = log10(rock_arr['halo_mvir']) rock_arr = 0 ## release memory hmf = histogram(logM, bins=hist_bins)[0] save(out_fn, hmf) all_snaps = [] for i in range(len(cosmo_arr)): for isnap in arange(30, nsnaps_arr[i]): all_snaps.append([cosmo_arr[i], int(isnap)]) pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) pool.map(Phm_gen, all_snaps) #pool.map(Phh_gen, all_snaps) #pool.map(hmf_gen, all_snaps) pool.close()
def mcmc_mpi(Nwalkers, Nchains, observables=['nbar', 'xi'], data_dict={ 'Mr': 21, 'b_normal': 0.25 }, prior_name='first_try', mcmcrun=None): ''' Standard MCMC implementaion Parameters ----------- - Nwalker : Number of walkers - Nchains : Number of MCMC chains - observables : list of observables. Options are: ['nbar','xi'],['nbar','gmf'],['xi'] - data_dict : dictionary that specifies the observation keywords ''' #Initializing the vector of observables and inverse covariance matrix if observables == ['xi']: fake_obs = Data.data_xi(**data_dict) #fake_obs_icov = Data.data_inv_cov('xi', **data_dict) fake_obs_icov = Data.data_cov(inference='mcmc', **data_dict)[1:16, 1:16] if observables == ['nbar', 'xi']: fake_obs = np.hstack( [Data.data_nbar(**data_dict), Data.data_xi(**data_dict)]) fake_obs_icov = Data.data_cov(inference='mcmc', **data_dict)[:16, :16] if observables == ['nbar', 'gmf']: ##### FIRST BIN OF GMF DROPPED ############### # CAUTION: hardcoded fake_obs = np.hstack( [Data.data_nbar(**data_dict), Data.data_gmf(**data_dict)[1:]]) fake_obs_icov = np.zeros((10, 10)) #print Data.data_cov(**data_dict)[17: , 17:].shape # Covariance matrix being adjusted accordingly fake_obs_icov[1:, 1:] = Data.data_cov(inference='mcmc', **data_dict)[17:, 17:] fake_obs_icov[0, 1:] = Data.data_cov(inference='mcmc', **data_dict)[0, 17:] fake_obs_icov[1:, 0] = Data.data_cov(inference='mcmc', **data_dict)[17:, 0] fake_obs_icov[0, 0] = Data.data_cov(inference='mcmc', **data_dict)[0, 0] # True HOD parameters data_hod_dict = Data.data_hod_param(Mr=data_dict['Mr']) data_hod = np.array([ data_hod_dict['logM0'], # log M0 np.log(data_hod_dict['sigma_logM']), # log(sigma) data_hod_dict['logMmin'], # log Mmin data_hod_dict['alpha'], # alpha data_hod_dict['logM1'] # log M1 ]) Ndim = len(data_hod) # Priors prior_min, prior_max = PriorRange(prior_name) prior_range = np.zeros((len(prior_min), 2)) prior_range[:, 0] = prior_min prior_range[:, 1] = prior_max # mcmc chain output file chain_file = ''.join([ util.mcmc_dir(), util.observable_id_flag(observables), '.', mcmcrun, '.mcmc_chain.dat' ]) #print chain_file if os.path.isfile(chain_file) and continue_chain: print 'Continuing previous MCMC chain!' sample = np.loadtxt(chain_file) Nchain = Niter - (len(sample) / Nwalkers ) # Number of chains left to finish if Nchain > 0: pass else: raise ValueError print Nchain, ' iterations left to finish' # Initializing Walkers from the end of the chain pos0 = sample[-Nwalkers:] else: # new chain f = open(chain_file, 'w') f.close() Nchain = Niter # Initializing Walkers random_guess = data_hod pos0 = np.repeat(random_guess, Nwalkers).reshape(Ndim, Nwalkers).T + \ 5.e-2 * np.random.randn(Ndim * Nwalkers).reshape(Nwalkers, Ndim) #print pos0.shape # Initializing MPIPool pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) # Initializing the emcee sampler hod_kwargs = { 'prior_range': prior_range, 'data': fake_obs, 'data_icov': fake_obs_icov, 'observables': observables, 'Mr': data_dict['Mr'] } sampler = emcee.EnsembleSampler(Nwalkers, Ndim, lnPost, pool=pool, kwargs=hod_kwargs) # Initializing Walkers for result in sampler.sample(pos0, iterations=Nchain, storechain=False): position = result[0] #print position f = open(chain_file, 'a') for k in range(position.shape[0]): output_str = '\t'.join(position[k].astype('str')) + '\n' f.write(output_str) f.close() pool.close()