def sample_parallel_tempering(self, n_temps, n_walkers, burn_in, n_samples, quiet=False): """ Sample with parallel tempering :param: n_temps :param: n_walkers :param: burn_in :param: n_samples :return: MCMC samples """ free_parameters = self._likelihood_model.free_parameters n_dim = len(free_parameters.keys()) sampler = emcee.PTSampler(n_temps, n_walkers, n_dim, self._log_like, self._log_prior) # Get one starting point for each temperature p0 = np.empty((n_temps, n_walkers, n_dim)) for i in range(n_temps): p0[i, :, :] = self._get_starting_points(n_walkers) print("Running burn-in of %s samples...\n" % burn_in) p, lnprob, lnlike = sample_with_progress("Burn-in", p0, sampler, burn_in) # Reset sampler sampler.reset() print("\nSampling\n") _ = sample_with_progress("Sampling", p, sampler, n_samples, lnprob0=lnprob, lnlike0=lnlike) self._sampler = sampler # Now build the _samples dictionary self._raw_samples = sampler.flatchain.reshape(-1, sampler.flatchain.shape[-1]) self._log_probability_values = None self._log_like_values = None self._marginal_likelihood = None self._build_samples_dictionary() self._build_results() # Display results if not quiet: self._results.display() return self.samples
def __init__(self, likelihood_evaluator, ntemps, nwalkers, pool=None, burn_in_iterations=None): try: import emcee except ImportError: raise ImportError("emcee is not installed.") # construct the sampler: PTSampler needs the likelihood and prior # functions separately likelihood_evaluator.set_callfunc('loglikelihood') ndim = len(likelihood_evaluator.waveform_generator.variable_args) sampler = emcee.PTSampler(ntemps, nwalkers, ndim, likelihood_evaluator, likelihood_evaluator._prior, pool=pool) # initialize super(EmceePTSampler, self).__init__(sampler, likelihood_evaluator, min_burn_in=burn_in_iterations) self._nwalkers = nwalkers self._ntemps = ntemps
def __init__(self, likelihood_evaluator, ntemps, nwalkers, pool=None, likelihood_call=None): try: import emcee except ImportError: raise ImportError("emcee is not installed.") if likelihood_call is None: likelihood_call = likelihood_evaluator # construct the sampler: PTSampler needs the likelihood and prior # functions separately ndim = len(likelihood_evaluator.variable_args) sampler = emcee.PTSampler(ntemps, nwalkers, ndim, _callloglikelihood(likelihood_call), _callprior(likelihood_call), pool=pool) # initialize super(EmceePTSampler, self).__init__(sampler, likelihood_evaluator) self._nwalkers = nwalkers self._ntemps = ntemps
def __init__(self, model, ntemps, nwalkers, betas=None, checkpoint_interval=None, checkpoint_signal=None, loglikelihood_function=None, nprocesses=1, use_mpi=False): self.model = model # create a wrapper for calling the model if loglikelihood_function is None: loglikelihood_function = 'loglikelihood' # frustratingly, emcee_pt does not support blob data, so we have to # turn it off model_call = models.CallModel(model, loglikelihood_function, return_all_stats=False) # these are used to help paralleize over multiple cores / MPI models._global_instance = model_call model_call = models._call_global_model prior_call = models._call_global_model_logprior self.pool = choose_pool(mpi=use_mpi, processes=nprocesses) # construct the sampler: PTSampler needs the likelihood and prior # functions separately ndim = len(model.variable_params) self._sampler = emcee.PTSampler(ntemps, nwalkers, ndim, model_call, prior_call, pool=self.pool, betas=betas) self.nwalkers = nwalkers self._ntemps = ntemps self._checkpoint_interval = checkpoint_interval self._checkpoint_signal = checkpoint_signal
def calculate_logevidence(cls, fp, thin_start=None, thin_end=None, thin_interval=None): """Calculates the log evidence from the given file using emcee's thermodynamic integration. Parameters ---------- fp : InferenceFile An open file handler to read the stats from. thin_start : int Index of the sample to begin returning stats. Default is to read stats after burn in. To start from the beginning set thin_start to 0. thin_interval : int Interval to accept every i-th sample. Default is to use the `fp.acl`. If `fp.acl` is not set, then use all stats (set thin_interval to 1). thin_end : int Index of the last sample to read. If not given then `fp.niterations` is used. Returns ------- lnZ : float The estimate of log of the evidence. dlnZ : float The error on the estimate. """ try: import emcee except ImportError: raise ImportError("emcee is not installed.") logstats = cls.read_likelihood_stats(fp, thin_start=thin_start, thin_end=thin_end, thin_interval=thin_interval, temps='all', flatten=False) # get the likelihoods logls = logstats['loglr'] + fp.lognl # we need the betas that were used betas = fp.attrs['betas'] # annoyingly, theromdynaimc integration in PTSampler is an instance # method, so we'll implement a dummy one ntemps = fp.ntemps nwalkers = fp.nwalkers ndim = len(fp.variable_args) dummy_sampler = emcee.PTSampler(ntemps, nwalkers, ndim, None, None, betas=betas) return dummy_sampler.thermodynamic_integration_log_evidence( logls=logls, fburnin=0.)
def run_chain(self, n_walkers, n_steps, ntemps, reset=False, name='mcmc', filename='MCMC.h5'): ndim = 3 theta_initial = [0, 0.58, -0.014] #gamma11, M11, N11 if self.prior_type == 'flat': cov = np.diagflat([ self.sub(self.priors['gamma11']), self.sub(self.priors['M11']), self.sub(self.priors['N11']) ]) elif self.prior_type == 'gaussian': cov = np.diagflat([ self.priors['gamma11'][1], self.priors['M11'][1], self.priors['N11'][1] ]) walkers_positions = np.zeros((ntemps, n_walkers, ndim)) lnikely = np.zeros((ntemps, n_walkers, n_steps)) i = 0 j = 0 while (i < ntemps): while (j < n_walkers): ran = np.random.multivariate_normal(theta_initial, cov) if self.prior_type == 'flat': if self.lnprior(ran) == 0.0: walkers_positions[i][j] = ran elif self.prior_type == 'gaussian': walkers_positions[i][j] = ran j += 1 i += 1 j = 0 with Pool() as pool: Sampler = emcee.PTSampler(nwalkers=n_walkers, dim=ndim, ntemps=ntemps, logl=self.lnlike, logp=self.lnprob, pool=pool) start = time.time() Sampler.run_mcmc( walkers_positions, N=n_steps, storechain=True ) #,progress=True)#,skip_initial_state_check=True) end = time.time() multi_time = end - start print("Multiprocessing took {0:.1f} seconds".format(multi_time)) chain = np.save('MCMC_PT', Sampler.chain) chi2 = np.save('chi2', Sampler.lnprobability) return Sampler
def __init__(self, pars, cov, regconsts, nwalkers=None, nthreads=1, ntemps=5): self.t0 = time.time() global hackpars, hackcosts, nregs self.pars = [] self.costs = [] self.deterministics = [] self.regs = regconsts self.nregs = len(self.regs) self.ntemps = ntemps self.nthreads = nthreads for par in pars: try: if par.observed == True: self.costs.append(par) else: self.pars.append(par) except: self.deterministics.append(par) #now append the regconsts to the pars, and give them a dummy covariance #for reg in self.regs: # #break # self.pars.append(reg) # if type(cov)==list: # self.cov.append(reg.value/100.) # else: # cov=numpy.append(cov,reg.value/100.) self.nvars = len(self.pars) self.ndim = self.nvars self.cov = cov if nwalkers == None: self.nwalkers = self.ndim * 8 else: self.nwalkers = nwalkers hackpars = self.pars hackcosts = self.costs nregs = self.nregs print emcee.__file__ try: self.sampler = emcee.PTSampler(self.ntemps, self.nwalkers, self.ndim, optFunc, logp, threads=nthreads) except AttributeError: "There is no Parallel Tempered Emcee sampler installed, try the untempered option?"
def run(self): print("enabling ParallelTempering sampler.") pos0 = [[self.para_guess + 1.0e-8*np.random.randn(self.ndim) for j in range(self.nwalkers)] for k in range(self.ntemps)] sampler = emcee.PTSampler(self.ntemps, self.nwalkers, self.ndim, self.LikelihoodsObj.lnlikelihood, self.PriorsObj.lnprior) # burn-in print("start burning in. nburn:", self.nburn) for j, result in enumerate(sampler.sample(pos0, iterations=self.nburn, thin=10)): self._display_bar(j, self.nburn) sys.stdout.write("\n") pos, lnpost, lnlike = result sampler.reset() # actual iteration print("start iterating. nsteps:", self.nsteps) for j, result in enumerate(sampler.sample(pos, iterations=self.nsteps)):#, lnprob0=lnpost, lnlike0=lnlike self._display_bar(j, self.nsteps) sys.stdout.write("\n") # modify samples samples = sampler.chain[0,:,:,:].reshape((-1,self.ndim)) # fold the parameter space of i if self.FitParametersObj.ifFreeInclination: idx = samples[:,-1]<0. samples[idx,-1]=-samples[idx,-1] idx = samples[:,-1]>np.pi/2. samples[idx,-1]=np.pi-samples[idx,-1] self.samples = samples # save evidence self.evidence = sampler.thermodynamic_integration_log_evidence() print("Bayesian evidence lnZ: {:0.5f}".format(self.evidence[0])) print("Bayesian evidence error dlnZ: {:0.5f}".format(self.evidence[1])) # save estimation result # 16, 50, 84 quantiles result = np.array(list(map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]), zip(*np.percentile(samples, [16, 50, 84],axis=0))))) self.para_fit = result[:,0] # maximum para_fitmax = np.zeros(self.ndim) for ipara in range(self.ndim): n, bins, _ = plt.hist(samples[:,ipara], bins=80) idx = np.where(n == n.max())[0][0] para_fitmax[ipara] = bins[idx:idx+1].mean() self.para_fitmax = para_fitmax self.result = np.concatenate([result, para_fitmax.reshape(self.ndim,1)], axis=1) # save acceptance fraction self.acceptance_fraction = np.array([np.mean(sampler.acceptance_fraction)]) print("Mean acceptance fraction: {:0.3f}".format(self.acceptance_fraction[0])) return
def walkers_parallel_tempered_decay(mN1_data, guesses, time_min, time_max, fromcsv, dataN, runN, gaus_var, nwalkers, nsteps, prior, scale_factor=100 * 100, withlaserskew=False): """ This function samples the posterior using MCMC. It is recommended to use 1e-4 for gaus_var when withlaserskew=False, and 1e-3 for gaus_var when withlaserskew=True. Parameters: mN1_data: fluoresence data for each time step (RRDataContainer) guesses: the initial guesses for the parameters of the model (array of floats) time_min: minimum Raman-Rabi pulse time (float) time_max: maximum Raman-Rabi pulse time (float) fromcsv: marks whether these data were read from a CSV file (bool) dataN: number of experiment repititions summed (int) runN: number of runs over which the experiment was done (int) gaus_var: variance of the gaussian that defines the starting positions (float) nwalkers: the number of walkers with which to sample (int) nsteps: the number of steps each walker should take (int) withlaserskew (optional): marks whether to use laserskew functions or not (bool) priors (optional): an array specifying the priors to use in log_prior Returns: sampler: the sampler object which now contains the samples taken by nwalkers walkers over nsteps steps """ ndim = len(guesses) # use temperature ladder specified in Gregory (see p. 330) betas = np.array([1.0, 0.7525, 0.505, 0.2575, 0.01]) ntemps = len(betas) sampler = emcee.PTSampler(ntemps, nwalkers, ndim, decay_loglikelihood, log_prior, betas=betas, loglargs=[ mN1_data, time_min, time_max, fromcsv, dataN, runN, scale_factor ], loglkwargs={'withlaserskew': withlaserskew}, logpkwargs={'priors': prior}) starting_positions = np.tile(guesses, ( ntemps, nwalkers, 1)) + 1e-4 * np.random.randn(ntemps, nwalkers, ndim) sampler.run_mcmc(starting_positions, nsteps) return sampler
def calculate_logevidence(cls, filename, thin_start=None, thin_end=None, thin_interval=None): """Calculates the log evidence from the given file using ``emcee_pt``'s thermodynamic integration. Parameters ---------- filename : str Name of the file to read the samples from. Should be an ``EmceePTFile``. thin_start : int Index of the sample to begin returning stats. Default is to read stats after burn in. To start from the beginning set thin_start to 0. thin_interval : int Interval to accept every i-th sample. Default is to use the `fp.acl`. If `fp.acl` is not set, then use all stats (set thin_interval to 1). thin_end : int Index of the last sample to read. If not given then `fp.niterations` is used. Returns ------- lnZ : float The estimate of log of the evidence. dlnZ : float The error on the estimate. """ with cls._io(filename, 'r') as fp: logls = fp.read_raw_samples(['loglikelihood'], thin_start=thin_start, thin_interval=thin_interval, thin_end=thin_end, temps='all', flatten=False) logls = logls['loglikelihood'] # we need the betas that were used betas = fp.betas # annoyingly, theromdynaimc integration in PTSampler is an instance # method, so we'll implement a dummy one ntemps = fp.ntemps nwalkers = fp.nwalkers ndim = len(fp.variable_params) dummy_sampler = emcee.PTSampler(ntemps, nwalkers, ndim, None, None, betas=betas) return dummy_sampler.thermodynamic_integration_log_evidence( logls=logls, fburnin=0.)
def run_one_model(model, data, prior_vals, ns, pool=None): # Vector of estimated parameters pe = [p for p in model.parameters if p.name.startswith('k')] # Generate model equations generate_equations(model) Solver._use_inline = True sol = Solver(model, numpy.linspace(0, 10, 10)) sol.run() # Number of temperatures, dimensions and walkers ntemps = 20 ndim = len(pe) blocksize = 48 nwalkers = get_num_walkers(ndim, blocksize) print 'Running %d walkers at %d temperatures for %d steps.' %\ (nwalkers, ntemps, ns) sampler = emcee.PTSampler(ntemps, nwalkers, ndim, likelihood, prior, threads=1, pool=pool, betas=None, a=2.0, Tmax=None, loglargs=[model, data], logpargs=[model, prior_vals], loglkwargs={}, logpkwargs={}) # Random initial parameters for walkers p0 = numpy.ones((ntemps, nwalkers, ndim)) for i in range(ntemps): for j in range(nwalkers): for k, pp in enumerate(pe): p0[i, j, k] = prior_vals.vals[j][pp.name] print p0 # Run sampler fname = scratch_path + 'chain_%s.dat' % model.name step = 0 for result in sampler.sample(p0, iterations=ns, storechain=True): print '---' position = result[0] with open(fname, 'a') as fh: for w in range(nwalkers): for t in range(ntemps): pos_str = '\t'.join(['%f' % p for p in position[t][w]]) fh.write('%d\t%d\t%d\t%s\n' % (step, w, t, pos_str)) step += 1 return sampler
def PTSampler(self, ntemps, nwalkers, **kwargs): self.ntemps = ntemps self.nwalkers = nwalkers self.__sampler = "PTSampler" if self.lnlikeType == "gcs": loglargs = (self.x, self.y, self.xerr, self.yerr, self.flag) else: loglargs = (self.x, self.y, self.xerr, self.yerr) self.sampler = emcee.PTSampler(ntemps, nwalkers, self.ndim, logl=self.lnlike, logp=lnprior, loglargs=loglargs, logpargs=[self.pRanges], **kwargs) print("[linfit]: Use the PTSampler.") return self.sampler
def PTSampler(self, ntemps, nwalkers, **kwargs): if self.__modelunct: self.__lnlike = lnlike_gp else: self.__lnlike = lnlike self.sampler = emcee.PTSampler(ntemps, nwalkers, self.__dim, logl=self.__lnlike, logp=lnprior, loglargs=[self.__data, self.__model], logpargs=[self.__data, self.__model, self.__modelunct, self.__unctDict], **kwargs) self.__ntemps = ntemps self.__nwalkers = nwalkers self.__sampler = "PTSampler" return self.sampler
def get_sampler(self, **kwargs): if not self.pt: return emcee.EnsembleSampler(self.nwalkers, self.ndim, self.lnprob, args=self.lnlike_args, **kwargs ) else: return emcee.PTSampler(self.ntemps, self.nwalkers, self.ndim, logl=self.lnlike, logp=self.lnprior, loglargs=self.lnlike_args, **kwargs )
def emcee_init(self, nwalkers, ndim, lnprob, ntemps=None, lnprob_kwargs={}, sampler_kwargs={}, PT=False): """ Initialize an ensemble sampler nwalkers : int number of walkers ndim : int number of parameter dimensions lnprob : method posterior function call lnprob_kwargs : dict keyword arguments for lnprob function call sampler_kwargs : dict keyword arguments for emcee EnsembleSampler instatiation """ self.nwalkers = nwalkers self.ndim = ndim self.lnprob = lnprob self.ntemps = ntemps self.PT = PT if PT == True: self.sampler = emcee.PTSampler(ntemps, nwalkers, ndim, self.lnlike, self.lnprior, loglkwargs=lnprob_kwargs, **sampler_kwargs) else: self.sampler = emcee.EnsembleSampler(self.nwalkers, self.ndim, self.lnprob, kwargs=lnprob_kwargs, **sampler_kwargs)
def run(self): '''Main body of the EMCEE run ''' self.sampler = emcee.PTSampler(self.ntemps, self.nwalkers, \ self.ndims, \ self.like, self.prior, \ threads = 1) p0 = np.zeros([self.ntemps, self.nwalkers, self.ndims]) if self.start_kdes == 'none': for i in range(self.ntemps): for j in range(self.nwalkers): p0[i,j,:] = self.start_params * \ (1.0 + np.random.randn(self.ndims) * 0.0001) #If KDEs exist, distribute walkers from those distributions else: for i in range(self.ntemps): for j in range(self.ndims): xs, ys = self.start_kdes[j] cdf = integrate.cumtrapz(ys, xs, initial=0) inv_cdf = interpolate.interp1d(cdf, xs) p0[i, :, j] = inv_cdf(np.random.rand(self.nwalkers)) if self.start_kdes == 'none': print('\nBurning in...') for p1, lnpp, lnlp in tqdm( self.sampler.sample(p0, iterations=self.niter)): pass self.sampler.reset() if self.start_kdes == 'none': print('\nRunning again...') for i in range(self.max_conv): for pp, lnpp, lnlp in tqdm( self.sampler.sample(p1, iterations=self.niter / 2)): pass #Convergence check: Standard deviation as 2% of the median value med = np.median(self.sampler.chain[0, :, -self.niter:-1, :], axis=0) conv = np.std(med, axis=0) / np.median(med, axis=0) if np.all(conv < self.conv_accept): break samples = self.sampler.chain[0, :, :, :].reshape((-1, self.ndims)) return np.array(samples)
def do_fit_PT(fit_dir, db_path): '''Fits data using emcee.PTtempering''' comm = mpi.COMM_WORLD pool = emcee_lik.MPIPool_stay_alive(loadbalance=True) files = glob(path.join(fit_dir, '*.pik')) # start fits for gal in files: comm.barrier() data = {} temp = pik.load(open(gal)) data[gal] = temp[-1] data = comm.bcast(data, root=0) posterior = emcee_lik.LRG_emcee_PT(data, db_path, have_dust=False, have_losvd=False) posterior.init() nwalkers = 2 * posterior.ndim() #ntemps = 20 Tmax = 1.7*10**12 if pool.is_master(): # need to make pos0 (ntemps, nwalkers, dim) pos0 = posterior.inital_pos(nwalkers, ntemps) sampler = emcee.PTSampler(None, nwalkers, posterior.ndim() , posterior, dummy_prior,Tmax=Tmax, pool=pool) burnin = 2000 iterations = 10 * 10**3 #pos, prob = [], [] i = 0 #burn in pos, prob, state = sampler.run_mcmc(pos0, burnin) sampler.reset() #mcmc for tpos, tprob, _ in sampler.sample(pos, iterations=iterations , rstate0=state): acept = sampler.acceptance_fraction.mean() print '%i out of %i, accept=%2.1f'%(i, iterations, acept) if i % 100: pik.dump((temp,sampler), open(gal + '.pt.incomplte', 'w'), 2) i+=1 pik.dump((temp,sampler), open(gal + '.pt', 'w'), 2) pool.close() else: pool.wait(posterior)
def run_PTmcmc(ntemps, lnlike, priors, position, ndim=3, nwalkers=100, logargs=[]): # Run MCMC sampler sampler = emcee.PTSampler(ntemps, nwalkers, ndim, lnlike, priors.lnprior, loglargs=logargs) sams = sampler.run_mcmc(position, 1000) return sampler, sams
def GetSampler(self, SamplerType, NTemps, NWalkers, Threads): ''' Set up sampler and position. Args: SamplerType (str): 'Ensemble' or 'PT' NTemps (int): only valid for Parallel-Tempering. set the # of temperature NWalkers (int): # of walkers Threads (int): # of threads ''' self._SamplerType = SamplerType if SamplerType == "Ensemble": self._Sampler = em.EnsembleSampler(NWalkers, self._FitDim, LogPosterior, threads=Threads, args=[ self._FitBound, self._Info, self._P, self.FluxGenerator, self.Times, self.Freqs, self.Fluxes, self.FluxErrs ]) self._Position0 = self._InitialBound[:, 0] + ( self._InitialBound[:, 1] - self._InitialBound[:, 0] ) * np.random.rand(NWalkers, self._FitDim) else: self._Sampler = em.PTSampler(NTemps, NWalkers, self._FitDim, LogLike, LogPrior, threads=Threads, loglargs=[ self._Info, self._P, self.FluxGenerator, self.Times, self.Freqs, self.Fluxes, self.FluxErrs ], logpargs=[self._FitBound, self._Info]) self._Position0 = self._InitialBound[:, 0] + ( self._InitialBound[:, 1] - self._InitialBound[:, 0] ) * np.random.rand(NTemps, NWalkers, self._FitDim)
def run_emcee(nllf, p0, bounds, burn, nsamples, nwalkers, temperatures): if np.isscalar(temperatures): ntemps, betas = temperatures, None else: ntemps, betas = len(temperatures), 1 / temperatures log_prior = lambda p: 0 if ( (p >= bounds[0]) & (p <= bounds[1])).all() else -inf log_likelihood = lambda p: -nllf(p) sampler = emcee.PTSampler( ntemps=ntemps, nwalkers=nwalkers, dim=len(p0), logl=log_likelihood, logp=log_prior, betas=betas, ) nthin = 1 steps = nsamples // (nwalkers * nthin) monitor = Monitor(burn + steps) # Burn-in pop = eps_init(nwalkers * ntemps, p0, bounds).reshape(ntemps, nwalkers, -1) for p, lnprob, lnlike in sampler.sample(pop, iterations=burn): monitor(p, lnlike) sampler.reset() print("== after burn ==", monitor.index, monitor.best) # Collect for p, lnprob, lnlike in sampler.sample(p, lnprob0=lnprob, lnlike0=lnlike, iterations=nthin * steps, thin=nthin): monitor(p, lnlike) print("== after sample ==", monitor.index, monitor.best) assert sampler.chain.shape == (ntemps, nwalkers, steps, len(p0)) #import pprint; pprint.pprint(sampler.__dict__) #[print(k, v.shape) for k, v in sampler.__dict__.items() if isinstance(v, np.ndarray)] return sampler
def run_one_model(model, model_number, data, ns, pool=None): # Vector of nominal parameters p = numpy.log10(numpy.array([pp.value for pp in model.parameters if pp.name[0]=='k'])) print posterior(p, model, data) # Number of temperatures, dimensions and walkers ntemps = 20 ndim = len(p) blocksize = 4 nblocks = int(numpy.ceil((2*ndim+1)/(1.0*blocksize))) nwalkers = blocksize * nblocks print 'Running %d walkers at %d temperatures for %d steps.' %\ (nwalkers, ntemps, ns) sampler = emcee.PTSampler(ntemps, nwalkers, ndim, likelihood, prior, threads=1, pool=pool, betas=None, a=2.0, Tmax=None, loglargs=[model, data], logpargs=[model], loglkwargs={}, logpkwargs={}) # Random initial parameters for walkers p0 = numpy.ones((ntemps, nwalkers, ndim)) for i in range(ntemps): for j in range(nwalkers): p0[i, j, :] = p + 1.0*(numpy.random.rand(ndim)-0.5) # Run sampler fname = folder_path + 'chain_%d.dat' % model_number step = 0 for result in sampler.sample(p0, iterations=ns, storechain=True): print '---' position = result[0] with open(fname, 'a') as fh: for w in range(nwalkers): for t in range(ntemps): pos_str = '\t'.join(['%f' % p for p in position[t][w]]) fh.write('%d\t%d\t%d\t%s\n' % (step, w, t, pos_str)) step += 1 return sampler
def ptmcmc(self, p0s): """Runs Parallel Tempering Monte Carlo Markov Chain algorithm to determine multi-modal uncertainties """ def logl(theta): return -self.chi(theta) sampler = emcee.PTSampler(self.n_temps, self.n_walkers, self.n_dim, logl, self.lnprior) if self.savefile is not None: f = open(self.savefile, "w") f.close() for result in tqdm(sampler.sample(p0s, iterations=self.n_steps, thin=self.thin), total=self.n_steps): if self.savefile is not None: position = result[0] f = open(self.savefile, "a") for k in range(position.shape[0]): f.write("{0:4d} {1:s}\n".format( k, " ".join([str(pos) for pos in position[k]]))) f.close() return sampler
def __init__(self, model, ntemps, nwalkers, pool=None, model_call=None): try: import emcee except ImportError: raise ImportError("emcee is not installed.") if model_call is None: model_call = model # construct the sampler: PTSampler needs the likelihood and prior # functions separately ndim = len(model.variable_params) sampler = emcee.PTSampler(ntemps, nwalkers, ndim, _callloglikelihood(model_call), _callprior(model_call), pool=pool) # initialize super(EmceePTSampler, self).__init__(sampler, model) self._nwalkers = nwalkers self._ntemps = ntemps
def prep_pt_sampler(init_params, lnprior, lnlikelihood, lnlikeargs, nwalkers, # nthreads, # a=2, ntemps=10, ballsize=1e-4, **kwargs): ndim = len(init_params) # number of parameters in the model sampler = emcee.PTSampler(ntemps, nwalkers, ndim, logp=lnprior, logl=lnlikelihood, loglargs=lnlikeargs, **kwargs) gaussian_ball = np.random.randn(ndim * nwalkers * ntemps).reshape( ntemps, nwalkers, ndim) initpar_ball = ( init_params + ballsize * gaussian_ball) return sampler, initpar_ball
def run_mcmc_sample( Nsamples, mag_band, maglimits, # Isochrone set -- currently probably only Padova will work in all cases which_iso='Padova', # Extra magnitudes to calculate extra_mags=None, # Introduce a colour cut color=None, colorlimits=None, # Coordinate cuts (in radians) -- remove everything |b|<modbcut # If deccut>0 cut all dec<deccut-pi/2 # If deccut<0 cut all dec>deccut+pi/2 modbcut=0., deccut=2. * np.pi, # Spectroscopic parameter cut -- log10Teff logglimits=None, Tefflimits=None, fehlimits=None, # Flags for different options extinct=True, with_halo=True, interp=False, dered=False, # Debugging and outputting output_file=None, messages=True, extra_magnitudes=None, # Sampler parameters nwalkers=1000, Nburn=None, threads=1, thin=20, pt=False, ntemp=1, asampler=2., # Adjust magnitude selection with a taper (1-break_grad(mag-break_pos)) #break_pos=1000.,break_grad=1000., # If cutting on error-convolved teff, logg and color -- currently won't do anything tgcolor_errs=np.array([0., 0., 0.])): ''' Draws a Monte Carlo sample from an EDF model Draws Nsamples across the sky between magnitudes maglimits for band mag_band ''' nsamples = int(Nsamples / nwalkers) + 1 # Turn on/off halo if (with_halo): edf_sampling.turn_on_halo() else: edf_sampling.turn_off_halo() # -- Set arguments check = lambda x: np.array([-10000., 10000.]) if x is None else x checkcolor = lambda x: np.array(['J', 'K'] ) if x is None else color # as a default args = [ which_iso, mag_band, maglimits, extinct, interp, modbcut, deccut, check(color), check(colorlimits), check(Tefflimits), check(logglimits), check(fehlimits) ] # -- Set sampler ndim, nwalker = 9, nwalkers sampler = emcee.EnsembleSampler(nwalker, ndim, LogL_Zsample, threads=threads, args=args) if (pt): # Parallel tempering sampler = emcee.PTSampler(ntemp, nwalker, ndim, los_magbox_LogL_sample_py, logp, loglargs=llos_args, threads=threads) # Initialize walkers # Sample uniform in age, mass # Sample gaussian in Z and velocities # Sample exp in (unextincted) magnitude lomag, himag = maglimits[0], maglimits[1] Zlim = [-3., 1.] if fehlimits is not None: Zlim = fehcut if not with_halo: Zlim[0] = edf_sampling.minZ() Zlim[1] = edf_sampling.maxZ() p0 = np.random.uniform( low=[0.2, Zlim[0], 0.5, -150., 50., -100., lomag, 0., modbcut], high=[ 12., Zlim[1], 3.5, 150., 350., 100., himag, 2. * np.pi, np.pi / 2. ], size=(nwalker, ndim)) p0[:, -1] *= (1. - 2. * (np.random.uniform(low=0, high=1, size=nwalker) > 0.5)) if fehlimits is not None: meanZ, sigZ = 0., .4 p0.T[1] = np.random.normal(size=nwalker) * sigZ + meanZ meanV, sigV = 0., 50. for i in range(3, 6): p0.T[i] = np.random.normal(size=nwalker) * sigV + meanV p0.T[4] += 200. p0.T[6] = trunc_exp_rv(lomag, himag, 1., size=len(p0)) # -- Only choose physically allowed masses for selected age, Z for i in np.arange(len(p0)): maxmass, minmass = 0., 0. maxmass = edf_sampling.check_highmass_Z(p0[i][0], p0[i][1], which_iso) minmass = edf_sampling.check_lowmass_Z(p0[i][0], p0[i][1], which_iso) while (maxmass < 0. or minmass < 0. or edf_sampling.check_radius_positive(p0[i][0], p0[i][1]) == 0 or deccut_fn(p0[i][-2], p0[i][-1], deccut)): p0[i][-3] = np.random.uniform(low=lomag, high=himag) p0[i][1] = np.random.uniform(low=Zlim[0], high=Zlim[1]) p0[i][0] = np.random.uniform(low=0., high=12.) maxmass = edf_sampling.check_highmass_Z(p0[i][0], p0[i][1], which_iso) minmass = edf_sampling.check_lowmass_Z(p0[i][0], p0[i][1], which_iso) MAXMASS = 3. if (maxmass > MAXMASS): maxmass = MAXMASS p0[i][2] = np.random.uniform(low=minmass, high=maxmass) # Check other cuts satisfied if (check(colorlimits)[0] > -10. or check(logglimits)[0] > -10. or check(Tefflimits)[0] > -10.): while (edf_sampling.check_color_logg_cut(p0[i][0], p0[i][1], p0[i][2], 0., 180., checkcolor(color), check(colorlimits), check(logglimits), check(Tefflimits), which_iso, False) == 0): p0[i][2] = np.random.uniform(low=minmass, high=maxmass) # Now wiggle the walkers a bit err = [0.5, 0.1, 0.1, 10., 10., 10., 0.1, 0.01, 0.01] for i in np.arange(len(p0)): extinctMag = edf_sampling.get_extinct(p0[i][-2], p0[i][-1], 3., mag_band) if (extinctMag > err[-3]): err[-3] = extinctMag pp = edf_sampling.LogL_sample(p0[i], *args) n = 0 while (np.isinf(pp) and n < 10000): p0[i] = p0[np.random.randint( len(p0))] + np.random.normal(size=ndim) * err pp = edf_sampling.LogL_sample(p0[i], *args) n += 1 if (n == 10000): print "Can't find start point:", p0[i], deccut_fn( p0[i][-2], p0[i][-1], deccut) if (messages): print 'Initial points sampled' if (pt): p0 = np.reshape(p0, (ntemp, nwalker, ndim)) if (Nburn == None): Nburn = 2 * thin * nsamples # -- Run a burn-in pos, prob, state = sampler.run_mcmc(p0, Nburn, storechain=False) if messages: print 'Number of logl=-inf = ' + str(len(prob[np.isinf(prob)])) if messages: print("Burnt") sampler.reset() # -- Sample with thinning and calculating dependent variables pos, prob, state = sampler.run_mcmc(pos, nsamples * thin, thin=thin) if messages: print("Sampled") flatchain = sampler.flatchain lnprob = sampler.lnprobability if (pt): flatchain = flatchain[0] lnprob = lnprob[0] extras = np.array( map( lambda i: edf_sampling.get_extra_data(i, mag_band, which_iso, False, extinct, interp), sampler.flatchain)) actions = np.array([ edf_sampling.get_actions(np.concatenate((b[4:7], a[3:6]))) for a, b in izip(flatchain, extras) ]) nameslist = np.copy(names) nameslist[1] = "Z" nameslist[10] = "RcP" nameslist[23] = mag_band nameslist[6] = mag_band + "0" if messages: print("Mean acceptance fraction:", np.mean(sampler.acceptance_fraction)) everything = np.vstack( (flatchain.T, extras.T, actions.T, lnprob.flatten())).T if color is not None: if extra_magnitudes is not None: extra_magnitudes = np.unique( np.concatenate( (color, extra_magnitudes, np.array([mag_band])))) else: extra_magnitudes = np.unique( np.concatenate((color, np.array([mag_band])))) extra_magnitudes = extra_magnitudes[extra_magnitudes != mag_band] if extra_magnitudes is not None: extra_mags = np.array( map( lambda i: edf_sampling.get_extra_magnitudes( i, mag_band, extra_magnitudes, which_iso, False, extinct, interp), flatchain)) nameslist = np.concatenate((nameslist, extra_magnitudes)) print extra_magnitudes nameslist = np.concatenate( (nameslist, np.array([e + '0' for e in list(extra_magnitudes)]))) everything = np.vstack((everything.T, extra_mags.T)).T df = pd.DataFrame(everything, columns=nameslist) df = df.sample(Nsamples, replace=False).reset_index(drop=True) if (output_file): df.to_csv(output_file) return df
def run(): """ Initialize and run the MCMC sampler. """ global _loglkwargs start_time = time.time() args = parse_args() config = ConfigParser() config.read(args.config_file) # set the mcmc parameters nwalkers = config.getint('mcmc_settings', 'nwalkers') ntemps = config.getint('mcmc_settings', 'ntemps') nplanets = config.getint('mcmc_settings', 'nplanets') nstep = config.getint('mcmc_settings', 'nstep') nthreads = config.getint('mcmc_settings', 'nthreads') use_epoch_astrometry = config.getboolean('mcmc_settings', 'use_epoch_astrometry', fallback=False) HipID = config.getint('data_paths', 'HipID', fallback=0) start_file = config.get('data_paths', 'start_file', fallback='none') # set initial conditions par0 = set_initial_parameters(start_file, ntemps, nplanets, nwalkers) ndim = par0[0, 0, :].size data, H1f, H2f, Gf = initialize_data(config) # set arguments for emcee PTSampler and the log-likelyhood (lnprob) samplekwargs = {'thin': 50} loglkwargs = { 'returninfo': False, 'use_epoch_astrometry': use_epoch_astrometry, 'data': data, 'nplanets': nplanets, 'H1f': H1f, 'H2f': H2f, 'Gf': Gf } _loglkwargs = loglkwargs # run sampler without feeding it loglkwargs directly, since loglkwargs contains non-picklable C objects. print('Running MCMC.') sample0 = emcee.PTSampler(ntemps, nwalkers, ndim, avoid_pickle_lnprob, return_one, threads=nthreads) sample0.run_mcmc(par0, nstep, **samplekwargs) print('Total Time: %.2f' % (time.time() - start_time)) print("Mean acceptance fraction (cold chain): {0:.6f}".format( np.mean(sample0.acceptance_fraction[0, :]))) # save data shape = sample0.lnprobability[0].shape parfit = np.zeros((shape[0], shape[1], 8)) loglkwargs['returninfo'] = True for i in range(shape[0]): for j in range(shape[1]): res = lnprob(sample0.chain[0][i, j], **loglkwargs) parfit[i, j] = [ res.plx_best, res.pmra_best, res.pmdec_best, res.chisq_sep, res.chisq_PA, res.chisq_H, res.chisq_HG, res.chisq_G ] out = fits.HDUList(fits.PrimaryHDU(sample0.chain[0].astype(np.float32))) out.append(fits.PrimaryHDU(sample0.lnprobability[0].astype(np.float32))) out.append(fits.PrimaryHDU(parfit.astype(np.float32))) for i in range(1000): filename = os.path.join(args.output_dir, 'HIP%d_chain%03d.fits' % (HipID, i)) if not os.path.isfile(filename): print('Writing output to {0}'.format(filename)) out.writeto(filename, overwrite=False) break
if p0.shape[-1] != len(F.parameters): raise ValueError("Parameter mismatch between " "walker (%dd) and Fitter (%dd)" % (p0.shape[-1], len(F.parameters))) if len(p0.shape) == 2: logger.info("using EnsembleSampler (warning: untested)") sampler = emcee.EnsembleSampler(p0.shape[0], p0.shape[1], lnprob_internal, pool=pool) # FIXME: untested else: logger.info("using PTSampler (warning: can't handle blobs)") sampler = emcee.PTSampler(ntemps=p0.shape[0], nwalkers=p0.shape[1], dim=p0.shape[2], logl=lnprob, logp=lnprior, pool=pool) def save(): subprocess.check_call([ 'rsync', '-rt', '--append-verify', local_dbdir + "/", "nimrod:" + dbdir + "/" ]) if not trust_nfs: # Run saving in the background so it doesn't interfere with computation done = False def save_loop(): logger.debug("starting saving loop")
#rvsys, K, w, ecc, T0, period labels=['rvsys', 'K', 'w', 'ecc', 'T0', 'period','sig2'] initial=[0,40,90,0.01,15,6.25,20] #VALUES print(np.c_[labels,initial]) plt.errorbar(time,rv,erv,fmt='.') plt.plot(mod_time,rv_pl(mod_time,initial[:-1])) plt.show() # Set up the sampler. ntemps, nwalkers, niter, ndim = 2, 500, 2500, len(labels) sampler = emcee.PTSampler(ntemps, nwalkers, ndim, lnlike, lnprior, loglargs=(time, rv, erv)) p0 = np.zeros([ntemps, nwalkers, ndim]) for i in range(ntemps): for j in range(nwalkers): p0[i,j,:] = initial + 1e-2*np.random.randn(ndim) print('... burning in ...') for p, lnprob, lnlike in tqdm(sampler.sample(p0, iterations=niter),total=niter): sleep(0.001) #print(np.shape(p0)) #print(np.shape(p)) # Clear and run the production chain. sampler.reset() print('... running sampler ...')
print('Name: '+str(NAME_OF_CHAIN)) print(str(socket.gethostname())+': Starting calculation at: '+time.ctime()) print('Testing proposal is: '+str(allparameters_proposal)) time_start_single=time.time() print('Likelihood for the testing proposal is: '+str(multi_model(allparameters_proposal))) time_end_single=time.time() print('Time for single calculation is '+str(time_end_single-time_start_single)) ##################################################################################################################### # First emcee sampler = emcee.PTSampler(parInitnT.shape[0],parInitnT.shape[1], parInitnT.shape[2], multi_model,modelP, pool=pool) sampler.run_mcmc(parInitnT, nsteps) #Export this file np.save(RESULT_FOLDER+NAME_OF_CHAIN+'Emcee1',sampler.chain) np.save(RESULT_FOLDER+NAME_OF_LIKELIHOOD_CHAIN+'Emcee1',sampler.lnlikelihood) # Create minchain and save it likechain0=sampler.lnlikelihood[0] chain0=sampler.chain[0] minchain=chain0[np.abs(likechain0)==np.min(np.abs(likechain0))][0] np.save(RESULT_FOLDER+NAME_OF_CHAIN+'minchain',minchain) print('Time when first emcee run finished was: '+time.ctime())
def mcmcSample(self, theta_ml, emcee_threads, nsteps=1000, PT=True, ntemps=20, Tmax=None, betas=None, thin=1, burn=1000): ''' Helper function to do MCMC sampling. This uses the emcee implementations of Ensemble Sampling MCMC or Parallel-Tempering MCMC (PT-MCMC). In the latter case, a number of ``temperatures'' must be defined. These are used to modify the likelihood in a way that makes exploration of multimodal distributions easier. PT-MCMC also allows an estimation of the model log-evidence, although to lower accuracy than nested sampling. theta_ml : array Initial parameters to start MCMC exploration from. nsteps : int, optional Number of steps required from MCMC. Default is 1000. The number of steps is rounded to a multiple of the ``thin'' factor. PT : bool, optional Boolean used to activate Parallel Tempering. With this option, an evaluation of log-evidence is possible, but is expected to be less accurate than in nested sampling. emcee_threads : int, optional Number of threads used within emcee. Parallelization is possible only within a single machine/node. ntemps : int, optional Number of temperatures used in PT-MCMC. This is a rather arbitrary number. Note that in emcee the temperature ladder is implemented such that each temperature is larger by a factor of \sqrt{2}. Tmax : float, optional Maximum temperature allowed for emcee PTSampler. If ``ntemps`` is not given, this argument controls the number of temperatures. Temperatures are chosen according to the spacing criteria until the maximum temperature exceeds ``Tmax`. Default is to set ``ntemps`` and leave Tmax=None. betas : array, optional Array giving the inverse temperatures, :math:`\\beta=1/T`, used in the ladder. The default is chosen so that a Gaussian posterior in the given number of dimensions will have a 0.25 tswap acceptance rate. thin: int, optional thinning factor (choose 1 to avoid thinning) burn : int, optional Burn-in of chains. Default is 1000 ''' # round number of steps to multiple of thinning factor: nsteps = nsteps - nsteps%thin if PT: print("Using PT-MCMC!") ndim, nwalkers = len(theta_ml), len(theta_ml)*4 if PT == False: # pos has shape (nwalkers, ndim) pos = [theta_ml + 1e-4 * np.random.randn(ndim) for i in np.arange(nwalkers)] # use affine-invariant Ensemble Sampling sampler = emcee.EnsembleSampler(nwalkers, ndim, _LnPost_Wrapper(self), threads=emcee_threads) # self.lineModel.lnprob # get MCMC samples, adding 'burn' steps which will be burnt later sampler.run_mcmc(pos, nsteps+burn) # flatten chain (but keep ndim) samples = sampler.chain[:,burn::thin, :].reshape((-1, ndim)) else: # testing purposes ONLY #betas=np.asarray([1.0,0.9,0.8,0.7,0.6, 0.5]) # use PT-MCMC sampler = emcee.PTSampler(ntemps, nwalkers, ndim, _LnLike_Wrapper(self), _LnPrior_Wrapper(self), threads=emcee_threads, Tmax=Tmax, betas=betas) # if Tmax is not None, ntemps was set internally in PTSampler ntemps = sampler.ntemps # pos has shape (ntemps, nwalkers, ndim) pos = [[theta_ml + 1e-4 * np.random.randn(ndim) for i in np.arange(nwalkers)] for t in np.arange(ntemps)] # burn-in 'burn' iterations for p, lnprob, lnlike in sampler.sample(pos, iterations=burn): pass sampler.reset() # now sample (and thin by a factor of `thin`): for p, lnprob, lnlike in sampler.sample(p, lnprob0=lnprob, lnlike0=lnlike, iterations=nsteps, thin=thin): pass # Keep only samples corresponding to T=0 chains samples = sampler.chain[0,:,:,:].reshape((-1, ndim)) # sanity check if PT==True: assert sampler.chain.shape == (ntemps, nwalkers, nsteps//thin, ndim) assert samples.shape == ( nwalkers * nsteps//thin, ndim) else: assert sampler.chain.shape == (nwalkers, burn+nsteps, ndim) assert samples.shape == (nwalkers * nsteps//thin, ndim) return samples, sampler