def optimize(): from emcee import EnsembleSampler import multiprocessing as mp ndim = 4 nwalkers = 4 * ndim p0 = np.array( [ np.random.uniform(1000, 5000, nwalkers), np.random.uniform(0.1, 1.0, nwalkers), np.random.uniform(2, 12, nwalkers), np.random.uniform(0.1, 1.5, nwalkers), ] ).T sampler = EnsembleSampler(nwalkers, ndim, lnprob, threads=mp.cpu_count()) pos, prob, state = sampler.run_mcmc(p0, 1000) sampler.reset() print("Burned in") # actual run pos, prob, state = sampler.run_mcmc(pos, 1000) # Save the last position of the walkers np.save("walkers_emcee.npy", pos) np.save("eparams_emcee.npy", sampler.flatchain)
def make_sampler(self): ens_samp=EnsembleSampler(self.nwalkers, len(list(self.model.parameters)), self.model.lnposterior, threads=self.threads, args=[self.data]) if self.seed is not None: seed_state=np.random.mtrand.RandomState(self.seed).get_state() ens_samp.random_state=seed_state return ens_samp
def run_burn_in(self, pool=None): # Initialise sampler for burn-in self.burn_in_sampler = EnsembleSampler( self.p['mcmc']['walkers_initial'], len(self.likelihood.mu), self.likelihood, pool=pool ) # Record start time self.burn_start_time = dt.now() # Initialise walkers self.walker_init() # Run the sampler and write progress to file for i, a in enumerate( self.burn_in_sampler.sample(self.pre_burn_position, iterations=self.p['mcmc']['burn_in_iterations']) ): if check_master(pool): with open(self.prog_fname, 'w') as f: f.write(self.write_progress(i, self.p['mcmc']['burn_in_iterations'], self.burn_start_time, 'B')) # Save the chain self.burn_chain = self.burn_in_sampler.chain
def sample_orbit(sampler, nsteps=0, theta0=None, processes=None): """Run the MCMC sampler Note: For improved parallel performance this function is not implemented as a class method of MCMCSampler. """ with Pool(processes) as pool: worker = EnsembleSampler(sampler.nwalkers, sampler.ndim, sampler.objective, backend=sampler.backend, pool=pool) if worker.backend.iteration == 0: logger.info("Starting new run") if theta0 is None: theta = np.array([[prior.draw() for prior in sampler.priors] for n in range(sampler.nwalkers)]) else: theta = theta0 else: logger.info("Resuming last run") theta = worker._previous_state assert theta is not None if nsteps is not None: assert nsteps >= 0 worker.run_mcmc(theta, nsteps, progress=True) logger.info("finished MCMC run") return worker
def run_sampler(backend, nwalkers=32, ndim=3, nsteps=25, seed=1234, thin_by=1): np.random.seed(seed) coords = np.random.randn(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=backend) sampler.run_mcmc(coords, nsteps, thin_by=thin_by) return sampler
def run_sampling(self, pool=None): # Take the time at the start of sampling self.sample_start_time = dt.now() # Respawn the walkers from the final burn-in position self.redistribute_walkers() # Initialise new sampler for final chain self.final_sampler = EnsembleSampler( self.p['mcmc']['walkers_initial'] * self.p['mcmc']['walkers_factor'], len(self.likelihood.mu), self.likelihood, pool=pool ) # Run the sampler and write progress to file for i, a in enumerate( self.final_sampler.sample(self.post_burn_position, iterations=(self.p['mcmc']['final_iterations'] + 10)) ): if check_master(pool): with open(self.prog_fname, 'w') as f: f.write(self.write_progress(i, self.p['mcmc']['final_iterations'] + 10, self.sample_start_time, 'S')) # Record the finish time self.sample_finish_time = dt.now() # Prune the chain to remove dead walkers and drop second burn-in self.format_chain()
def sample_emcee(model, data, nwalkers, nsamples, walker_initial_pos, threads='auto', cleanup_threads=True, seed=None): sampler = EnsembleSampler(nwalkers, len(list(model.parameters)), model.lnposterior, threads=autothreads(threads), args=[data]) if seed is not None: np.random.seed(seed) seed_state = np.random.mtrand.RandomState(seed).get_state() sampler.random_state = seed_state sampler.run_mcmc(walker_initial_pos, nsamples) if sampler.pool is not None and cleanup_threads: sampler.pool.terminate() sampler.pool.join() return sampler
def test_sampler_seed(): nwalkers = 32 ndim = 3 nsteps = 25 np.random.seed(456) coords = np.random.randn(nwalkers, ndim) sampler1 = EnsembleSampler(nwalkers, ndim, normal_log_prob, seed=1234) sampler2 = EnsembleSampler(nwalkers, ndim, normal_log_prob, seed=2) sampler3 = EnsembleSampler(nwalkers, ndim, normal_log_prob, seed=1234) sampler4 = EnsembleSampler(nwalkers, ndim, normal_log_prob, seed=deepcopy(sampler1._random)) for sampler in (sampler1, sampler2, sampler3, sampler4): sampler.run_mcmc(coords, nsteps) for k in ["get_chain", "get_log_prob"]: attr1 = getattr(sampler1, k)() attr2 = getattr(sampler2, k)() attr3 = getattr(sampler3, k)() attr4 = getattr(sampler4, k)() assert not np.allclose(attr1, attr2), "inconsistent {0}".format(k) np.testing.assert_allclose(attr1, attr3, err_msg="inconsistent {0}".format(k)) np.testing.assert_allclose(attr1, attr4, err_msg="inconsistent {0}".format(k))
def fitMcmc(self, u, v, *theta0, **kwargs): """! @brief Markov chain monte carlo fit method @param u <b>np_1darray</b> Rank data vector @param v <b>np_1darray</b> Rank data vector @param theta0 Initial guess for copula parameter list @return <b>tuple</b> : (<b>np_array</b> Array of MLE fit copula parameters, <b>np_2darray</b> sample array of shape (nparams, nsamples)) """ from emcee import EnsembleSampler wgts = kwargs.pop("weights", np.ones(len(u))) rotation = 0 ln_prob = lambda theta: self._ln_prior(*theta, **kwargs) + \ self._ln_like(u, v, wgts, rotation, *theta) if None in theta0: params0 = self.theta0 else: params0 = theta0 ndim = len(params0) ngen = kwargs.get("ngen", 200) nburn = kwargs.get("nburn", 100) nwalkers = kwargs.get("nwalkers", 50) # initilize walkers in gaussian ball around theta0 pos_0 = [ np.array(params0) + 1e-6 * np.asarray(params0) * np.random.randn(ndim) for i in range(nwalkers) ] emcee_mcmc = EnsembleSampler(nwalkers, ndim, ln_prob) emcee_mcmc.run_mcmc(pos_0, ngen) samples = emcee_mcmc.chain[:, nburn:, :].reshape((-1, ndim)) res = np.mean(samples, axis=0) self._fittedParams = res return res, samples
def sample_mcmc(self, niter: int = 500, thin: int = 5, repeats: int = 1, npop: int = None, population=None, label='MCMC sampling', reset=True, leave=True, save=False, use_tqdm: bool = True): if save and self.result_dir is None: raise ValueError('The MCMC sampler is set to save the results, but the result directory is not set.') if self.sampler is None: if population is not None: pop0 = population elif hasattr(self, '_local_minimization') and self._local_minimization is not None: pop0 = multivariate_normal(self._local_minimization.x, diag(full(len(self.ps), 0.001 ** 2)), size=npop) elif self.de is not None: pop0 = self.de.population.copy() else: raise ValueError('Sample MCMC needs an initial population.') self.sampler = EnsembleSampler(pop0.shape[0], pop0.shape[1], self.lnposterior, vectorize=True) else: pop0 = self.sampler.chain[:,-1,:].copy() for i in tqdm(range(repeats), desc='MCMC sampling', disable=(not use_tqdm)): if reset or i > 0: self.sampler.reset() for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc='Run {:d}/{:d}'.format(i+1, repeats), leave=False, disable=(not use_tqdm)): pass if save: self.save(self.result_dir) pop0 = self.sampler.chain[:,-1,:].copy()
def run_sampler( backend, nwalkers=32, ndim=3, nsteps=25, seed=1234, thin=None, thin_by=1, progress=False, store=True, ): np.random.seed(seed) coords = np.random.randn(nwalkers, ndim) np.random.seed(None) sampler = EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=backend, seed=seed) sampler.run_mcmc( coords, nsteps, thin=thin, thin_by=thin_by, progress=progress, store=store, ) return sampler
def lt_taum(pTeff, plogLstar, grid_name='MIST', ntrials=10000, burn=0, nwalkers=10): # set up parser parser = argparse.ArgumentParser(description="Given a set of MCMC samples of T, log L, use scipy.kde to approximate the density field.") parser.add_argument("--config", default="config.yaml", help="The config file specifying everything we need.") args = parser.parse_args() f = open(args.config) config = yaml.load(f) f.close() # collate the Teff, logLstar samples (presumed independent here) TlL_samples = np.column_stack((np.log10(pTeff), plogLstar)) # initialize MCMC walkers ndim = 2 age_low, age_high = 0.2, 20. # in Myr Mstar_low, Mstar_high = 0.1, 3. # in Msun p0 = np.array([np.log10(1e6*np.random.uniform(age_low, age_high, nwalkers)), np.log10(np.random.uniform(Mstar_low, Mstar_high, nwalkers))]).T # KDE for Teff, logLstar samples = TlL_samples.T kernel = gaussian_kde(samples) # define the likelihood function def lnprob(p, grid): age, mass = p #if ((age < 0.0) or (mass < 0.0)): # return -np.inf # smooth interpolation in H-R diagram temp = grid.interp_T(p) lL = grid.interp_lL(p) # land outside the grid, you get a NaN; convert to -np.inf to sample if np.isnan(temp) or np.isnan(lL): return -np.inf # evaluate the KDE kernel lnp = kernel.logpdf([temp, lL]) # return the log-likelihood return lnp # *** sample the {age, Mstar} posterior # assign the model grid grid = model_dict[grid_name](**config[grid_name]) # initialize and run the EMCEE sampler sampler = EnsembleSampler(nwalkers, ndim, lnprob, args=[grid]) pos, prob, state = sampler.run_mcmc(p0, ntrials) # flatten the resulting chain to give joint samples of {age, Mstar} ptauMstar = (sampler.chain[:,burn:,:]).reshape(-1, ndim) return ptauMstar
def test_hybrid_sampling(pipe): n_walkers, p0, hybrid_lnpost = get_walkers(pipe, lnpost_fn=lnpost) n_walkers *= 2 p0 = np.concatenate([p0, p0]) with pipe.worker_mode: if pipe._is_controller: sampler = EnsembleSampler(n_walkers, pipe._modellink._n_par, hybrid_lnpost, args=[pipe]) sampler.run_mcmc(p0, 10)
def run_sampler(backend, nwalkers=32, ndim=3, nsteps=25, seed=1234, thin=None, thin_by=1, progress=False, store=True): np.random.seed(seed) coords = np.random.randn(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=backend) sampler.run_mcmc(coords, nsteps, thin=thin, thin_by=thin_by, progress=progress, store=store) return sampler
def __init__(self, lnpost, p0, keys, nwalkers=120): self.lnpost = lnpost self.sampler = EnsembleSampler(nwalkers, p0.shape[1], lnpost, threads=15) self.p0 = p0 self.p = p0 self.keys = keys self.ndim = len(keys)
def sample(self, lnl): # set up output file if self.output_file and mpi.is_master(): _output_file = open(self.output_file,"wb") protocol = pickle.HIGHEST_PROTOCOL pickle.dump( obj=list(chain(['lnl','weight'], self.sampled.keys(), self.output_extra_params.keys())), file=_output_file, protocol=protocol ) dtypes = ','.join([dtype(float).name]*(2+len(self.sampled))+list(self.output_extra_params.values())) samples = {i:empty(self.output_freq,dtypes) for i in range(self.nwalkers)} # distribute walkers initially according to covariance estimate pos = multivariate_normal( [v.start for v in self.sampled.values()], self.cov_est, size=self.nwalkers ) # diferent sign convention with emcee def lnprob(x): l, p = lnl(*x) return -l, p # step each walker once, yield them all one-by-one, repeat weight = ones(self.nwalkers) isample = zeros(self.nwalkers,dtype=int) from emcee import EnsembleSampler sampler = EnsembleSampler(self.nwalkers, len(self.sampled), lnprob, pool=self.pool) nsteps = int(ceil(self.num_samples/self.nwalkers)) for i in range(nsteps): posnext, prob, state, blobs = sampler.run_mcmc(pos,1) for iwalker, (x, xnext, l, params) in enumerate(zip(pos,posnext,prob,blobs)): if (x==xnext).all() and i!=nsteps-1: weight[iwalker] += 1 else: yield sample(-l,x,weight[iwalker]) # write to file once every `self.output_freq` accepted steps (per walker) if self.output_file and mpi.is_master(): row = tuple(chain([-l,weight[iwalker]],x,[params[k] for k in self.output_extra_params])) samples[iwalker][isample[iwalker]] = row isample[iwalker] += 1 if isample[iwalker]>=self.output_freq or i==nsteps-1: pickle.dump((iwalker,samples[iwalker][:isample[iwalker]]),_output_file,protocol) _output_file.flush() isample[iwalker] = 0 weight[iwalker] = 1 pos = posnext
def test_vectorize(): def lp_vec(p): return -0.5 * np.sum(p**2, axis=1) np.random.seed(42) nwalkers, ndim = 32, 3 coords = np.random.randn(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, lp_vec, vectorize=True) sampler.run_mcmc(coords, 10) assert sampler.get_chain().shape == (10, nwalkers, ndim)
def run_sampler(backend, nwalkers=32, ndim=3, nsteps=25, seed=1234, thin_by=1, dtype=None, blobs=True, lp=None): if lp is None: lp = normal_log_prob_blobs if blobs else normal_log_prob if seed is not None: np.random.seed(seed) coords = np.random.randn(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, lp, backend=backend, blobs_dtype=dtype) sampler.run_mcmc(coords, nsteps, thin_by=thin_by) return sampler
def test_shapes(backend, moves, nwalkers=32, ndim=3, nsteps=10, seed=1234): # Set up the random number generator. np.random.seed(seed) with backend() as be: # Initialize the ensemble, moves and sampler. coords = np.random.randn(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, normal_log_prob, moves=moves, backend=be) # Run the sampler. sampler.run_mcmc(coords, nsteps) chain = sampler.get_chain() assert len(chain) == nsteps, "wrong number of steps" tau = sampler.get_autocorr_time(quiet=True) assert tau.shape == (ndim, ) # Check the shapes. with pytest.warns(DeprecationWarning): assert sampler.chain.shape == ( nwalkers, nsteps, ndim, ), "incorrect coordinate dimensions" with pytest.warns(DeprecationWarning): assert sampler.lnprobability.shape == ( nwalkers, nsteps, ), "incorrect probability dimensions" assert sampler.get_chain().shape == ( nsteps, nwalkers, ndim, ), "incorrect coordinate dimensions" assert sampler.get_log_prob().shape == ( nsteps, nwalkers, ), "incorrect probability dimensions" assert sampler.acceptance_fraction.shape == ( nwalkers, ), "incorrect acceptance fraction dimensions" # Check the shape of the flattened coords. assert sampler.get_chain(flat=True).shape == ( nsteps * nwalkers, ndim, ), "incorrect coordinate dimensions" assert sampler.get_log_prob(flat=True).shape == ( nsteps * nwalkers, ), "incorrect probability dimensions"
def __init__(self,lnpost,p0,nwalkers=120): """ init """ self.lnpost = lnpost blobs_dtype = float # Note: Here dtype must be specified, otherwise an error happens. #[("lnlike",float),] self.sampler = EnsembleSampler(nwalkers,p0.shape[1],lnpost,blobs_dtype=blobs_dtype) # NOTE: dtype must be list of tuple (not tuple of tuple) self.p0 = p0 self.p_last = p0 self.ndim = p0.shape[1]
def test_overwrite(seed=1234): np.random.seed(seed) def ll(x): return -0.5 * np.sum(x**2) nwalkers = 64 p0 = np.random.normal(size=(nwalkers, 1)) init = np.copy(p0) sampler = EnsembleSampler(nwalkers, 1, ll) sampler.run_mcmc(p0, 10) assert np.allclose(init, p0)
def sample(self, niter=500, thin=5, label='MCMC sampling', reset=False): """MCMC sampling using emcee""" if self.sampler is None: self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par, self.lnposterior) pop0 = self.de.population else: pop0 = self.sampler.chain[:, -1, :].copy() if reset: self.sampler.reset() for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc=label): pass
def test_blob_shape(backend, blob_spec): # HDF backends don't support the object type hdf_able, ragged, blob_shape, func = blob_spec if backend in (backends.TempHDFBackend,) and not hdf_able: return with backend() as be: np.random.seed(42) model = BlobLogProb(func) coords = np.random.randn(32, 3) nwalkers, ndim = coords.shape sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) nsteps = 10 if ragged: with warnings.catch_warnings(): warnings.simplefilter("ignore", DeprecationWarning) sampler.run_mcmc(coords, nsteps) else: sampler.run_mcmc(coords, nsteps) shape = [nsteps, nwalkers] if isinstance(blob_shape, tuple): shape += blob_shape elif blob_shape > 0: shape += [blob_shape] assert sampler.get_blobs().shape == tuple(shape) if not hdf_able: assert sampler.get_blobs().dtype == np.dtype("object")
def test_shapes(backend, moves, nwalkers=32, ndim=3, nsteps=10, seed=1234): # Set up the random number generator. np.random.seed(seed) with backend() as be: # Initialize the ensemble, moves and sampler. coords = np.random.randn(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, normal_log_prob, moves=moves, backend=be) # Run the sampler. sampler.run_mcmc(coords, nsteps) chain = sampler.get_chain() assert len(chain) == nsteps, "wrong number of steps" tau = sampler.get_autocorr_time(quiet=True) assert tau.shape == (ndim,) # Check the shapes. assert sampler.chain.shape == (nwalkers, nsteps, ndim), \ "incorrect coordinate dimensions" assert sampler.get_chain().shape == (nsteps, nwalkers, ndim), \ "incorrect coordinate dimensions" assert sampler.lnprobability.shape == (nsteps, nwalkers), \ "incorrect probability dimensions" assert sampler.acceptance_fraction.shape == (nwalkers,), \ "incorrect acceptance fraction dimensions" # Check the shape of the flattened coords. assert sampler.get_chain(flat=True).shape == \ (nsteps * nwalkers, ndim), "incorrect coordinate dimensions" assert sampler.get_log_prob(flat=True).shape == \ (nsteps*nwalkers,), "incorrect probability dimensions"
def run_emcee(x, lnprob, args, nwalkers, nruns, fudge, chain_name, burns, pool=None, nthreads=1, namearray=[], resume=False, w=False): ndim = len(x) p0 = [] if resume == True: p0, ndone = resume_file(chain_name, ndim, nwalkers) nruns -= ndone n = (ndone + burns) / nwalkers else: for i in range(0, nwalkers): shuffle = (10 ** (fudge * (np.random.rand(ndim) - 0.5))) p0 += [list(shuffle * x)] initiate_file(chain_name, ndim, blob_list=namearray, w=w) n = 0 iterations = int(nruns / nwalkers) if pool != None: sampler = EnsembleSampler(nwalkers, ndim, lnprob, args=args, pool=pool) else: sampler = EnsembleSampler(nwalkers, ndim, lnprob, args=args, threads=nthreads) for result in sampler.sample(p0, iterations=iterations, storechain=False): n += 1 if (n > burns / nwalkers): position = result[0] logl = result[1] with fFITS(chain_name, 'rw') as fits: for k in range(position.shape[0]): output = { 'lp': np.array([logl[k]]), 'x': np.array([position[k]]) } for i in range(0, len(namearray)): blob = result[3][k][i] output[namearray[i]] = np.array([blob]) if np.isfinite(logl[k]): fits['MCMC'].append(output) pool.close()
def runSampler(niters=400000, thin=400, newData=True, filename="./recepmod/data/test_chain.h5", npar=36): """ Run the sampling. """ from emcee import EnsembleSampler from .StoneModel import StoneModel from tqdm import tqdm # Load model StoneM = StoneModel(newData) # Get uniform distribution of positions for start p0, ndims, nwalkers = getUniformStart(StoneM) # Set up sampler sampler = EnsembleSampler(nwalkers, ndims, StoneM.NormalErrorCoef, threads=npar) if filename is not None: f, dset = startH5File(StoneM, filename) # Setup thinning tracking thinTrack = -thin for p, lnprob, _ in tqdm(sampler.sample(p0, iterations=niters, storechain=False), total=niters): if thinTrack < thin: thinTrack += 1 else: matOut = np.concatenate( (lnprob.reshape(nwalkers, 1), np.arange(0, nwalkers).reshape( nwalkers, 1), p.reshape(nwalkers, ndims)), axis=1) if filename is not None: fShape = dset.shape dset.resize((fShape[0] + np.shape(matOut)[0], fShape[1])) dset[fShape[0]:, :] = matOut f.flush() thinTrack = 1
def sample_emcee(model, data, nwalkers, nsamples, walker_initial_pos, threads='auto', cleanup_threads=True, seed=None): sampler = EnsembleSampler(nwalkers, len(list(model.parameters)), model.lnposterior, threads=autothreads(threads), args=[data]) if seed is not None: np.random.seed(seed) seed_state = np.random.mtrand.RandomState(seed).get_state() sampler.random_state=seed_state sampler.run_mcmc(walker_initial_pos, nsamples) if sampler.pool is not None and cleanup_threads: sampler.pool.terminate() sampler.pool.join() return sampler
def do_mcmc(self, nwalker=100, nburn=50, nchain=50, threads=1, set_prior=True): # initial walkers for MCMC ndim = 2 pinit = np.zeros((nwalker, ndim)) pinit[:, 0] = np.random.uniform(-10, -2, nwalker) pinit[:, 1] = np.random.uniform(np.log10(self.lc.dt_min / 10), np.log10(self.lc.dt_tot * 10), nwalker) #start sampling sampler = EnsembleSampler(nwalker, ndim, self.lnprob, args=(self.lc, set_prior), threads=threads) # burn-in pos, prob, state = sampler.run_mcmc(pinit, nburn) sampler.reset() # actual samples sampler.run_mcmc(pos, nchain, rstate0=state) self.sampler = sampler self.flatchain = sampler.flatchain self.lnprobability = sampler.lnprobability
def test_errors(backend, nwalkers=32, ndim=3, nsteps=5, seed=1234): # Set up the random number generator. np.random.seed(seed) with backend() as be: # Initialize the ensemble, proposal, and sampler. coords = np.random.randn(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=be) # Test for not running. with pytest.raises(AttributeError): sampler.chain with pytest.raises(AttributeError): sampler.lnprobability # What about not storing the chain. sampler.run_mcmc(coords, nsteps, store=False) with pytest.raises(AttributeError): sampler.chain # Now what about if we try to continue using the sampler with an # ensemble of a different shape. sampler.run_mcmc(coords, nsteps, store=False) coords2 = np.random.randn(nwalkers, ndim + 1) with pytest.raises(ValueError): list(sampler.run_mcmc(coords2, nsteps))
def centroid(img, x0, y0, fwhm_x=8., fwhm_y=8., verbose=False, **kwargs): def prior_bounds(pv): return -1e18 if not ((0 < pv[0] < img.shape[1]) | (0 < pv[1] < img.shape[0])) else 0 estimate_errors = kwargs.get('estimate_errors', True) return_chains = kwargs.get('return_chains', False) operation = kwargs.get('operation', 'mean') maxiter = kwargs.get('maxiter', 5000) maxfun = kwargs.get('maxfun', 5000) mc_threads = kwargs.get('mc_threads', 1) mc_nwalkers = kwargs.get('mc_nwalkers', 50) mc_niter = kwargs.get('mc_niter', 300) mc_thinning = kwargs.get('mc_thinning', 300) mc_burn = kwargs.get('mc_burn', 100) if operation == 'mean': x, y = img.mean(axis=0), img.mean(axis=1) elif operation == 'max': x, y = img.max(axis=0), img.max(axis=1) else: raise TypeError vmin, vmax = 0.5*(x.min()+y.min()), 0.5*(x.max()+y.max()) pv0 = np.array([x0, y0, fwhm_x, fwhm_y, vmax-vmin, 1e-2*(vmax-vmin), vmin]) lpfun = lambda pv: ( logl_g1d(pv[0], pv[4], pv[2], pv[5], pv[6], x) + logl_g1d(pv[1], pv[4], pv[3], pv[5], pv[6], y) + prior_bounds(pv)) pv = fmin(lambda pv:-lpfun(pv), pv0, disp=verbose, maxfun=maxfun, maxiter=maxiter) if not (with_emcee and estimate_errors): return pv, -np.ones(pv.size) else: sampler = EnsembleSampler(mc_nwalkers, pv.size, lpfun, threads=1) sampler.run_mcmc(multivariate_normal(pv, 5e-3*np.eye(pv.size), size=mc_nwalkers), mc_niter); fc = sampler.chain[:,mc_burn::mc_thinning,:].reshape([-1,pv.size]) pc = np.array(np.percentile(fc, [50,16,84], axis=0)) if return_chains: pc[0,:], np.mean(np.abs(pc[1:,:]-pc[0,:]), axis=0), fc else: return pc[0,:], np.mean(np.abs(pc[1:,:]-pc[0,:]), axis=0)
def test_infinite_iterations_store(backend, nwalkers=32, ndim=3): with backend() as be: coords = np.random.randn(nwalkers, ndim) with pytest.raises(ValueError): next( EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=be).sample(coords, iterations=None, store=True))
def test_infinite_iterations(backend, nwalkers=32, ndim=3): with backend() as be: coords = np.random.randn(nwalkers, ndim) for state in islice( EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=be).sample(coords, iterations=None, store=False), 10): pass
def sample_mcmc(self, niter=500, thin=5, label='MCMC sampling', reset=False, leave=True): if self.sampler is None: self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par, self.lnposterior, vectorize=True) pop0 = self.de.population else: pop0 = self.sampler.chain[:,-1,:].copy() if reset: self.sampler.reset() for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc=label, leave=False): pass
def __call__(self, nw=None, nt=None, nb=None, ns=None): if nw is None: nw = self.nWalkers else: self.nWalkers = nw self._initial_parameters() if nt is None: nt = self.nThreads if nb is None: nb = self.nBurnin if ns is None: ns = self.nSteps # setup emcee sampler sampler = EnsembleSampler(nw, self.nDim, self.lnProb, threads=nt) if nb: # Run burn-in steps pos, prob, state = sampler.run_mcmc(self.pos0, nb) # Reset the chain to remove the burn-in samples sampler.reset() # from the final position in burn-in chain, sample for nsteps sampler.run_mcmc(pos, ns, rstate0=state) else: # sample for nsteps sampler.run_mcmc(self.pos0, ns) samples = sampler.flatchain lnprobs = sampler.flatlnprobability indxs = np.where(lnprobs > -float_info.max)[0] if self.scale == 'linear': samples = samples[indxs] elif self.scale == 'log': samples = np.power(10, samples[indxs]) else: raise Exception("prior scale must be set") lnprobs = lnprobs[indxs] Xmin = max(lnprobs) indmin = np.where(lnprobs == Xmin)[0][0] vals = samples[indmin] return vals, samples, lnprobs
def _get_sampler(self, **kwargs): # This is bad, but I have to access this before passing kwargs, # otherwise nwalkers is passed twice. if "nwalkers" in kwargs: del kwargs["nwalkers"] return EnsembleSampler( log_prob_fn=self.likelihood, ndim=self.nparams, nwalkers=self.nwalkers, **kwargs, )
def runsample(self, sed_obs, sed_obs_err, vpi_obs, vpi_obs_err, Lvpi=1.0, Lprior=1.0, nsteps=(1000, 1000, 2000), p0try=None): ndim = 4 # 4 stands for [Teff, logg, Av, DM] nwalkers = len(p0try) # number of chains for i in range(len(nsteps)): if i == 0: # initialize sampler sampler = EnsembleSampler(nwalkers, ndim, costfun, args=(self.r, self.p_bounds, self.Alambda, sed_obs, sed_obs_err, vpi_obs, vpi_obs_err, Lvpi, Lprior)) # guess Av and DM for p0try p0try = np.array([initial_guess(_, self.r, self.Alambda, sed_obs, sed_obs_err) for _ in p0try]) # run sampler pos, _, __ = sampler.run_mcmc(p0try, nsteps[i]) else: # generate new p p_rand = random_p(sampler, nloopmax=1000, method="mle", costfun=costfun, args=(self.r, self.p_bounds, self.Alambda, sed_obs, sed_obs_err, vpi_obs, vpi_obs_err, Lvpi, Lprior)) # reset sampler sampler.reset() # run at new p pos1, lnprob1, rstate1 = sampler.run_mcmc(p_rand, nsteps[i]) return sampler
def test_errors(backend, nwalkers=32, ndim=3, nsteps=5, seed=1234): # Set up the random number generator. np.random.seed(seed) with backend() as be: # Initialize the ensemble, proposal, and sampler. coords = np.random.randn(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=be) # Test for not running. with pytest.raises(AttributeError): sampler.chain with pytest.raises(AttributeError): sampler.lnprobability # What about not storing the chain. sampler.run_mcmc(coords, nsteps, store=False) with pytest.raises(AttributeError): sampler.chain # Now what about if we try to continue using the sampler with an # ensemble of a different shape. sampler.run_mcmc(coords, nsteps, store=False) coords2 = np.random.randn(nwalkers, ndim+1) with pytest.raises(ValueError): list(sampler.run_mcmc(coords2, nsteps))
def test_blob_shape(backend, blob_spec): # HDF backends don't support the object type if backend in (backends.TempHDFBackend, ) and not blob_spec[0]: return with backend() as be: np.random.seed(42) model = BlobLogProb(blob_spec[2]) coords = np.random.randn(32, 3) nwalkers, ndim = coords.shape sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) nsteps = 10 sampler.run_mcmc(coords, nsteps) shape = [nsteps, nwalkers] if blob_spec[1] > 0: shape += [blob_spec[1]] assert sampler.get_blobs().shape == tuple(shape)
def do_mcmc(self, nwalker=100, nburn=50, nchain=50, threads=1, set_prior=True): # initial walkers for MCMC ndim = 2 pinit = np.zeros((nwalker, ndim)) pinit[:,0] = np.random.uniform(-10, -2, nwalker) pinit[:,1] = np.random.uniform(np.log10(self.lc.dt_min/10), np.log10(self.lc.dt_tot*10), nwalker) #start sampling sampler = EnsembleSampler(nwalker, ndim, self.lnprob, args=(self.lc,set_prior), threads=threads) # burn-in pos, prob, state = sampler.run_mcmc(pinit, nburn) sampler.reset() # actual samples sampler.run_mcmc(pos, nchain, rstate0=state) self.sampler = sampler self.flatchain = sampler.flatchain self.lnprobability = sampler.lnprobability
# print(lp, lnlike(theta, target, source1, source2)) return lp + lnlike(theta, target, source1, source2) ndim, nwalkers = 2, 6 pos = [] counter = -1 while len(pos) < nwalkers: realization = [random_in_range(0, 1), random_in_range(0.01, 1)]# , #random_in_range(-0.01, 0.01)] if np.isfinite(lnprior(realization)): pos.append(realization) sampler = EnsembleSampler(nwalkers, ndim, lnprob, threads=8, args=(target_slices, source1_slices, source2_slices)) p0 = sampler.run_mcmc(pos, 1000)[0] sampler.reset() sampler.run_mcmc(p0, 1000) sampler.pool.close() samples = sampler.chain[:, 500:, :].reshape((-1, ndim)) #samples[:, 0] *= R_lambda lower, m, upper = np.percentile(samples[:, 0], [16, 50, 84]) band_results['f_S_lower'] = m - lower band_results['f_S'] = m band_results['f_S_upper'] = upper - m band_results['yerr'] = np.median(samples[:, 1])
pos = [] counter = -1 while len(pos) < nwalkers: realization = [random_in_range(0, 1), random_in_range(0.01, 1)] if np.isfinite(lnprior(realization)): pos.append(realization) # pool = MPIPool(loadbalance=True) # if not pool.is_master(): # pool.wait() # sys.exit(0) sampler = EnsembleSampler(nwalkers, ndim, lnprob, threads=8, args=(target_slices, source1_slices, source2_slices)) #pool=pool) sampler.run_mcmc(pos, 1000) samples = sampler.chain[:, 500:, :].reshape((-1, ndim)) samples[:, 0] *= R_lambda lower, m, upper = np.percentile(samples[:, 0], [16, 50, 84]) band_results['f_S_lower'] = m - lower band_results['f_S'] = m band_results['f_S_upper'] = upper - m band_results['yerr'] = np.median(samples[:, 1])
print(lnprob(np.array([6302, 4.38, 0.1, -39.54, 5.6, -12.221]))) # # Use vanilla emcee to do the sampling from emcee import EnsembleSampler # ndim = 6 nwalkers = 4 * ndim # # # Load values from config file. # # Add scatter in # p0 = np.array([ np.random.uniform(6200, 6400, nwalkers), np.random.uniform(4.0, 4.49, nwalkers), np.random.uniform(-0.2, -0.1, nwalkers), np.random.uniform(-5., -4., nwalkers), np.random.uniform(4.0, 6.0, nwalkers), np.random.uniform(-12.81, -12.80, nwalkers)]).T sampler = EnsembleSampler(nwalkers, ndim, lnprob, threads=mp.cpu_count()-1) # # # burn in pos, prob, state = sampler.run_mcmc(p0, args.samples) sampler.reset() print("Burned in") # # actual run pos, prob, state = sampler.run_mcmc(pos, args.samples) # Save the last position of the walkers np.save("walkers_emcee.npy", pos) np.save("eparams_emcee.npy", sampler.flatchain)
results = curve_fit(bkplw, k, plaw, p0=(np.log10(1e3), 0.1, 0.0, -2.), maxfev=100000) print results[0] p.loglog(vcs.vel_freqs, vcs.ps1D, 'bD') p.loglog(k, 10**bkplw(k, *results[0]), 'r') p.show() from emcee import EnsembleSampler ndim = 4 nwalkers = 100 pos = [results[0] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)] sampler = EnsembleSampler(nwalkers, ndim, fit_func, args=(plaw, k)) sampler.run_mcmc(pos, 1e3) samples = sampler.chain.reshape((-1, ndim)) # Remove the burn-in samples = samples[200:, :] import triangle fig = triangle.corner(samples) p.show() print samples.mean(axis=0) p.loglog(vcs.vel_freqs, vcs.ps1D, 'bD') p.loglog(k, 10**bkplw(k, *results[0]), 'r') p.loglog(k, 10**bkplw(k, *samples.mean(axis=0)), 'g')
class LPFunction(object): """A basic log posterior function class. """ def __init__(self, time, flux, nthreads=1): # Set up the transit model # ------------------------ self.tm = MA(interpolate=True, klims=(0.08, 0.13), nthr=nthreads) self.nthr = nthreads # Initialise data # --------------- self.time = time.copy() if time is not None else array([]) self.flux_o = flux.copy() if flux is not None else array([]) self.npt = self.time.size # Set the optimiser and the MCMC sampler # -------------------------------------- self.de = None self.sampler = None # Set up the parametrisation and priors # ------------------------------------- psystem = [ GParameter('tc', 'zero_epoch', 'd', NP(1.01, 0.02), (-inf, inf)), GParameter('pr', 'period', 'd', NP(2.50, 1e-7), (0, inf)), GParameter('rho', 'stellar_density', 'g/cm^3', UP(0.90, 2.50), (0.90, 2.5)), GParameter('b', 'impact_parameter', 'R_s', UP(0.00, 1.00), (0.00, 1.0)), GParameter('k2', 'area_ratio', 'A_s', UP(0.08 ** 2, 0.13 ** 2), (1e-8, inf))] pld = [ PParameter('q1', 'q1_coefficient', '', UP(0, 1), bounds=(0, 1)), PParameter('q2', 'q2_coefficient', '', UP(0, 1), bounds=(0, 1))] pbl = [LParameter('es', 'white_noise', '', UP(1e-6, 1e-2), bounds=(1e-6, 1e-2))] per = [LParameter('bl', 'baseline', '', NP(1.00, 0.001), bounds=(0.8, 1.2))] self.ps = ParameterSet() self.ps.add_global_block('system', psystem) self.ps.add_passband_block('ldc', 2, 1, pld) self.ps.add_lightcurve_block('baseline', 1, 1, pbl) self.ps.add_lightcurve_block('error', 1, 1, per) self.ps.freeze() def compute_baseline(self, pv): """Constant baseline model""" return full_like(self.flux_o, pv[8]) def compute_transit(self, pv): """Transit model""" _a = as_from_rhop(pv[2], pv[1]) # Scaled semi-major axis from stellar density and orbital period _i = mt.acos(pv[3] / _a) # Inclination from impact parameter and semi-major axis _k = mt.sqrt(pv[4]) # Radius ratio from area ratio a, b = mt.sqrt(pv[5]), 2 * pv[6] _uv = array([a * b, a * (1. - b)]) # Quadratic limb darkening coefficients return self.tm.evaluate(self.time, _k, _uv, pv[0], pv[1], _a, _i) def compute_lc_model(self, pv): """Combined baseline and transit model""" return self.compute_baseline(pv) * self.compute_transit(pv) def lnprior(self, pv): """Log prior""" if any(pv < self.ps.lbounds) or any(pv > self.ps.ubounds): return -inf else: return self.ps.lnprior(pv) def lnlikelihood(self, pv): """Log likelihood""" flux_m = self.compute_lc_model(pv) return ll_normal_es(self.flux_o, flux_m, pv[7]) def lnposterior(self, pv): """Log posterior""" lnprior = self.lnprior(pv) if isinf(lnprior): return lnprior else: return lnprior + self.lnlikelihood(pv) def create_pv_population(self, npop=50): return self.ps.sample_from_prior(npop) def optimize(self, niter=200, npop=50, population=None, label='Optimisation'): """Global optimisation using Differential evolution""" if self.de is None: self.de = DiffEvol(self.lnposterior, clip(self.ps.bounds, -1, 1), npop, maximize=True) if population is None: self.de._population[:, :] = self.create_pv_population(npop) else: self.de._population[:, :] = population for _ in tqdm(self.de(niter), total=niter, desc=label): pass def sample(self, niter=500, thin=5, label='MCMC sampling', reset=False): """MCMC sampling using emcee""" if self.sampler is None: self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par, self.lnposterior) pop0 = self.de.population else: pop0 = self.sampler.chain[:, -1, :].copy() if reset: self.sampler.reset() for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc=label): pass
def mcmc(meta_prefit, C0, P, D, t_steps, pops, nwalkers=80, nsteps=500, nburn=100, storage="mcmc"): """ MCMC Version 2015 Jan 17 Madhura Killedar Last update: 2015 Nov 20 Madhura Killedar implementation of D. Forman-Mackay's emcee for Bayesian parameter fitting emcee is a python implementation of affine invariant MCMC ensemble sampler INPUT meta_prefit: as a starting proposal for parameters, taken from model baseline (or mean). the walkers begin in a ball around this point and the send a chain to explore the parameter space nwalkers: number of walkers/chains nsteps: total number of steps taken by each chain nburn: assumed burn-in phase storage: path to folder in which to place output files OUTPUT samples: non-burn-in steps of all chains combined, used to actually sample the posterior ON-THE-FLY OUTPUT fn_diagnostics (ASCII ~1KB) : a log file for useful diagnostics of the MCMC run fn_chains (PNG ~200KB) : plot of some of the chains/walkers in MCMC run for some of the parameters fn_corner (PNG ~1MB) : corner plot for MCMC sample of all parameters """ # ------------------------------------------------- # on-the-fly output filenames fn_diagnostics = "MCMCdiagnostics.log" fn_chains = "plot-mcmc-chain.png" fn_corner = "plot-mcmc-corner.png" logfile = storage+"/"+fn_diagnostics logf = open(logfile, "w") # SETUP NUMBER OF PARAMETERS, WALKERS, STEPS TO USE nparam = len(meta_prefit) # actual number of parameters being calibrated (mortality for now) if nwalkers < 2*nparam: # will increase number of walkers/chains if needed nwalkers = 2*nparam print("\n I'm increasing number of walkers") logf.write("\n Number of walkers = "+str(nwalkers)) logf.write("\n Number of steps per walker = "+str(nsteps)) logf.write("\n ... of which the first "+str(nburn)+" are considered in the burn-in phase") steps_used = (nsteps-nburn)*nwalkers logf.write("\n Therefore, number of samples used = "+str(steps_used)) # PROPOSAL DISTRIBUTION pars, proposal_center = [], [] for i in meta_prefit.keys(): # Converting dictionary to ordered list pars.append(i) proposal_center.append(meta_prefit[i]) prop_str = "" for i in meta_prefit: prop_str = prop_str+" ("+i+": "+str(meta_prefit[i])+")" prop_str = "\n Initial proposal for each parameter = "+prop_str print(prop_str) logf.write(prop_str) # starting point for all walkers in a ball around the suspected centre proposal_dist = [proposal_center + 1.e-2*random.randn(nparam) for i in range(nwalkers)] for i in range(nwalkers): for j in range(nparam): if proposal_dist[i][j] < 0: proposal_dist[i][j] = 0.001 # some lower bound starting point # RUN EMCEE print("\n *** running emcee ***") sampler = EnsembleSampler(nwalkers, nparam, lnprob, args=(C0, P, D, t_steps, pops)) sampler.run_mcmc(proposal_dist, nsteps) # POST EMCEE DIAGNOSTICS samples = sampler.chain[:, nburn:, :].reshape((-1, nparam)) steps_used = samples.shape[0] logf.write("\n Number of samples used = "+str(steps_used)) # Autocorrelation time auto_time = sampler.acor auto_str = "" for i in range(nparam): auto_str = auto_str+" "+str(auto_time[i]) auto_str = "\n\n Autocorrelation time for each parameter = "+auto_str print(auto_str) logf.write(auto_str) # Acceptance fraction accept_frac = sampler.acceptance_fraction accf_str = "\n Acceptance fractions = " for i in range(nwalkers): accf_str = accf_str+" "+str(accept_frac[i]) print(accf_str) logf.write(accf_str) # CHAIN PLOT OF MCMC figwalkers = plt.figure() npar_plt = min(4, nparam) nwalk_plt = min(20, nwalkers) axes_plt = empty(npar_plt, dtype=object) step = arange(nsteps) for i in arange(npar_plt): axes_plt[i] = figwalkers.add_subplot(npar_plt, 1, i+1) axes_plt[i] = plt.gca() for i in arange(npar_plt): k = i for j in arange(nwalk_plt): # or all nwalkers position_plt = sampler.chain[j, :, k] axes_plt[i].plot(step,position_plt, '-') label_plt = 'par '+str(i) axes_plt[i].set_xlabel(label_plt) axes_plt[i].set_ylim(amin(sampler.chain[:nwalk_plt-1, :, k]), amax(sampler.chain[:nwalk_plt-1, :, k])) figwalkers.savefig(storage+"/"+fn_chains, format="png") """ # DEBUG: USE PRIOR DISTRIBUTION INSTEAD OF POSTERIOR/MCMC RESULT samples = empty([steps_used,nparam]) for i in arange(nparam): samples[:,i] = random.lognormal(1.,1.,steps_used) """ # CORNER PLOT OF POSTERIOR SAMPLE labels_mcmc = empty(nparam, dtype=object) for k in range(nparam): labels_mcmc[k] = pars[k] fig_mcmc_corner = corner(samples, labels=labels_mcmc, truths=proposal_center) fig_mcmc_corner.savefig(storage+"/"+fn_corner, format="png") logf.close() return samples
bre = BayesianRichardsonExtrapolation() #print("\nInitializing walkers") nwalk = 100 params0 = np.tile(guess_list, nwalk).reshape(nwalk, len(guess_list)) # # perturb walkers around guess # for i in xrange(len(guess_list)): params0.T[i] += np.random.rand(nwalk) * perturb_list[i] # hack! params0.T[2] = np.absolute(params0.T[2]) # ...and force >= 0 #print("\nInitializing the sampler and burning in walkers") s = EnsembleSampler(nwalk, params0.shape[-1], bre, threads=4) pos, prob, state = s.run_mcmc(params0, burn_in) s.reset() #print("\nSampling the posterior density for the problem") s.run_mcmc(pos, samples) samplea = s.flatchain[:,0] pylab.plot(samplea) pylab.xlabel('Step number') pylab.ylabel('alpha') pylab.show() pylab.savefig('alpha.png') samples = s.flatchain[:,1] pylab.plot(samples)
class OCLBaseLPF(BaseLPF): def __init__(self, target: str, passbands: list, times: list = None, fluxes: list = None, errors: list = None, pbids: list = None, covariates: list = None, nsamples: tuple = None, exptimes: tuple = None, klims: tuple = (0.01, 0.75), nk: int = 512, nz: int = 512, cl_ctx=None, cl_queue=None, **kwargs): self.cl_ctx = cl_ctx or self.tm.ctx self.cl_queue = cl_queue or self.tm.queue self.cl_lnl_chunks = kwargs.get('cl_lnl_chunks', 1) super().__init__(target, passbands, times, fluxes, errors, pbids, covariates, None, nsamples, exptimes) self.tm = QuadraticModelCL(klims=klims, nk=nk, nz=nz, cl_ctx=cl_ctx, cl_queue=cl_queue) self.tm.set_data(self.timea, self.lcids, self.pbids, self.nsamples, self.exptimes) src = """ __kernel void lnl2d(const int nlc, __global const float *obs, __global const float *mod, __global const float *err, __global const int *lcids, __global float *lnl2d){ uint i_tm = get_global_id(1); // time vector index uint n_tm = get_global_size(1); // time vector size uint i_pv = get_global_id(0); // parameter vector index uint n_pv = get_global_size(0); // parameter vector population size uint gid = i_pv*n_tm + i_tm; // global linear index float e = err[i_pv*nlc + lcids[i_tm]]; lnl2d[gid] = -log(e) - 0.5f*log(2*M_PI_F) - 0.5f*pown((obs[i_tm]-mod[gid]) / e, 2); } __kernel void lnl1d(const uint npt, __global float *lnl2d, __global float *lnl1d){ uint i_pv = get_global_id(0); // parameter vector index uint n_pv = get_global_size(0); // parameter vector population size int i; bool is_even; uint midpoint = npt; __global float *lnl = &lnl2d[i_pv*npt]; while(midpoint > 1){ is_even = midpoint % 2 == 0; if (is_even == 0){ lnl[0] += lnl[midpoint-1]; } midpoint /= 2; for(i=0; i<midpoint; i++){ lnl[i] = lnl[i] + lnl[midpoint+i]; } } lnl1d[i_pv] = lnl[0]; } __kernel void lnl1d_chunked(const uint npt, __global float *lnl2d, __global float *lnl1d){ uint ipv = get_global_id(0); // parameter vector index uint npv = get_global_size(0); // parameter vector population size uint ibl = get_global_id(1); // block index uint nbl = get_global_size(1); // number of blocks uint lnp = npt / nbl; __global float *lnl = &lnl2d[ipv*npt + ibl*lnp]; if(ibl == nbl-1){ lnp = npt - (ibl*lnp); } prefetch(lnl, lnp); bool is_even; uint midpoint = lnp; while(midpoint > 1){ is_even = midpoint % 2 == 0; if (is_even == 0){ lnl[0] += lnl[midpoint-1]; } midpoint /= 2; for(int i=0; i<midpoint; i++){ lnl[i] = lnl[i] + lnl[midpoint+i]; } } lnl1d[ipv*nbl + ibl] = lnl[0]; } """ self.prg_lnl = cl.Program(self.cl_ctx, src).build() self.lnlikelihood = self.lnlikelihood_ocl def _init_data(self, times, fluxes, pbids, covariates=None, errors=None, nsamples=None, exptimes=None): super()._init_data(times, fluxes, pbids, covariates, errors, nsamples, exptimes) self.nlc = int32(self.nlc) # Initialise the Python arrays # ---------------------------- self.timea = self.timea.astype('f') self.ofluxa = self.ofluxa.astype('f') self.lnl2d = zeros([50, self.ofluxa.size], 'f') self.lnl1d = zeros([self.lnl2d.shape[0], self.cl_lnl_chunks], 'f') self.ferr = zeros([50, self.nlc]) self.lcids = self.lcids.astype('int32') self.pbids = self.pbids.astype('int32') if covariates is not None: self.cova = self.cova.astype('f') # Initialise OpenCL buffers # ------------------------- mf = cl.mem_flags self._b_flux = cl.Buffer(self.cl_ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.ofluxa) self._b_err = cl.Buffer(self.cl_ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.ferr) self._b_lnl2d = cl.Buffer(self.cl_ctx, mf.WRITE_ONLY, self.lnl2d.nbytes) self._b_lnl1d = cl.Buffer(self.cl_ctx, mf.WRITE_ONLY, self.lnl1d.nbytes) self._b_lcids = cl.Buffer(self.cl_ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.lcids) if covariates is not None: self._b_covariates = cl.Buffer(self.cl_ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.cova) def transit_model(self, pvp, copy=False): pvp = atleast_2d(pvp) pvp_t = zeros([pvp.shape[0], 8], "f") uv = zeros([pvp.shape[0], 2], "f") pvp_t[:, 0:1] = sqrt(pvp[:, self._pid_k2]) # Radius ratio pvp_t[:, 1:3] = pvp[:, 0:2] # Transit centre and orbital period pvp_t[:, 3] = a = as_from_rhop(pvp[:, 2], pvp[:, 1]) pvp_t[:, 4] = i_from_ba(pvp[:, 3], a) a, b = sqrt(pvp[:, self._sl_ld][:, 0]), 2. * pvp[:, self._sl_ld][:, 1] uv[:, 0] = a * b uv[:, 1] = a * (1. - b) flux = self.tm.evaluate_t_pv2d(pvp_t, uv, copy=copy) return flux if copy else None def flux_model(self, pvp): return self.transit_model(pvp, copy=True).astype('d') def _lnl2d(self, pv): if self.lnl2d.shape[0] != pv.shape[0] or self.lnl1d.size != pv.shape[0]: self.err = zeros([pv.shape[0], self.nlc], 'f') self._b_err.release() self._b_err = cl.Buffer(self.cl_ctx, cl.mem_flags.WRITE_ONLY, self.err.nbytes) self.lnl2d = zeros([pv.shape[0], self.ofluxa.size], 'f') self._b_lnl2d.release() self._b_lnl2d = cl.Buffer(self.cl_ctx, cl.mem_flags.WRITE_ONLY, self.lnl2d.nbytes) self.lnl1d = zeros([pv.shape[0], self.cl_lnl_chunks], 'f') if self._b_lnl1d: self._b_lnl1d.release() self._b_lnl1d = cl.Buffer(self.cl_ctx, cl.mem_flags.WRITE_ONLY, self.lnl1d.nbytes) self.transit_model(pv) cl.enqueue_copy(self.cl_queue, self._b_err, (10 ** pv[:, self._sl_err]).astype('f')) self.prg_lnl.lnl2d(self.cl_queue, self.tm.f.shape, None, self.nlc, self._b_flux, self.tm._b_f, self._b_err, self._b_lcids, self._b_lnl2d) def lnlikelihood_numba(self, pv): self._lnl2d(pv) cl.enqueue_copy(self.cl_queue, self.lnl2d, self._b_lnl2d) lnl = psum2d(self.lnl2d) return where(isfinite(lnl), lnl, -inf) def lnlikelihood_ocl(self, pv): self._lnl2d(pv) self.prg_lnl.lnl1d_chunked(self.cl_queue, [self.lnl2d.shape[0], self.cl_lnl_chunks], None, uint32(self.lnl2d.shape[1]), self._b_lnl2d, self._b_lnl1d) cl.enqueue_copy(self.cl_queue, self.lnl1d, self._b_lnl1d) lnl = self.lnl1d.astype('d').sum(1) return lnl def lnlikelihood_numpy(self, pv): self._lnl2d(pv) cl.enqueue_copy(self.cl_queue, self.lnl2d, self._b_lnl2d) lnl = self.lnl2d.astype('d').sum(1) return where(isfinite(lnl), lnl, -inf) def lnprior(self, pv): lnpriors = zeros(pv.shape[0]) for i, p in enumerate(self.ps.priors): lnpriors += p.logpdf(pv[:, i]) return lnpriors + self.additional_priors(pv) def lnposterior(self, pv): lnp = self.lnlikelihood(pv) + self.lnprior(pv) return where(isfinite(lnp), lnp, -inf) def optimize_global(self, niter=200, npop=50, population=None, label='Global optimisation', leave=False): if self.de is None: self.de = DiffEvol(self.lnposterior, clip(self.ps.bounds, -1, 1), npop, maximize=True, vectorize=True) if population is None: self.de._population[:, :] = self.create_pv_population(npop) else: self.de._population[:, :] = population for _ in tqdm(self.de(niter), total=niter, desc=label, leave=leave): pass def sample_mcmc(self, niter=500, thin=5, label='MCMC sampling', reset=False, leave=True): if not with_emcee: raise ImportError('Emcee not installed.') if self.sampler is None: self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par, self.lnposterior, vectorize=True) pop0 = self.de.population else: pop0 = self.sampler.chain[:, -1, :].copy() if reset: self.sampler.reset() for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc=label, leave=False): pass def remove_outliers(self, sigma=5): fmodel = self.flux_model(self.de.minimum_location)[0] times, fluxes, pbids, errors = [], [], [], [] for i in range(len(self.times)): res = self.fluxes[i] - fmodel[i] mask = ~sigma_clip(res, sigma=sigma).mask times.append(self.times[i][mask]) fluxes.append(self.fluxes[i][mask]) if self.errors is not None: errors.append(self.errors[i][mask]) self._init_data(times, fluxes, self.pbids, (errors if self.errors is not None else None))
:return: Log of prior density for given parameter ``p``. """ return p[2] if __name__ == '__main__': # Data from Gelman (Table 11.2) y_ij = list() y_ij.append([62, 60, 63, 59]) y_ij.append([63, 67, 71, 64, 65, 66]) y_ij.append([68, 66, 71, 67, 68, 68]) y_ij.append([56, 62, 60, 61, 63, 64, 63, 59]) print y_ij lnpost = LnPost(y_ij, lnpr=lnpr) ndim = 7 nwalkers = 500 sampler = EnsembleSampler(nwalkers, ndim, lnpost) p0 = sample_ball([60., 0., 0., 60., 60., 60., 60.], [10., 1., 1., 10., 10., 10., 10.], size=500) print "sample ball :" print p0 print "Burning-in..." pos, prob, state = sampler.run_mcmc(p0, 300) print "Reseting burn-in..." sampler.reset() print "Now sampling from posterior" sampler.run_mcmc(pos, 1000, rstate0=state)
if not np.isfinite(lp): return -np.inf return lp + lnlike(theta, target, source1, source2) ndim, nwalkers = 2, 10 pos = [] counter = -1 while len(pos) < nwalkers: realization = [random_in_range(0, 1), random_in_range(0.01, 1)] if np.isfinite(lnprior(realization)): pos.append(realization) sampler = EnsembleSampler(nwalkers, ndim, lnprob, threads=8, args=(target_slices, source1_slices, source2_slices)) sampler.run_mcmc(pos, 2000) samples = sampler.chain[:, 1000:, :].reshape((-1, ndim)) samples[:, 0] *= R_lambda lower, m, upper = np.percentile(samples[:, 0], [16, 50, 84]) band_results['f_S_lower'] = m - lower band_results['f_S'] = m band_results['f_S_upper'] = upper - m band_results['yerr'] = np.median(samples[:, 1]) corner(samples, labels=['$f_S$', '$f$'])
import numpy as np from emcee import EnsembleSampler def lnprob(p): x, y = p lnp = -((1.0 - x)**2 + 100 * (y - x**2)**2) return lnp ndim, nwalkers = 2, 40 p0 = np.array([np.random.rand(ndim) for i in range(nwalkers)]) sampler = EnsembleSampler(nwalkers, ndim, lnprob) p0, prob, state = sampler.run_mcmc(p0, 10000) # sampler.reset() # p0, prob, state = sampler.run_mcmc(p0, 10000) # np.save("chain.npy", sampler.chain)
if np.isnan(temp) or np.isnan(lL): # return np.nan return -np.inf # Use the KDE kernel to evaluate how well this point fits based upon our temp, lL posterior. lnp = kernel.logpdf([temp, lL]) return lnp from ScottiePippen.grids import model_dict for grid_name in config["grids"]: print(grid_name) grid = model_dict[grid_name](**config[grid_name]) sampler = EnsembleSampler(nwalkers, ndim, lnprob, args=[grid]) pos, prob, state = sampler.run_mcmc(p0, config["samples"]) # Save the actual chain of samples np.save(config["outfile"].format(grid_name), sampler.chain) # grid = DartmouthPMS(age_range=[0.1, 100], mass_range=[0.1, 1.8]) # grid = PISA(age_range=[0.1, 100], mass_range=[0.1, 1.8]) # grid = Baraffe15(age_range=[0.1, 100], mass_range=[0.1, 1.4]) # grid = Seiss(age_range=[0.1, 100], mass_range=[0.1, 1.8]) # grid.load() # grid.setup_interpolator()
def test_blob_shape(backend): with backend() as be: np.random.seed(42) nblobs = 5 model = BlobLogProb(lambda x: np.random.randn(nblobs)) coords = np.random.randn(32, 3) nwalkers, ndim = coords.shape sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) nsteps = 10 sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers, nblobs) model = BlobLogProb(lambda x: np.random.randn()) be.reset(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers) # HDF backends don't support the object type if backend in (backends.TempHDFBackend, ): return model = BlobLogProb(lambda x: "face") be.reset(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers) model = BlobLogProb(lambda x: (np.random.randn(nblobs), "face")) be.reset(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers, 2)
class BaseLPF: _lpf_name = 'base' def __init__(self, name: str, passbands: list, times: list = None, fluxes: list = None, errors: list = None, pbids: list = None, covariates: list = None, tm: TransitModel = None, nsamples: tuple = 1, exptimes: tuple = 0.): self.tm = tm or QuadraticModel(klims=(0.01, 0.75), nk=512, nz=512) # LPF name # -------- self.name = name # Passbands # --------- # Should be arranged from blue to red if isinstance(passbands, (list, tuple, ndarray)): self.passbands = passbands else: self.passbands = [passbands] self.npb = npb = len(self.passbands) self.nsamples = None self.exptimes = None # Declare high-level objects # -------------------------- self.ps = None # Parametrisation self.de = None # Differential evolution optimiser self.sampler = None # MCMC sampler self.instrument = None # Instrument self.ldsc = None # Limb darkening set creator self.ldps = None # Limb darkening profile set self.cntm = None # Contamination model # Declare data arrays and variables # --------------------------------- self.nlc: int = 0 # Number of light curves self.times: list = None # List of time arrays self.fluxes: list = None # List of flux arrays self.errors: list = None # List of flux uncertainties self.covariates: list = None # List of covariates self.wn: ndarray = None # Array of white noise estimates for each light curve self.timea: ndarray = None # Array of concatenated times self.mfluxa: ndarray = None # Array of concatenated model fluxes self.ofluxa: ndarray = None # Array of concatenated observed fluxes self.errora: ndarray = None # Array of concatenated model fluxes self.lcids: ndarray = None # Array of light curve indices for each datapoint self.pbids: ndarray = None # Array of passband indices for each light curve self.lcslices: list = None # List of light curve slices # Set up the observation data # --------------------------- if times is not None and fluxes is not None and pbids is not None: self._init_data(times, fluxes, pbids, covariates, errors, nsamples, exptimes) # Setup parametrisation # ===================== self._init_parameters() # Initialise the additional lnprior list # -------------------------------------- self.lnpriors = [] # Initialise the temporary arrays # ------------------------------- self._zpv = zeros(6) self._tuv = zeros((npb, 2)) self._zeros = zeros(npb) self._ones = ones(npb) # Inititalise the instrument self._init_instrument() if times is not None: self._bad_fluxes = [ones_like(t) for t in self.times] else: self._bad_fluxes = None def _init_data(self, times, fluxes, pbids, covariates=None, errors=None, nsamples=1, exptimes=0.): if isinstance(times, ndarray) and times.dtype == float: times = [times] if isinstance(fluxes, ndarray) and fluxes.dtype == float: fluxes = [fluxes] self.nlc = len(times) self.times = asarray(times) self.fluxes = asarray(fluxes) self.pbids = asarray(pbids) self.wn = [diff(f).std() / sqrt(2) for f in fluxes] self.timea = concatenate(self.times) self.ofluxa = concatenate(self.fluxes) self.mfluxa = zeros_like(self.ofluxa) self.pbids = atleast_1d(pbids).astype('int') self.lcids = concatenate([full(t.size, i) for i, t in enumerate(self.times)]) if isscalar(nsamples): self.nsamples = full(self.nlc, nsamples) self.exptimes = full(self.nlc, exptimes) else: assert (len(nsamples) == self.nlc) and (len(exptimes) == self.nlc) self.nsamples = asarray(nsamples, 'int') self.exptimes = asarray(exptimes) self.tm.set_data(self.timea, self.lcids, self.pbids, self.nsamples, self.exptimes) if errors is None: self.errors = array([full(t.size, nan) for t in self.times]) else: self.errors = asarray(errors) self.errora = concatenate(self.errors) # Initialise the light curves slices # ---------------------------------- self.lcslices = [] sstart = 0 for i in range(self.nlc): s = self.times[i].size self.lcslices.append(s_[sstart:sstart + s]) sstart += s # Initialise the covariate arrays, if given # ----------------------------------------- if covariates is not None: self.covariates = covariates for cv in self.covariates: cv[:, 1:] = (cv[:, 1:] - cv[:, 1:].mean(0)) / cv[:, 1:].ptp(0) self.ncovs = self.covariates[0].shape[1] self.covsize = array([c.size for c in self.covariates]) self.covstart = concatenate([[0], self.covsize.cumsum()[:-1]]) self.cova = concatenate(self.covariates) def _init_parameters(self): self.ps = ParameterSet() self._init_p_orbit() self._init_p_planet() self._init_p_limb_darkening() self._init_p_baseline() self._init_p_noise() self.ps.freeze() def _init_p_orbit(self): """Orbit parameter initialisation. """ porbit = [ GParameter('tc', 'zero_epoch', 'd', N(0.0, 0.1), (-inf, inf)), GParameter('pr', 'period', 'd', N(1.0, 1e-5), (0, inf)), GParameter('rho', 'stellar_density', 'g/cm^3', U(0.1, 25.0), (0, inf)), GParameter('b', 'impact_parameter', 'R_s', U(0.0, 1.0), (0, 1))] self.ps.add_global_block('orbit', porbit) def _init_p_planet(self): """Planet parameter initialisation. """ pk2 = [PParameter('k2', 'area_ratio', 'A_s', GM(0.1), (0.01**2, 0.55**2))] self.ps.add_passband_block('k2', 1, 1, pk2) self._pid_k2 = repeat(self.ps.blocks[-1].start, self.npb) self._start_k2 = self.ps.blocks[-1].start self._sl_k2 = self.ps.blocks[-1].slice def _init_p_limb_darkening(self): """Limb darkening parameter initialisation. """ pld = concatenate([ [PParameter('q1_{:d}'.format(i), 'q1_coefficient', '', U(0, 1), bounds=(0, 1)), PParameter('q2_{:d}'.format(i), 'q2_coefficient', '', U(0, 1), bounds=(0, 1))] for i in range(self.npb)]) self.ps.add_passband_block('ldc', 2, self.npb, pld) self._sl_ld = self.ps.blocks[-1].slice self._start_ld = self.ps.blocks[-1].start def _init_p_baseline(self): """Baseline parameter initialisation. """ pass def _init_p_noise(self): """Noise parameter initialisation. """ pns = [LParameter('lne_{:d}'.format(i), 'log_error_{:d}'.format(i), '', U(-8, -0), bounds=(-8, -0)) for i in range(self.nlc)] self.ps.add_lightcurve_block('log_err', 1, self.nlc, pns) self._sl_err = self.ps.blocks[-1].slice self._start_err = self.ps.blocks[-1].start def _init_instrument(self): pass def create_pv_population(self, npop=50): pvp = self.ps.sample_from_prior(npop) for sl in self.ps.blocks[1].slices: pvp[:,sl] = uniform(0.01**2, 0.25**2, size=(npop, 1)) # With LDTk # --------- # # Use LDTk to create the sample if LDTk has been initialised. # if self.ldps: istart = self._start_ld cms, ces = self.ldps.coeffs_tq() for i, (cm, ce) in enumerate(zip(cms.flat, ces.flat)): pvp[:, i + istart] = normal(cm, ce, size=pvp.shape[0]) # No LDTk # ------- # # Ensure that the total limb darkening decreases towards # red passbands. # else: ldsl = self._sl_ld for i in range(pvp.shape[0]): pid = argsort(pvp[i, ldsl][::2])[::-1] pvp[i, ldsl][::2] = pvp[i, ldsl][::2][pid] pvp[i, ldsl][1::2] = pvp[i, ldsl][1::2][pid] # Estimate white noise from the data # ---------------------------------- for i in range(self.nlc): wn = diff(self.ofluxa).std() / sqrt(2) pvp[:, self._start_err] = log10(uniform(0.5*wn, 2*wn, size=npop)) return pvp def baseline(self, pv): """Multiplicative baseline""" return 1. def trends(self, pv): """Additive trends""" return 0. def transit_model(self, pv): pv = atleast_2d(pv) pvp = map_pv(pv) ldc = map_ldc(pv[:,self._sl_ld]) flux = self.tm.evaluate_pv(pvp, ldc) return flux def flux_model(self, pv): baseline = self.baseline(pv) trends = self.trends(pv) model_flux = self.transit_model(pv) return baseline * model_flux + trends def residuals(self, pv): return self.ofluxa - self.flux_model(pv) def set_prior(self, pid: int, prior) -> None: self.ps[pid].prior = prior def add_t14_prior(self, mean: float, std: float) -> None: """Add a normal prior on the transit duration. Parameters ---------- mean std Returns ------- """ def T14(pv): a = as_from_rhop(pv[2], pv[1]) t14 = duration_eccentric(pv[1], sqrt(pv[4]), a, mt.acos(pv[3] / a), 0, 0, 1) return norm.logpdf(t14, mean, std) self.lnpriors.append(T14) def add_as_prior(self, mean: float, std: float) -> None: """Add a prior on the scaled semi-major axis Parameters ---------- mean std Returns ------- """ def as_prior(pv): a = as_from_rhop(pv[2], pv[1]) return norm.logpdf(a, mean, std) self.lnpriors.append(as_prior) def add_ldtk_prior(self, teff: tuple, logg: tuple, z: tuple, uncertainty_multiplier: float = 3, pbs: tuple = ('g', 'r', 'i', 'z')) -> None: """Add a LDTk-based prior on the limb darkening. Parameters ---------- teff logg z uncertainty_multiplier pbs Returns ------- """ fs = {n: f for n, f in zip('g r i z'.split(), (sdss_g, sdss_r, sdss_i, sdss_z))} filters = [fs[k] for k in pbs] self.ldsc = LDPSetCreator(teff, logg, z, filters) self.ldps = self.ldsc.create_profiles(1000) self.ldps.resample_linear_z() self.ldps.set_uncertainty_multiplier(uncertainty_multiplier) def ldprior(pv): return self.ldps.lnlike_tq(pv[self._sl_ld]) self.lnpriors.append(ldprior) def remove_outliers(self, sigma=5): fmodel = self.flux_model(self.de.minimum_location) times, fluxes, pbids, errors = [], [], [], [] for i in range(len(self.times)): res = self.fluxes[i] - fmodel[i] mask = ~sigma_clip(res, sigma=sigma).mask times.append(self.times[i][mask]) fluxes.append(self.fluxes[i][mask]) if self.errors is not None: errors.append(self.errors[i][mask]) self._init_data(times, fluxes, self.pbids, (errors if self.errors is not None else None)) def remove_transits(self, tids): m = ones(len(self.times), bool) m[tids] = False self._init_data(self.times[m], self.fluxes[m], self.pbids[m], self.covariates[m] if self.covariates is not None else None, self.errors[m], self.nsamples[m], self.exptimes[m]) self._init_parameters() def lnprior(self, pv): return self.ps.lnprior(pv) + self.additional_priors(pv) def additional_priors(self, pv): """Additional priors.""" pv = atleast_2d(pv) return sum([f(pv) for f in self.lnpriors], 0) def lnlikelihood(self, pv): flux_m = self.flux_model(pv) wn = 10**(atleast_2d(pv)[:,self._sl_err]) return lnlike_normal_v(self.ofluxa, flux_m, wn, self.lcids) def lnposterior(self, pv): lnp = self.lnprior(pv) + self.lnlikelihood(pv) return where(isfinite(lnp), lnp, -inf) def __call__(self, pv): return self.lnposterior(pv) def optimize_global(self, niter=200, npop=50, population=None, label='Global optimisation', leave=False): if self.de is None: self.de = DiffEvol(self.lnposterior, clip(self.ps.bounds, -1, 1), npop, maximize=True, vectorize=True) if population is None: self.de._population[:, :] = self.create_pv_population(npop) else: self.de._population[:,:] = population for _ in tqdm(self.de(niter), total=niter, desc=label, leave=leave): pass def sample_mcmc(self, niter=500, thin=5, label='MCMC sampling', reset=False, leave=True): if self.sampler is None: self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par, self.lnposterior, vectorize=True) pop0 = self.de.population else: pop0 = self.sampler.chain[:,-1,:].copy() if reset: self.sampler.reset() for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc=label, leave=False): pass def posterior_samples(self, burn: int=0, thin: int=1, include_ldc: bool=False): ldstart = self._sl_ld.start fc = self.sampler.chain[:, burn::thin, :].reshape([-1, self.de.n_par]) d = fc if include_ldc else fc[:, :ldstart] n = self.ps.names if include_ldc else self.ps.names[:ldstart] return pd.DataFrame(d, columns=n) def plot_mcmc_chains(self, pid: int=0, alpha: float=0.1, thin: int=1, ax=None): fig, ax = (None, ax) if ax is not None else subplots() ax.plot(self.sampler.chain[:, ::thin, pid].T, 'k', alpha=alpha) fig.tight_layout() return fig def __repr__(self): s = f"""Target: {self.name} LPF: {self._lpf_name} Passbands: {self.passbands}""" return s