def setup_sampler(self): """ Either initialize the sampelr or read in the resume file """ import ptemcee if os.path.isfile(self.resume_file) and self.resume is True: logger.info("Resume data {} found".format(self.resume_file)) with open(self.resume_file, "rb") as file: data = dill.load(file) # Extract the check-point data self.sampler = data["sampler"] self.iteration = data["iteration"] self.chain_array = data["chain_array"] self.log_likelihood_array = data["log_likelihood_array"] self.pos0 = data["pos0"] self.beta_list = data["beta_list"] self.sampler._betas = np.array(self.beta_list[-1]) self.tau_list = data["tau_list"] self.tau_list_n = data["tau_list_n"] self.time_per_check = data["time_per_check"] # Initialize the pool self.sampler.pool = self.pool self.sampler.threads = self.threads logger.info("Resuming from previous run with time={}".format( self.iteration)) else: # Initialize the PTSampler if self.threads == 1: self.sampler = ptemcee.Sampler(dim=self.ndim, logl=self.log_likelihood, logp=self.log_prior, **self.sampler_init_kwargs) else: self.sampler = ptemcee.Sampler(dim=self.ndim, logl=do_nothing_function, logp=do_nothing_function, pool=self.pool, threads=self.threads, **self.sampler_init_kwargs) self.sampler._likeprior = LikePriorEvaluator( self.search_parameter_keys, use_ratio=self.use_ratio) # Initialize storing results self.iteration = 0 self.chain_array = self.get_zero_chain_array() self.log_likelihood_array = self.get_zero_log_likelihood_array() self.beta_list = [] self.tau_list = [] self.tau_list_n = [] self.time_per_check = [] self.pos0 = self.get_pos0() return self.sampler
def _initialise_sampler(self): import ptemcee self._sampler = ptemcee.Sampler(dim=self.ndim, logl=self.log_likelihood, logp=self.log_prior, **self.sampler_init_kwargs) self._init_chain_file()
def run_sampler(self): import ptemcee tqdm = get_progress_bar() sampler = ptemcee.Sampler(dim=self.ndim, logl=self.log_likelihood, logp=self.log_prior, **self.sampler_init_kwargs) self.pos0 = [[self.get_random_draw_from_prior() for _ in range(self.nwalkers)] for _ in range(self.kwargs['ntemps'])] for _ in tqdm( sampler.sample(self.pos0, **self.sampler_function_kwargs), total=self.nsteps): pass self.calculate_autocorrelation(sampler.chain.reshape((-1, self.ndim))) self.result.sampler_output = np.nan self.print_nburn_logging_info() self.result.nburn = self.nburn if self.result.nburn > self.nsteps: logger.warning('Chain not burned in, no samples generated.') self.result.samples = sampler.chain[0, :, self.nburn:, :].reshape( (-1, self.ndim)) self.result.betas = sampler.betas self.result.log_evidence, self.result.log_evidence_err =\ sampler.log_evidence_estimate( sampler.loglikelihood, self.nburn / self.nsteps) self.result.walkers = sampler.chain[0, :, :, :] return self.result
def run_sampler(self): import ptemcee tqdm = get_progress_bar() sampler = ptemcee.Sampler(dim=self.ndim, logl=self.log_likelihood, logp=self.log_prior, **self.sampler_init_kwargs) self.pos0 = [[ self.get_random_draw_from_prior() for _ in range(self.nwalkers) ] for _ in range(self.kwargs['ntemps'])] log_likelihood_evaluations = [] log_prior_evaluations = [] for pos, logpost, loglike in tqdm(sampler.sample( self.pos0, **self.sampler_function_kwargs), total=self.nsteps): log_likelihood_evaluations.append(loglike) log_prior_evaluations.append(logpost - loglike) pass self.calculate_autocorrelation(sampler.chain.reshape((-1, self.ndim))) self.result.sampler_output = np.nan self.print_nburn_logging_info() self.result.nburn = self.nburn if self.result.nburn > self.nsteps: raise SamplerError( "The run has finished, but the chain is not burned in: " "`nburn < nsteps`. Try increasing the number of steps.") self.result.samples = sampler.chain[0, :, self.nburn:, :].reshape( (-1, self.ndim)) self.result.log_likelihood_evaluations = np.array( log_likelihood_evaluations)[self.nburn:, 0, :].reshape((-1)) self.result.log_prior_evaluations = np.array(log_prior_evaluations)[ self.nburn:, 0, :].reshape((-1)) self.result.betas = sampler.betas self.result.log_evidence, self.result.log_evidence_err =\ sampler.log_evidence_estimate( sampler.loglikelihood, self.nburn / self.nsteps) self.result.walkers = sampler.chain[0, :, :, :] return self.result
def run_pt_emcee(log_like, log_prior, n_burn, n_steps, n_temps=None, n_walkers=None, p_dict=None, p0=None, columns=None, loglargs=(), logpargs=(), threads=None, thin=1, return_lnZ=False, return_sampler=False, return_pos=False): """ Run emcee. Parameters ---------- log_like : function The function that computes the log likelihood. Must be of the form log_like(p, *llargs), where p is a NumPy array of parameters that are sampled by the MCMC sampler. log_prior : function The function that computes the log prior. Must be of the form log_post(p, *lpargs), where p is a NumPy array of parameters that are sampled by the MCMC sampler. n_burn : int Number of burn steps n_steps : int Number of MCMC samples to take n_temps : int The number of temperatures to use in PT sampling. n_walkers : int Number of walkers p_dict : collections.OrderedDict Each entry is a tuple with the function used to generate starting points for the parameter and the arguments for the function. The starting point function must have the call signature f(*args_for_function, n_walkers). Ignored if p0 is not None. p0 : array n_walkers by n_dim array of initial starting values. p0[k,i,j] is the starting point for walk i along variable j for temperature k. If provided, p_dict is ignored. columns : list of strings Name of parameters. These will be the column headings in the returned DataFrame. If None, either inferred from p_dict or assigned sequential integers. args : tuple Arguments passed to log_post threads : int Number of cores to use in calculation thin : int The number of iterations to perform between saving the state to the internal chain. return_lnZ : bool, default False If True, additionally return lnZ and dlnZ. return_sampler : bool, default False If True, additionally return sampler. return_pos : bool, default False If True, additionally return position of the sampler. Returns ------- df : pandas.DataFrame First columns give flattened MCMC chains, with columns named with the variable being sampled as a string. Other columns are: 'chain': ID of chain 'beta': Inverse temperature 'beta_ind': Index of beta in list of betas 'lnlike': Log likelihood 'lnprob': Log posterior probability (with beta multiplying log likelihood) lnZ : float, optional ln Z(1), which is equal to the evidence of the parameter estimation problem. dlnZ : float, optional The estimated error in the lnZ calculation. sampler : emcee.PTSampler instance, optional The sampler instance. pos : ndarray, shape (ntemps, nwalkers, ndim), optional Last position of the walkers. """ if p0 is None and p_dict is None: raise RuntimeError('Must supply either p0 or p_dict.') # Infer n_dim and n_walkers (and check inputs) if p0 is None: if n_walkers is None: raise RuntimeError('n_walkers must be specified if p0 is None') if type(p_dict) is not collections.OrderedDict: raise RuntimeError('p_dict must be collections.OrderedDict.') n_dim = len(p_dict) else: n_temps, n_walkers, n_dim = p0.shape if p_dict is not None: warnings.RuntimeWarning('p_dict is being ignored.') # Infer columns if columns is None: if p_dict is not None: columns = list(p_dict.keys()) else: columns = list(range(n_dim)) elif len(columns) != n_dim: raise RuntimeError('len(columns) must equal number of parameters.') # Check for invalid column names invalid_column_names = ['lnprob', 'chain', 'lnlike', 'beta', 'beta_ind'] if np.any([x in columns for x in invalid_column_names]): raise RuntimeError('You cannot name columns with any of these: ' + ' '.join(invalid_column_names)) # Build starting points of walkers if p0 is None: p0 = np.empty((n_temps, n_walkers, n_dim)) for i, key in enumerate(p_dict): p0[:, :, i] = p_dict[key][0](*(p_dict[key][1] + ((n_temps, n_walkers), ))) # Set up the PTSampler instance if threads is not None: sampler = ptemcee.Sampler(n_walkers, n_dim, log_like, log_prior, ntemps=n_temps, loglargs=loglargs, logpargs=logpargs, threads=threads) else: sampler = ptemcee.Sampler(n_walkers, n_dim, log_like, log_prior, ntemps=n_temps, loglargs=loglargs, logpargs=logpargs) # Do burn-in if n_burn > 0: pos, _, _ = sampler.run_mcmc(p0, iterations=n_burn, storechain=False) else: pos = p0 # Sample again, starting from end burn-in state pos, _, _ = sampler.run_mcmc(pos, iterations=n_steps, thin=thin) # Compute thermodynamic integral lnZ, dlnZ = sampler.log_evidence_estimate(fburnin=0) # Make DataFrame for results df = sampler_to_dataframe(sampler, columns=columns) # Set up return return_vals = (df, lnZ, dlnZ, sampler, pos) return_bool = (True, return_lnZ, return_lnZ, return_sampler, return_pos) ret = tuple([rv for rv, rb in zip(return_vals, return_bool) if rb]) if len(ret) == 1: return ret[0] return ret
def run_sampler(self, total_orbits, burn_steps=0, thin=1, examine_chains=False): """ Runs PT MCMC sampler. Results are stored in ``self.chain`` and ``self.lnlikes``. Results also added to ``orbitize.results.Results`` object (``self.results``) .. Note:: Can be run multiple times if you want to pause and inspect things. Each call will continue from the end state of the last execution. Args: total_orbits (int): total number of accepted possible orbits that are desired. This equals ``num_steps_per_walker`` x ``num_walkers`` burn_steps (int): optional paramter to tell sampler to discard certain number of steps at the beginning thin (int): factor to thin the steps of each walker by to remove correlations in the walker steps examine_chains (boolean): Displays plots of walkers at each step by running `examine_chains` after `total_orbits` sampled. Returns: ``emcee.sampler`` object: the sampler used to run the MCMC """ if self.use_pt: sampler = ptemcee.Sampler(self.num_walkers, self.num_params, self._logl, orbitize.priors.all_lnpriors, ntemps=self.num_temps, threads=self.num_threads, logpargs=[ self.priors, ]) else: sampler = emcee.EnsembleSampler(self.num_walkers, self.num_params, self._logl, threads=self.num_threads, kwargs={'include_logp': True}) # we're using args because emcee < 3.0 has three return values whereas emcee > 3.0 has # four. We can explicitly declare 4 variables instead of args in the future. for args in sampler.sample(self.curr_pos, iterations=burn_steps, thin=thin): pass sampler.reset() try: self.curr_pos = args[0] except UnboundLocalError: # 0 step burn-in (pos is not defined) pass print('Burn in complete') nsteps = int(np.ceil(total_orbits / self.num_walkers)) assert (nsteps > 0), 'Total_orbits must be greater than num_walkers.' i = 0 # we're using args because emcee < 3.0 has three return values whereas emcee > 3.0 has # four. We can explicitly declare 4 variables instead of args in the future. for args in sampler.sample(self.curr_pos, iterations=nsteps, thin=thin): i += 1 # print progress statement if i % 5 == 0: print(str(i) + '/' + str(nsteps) + ' steps completed', end='\r') print('') self.curr_pos = args[0] # note that args[0] is pos output # TODO: Need something here to pick out temperatures, just using lowest one for now self.chain = sampler.chain if self.use_pt: self.post = sampler.flatchain[0, :, :] # should also be picking out the lowest temperature logps self.lnlikes = sampler.loglikelihood[0, :, :].flatten() self.lnlikes_alltemps = sampler.loglikelihood else: self.post = sampler.flatchain self.lnlikes = sampler.flatlnprobability # convert posterior probability (returned by sampler objects) to likelihood (required by orbitize.results.Results) for i, orb in enumerate(self.post): self.lnlikes[i] -= orbitize.priors.all_lnpriors( orb, self.priors) # include fixed parameters in posterior self.post = self._fill_in_fixed_params(self.post) self.results.add_samples(self.post, self.lnlikes, labels=self.system.labels) print('Run complete') if examine_chains: self.examine_chains() return sampler
def run_mcmc(self, nsteps, nburnsteps=None, nwalkers=None, status=None, ntemps=1): """ Run MCMC model calibration. If the chain already exists, continue from the last point, otherwise burn-in and start the chain. """ with self.open('a') as f: try: dset = f['chain'] except KeyError: burn = True if nburnsteps is None or nwalkers is None: print( 'must specify nburnsteps and nwalkers to start chain') return dset = f.create_dataset('chain', dtype='f8', shape=(nwalkers, 0, self.ndim), chunks=(nwalkers, 1, self.ndim), maxshape=(nwalkers, None, self.ndim), compression='lzf') else: burn = False nwalkers = dset.shape[0] #choose number of temperatures for PTSampler if usePTSampler: print("Using PTSampler") print("ntemps = " + str(ntemps)) ncpu = cpu_count() print("{0} CPUs".format(ncpu)) Tmax = np.inf with Pool() as pool: sampler = ptemcee.Sampler(nwalkers, self.ndim, self.log_likelihood, self.log_prior, ntemps, Tmax, pool=pool) print("Running burn-in phase") nburn0 = nburnsteps pos0 = np.random.uniform(self.min, self.max, (ntemps, nwalkers, self.ndim)) start = time.time() sampler.run_mcmc(pos0, nburn0, adapt=True) end = time.time() print("... finished in " + str(end - start) + " sec") print("sampler.chain.shape " + str(sampler.chain.shape)) print("betas = " + str(sampler.betas)) #get the last step of the chain pos0 = sampler.chain[:, :, -1, :] print("pos0.shape " + str(pos0.shape)) sampler.reset() print("Running MCMC chains") niters = 10 for iter in range(niters): print("betas = " + str(sampler.betas)) print("iteration " + str(iter) + " ...") start = time.time() sampler.run_mcmc(pos0, nsteps // 10) end = time.time() print("... finished in " + str(end - start) + " sec") print("sampler.chain.shape " + str(sampler.chain.shape)) print('writing chain to file') dset.resize(dset.shape[1] + nsteps, 1) #save only the zero temperature chain dset[:, -nsteps:, :] = sampler.chain[0, :, :, :] #save the thermodynamic log evidence #logZ, dlogZ = sampler.thermodynamic_integration_log_evidence() logZ, dlogZ = sampler.log_evidence_estimate() print("logZ = " + str(logZ) + " +/- " + str(dlogZ)) with open('mcmc/chain-idf-' + str(idf) + '-info.dat', 'w') as f: f.write('logZ ' + str(logZ) + '\n') f.write('dlogZ ' + str(dlogZ)) else: sampler = LoggingEnsembleSampler(nwalkers, self.ndim, self.log_posterior, pool=self) if burn: print('no existing chain found, starting initial burn-in') # Run first half of burn-in starting from random positions. nburn0 = nburnsteps // 2 sampler.run_mcmc(self.random_pos(nwalkers), nburn0, status=status) print('resampling walker positions') # Reposition walkers to the most likely points in the chain, # then run the second half of burn-in. This significantly # accelerates burn-in and helps prevent stuck walkers. X0 = sampler.flatchain[np.unique( sampler.flatlnprobability, return_index=True)[1][-nwalkers:]] sampler.reset() X0 = sampler.run_mcmc(X0, nburnsteps - nburn0, status=status, storechain=False)[0] sampler.reset() print('burn-in complete, starting production') else: print('restarting from last point of existing chain') X0 = dset[:, -1, :] sampler.run_mcmc(X0, nsteps, status=status) print('writing chain to file') dset.resize(dset.shape[1] + nsteps, 1) dset[:, -nsteps:, :] = sampler.chain
def __init__(self, model, nwalkers, ntemps=None, Tmax=None, betas=None, adaptive=False, adaptation_lag=None, adaptation_time=None, scale_factor=None, loglikelihood_function=None, checkpoint_interval=None, checkpoint_signal=None, nprocesses=1, use_mpi=False): self.model = model ndim = len(model.variable_params) # create temperature ladder if needed if ntemps is None and Tmax is None and betas is None: raise ValueError("must provide either ntemps/Tmax or betas") if betas is None: betas = ptemcee.make_ladder(ndim, ntemps=ntemps, Tmax=Tmax) # construct the keyword arguments to pass; if a kwarg is None, we # won't pass it, resulting in ptemcee's defaults being used kwargs = {} kwargs['adaptive'] = adaptive kwargs['betas'] = betas if adaptation_lag is not None: kwargs['adaptation_lag'] = adaptation_lag if adaptation_time is not None: kwargs['adaptation_time'] = adaptation_time if scale_factor is not None: kwargs['scale_factor'] = scale_factor # create a wrapper for calling the model if loglikelihood_function is None: loglikelihood_function = 'loglikelihood' # frustratingly, ptemcee does not support blob data, so we have to # turn it off model_call = models.CallModel(model, loglikelihood_function, return_all_stats=False) # these are used to help paralleize over multiple cores / MPI models._global_instance = model_call model_call = models._call_global_model prior_call = models._call_global_model_logprior self.pool = choose_pool(mpi=use_mpi, processes=nprocesses) # construct the sampler self._sampler = ptemcee.Sampler(nwalkers=nwalkers, ndim=ndim, logl=model_call, logp=prior_call, mapper=self.pool.map, **kwargs) self.nwalkers = nwalkers self._ntemps = ntemps self._checkpoint_interval = checkpoint_interval self._checkpoint_signal = checkpoint_signal # we'll initialize ensemble and chain to None self._chain = None self._ensemble = None
def run(self): """ Run the parallel-tempering algorithm """ # PREPARE FOR RUNNING # Define initial walkers population if self.p0 is not None: pass elif type(None) not in ( type(self.opt_data), type(self.ntemps), type(self.nwalkers), type(self.fbest)): # distributions using opt_data self.p0 = init_walkers(self.PSystem,distribution=self.distribution, opt_data=self.opt_data, ntemps=self.ntemps, nwalkers=self.nwalkers,fbest=self.fbest) elif type(None) not in ( type(self.ntemps), type(self.nwalkers) ): # Uniform distribution maybe? self.p0 = init_walkers(self.PSystem,distribution=self.distribution, ntemps=self.ntemps, nwalkers=self.nwalkers) else: raise NameError("Not enough information to initialize MCMC.\n\n" + "--> Provide an array using physical values through the 'p0' kwarg with shape (temperatures, walkers, dimensions)\n" + "or\n" + "--> Define: 'opt_data', 'fbest', 'ntemps', " + "'nwalkers', and 'distribution' to initialize " + "walkers from optimizers.") # Update ndim and nwalkers from p0 above self.ntemps, self.nwalkers, ndim_tmp = self.p0.shape # p0 is normalized? Or is it physical? if (self.p0 >= 0.).all() and (self.p0 <= 1.).all(): # cube p0_norm = True insert_cnst = False else: # physical p0_norm = False # If p0 is physical, then constants must be inserted insert_cnst = True # Check for consistency in input parameters if self.nwalkers < 2 * self.PSystem.ndim: raise RuntimeError(f"Number of walkers must be >= 2*ndim, i.e., " + f"nwalkers have to be >= {2 * self.PSystem.ndim}.") if ndim_tmp != self.PSystem.ndim: raise RuntimeError(f"Number of dimensions in 'PSystem' " + f"({self.PSystem.ndim}) differs from that in 'p0' ({ndim_tmp}).") # temperatures from betas if self.betas is not None: if len(self.betas) == self.ntemps: pass else: raise RuntimeError(f"Number of 'betas' ({self.betas}) differs"+ f" from number of temperatures in 'p0' ({self.ntemps})") # Verify path exists if os.path.exists(self.path): pass else: raise RuntimeError(f"directory -path- {self.path} does not exist") # hdf5 file name to save mcmc data if self.file_name is not None: self.hdf5_filename = f"{self.path}{self.file_name}{self.suffix}.hdf5" else: self.hdf5_filename = f"{self.path}{self.PSystem.system_name}{self.suffix}.hdf5" # Time it ti = time.time() now = datetime.datetime.now() print("\n =========== PARALLEL-TEMPERING MCMC ===========\n") print("--> Starting date: ", now.strftime("%Y-%m-%d %H:%M")) print("--> Reference epoch of the solutions: ", self.PSystem.t0, " [JD]") print('--> Results will be saved at: ', self.hdf5_filename) print("--> MCMC parameters:") print(f" -ntemps: {self.ntemps}") print(f" -nwalkers: {self.nwalkers}") print(f" -itmax: {self.itmax}") print(f" -intra_steps: {self.intra_steps}") print() # Create an h5py file self._set_hdf5( self.PSystem, self.hdf5_filename) # Default values in ptemcee, # Do not change it at least you have read Vousden et al. (2016): _nu = 100. #/self.nwalkers _t0 = 1000. #/self.nwalkers a_scale = 10 # RUN with closing(Pool(processes=self.cores)) as pool: sampler = pt.Sampler( nwalkers=self.nwalkers, dim=self.PSystem.ndim, logp=self.logprior, logl=log_likelihood_func, ntemps=self.ntemps, betas=self.betas, adaptation_lag = _t0, adaptation_time=_nu, a=a_scale, Tmax=self.tmax, pool=pool, loglargs=(self.PSystem, p0_norm, # cube insert_cnst), # insert_constants logpkwargs={'psystem':self.PSystem} ) index = 0 autocorr = np.empty( self.nsteps ) # thin: The number of iterations to perform between saving the # state to the internal chain. for iteration, s in enumerate( sampler.sample(p0=self.p0, iterations=self.itmax, thin=self.intra_steps, storechain=True, adapt=True, swap_ratios=False)): # s[0] = walkers position # s[1] = log-posterior # s[2] = log-likelihood if (iteration+1) % self.intra_steps : continue # Identify current maximum a posteriori (MAP) max_value, max_index = max((x, (i, j)) for i, row in enumerate(s[1][:]) #MAP is calculated over the posterior for j, x in enumerate(row)) # get_autocorr_time, returns a matrix of autocorrelation # lengths for each parameter in each temperature of shape # ``(Ntemps, ndim)``. tau = sampler.get_autocorr_time()[0] # Take only colder temp mean_tau = np.mean(tau) # tswap_acceptance_fraction, returns an array of accepted # temperature swap fractions for each temperature; # shape ``(ntemps, ) # nswap_accepted/nswap swap = list(sampler.tswap_acceptance_fraction) # acceptance_fraction, matrix of shape ``(Ntemps, Nwalkers)`` # detailing the acceptance fraction for each walker. # nprop_accepted/nprop acc0 = sampler.acceptance_fraction[0,:] xbest = s[0][max_index[0]][max_index[1]] current_meanposterior = np.mean(s[1][0][:]) current_meanlogl = np.mean(s[2][0][:]) std_meanlogp = np.std(s[1][0][:]) # Output in terminal if self.verbose: print("--------- Iteration: ", iteration + 1) print(" Mean tau Temp 0:", round(mean_tau, 3)) print(" Accepted swap fraction in Temp 0: ", round(swap[0],3)) print(" Mean acceptance fraction Temp 0: ", round(np.mean(acc0),3)) print(" Mean log-likelihood: ", round(current_meanlogl, 3)) print(" Mean log-posterior: ", round(current_meanposterior, 3)) print(" Current log-posterior dispersion: ", round(std_meanlogp, 3)) print(" Current MAP: ", max_index, round(max_value,3)) autocorr[index] = mean_tau # Save data in hdf5 File # shape for chains is: (temps,walkers,steps,dim) # It's worth saving temperatures others than 0??? ta = time.time() # Monitor the time wasted in saving.. self._save_mcmc(self.hdf5_filename, sampler.chain[:,:,index,:], xbest, sampler.betas, autocorr, index, max_value, swap, max_index, iteration, current_meanposterior) #current_meanlogl) if self.verbose: print(f' Saving time: {(time.time() - ta) :.5f} sec') """ CONVERGENCE CRITERIA Write here your favorite convergence criteria """ ##geweke() if self.verbose: print(' Elapsed time: ', round((time.time() - ti)/60.,4),'min') index += 1 if (index+1)*self.intra_steps > self.itmax: print('\n--> Maximum number of iterations reached in MCMC') break # Extract best solutions from hdf5 file and write it in ascci extract_best_solutions(self.PSystem, self.hdf5_filename, write_file=True) print("--> Reference epoch of the solutions: ", self.PSystem.t0, " [JD]") print('--> Iterations performed: ', iteration +1) print('--> Elapsed time in MCMC:', round((time.time() - ti)/60.,4), 'minutes') return sampler
def run_emcee(p,nwalkers,nsteps,ndim,multiT,convTest,pos,lnprob): """ Run MCMC with: Number of walkers = nwalkers Number of dimensions = ndim Number of steps = nsteps Log probability function = lnprob Pool = p Initial walker positions = pos If multiT is true, MCMC will be run at 3 different temperatures (inverses given by betas). If convTest is true, MCMC will either run until convergence or for nsteps steps, whichever happens first. """ if convTest: # walker paths will be stored in backend and periodically checked for convergence filename = headFile+".h5" backend = emcee.backends.HDFBackend(filename) backend.reset(nwalkers, ndim) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, backend=backend, pool=p) max_n = nsteps #sampler.run_mcmc(pos, 500) # We'll track how the average autocorrelation time estimate changes index = 0 autocorr = np.empty(max_n) old_tau = np.inf # Now we'll sample for up to max_n steps for sample in sampler.sample(pos, store=True, iterations=max_n, progress=True): # Only check convergence every 100 steps if sampler.iteration % 100: continue # Compute the autocorrelation time so far # Using tol=0 means that we'll always get an estimate even if it isn't trustworthy tau = sampler.get_autocorr_time(tol=0) autocorr[index] = np.mean(tau) index += 1 # Check convergence converged = np.all(tau * 100 < sampler.iteration) converged &= np.all(np.abs(old_tau - tau) / tau < 0.01) if converged: break old_tau = tau nsteps = sampler.iteration # find mle_soln, the walker position with the maximum probability chain = sampler.chain probs = sampler.get_log_prob() maxprob=np.argmax(probs) hp_loc = np.unravel_index(maxprob, probs.shape) mle_soln = chain[(hp_loc[1],hp_loc[0])] # switching from order (nsteps,nwalkers) to (nwalkers,nsteps) print(mle_soln) return nsteps, chain, mle_soln, probs, sampler elif multiT: betas = np.asarray([0.01, 0.505, 1.0]) # inverse temperatures for log-likelihood sampler = ptemcee.Sampler(nwalkers, ndim, lnprob, lnprior, betas=betas, pool=p) sampler.run_mcmc(pos, nsteps) # find mle_soln, the walker position with the maximum probability chain = sampler.chain[2][:,:,:] probs = sampler.logprobability[2] maxprob = np.argmax(probs) hp_loc = np.unravel_index(maxprob, probs.shape) mle_soln = chain[hp_loc] # already in order (nwalkers,nsteps) print(mle_soln) return nsteps, chain, mle_soln, probs, sampler else: sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, pool=p) sampler.run_mcmc(pos, nsteps, store=True) # find mle_soln, the walker position with the maximum probability chain = sampler.chain probs = sampler.get_log_prob() maxprob = np.argmax(probs) hp_loc = np.unravel_index(maxprob, probs.shape) mle_soln = chain[(hp_loc[1],hp_loc[0])] # switching from order (nsteps,nwalkers) to (nwalkers,nsteps) print(mle_soln) return nsteps, chain, mle_soln, probs, sampler
def run_sampler(self, total_orbits, burn_steps=0, thin=1, examine_chains=False, output_filename=None, periodic_save_freq=None): """ Runs PT MCMC sampler. Results are stored in ``self.chain`` and ``self.lnlikes``. Results also added to ``orbitize.results.Results`` object (``self.results``) .. Note:: Can be run multiple times if you want to pause and inspect things. Each call will continue from the end state of the last execution. Args: total_orbits (int): total number of accepted possible orbits that are desired. This equals ``num_steps_per_walker`` x ``num_walkers`` burn_steps (int): optional paramter to tell sampler to discard certain number of steps at the beginning thin (int): factor to thin the steps of each walker by to remove correlations in the walker steps examine_chains (boolean): Displays plots of walkers at each step by running `examine_chains` after `total_orbits` sampled. output_filename (str): Optional filepath for where results file can be saved. periodic_save_freq (int): Optionally, save the current results into ``output_filename`` every nth step while running, where n is value passed into this variable. Returns: ``emcee.sampler`` object: the sampler used to run the MCMC """ if periodic_save_freq is not None and output_filename is None: raise ValueError( "output_filename must be defined for periodic saving of the chains" ) if periodic_save_freq is not None and not isinstance( periodic_save_freq, int): raise TypeError("periodic_save_freq must be an integer") nsteps = int(np.ceil(total_orbits / self.num_walkers)) if nsteps <= 0: raise ValueError("Total_orbits must be greater than num_walkers.") if self.use_pt: sampler = ptemcee.Sampler(self.num_walkers, self.num_params, self._logl, orbitize.priors.all_lnpriors, ntemps=self.num_temps, threads=self.num_threads, logpargs=[ self.priors, ]) else: if self.num_threads != 1: print( 'Setting num_threads=1. If you want parallel processing for emcee implemented in orbitize, let us know.' ) self.num_threads = 1 sampler = emcee.EnsembleSampler(self.num_walkers, self.num_params, self._logl, kwargs={'include_logp': True}) print("Starting Burn in") for i, state in enumerate( sampler.sample(self.curr_pos, iterations=burn_steps, thin=thin)): if self.use_pt: self.curr_pos = state[0] else: self.curr_pos = state.coords if (i + 1) % 5 == 0: print(str(i + 1) + '/' + str(burn_steps) + ' steps of burn-in complete', end='\r') if periodic_save_freq is not None: if (i + 1 ) % periodic_save_freq == 0: # we've completed i+1 steps self.results.curr_pos = self.curr_pos self.results.save_results(output_filename) sampler.reset() print('') print('Burn in complete. Sampling posterior now.') saved_upto = 0 # keep track of how many steps of this chain we've saved. this is the next index that needs to be saved for i, state in enumerate( sampler.sample(self.curr_pos, iterations=nsteps, thin=thin)): if self.use_pt: self.curr_pos = state[0] else: self.curr_pos = state.coords # print progress statement if (i + 1) % 5 == 0: print(str(i + 1) + '/' + str(nsteps) + ' steps completed', end='\r') if periodic_save_freq is not None: if (i + 1 ) % periodic_save_freq == 0: # we've completed i+1 steps self._update_chains_from_sampler(sampler, num_steps=i + 1) # figure out what is the new chunk of the chain and corresponding lnlikes that have been computed before last save # grab the current posterior and lnlikes and reshape them to have the Nwalkers x Nsteps dimension again post_shape = self.post.shape curr_chain_shape = (self.num_walkers, post_shape[0] // self.num_walkers, post_shape[-1]) curr_chain = self.post.reshape(curr_chain_shape) curr_lnlike_chain = self.lnlikes.reshape( curr_chain_shape[:2]) # use the reshaped arrays and find the new steps we computed curr_chunk = curr_chain[:, saved_upto:i + 1] curr_chunk = curr_chunk.reshape( -1, curr_chunk.shape[-1]) # flatten nwalkers x nsteps dim curr_lnlike_chunk = curr_lnlike_chain[:, saved_upto:i + 1].flatten() # add this current chunk to the results object (which already has all the previous chunks saved) self.results.add_samples(curr_chunk, curr_lnlike_chunk, labels=self.system.labels, curr_pos=self.curr_pos) self.results.save_results(output_filename) saved_upto = i + 1 print('') self._update_chains_from_sampler(sampler) if periodic_save_freq is None: # need to save everything self.results.add_samples(self.post, self.lnlikes, labels=self.system.labels, curr_pos=self.curr_pos) elif saved_upto < nsteps: # just need to save the last few # same code as above except we just need to grab the last few post_shape = self.post.shape curr_chain_shape = (self.num_walkers, post_shape[0] // self.num_walkers, post_shape[-1]) curr_chain = self.post.reshape(curr_chain_shape) curr_lnlike_chain = self.lnlikes.reshape(curr_chain_shape[:2]) curr_chunk = curr_chain[:, saved_upto:] curr_chunk = curr_chunk.reshape( -1, curr_chunk.shape[-1]) # flatten nwalkers x nsteps dim curr_lnlike_chunk = curr_lnlike_chain[:, saved_upto:].flatten() self.results.add_samples(curr_chunk, curr_lnlike_chunk, labels=self.system.labels, curr_pos=self.curr_pos) if output_filename is not None: self.results.save_results(output_filename) print('Run complete') if examine_chains: self.examine_chains() return sampler
def throw_darts(self, nburn=1000, nsteps=1000, method='emcee'): """ Run the sampler. Args: nburn : int (default: 1000), number of burn-in steps. nsteps : int (default: 1000), number of steps to be saved. """ # To allow for PT sampling if self.ntemps is not None: try: import ptemcee except ImportError: raise ImportError( "You must pip install ptemcee to run the parallel-tempering MCMC method" ) method = 'emcee_PT' if method == 'emcee': # Define sampler if self.pool is not None: sampler = emcee.EnsembleSampler( self.nwalkers, self.dim, self.posterior_function, args=[self], blobs_dtype=posterior.blobs_dtype, moves=self.emcee_moves, pool=self.pool) self.pool = None elif self.threads != 1: sampler = emcee.EnsembleSampler( self.nwalkers, self.dim, self.posterior_function, args=[self], blobs_dtype=posterior.blobs_dtype, moves=self.emcee_moves, threads=self.threads) else: sampler = emcee.EnsembleSampler( self.nwalkers, self.dim, self.posterior_function, blobs_dtype=posterior.blobs_dtype, args=[self], moves=self.emcee_moves) # Burn-in print(self.p0.shape) print("Starting burn-in...") pos = sampler.run_mcmc(self.p0, nburn) # pos,prob,state,binary_data = sampler.run_mcmc(self.p0, nburn) print("...finished running burn-in") # Full run print("Starting full run...") sampler.reset() sampler.run_mcmc(pos, nsteps) # pos,prob,state,binary_data = sampler.run_mcmc(pos, nsteps) print("...full run finished") # Save only every 100th sample self.chains = sampler.chain[:, ::self.thin, :] self.derived = np.swapaxes(np.array(sampler.blobs), 0, 1)[:, ::self.thin] # self.derived = np.swapaxes(np.array(sampler.blobs), 0, 1)[:,::self.thin,0,:] self.lnprobability = sampler.lnprobability[:, ::self.thin] self.sampler = sampler elif method == 'emcee_PT': # Define sampler if self.pool is not None: sampler = ptemcee.Sampler(self.nwalkers, self.dim, self.ln_likelihood_function, self.ln_prior_function, ntemps=self.ntemps, Tmax=self.Tmax, blobs_dtype=posterior.blobs_dtype, loglargs=(self, ), logpargs=(self, ), pool=self.pool) self.pool = None else: sampler = ptemcee.Sampler(self.nwalkers, self.dim, self.ln_likelihood_function, self.ln_prior_function, ntemps=self.ntemps, Tmax=self.Tmax, blobs_dtype=posterior.blobs_dtype, loglargs=(self, ), logpargs=(self, )) # Burn-in print("Starting burn-in...") for pos, prob, state in sampler.sample(self.p0, iterations=nburn): pass print("...finished running burn-in") # Full run print("Starting full run...") sampler.reset() for pos, prob, state in sampler.sample(pos, iterations=nsteps, thin=self.thin): pass print("...full run finished") self.chains = sampler.chain self.derived = sampler.blobs self.lnprobability = sampler.logprobability self.sampler = sampler elif method == 'nestle': print("Nested sampling is not yet implemented.") else: print("Your chosen method is not supported by dart_board.")
def sample(self): ''' Run the MCMC. ''' # First make sure that the maximum likelihood params are fitted if not self.minimized: self.approximate_ml() # print(self.params_all) ndim, nwalkers = len(self.params_vary), self.config['NWALKERS'] p0 = np.zeros((nwalkers, len(self.params_vary))) pml = [self.params_all[pname] for pname in self.params_vary] for pnum, pname in enumerate(self.params_vary): p0[:, pnum] = (np.random.randn(nwalkers)\ * self.config['SAMPLE_BALL']+1.)*pml[pnum] plist = [] for key in self.params_vary.keys(): plist.append(key) args = (self.freqs, self.tb_meas, self.var_tb, self.params_all, plist, self.params_vary, self.fg_model, self.sig_model) if self.config['MPI']: from emcee.utils import MPIPool pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) self.sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=args, pool=pool) self.sampler.run_mcmc(p0, self.config['NBURN']) # burn in p0 = self.sampler.chain[:, -1, :].squeeze() self.sampler.reset() self.sampler.run_mcmc(p0, self.config['NSTEPS']) pool.close() else: if self.config['SAMPLER'] == 'PARALLELTEMPERING': logl = lambda x: lnlike( x, self.freqs, self.tb_meas, self.var_tb, self.params_all, self.params_vary, self.fg_model, self.sig_model) logp = lambda x: lnprior(x, self.params_vary.keys(), self. params_vary) self.sampler = ptemcee.Sampler( ntemps=self.config['NTEMPS'], nwalkers=self.config['NWALKERS'], dim=self.ndim, logl=logl, logp=logp) else: self.sampler = emcee.EnsembleSampler( nwalkers=self.config['NWALKERS'], ndim=ndim, log_prob_fn=lnprob, args=args, threads=self.config['THREADS']) # If we use PT sampling, we need a further dimension of # start parameters for the different temperatures if self.config['SAMPLER'] == 'PARALLELTEMPERING': p0 = np.array([p0 for m in range(self.config['NTEMPS'])]) # Run the MCMC for the burn-in self.sampler.run_mcmc(p0, self.config['NBURN'], thin=self.config['NTHIN']) # Reset after burn-in and run the full chain if self.config['SAMPLER'] == 'PARALLELTEMPERING': p0 = self.sampler.chain[:, :, -1, :] else: p0 = self.sampler.chain[:, -1, :].squeeze() self.sampler.reset() self.sampler.run_mcmc(p0, self.config['NSTEPS'], thin=self.config['NTHIN']) # Create output directory if not os.path.exists(self.config['PROJECT_NAME']): os.makedirs(self.config['PROJECT_NAME']) # Save output and configuration with open(os.path.join(self.config['PROJECT_NAME'], 'config.yaml'), 'w') as f: yaml.dump(self.config, f, default_flow_style=False) with open(os.path.join(self.config['PROJECT_NAME'], 'ml_params.yaml'), 'w') as f: yaml.dump(self.params_all, f, default_flow_style=False) self.sampled = True # Collect result parameters ########################### resultdict = {} # Chain ####### resultdict['chain'] = self.sampler.chain, # Conservative evidence ####################### if (self.config['COMPUTECOVARIANCE'] & (self.config['SAMPLER'] == 'ENSEMBLESAMPLER')): # Estimate autocorrelation self.acors = self.sampler.acor.astype(int) resultdict['autocorrs'] = self.acors # Estimate covariance self.cov_samples = np.zeros( (len(self.params_vary), len(self.params_vary))) resultdict['cov_samples'] = self.cov_samples for i in range(len(self.params_vary)): for j in range(len(self.params_vary)): stepsize = np.max([self.acors[i], self.acors[j]]) csample_i = self.sampler.chain[i, ::stepsize, :].flatten() csample_j = self.sampler.chain[j, ::stepsize, :].flatten() self.cov_samples[i, j] = np.mean( (csample_i - csample_i.mean()) * (csample_j - csample_j.mean())) # Compute conservative evidence without prior factor self.conservative_evidence = np.exp(self.ln_ml) / np.sqrt( np.linalg.det(self.cov_samples)) resultdict['conservative_evidence'] = self.conservative_evidence # Evidence from thermodynamic integration from the PT sampler ############################################################# if self.config['SAMPLER'].lower() == 'paralleltempering': self.logz, self.dlogz = self.sampler.log_evidence_estimate( fburnin=0.) resultdict['log_thd_evidence'] = self.logz resultdict['dlog_thd_evidence'] = self.dlogz # Posterior mean # The posterior mean values of the parameters ############### post_mean_vals = np.mean(self.sampler.flatchain, axis=0) resultdict['post_mean_vals'] = post_mean_vals # Likelihood # The value of the posterior for the best-fit results ############ logL = self.sampler.log_prob_fn(post_mean_vals) resultdict['logL'] = logL # Save as .npz np.savez(os.path.join(self.config['PROJECT_NAME'], 'output.npz'), **resultdict)
def run_sampler(self, total_orbits, burn_steps=0, thin=1): """ Runs PT MCMC sampler. Results are stored in ``self.chain`` and ``self.lnlikes``. Results also added to ``orbitize.results.Results`` object (``self.results``) .. Note:: Can be run multiple times if you want to pause and inspect things. Each call will continue from the end state of the last execution. Args: total_orbits (int): total number of accepted possible orbits that are desired. This equals ``num_steps_per_walker`` x ``num_walkers`` burn_steps (int): optional paramter to tell sampler to discard certain number of steps at the beginning thin (int): factor to thin the steps of each walker by to remove correlations in the walker steps Returns: ``emcee.sampler`` object: the sampler used to run the MCMC """ if self.use_pt: sampler = ptemcee.Sampler(self.num_walkers, self.num_params, self._logl, orbitize.priors.all_lnpriors, ntemps=self.num_temps, threads=self.num_threads, logpargs=[ self.priors, ]) else: sampler = emcee.EnsembleSampler(self.num_walkers, self.num_params, self._logl, threads=self.num_threads, kwargs={'include_logp': True}) for pos, lnprob, lnlike in sampler.sample(self.curr_pos, iterations=burn_steps, thin=thin): pass sampler.reset() try: self.curr_pos = pos except UnboundLocalError: # 0 step burn-in (pos is not defined) pass print('Burn in complete') nsteps = int(np.ceil(total_orbits / self.num_walkers)) assert (nsteps > 0), 'Total_orbits must be greater than num_walkers.' i = 0 for pos, lnprob, lnlike in sampler.sample(p0=self.curr_pos, iterations=nsteps, thin=thin): i += 1 # print progress statement if i % 5 == 0: print(str(i) + '/' + str(nsteps) + ' steps completed', end='\r') print('') self.curr_pos = pos # TODO: Need something here to pick out temperatures, just using lowest one for now self.chain = sampler.chain if self.use_pt: self.post = sampler.flatchain[0, :, :] self.lnlikes = sampler.logprobability[0, :, :].flatten( ) # should also be picking out the lowest temperature logps self.lnlikes_alltemps = sampler.logprobability else: self.post = sampler.flatchain self.lnlikes = sampler.lnprobability # include fixed parameters in posterior self.post = self._fill_in_fixed_params(self.post) self.results.add_samples(self.post, self.lnlikes) print('Run complete') return sampler
print("1 sigma spread", sigma1_1) print("2 sigma spread", sigma2_1) quantiles = np.percentile(sampler.flatchain[:, 1], [2.28, 15.9, 50, 84.2, 97.7]) sigma1_2 = 0.5 * (quantiles[3] - quantiles[1]) sigma2_2 = 0.5 * (quantiles[4] - quantiles[0]) print("1 sigma spread", sigma1_2) print("2 sigma spread", sigma2_2) elif multiT: betas = np.asarray([0.01, 0.505, 1.0]) #inverse temperatures for log-likelihood sampler = ptemcee.Sampler(nwalkers, ndim, lnprob, lnprior, betas=betas, threads=3) sampler.run_mcmc(pos, nsteps) chain = sampler.chain[2][:, :, :] else: sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob) sampler.run_mcmc(pos, nsteps) chain = sampler.chain probs = sampler.get_log_prob() maxprob = np.argmin(probs) hp_loc = np.unravel_index(maxprob, probs.shape) mle_soln = chain[( hp_loc[1], hp_loc[0] )] #switching from order (nsteps,nwalkers) to (nwalkers,nsteps)
def samplePtemcee(t, y, ye, mup, sigp, Tmax, nwalkers=100, nsteps=1000, nburn=None, ntemps=21, sampleFile=None, maxTemp=np.inf): ndim = len(mup) ndata = len(t) if nburn is None: nburn = nsteps // 4 doTheSampling = True betas = None if sampleFile is not None: with h5.File(sampleFile, "a") as f: if ('ptemcee/chain' in f and 'ptemcee/lnprobability' in f and 'ptemcee/lnlikelihood' in f and 'ptemcee/betas' in f): chain = f['ptemcee/chain'][...] lnprobability = f['ptemcee/lnprobability'][...] lnlikelihood = f['ptemcee/lnlikelihood'][...] betas = f['ptemcee/betas'][...] try: assert chain.shape == (ntemps, nwalkers, nsteps, ndim) assert lnprobability.shape == (ntemps, nwalkers, nsteps) assert lnlikelihood.shape == (ntemps, nwalkers, nsteps) assert betas.shape == (ntemps, ) chain = chain samps = chain[0].reshape((-1, ndim)) lnprobs = lnprobability[0].reshape((-1, )) lnlikes = lnlikelihood doTheSampling = False except AssertionError: pass if doTheSampling: if betas is None: betas = ptemcee.make_ladder(ndim, ntemps, maxTemp) sampler = ptemcee.Sampler(nwalkers, ndim, loglike, logprior, logl_args=(t, y, ye), logp_args=(mup, sigp), betas=betas, adaptive=True) p0 = mup[None, None, :] + np.random.normal(0.0, 1.0e-4, (ntemps, nwalkers, ndim)) if nburn > 0: for i, result in enumerate( sampler.sample(p0, iterations=nburn, storechain=False)): print("Burn in {0:d} steps: {1:.1f}%".format( nburn, 100 * (i + 1) / nburn), end='\r') print('') sampler.reset() else: result = (p0, ) for i, result in enumerate( sampler.sample(*result, iterations=nsteps, storechain=True)): print("Sampling {0:d} steps: {1:.1f}%".format( nsteps, 100 * (i + 1) / nsteps), end='\r') print('') chain = sampler.chain samps = sampler.flatchain[0] lnprobs = sampler.lnprobability[0].reshape((-1, )) lnlikes = sampler.lnlikelihood betas = sampler.betas if sampleFile is not None: f = h5.File(sampleFile, 'a') if 'ptemcee/chain' in f: f['ptemcee/chain'].resize(sampler.chain.shape) f['ptemcee/chain'][...] = sampler.chain[...] else: f.create_dataset('ptemcee/chain', data=sampler.chain, maxshape=(None, None, None, None)) if 'ptemcee/lnprobability' in f: f['ptemcee/lnprobability'].resize(sampler.lnprobability.shape) f['ptemcee/lnprobability'][...] = sampler.lnprobability[...] else: f.create_dataset('ptemcee/lnprobability', data=sampler.lnprobability, maxshape=(None, None, None)) if 'ptemcee/lnlikelihood' in f: f['ptemcee/lnlikelihood'].resize(sampler.lnlikelihood.shape) f['ptemcee/lnlikelihood'][...] = sampler.lnlikelihood[...] else: f.create_dataset('ptemcee/lnlikelihood', data=sampler.lnlikelihood, maxshape=(None, None, None)) if 'ptemcee/betas' in f: f['ptemcee/betas'].resize(betas.shape) f['ptemcee/betas'][...] = betas[...] else: f.create_dataset('ptemcee/betas', data=betas, maxshape=(None, )) f.close() labels = ['C{0:01d}'.format(i) for i in range(ndim)] fig = corner.corner(samps, labels=labels) figname = "emceePT_corner.png" print("Saving", figname) fig.savefig(figname) plt.close(fig) # for k in range(ntemps): for k in [0, ntemps - 1]: for i in range(ndim): fig, ax = plt.subplots(1, 1, figsize=(8, 4)) for j in range(nwalkers): ax.plot(chain[k, j, :, i], alpha=2.0 / nwalkers, color='k') ax.set_xlabel('# Iterations') ax.set_ylabel(labels[i]) figname = "emceePT_trace_T{0:01d}_{1:s}.png".format(k, labels[i]) print("Saving", figname) fig.savefig(figname) plt.close(fig) imap = lnprobs.argmax() taus = autocorr.integrated_time(chain, timeAxis=2, walkerAxis=1) lnlike_taus = autocorr.integrated_time(lnlikes, timeAxis=2, walkerAxis=1) print("emceePT AutoCorrTau:", taus) print("emceePT AutoCorrTau logLike:", lnlike_taus) lnlike_adj = lnlikes - lnlikes.mean(axis=(1, 2), keepdims=True) lnlike_var = (lnlike_adj * lnlike_adj).mean(axis=(1, 2)) xmap = samps[imap] means = samps.mean(axis=0) diffs = samps - means cov = (diffs[:, :, None] * diffs[:, None, :]).mean(axis=0) avglnl = lnlikes.mean(axis=(1, 2))[::-1] avglnl_err = np.sqrt(lnlike_taus / (nsteps * nwalkers) * lnlike_var)[::-1] betas = betas[::-1] if betas[0] > 0.0: betas = np.concatenate(([0.0], betas)) avglnl = np.concatenate(([avglnl[0]], avglnl)) avglnl_err = np.concatenate(([avglnl_err[0]], avglnl_err)) return xmap, means, cov, samps, lnprobs, avglnl, betas, avglnl_err