def run(self): """ Prepares the posterior function and calls ``PolyChord``'s ``run`` function. """ # Prepare the posterior # Don't forget to multiply by the volume of the physical hypercube, # since PolyChord divides by it def logpost(params_values): logposterior, logpriors, loglikes, derived = ( self.model.logposterior(params_values)) if len(derived) != len(self.model.parameterization.derived_params()): derived = np.full( len(self.model.parameterization.derived_params()), np.nan) if len(loglikes) != len(self.model.likelihood._likelihoods): loglikes = np.full( len(self.model.likelihood._likelihoods), np.nan) derived = list(derived) + list(logpriors) + list(loglikes) return ( max(logposterior + self.logvolume, 0.99 * self.pc_settings.logzero), derived) self.log.info("Sampling!") if get_mpi(): get_mpi_comm().barrier() self.pc.run_polychord(logpost, self.nDims, self.nDerived, self.pc_settings, self.pc_prior)
def send_error_signal(self): """ Sends an error signal to the other MPI processes. """ for i_rank in range(get_mpi_size()): if i_rank != get_mpi_rank(): get_mpi_comm().isend(True, dest=i_rank, tag=_error_tag)
def close(self, *args): """ Determines success (or not), chooses best (if MPI) and produces output (if requested). """ evals_attr_ = evals_attr[self.method.lower()] # If something failed if not hasattr(self, "result"): return if get_mpi_size(): results = get_mpi_comm().gather(self.result, root=0) _inv_affine_transform_matrices = get_mpi_comm().gather( self._inv_affine_transform_matrix, root=0) _affine_transform_baselines = get_mpi_comm().gather( self._affine_transform_baseline, root=0) if am_single_or_primary_process(): i_min = np.argmin([getattr(r, evals_attr_) for r in results]) self.result = results[i_min] self._inv_affine_transform_matrix = _inv_affine_transform_matrices[ i_min] self._affine_transform_baseline = _affine_transform_baselines[ i_min] if am_single_or_primary_process(): if not self.success: raise LoggedError( self.log, "Minimization failed! Here is the raw result object:\n%s", str(self.result)) logp_min = -np.array(getattr(self.result, evals_attr_)) x_min = self.inv_affine_transform(self.result.x) self.log.info("-log(%s) minimized to %g", "likelihood" if self.ignore_prior else "posterior", -logp_min) recomputed_post_min = self.model.logposterior(x_min, cached=False) recomputed_logp_min = (sum(recomputed_post_min.loglikes) if self.ignore_prior else recomputed_post_min.logpost) if not np.allclose(logp_min, recomputed_logp_min): raise LoggedError( self.log, "Cannot reproduce result. Maybe yout likelihood is stochastic? " "Recomputed min: %g (was %g) at %r", recomputed_logp_min, logp_min, x_min) self.minimum = OnePoint( self.model, self.output, name="", extension=("bestfit.txt" if self.ignore_prior else "minimum.txt")) self.minimum.add(x_min, derived=recomputed_post_min.derived, logpost=recomputed_post_min.logpost, logpriors=recomputed_post_min.logpriors, loglikes=recomputed_post_min.loglikes) self.log.info("Parameter values at minimum:\n%s", self.minimum.data.to_string()) self.minimum._out_update() self.dump_getdist()
def check_error_signal(self): """ Checks if any of the other process has sent an error signal, and fails. NB: This behaviour only shows up when running this sampler inside a Python script, not when running with `cobaya run` (in that case, the process raising an error will call `MPI_ABORT` and kill the rest. """ for i in range(get_mpi_size()): if i != get_mpi_rank(): from mpi4py import MPI status = MPI.Status() get_mpi_comm().iprobe(i, status=status) if status.tag == _error_tag: raise LoggedError(self.log, "Another process failed! Exiting.")
def check_all_ready(self): """ Checks if the chain(s) is(/are) ready to check convergence and, if requested, learn a new covariance matrix for the proposal distribution. """ msg_ready = ( ("Ready to" if get_mpi() or self.learn_proposal else "") + " check convergence" + (" and" if get_mpi() and self.learn_proposal else "") + (" learn a new proposal covmat" if self.learn_proposal else "")) # If *just* (weight==1) got ready to check+learn if (self.n() > 0 and self.current_point[_weight] == 1 and not (self.n() % self.check_every)): self.log.info("Checkpoint: %d samples accepted.", self.n()) if get_mpi(): self.been_waiting += 1 if self.been_waiting > self.max_waiting: self.log.error( "Waiting for too long for all chains to be ready. " "Maybe one of them is stuck or died unexpectedly?") raise HandledException self.model.dump_timing() # If not MPI, we are ready if not get_mpi(): if msg_ready: self.log.info(msg_ready) return True # If MPI, tell the rest that we are ready -- we use a "gather" # ("reduce" was problematic), but we are in practice just pinging if not hasattr(self, "req"): # just once! self.all_ready = np.empty(get_mpi_size()) self.req = get_mpi_comm().Iallgather(np.array([1.]), self.all_ready) self.log.info(msg_ready + " (waiting for the rest...)") # If all processes are ready to learn (= communication finished) if self.req.Test() if hasattr(self, "req") else False: # Sanity check: actually all processes have finished assert np.all(self.all_ready == 1), ( "This should not happen! Notify the developers. (Got %r)", self.all_ready) if get_mpi_rank() == 0: self.log.info("All chains are r" + msg_ready[1:]) delattr(self, "req") self.been_waiting = 0 # Just in case, a barrier here get_mpi_comm().barrier() return True return False
def close(self, *args): """ Determines success (or not), chooses best (if MPI) and produces output (if requested). """ # If something failed if not hasattr(self, "result"): return if get_mpi_size(): results = get_mpi_comm().gather(self.result, root=0) if not get_mpi_rank(): self.result = results[np.argmin([r.fun for r in results])] if not get_mpi_rank(): if not self.result.success: self.log.error("Maximization failed! Here is the `scipy` raw result:\n%r", self.result) raise HandledException self.log.info("log%s maximized at %g", "likelihood" if self.ignore_prior else "posterior", -self.result.fun) post = self.model.logposterior(self.result.x) recomputed_max = sum(post.loglikes) if self.ignore_prior else post.logpost if not np.allclose(-self.result.fun, recomputed_max): self.log.error("Cannot reproduce result. Something bad happened. " "Recomputed max: %g at %r", recomputed_max, self.result.x) raise HandledException self.maximum = OnePoint( self.model, self.output, name="maximum", extension=("likelihood" if self.ignore_prior else "posterior")) self.maximum.add(self.result.x, derived=post.derived, logpost=post.logpost, logpriors=post.logpriors, loglikes=post.loglikes) self.log.info("Parameter values at maximum:\n%s"%self.maximum.data.to_string()) self.maximum._out_update()
def load_input_MPI(input_file): if am_single_or_primary_process(): info = load_input(input_file) else: info = None info = get_mpi_comm().bcast(info, root=0) return info
def run(self): """ Runs the sampler. """ # Get first point, to be discarded -- not possible to determine its weight # Still, we need to compute derived parameters, since, as the proposal "blocked", # we may be saving the initial state of some block. # NB: if resuming but nothing was written (burn-in not finished): re-start self.log.info("Initial point:") if self.resuming and self.collection.n(): initial_point = (self.collection[self.collection.sampled_params] .iloc[self.collection.n() - 1]).values.copy() logpost = -(self.collection[_minuslogpost] .iloc[self.collection.n() - 1].copy()) logpriors = -(self.collection[self.collection.minuslogprior_names] .iloc[self.collection.n() - 1].copy()) loglikes = -0.5 * (self.collection[self.collection.chi2_names] .iloc[self.collection.n() - 1].copy()) derived = (self.collection[self.collection.derived_params] .iloc[self.collection.n() - 1].values.copy()) else: initial_point = self.model.prior.reference(max_tries=self.max_tries) logpost, logpriors, loglikes, derived = self.model.logposterior(initial_point) self.current_point.add(initial_point, derived=derived, logpost=logpost, logpriors=logpriors, loglikes=loglikes) self.log.info("\n%s", self.current_point.data.to_string( index=False, line_width=_line_width)) # Initial dummy checkpoint (needed when 1st checkpoint not reached in prev. run) self.write_checkpoint() # Main loop! self.log.info("Sampling!" + ( " (NB: nothing will be printed until %d burn-in samples " % self.burn_in + "have been obtained)" if self.burn_in else "")) while self.n() < self.effective_max_samples and not self.converged: self.get_new_sample() # Callback function if (hasattr(self, "callback_function_callable") and not (max(self.n(), 1) % self.callback_every) and self.current_point[_weight] == 1): self.callback_function_callable(self) self.last_point_callback = self.collection.n() # Checking convergence and (optionally) learning the covmat of the proposal if self.check_all_ready(): self.check_convergence_and_learn_proposal() if am_single_or_primary_process(): self.i_checkpoint += 1 if self.n() == self.effective_max_samples: self.log.info("Reached maximum number of accepted steps allowed. " "Stopping.") # Make sure the last batch of samples ( < output_every ) are written self.collection._out_update() if more_than_one_process(): Ns = (lambda x: np.array(get_mpi_comm().gather(x)))(self.n()) else: Ns = [self.n()] if am_single_or_primary_process(): self.log.info("Sampling complete after %d accepted steps.", sum(Ns))
def __init__(self, *args, **kwargs): to_broadcast = ("folder", "prefix", "kind", "ext", "resuming") if am_single_or_primary_process(): Output.__init__(self, *args, **kwargs) else: for var in to_broadcast: setattr(self, var, None) for var in to_broadcast: setattr(self, var, get_mpi_comm().bcast(getattr(self, var), root=0))
def exception_handler(exception_type, value, trace_back): # Do nothing (just exit) if the exception has been handled and logged if exception_type == HandledException: # Exit all MPI processes getattr(get_mpi_comm(), "Abort", lambda: None)() return # so that no traceback is printed log = logging.getLogger("exception handler") line = "------------------------------------------------\n" log.critical(line[6:] + "\n" + "".join( traceback.format_exception(exception_type, value, trace_back)) + line) if exception_type == KeyboardInterrupt: log.critical("Interrupted by the user.") return log.critical( "Some unexpected ERROR ocurred. You can see the exception information above.\n" "We recommend trying to reproduce this error with '%s:True' in the input.\n" "If you cannot solve it yourself and need to report it, include the debug ouput," "\nwhich you can send it to a file setting '%s:[some_file_name]'.", _debug, _debug_file) # Exit all MPI processes getattr(get_mpi_comm(), "Abort", lambda: None)()
def _run(self): """ Runs the sampler. """ self.mpi_info( "Sampling!" + (" (NB: no accepted step will be saved until %d burn-in samples " % self.burn_in.value + "have been obtained)" if self.burn_in.value else "")) self.n_steps_raw = 0 last_output = 0 last_n = self.n() while last_n < self.max_samples and not self.converged: self.get_new_sample() self.n_steps_raw += 1 if self.output_every.unit: # if output_every in sec, print some info and dump at fixed time intervals now = datetime.datetime.now() now_sec = now.timestamp() if now_sec >= last_output + self.output_every.value: self.do_output(now) last_output = now_sec if self.current_point.weight == 1: # have added new point # Callback function n = self.n() if n != last_n: # and actually added last_n = n if (hasattr(self, "callback_function_callable") and not (max(n, 1) % self.callback_every.value) and self.current_point.weight == 1): self.callback_function_callable(self) self.last_point_callback = len(self.collection) # Checking convergence and (optionally) learning # the covmat of the proposal if self.check_all_ready(): self.check_convergence_and_learn_proposal() if is_main_process(): self.i_learn += 1 if last_n == self.max_samples: self.log.info("Reached maximum number of accepted steps allowed. " "Stopping.") # Make sure the last batch of samples ( < output_every (not in sec)) are written self.collection.out_update() if more_than_one_process(): Ns = (lambda x: np.array(get_mpi_comm().gather(x)))(self.n()) if not is_main_process(): Ns = [] else: Ns = [self.n()] self.mpi_info("Sampling complete after %d accepted steps.", sum(Ns))
def check_all_ready(self): """ Checks if the chain(s) is(/are) ready to check convergence and, if requested, learn a new covariance matrix for the proposal distribution. """ msg_ready = ("Ready to check convergence" + (" and learn a new proposal covmat" if self.learn_proposal else "")) n = len(self.collection) # If *just* (weight==1) got ready to check+learn if not (n % self.learn_every.value) and n > 0: self.log.info("Learn + convergence test @ %d samples accepted.", n) if more_than_one_process(): self.been_waiting += 1 if self.been_waiting > self.max_waiting: self.send_error_signal() raise LoggedError( self.log, "Waiting for too long for all chains to be ready. " "Maybe one of them is stuck or died unexpectedly?") self.model.dump_timing() # If not MPI size > 1, we are ready if not more_than_one_process(): self.log.debug(msg_ready) return True # Error check in case any process already sent an error signal self.check_error_signal() # If MPI, tell the rest that we are ready -- we use a "gather" # ("reduce" was problematic), but we are in practice just pinging if not hasattr(self, "req"): # just once! self.all_ready = np.empty(get_mpi_size()) self.req = get_mpi_comm().Iallgather(np.array([1.]), self.all_ready) self.log.info(msg_ready + " (waiting for the rest...)") # If all processes are ready to learn (= communication finished) if self.req.Test() if hasattr(self, "req") else False: # Sanity check: actually all processes have finished assert np.all(self.all_ready == 1), ( "This should not happen! Notify the developers. (Got %r)", self.all_ready) if more_than_one_process() and is_main_process(): self.log.info("All chains are r" + msg_ready[1:]) delattr(self, "req") self.been_waiting = 0 # Another error check, in case the error occurred after sending "ready" signal self.check_error_signal() # Just in case, a barrier here sync_processes() return True return False
def check_convergence_and_learn_proposal(self): """ Checks the convergence of the sampling process, and, if requested, learns a new covariance matrix for the proposal distribution from the covariance of the last samples. """ if more_than_one_process(): # Compute and gather means, covs and CL intervals of last half of chains use_first = int(self.n() / 2) mean = self.collection.mean(first=use_first) cov = self.collection.cov(first=use_first) mcsamples = self.collection._sampled_to_getdist_mcsamples( first=use_first) try: bound = np.array([[ mcsamples.confidence(i, limfrac=self.Rminus1_cl_level / 2., upper=which) for i in range(self.model.prior.d()) ] for which in [False, True]]).T success_bounds = True except: bound = None success_bounds = False Ns, means, covs, bounds, acceptance_rates = map( lambda x: np.array(get_mpi_comm().gather(x)), [self.n(), mean, cov, bound, self.acceptance_rate]) else: # Compute and gather means, covs and CL intervals of last m-1 chain fractions m = 1 + self.Rminus1_single_split cut = int(len(self.collection) / m) try: Ns = (m - 1) * [cut] means = np.array([ self.collection.mean(first=i * cut, last=(i + 1) * cut - 1) for i in range(1, m) ]) covs = np.array([ self.collection.cov(first=i * cut, last=(i + 1) * cut - 1) for i in range(1, m) ]) mcsamples_list = [ self.collection._sampled_to_getdist_mcsamples( first=i * cut, last=(i + 1) * cut - 1) for i in range(1, m) ] except: self.log.info( "Not enough points in chain to check convergence. " "Waiting for next checkpoint.") return acceptance_rates = self.acceptance_rate try: bounds = [ np.array([[ mcs.confidence(i, limfrac=self.Rminus1_cl_level / 2., upper=which) for i in range(self.model.prior.d()) ] for which in [False, True]]).T for mcs in mcsamples_list ] success_bounds = True except: bounds = None success_bounds = False # Compute convergence diagnostics if is_main_process(): self.progress.at[self.i_learn, "N"] = (sum(Ns) if more_than_one_process() else self.n()) self.progress.at[self.i_learn, "timestamp"] = \ datetime.datetime.now().isoformat() acceptance_rate = (np.average(acceptance_rates, weights=Ns) if more_than_one_process() else acceptance_rates) self.log.info( " - Acceptance rate: %.3f" + (" = avg(%r)" % list(acceptance_rates) if more_than_one_process() else ""), acceptance_rate) self.progress.at[self.i_learn, "acceptance_rate"] = acceptance_rate # "Within" or "W" term -- our "units" for assessing convergence # and our prospective new covariance matrix mean_of_covs = np.average(covs, weights=Ns, axis=0) # "Between" or "B" term # We don't weight with the number of samples in the chains here: # shorter chains will likely be outliers, and we want to notice them cov_of_means = np.atleast_2d(np.cov(means.T)) # , fweights=Ns) # For numerical stability, we turn mean_of_covs into correlation matrix: # rho = (diag(Sigma))^(-1/2) * Sigma * (diag(Sigma))^(-1/2) # and apply the same transformation to the mean of covs (same eigenvals!) diagSinvsqrt = np.diag(np.power(np.diag(cov_of_means), -0.5)) corr_of_means = diagSinvsqrt.dot(cov_of_means).dot(diagSinvsqrt) norm_mean_of_covs = diagSinvsqrt.dot(mean_of_covs).dot( diagSinvsqrt) success = False # Cholesky of (normalized) mean of covs and eigvals of Linv*cov_of_means*L try: L = np.linalg.cholesky(norm_mean_of_covs) except np.linalg.LinAlgError: self.log.warning( "Negative covariance eigenvectors. " "This may mean that the covariance of the samples does not " "contain enough information at this point. " "Skipping learning a new covmat for now.") else: Linv = np.linalg.inv(L) # Suppress numpy warnings (restored later in this function) error_handling = deepcopy(np.geterr()) np.seterr(all="ignore") try: eigvals = np.linalg.eigvalsh( Linv.dot(corr_of_means).dot(Linv.T)) success = True except np.linalg.LinAlgError: self.log.warning("Could not compute eigenvalues. " "Skipping learning a new covmat for now.") else: Rminus1 = max(np.abs(eigvals)) self.progress.at[self.i_learn, "Rminus1"] = Rminus1 # For real square matrices, a possible def of the cond number is: condition_number = Rminus1 / min(np.abs(eigvals)) self.log.debug(" - Condition number = %g", condition_number) self.log.debug(" - Eigenvalues = %r", eigvals) self.log.info( " - Convergence of means: R-1 = %f after %d accepted steps" % (Rminus1, (sum(Ns) if more_than_one_process() else self.n())) + (" = sum(%r)" % list(Ns) if more_than_one_process() else "")) # Have we converged in means? # (criterion must be fulfilled twice in a row) if max(Rminus1, self.Rminus1_last) < self.Rminus1_stop: # Check the convergence of the bounds of the confidence intervals # Same as R-1, but with the rms deviation from the mean bound # in units of the mean standard deviation of the chains if success_bounds: Rminus1_cl = (np.std(bounds, axis=0).T / np.sqrt(np.diag(mean_of_covs))) self.log.debug( " - normalized std's of bounds = %r", Rminus1_cl) Rminus1_cl = np.max(Rminus1_cl) self.progress.at[self.i_learn, "Rminus1_cl"] = Rminus1_cl self.log.info( " - Convergence of bounds: R-1 = %f after %d " % (Rminus1_cl, (sum(Ns) if more_than_one_process( ) else self.n())) + "accepted steps" + (" = sum(%r)" % list(Ns) if more_than_one_process() else "")) if Rminus1_cl < self.Rminus1_cl_stop: self.converged = True self.log.info("The run has converged!") self._Ns = Ns else: self.log.info( "Computation of the bounds was not possible. " "Waiting until the next converge check.") np.seterr(**error_handling) else: mean_of_covs = np.empty( (self.model.prior.d(), self.model.prior.d())) success = None Rminus1 = None # Broadcast and save the convergence status and the last R-1 of means success = share_mpi(success) if success: self.Rminus1_last, self.converged = share_mpi(( Rminus1, self.converged) if is_main_process() else None) # Do we want to learn a better proposal pdf? if self.learn_proposal and not self.converged: good_Rminus1 = (self.learn_proposal_Rminus1_max > self.Rminus1_last > self.learn_proposal_Rminus1_min) if not good_Rminus1: self.mpi_info( "Convergence less than requested for updates: " "waiting until the next convergence check.") return if more_than_one_process(): get_mpi_comm().Bcast(mean_of_covs, root=0) else: mean_of_covs = covs[0] try: self.proposer.set_covariance(mean_of_covs) if is_main_process(): self.log.info( " - Updated covariance matrix of proposal pdf.") self.log.debug("%r", mean_of_covs) except: if is_main_process(): self.log.debug( "Updating covariance matrix failed unexpectedly. " "waiting until next covmat learning attempt.") # Save checkpoint info self.write_checkpoint()
def process_results(self): """ Determines success (or not), chooses best (if MPI) and produces output (if requested). """ evals_attr_ = evals_attr[self.method.lower()] # If something failed if not hasattr(self, "result"): return if more_than_one_process(): results = get_mpi_comm().gather(self.result, root=0) successes = get_mpi_comm().gather(self.success, root=0) _affine_transform_baselines = get_mpi_comm().gather( self._affine_transform_baseline, root=0) if is_main_process(): mins = [(getattr(r, evals_attr_) if s else np.inf) for r, s in zip(results, successes)] i_min = np.argmin(mins) self.result = results[i_min] self._affine_transform_baseline = _affine_transform_baselines[ i_min] else: successes = [self.success] if is_main_process(): if not any(successes): raise LoggedError( self.log, "Minimization failed! Here is the raw result object:\n%s", str(self.result)) elif not all(successes): self.log.warning('Some minimizations failed!') elif more_than_one_process(): if max(mins) - min(mins) > 1: self.log.warning('Big spread in minima: %r', mins) elif max(mins) - min(mins) > 0.2: self.log.warning('Modest spread in minima: %r', mins) logp_min = -np.array(getattr(self.result, evals_attr_)) x_min = self.inv_affine_transform(self.result.x) self.log.info("-log(%s) minimized to %g", "likelihood" if self.ignore_prior else "posterior", -logp_min) recomputed_post_min = self.model.logposterior(x_min, cached=False) recomputed_logp_min = (sum(recomputed_post_min.loglikes) if self.ignore_prior else recomputed_post_min.logpost) if not np.allclose(logp_min, recomputed_logp_min, atol=1e-2): raise LoggedError( self.log, "Cannot reproduce log minimum to within 0.01. Maybe your " "likelihood is stochastic or large numerical error? " "Recomputed min: %g (was %g) at %r", recomputed_logp_min, logp_min, x_min) self.minimum = OnePoint(self.model, self.output, name="", extension=get_collection_extension( self.ignore_prior)) self.minimum.add(x_min, derived=recomputed_post_min.derived, logpost=recomputed_post_min.logpost, logpriors=recomputed_post_min.logpriors, loglikes=recomputed_post_min.loglikes) self.log.info("Parameter values at minimum:\n%s", self.minimum.data.to_string()) self.minimum.out_update() self.dump_getdist()
def safe_exit(): """Closes all MPI process, if more than one present.""" if get_mpi_size() > 1: get_mpi_comm().Abort(1)
def info_random_gaussian(ranges, n_modes=1, prefix="", O_std_min=1e-2, O_std_max=1, mpi_aware=True): """ Wrapper around ``random_mean`` and ``random_cov`` to generate the likelihood and parameter info for a random Gaussian. If ``mpi_aware=True``, it draws the random stuff only once, and communicates it to the rest of the MPI processes. """ if am_single_or_primary_process() or not mpi_aware: cov = random_cov(ranges, n_modes=n_modes, O_std_min=O_std_min, O_std_max=O_std_max, mpi_warn=False) if n_modes == 1: cov = [cov] # Make sure it stays away from the edges mean = [[]] * n_modes for i in range(n_modes): std = np.sqrt(cov[i].diagonal()) factor = 3 ranges_mean = [[l[0] + factor * s, l[1] - +factor * s] for l, s in zip(ranges, std)] # If this implies min>max, take the centre ranges_mean = [(l if l[0] <= l[1] else 2 * [(l[0] + l[1]) / 2]) for l in ranges_mean] mean[i] = random_mean(ranges_mean, n_modes=1, mpi_warn=False) elif not am_single_or_primary_process() and mpi_aware: mean, cov = None, None if mpi_aware: mean, cov = get_mpi_comm().bcast(mean, root=0), get_mpi_comm().bcast(cov, root=0) dimension = len(ranges) info = { _likelihood: { "gaussian": { "mean": mean, "cov": cov, "prefix": prefix } } } info[_params] = odict( # sampled [[ prefix + "%d" % i, { "prior": { "min": ranges[i][0], "max": ranges[i][1] }, "latex": r"\alpha_{%i}" % i } ] for i in range(dimension)] + # derived [[ prefix + "derived_%d" % i, { "min": -3, "max": 3, "latex": r"\beta_{%i}" % i } ] for i in range(dimension * n_modes)]) return info
def check_convergence_and_learn_proposal(self): """ Checks the convergence of the sampling process (MPI only), and, if requested, learns a new covariance matrix for the proposal distribution from the covariance of the last samples. """ if get_mpi(): # Compute and gather means, covs and CL intervals of last half of chains mean = self.collection.mean(first=int(self.n() / 2)) cov = self.collection.cov(first=int(self.n() / 2)) mcsamples = self.collection._sampled_to_getdist_mcsamples( first=int(self.n() / 2)) try: bound = np.array([[ mcsamples.confidence(i, limfrac=self.Rminus1_cl_level / 2., upper=which) for i in range(self.model.prior.d()) ] for which in [False, True]]).T success_bounds = True except: bound = None success_bounds = False Ns, means, covs, bounds = map( lambda x: np.array(get_mpi_comm().gather(x)), [self.n(), mean, cov, bound]) else: # Compute and gather means, covs and CL intervals of last m-1 chain fractions m = 1 + self.Rminus1_single_split cut = int(self.collection.n() / m) if cut <= 1: self.log.error( "Not enough points in chain to check convergence. " "Increase `check_every` or reduce `Rminus1_single_split`.") raise HandledException Ns = (m - 1) * [cut] means = np.array([ self.collection.mean(first=i * cut, last=(i + 1) * cut - 1) for i in range(1, m) ]) covs = np.array([ self.collection.cov(first=i * cut, last=(i + 1) * cut - 1) for i in range(1, m) ]) # No logging of warnings temporarily, so getdist won't complain unnecessarily logging.disable(logging.WARNING) mcsampleses = [ self.collection._sampled_to_getdist_mcsamples( first=i * cut, last=(i + 1) * cut - 1) for i in range(1, m) ] logging.disable(logging.NOTSET) try: bounds = [ np.array([[ mcs.confidence(i, limfrac=self.Rminus1_cl_level / 2., upper=which) for i in range(self.model.prior.d()) ] for which in [False, True]]).T for mcs in mcsampleses ] success_bounds = True except: bounds = None success_bounds = False # Compute convergence diagnostics if not get_mpi_rank(): # "Within" or "W" term -- our "units" for assessing convergence # and our prospective new covariance matrix mean_of_covs = np.average(covs, weights=Ns, axis=0) # "Between" or "B" term # We don't weight with the number of samples in the chains here: # shorter chains will likely be outliers, and we want to notice them cov_of_means = np.atleast_2d(np.cov(means.T)) # , fweights=Ns) # For numerical stability, we turn mean_of_covs into correlation matrix: # rho = (diag(Sigma))^(-1/2) * Sigma * (diag(Sigma))^(-1/2) # and apply the same transformation to the mean of covs (same eigenvals!) diagSinvsqrt = np.diag(np.power(np.diag(cov_of_means), -0.5)) corr_of_means = diagSinvsqrt.dot(cov_of_means).dot(diagSinvsqrt) norm_mean_of_covs = diagSinvsqrt.dot(mean_of_covs).dot( diagSinvsqrt) # Cholesky of (normalized) mean of covs and eigvals of Linv*cov_of_means*L try: L = np.linalg.cholesky(norm_mean_of_covs) except np.linalg.LinAlgError: self.log.warning( "Negative covariance eigenvectors. " "This may mean that the covariance of the samples does not " "contain enough information at this point. " "Skipping this checkpoint") success = False else: Linv = np.linalg.inv(L) try: eigvals = np.linalg.eigvalsh( Linv.dot(corr_of_means).dot(Linv.T)) success = True except np.linalg.LinAlgError: self.log.warning("Could not compute eigenvalues. " "Skipping this checkpoint.") success = False if success: Rminus1 = max(np.abs(eigvals)) # For real square matrices, a possible def of the cond number is: condition_number = Rminus1 / min(np.abs(eigvals)) self.log.debug("Condition number = %g", condition_number) self.log.debug("Eigenvalues = %r", eigvals) self.log.info( "Convergence of means: R-1 = %f after %d accepted steps" % (Rminus1, (sum(Ns) if get_mpi() else self.n())) + (" = sum(%r)" % list(Ns) if get_mpi() else "")) # Have we converged in means? # (criterion must be fulfilled twice in a row) if max(Rminus1, self.Rminus1_last) < self.Rminus1_stop: # Check the convergence of the bounds of the confidence intervals # Same as R-1, but with the rms deviation from the mean bound # in units of the mean standard deviation of the chains if success_bounds: Rminus1_cl = (np.std(bounds, axis=0).T / np.sqrt(np.diag(mean_of_covs))) self.log.debug("normalized std's of bounds = %r", Rminus1_cl) self.log.info( "Convergence of bounds: R-1 = %f after %d " % (np.max(Rminus1_cl), (sum(Ns) if get_mpi() else self.n())) + "accepted steps" + (" = sum(%r)" % list(Ns) if get_mpi() else "")) if np.max(Rminus1_cl) < self.Rminus1_cl_stop: self.converged = True self.log.info("The run has converged!") self._Ns = Ns else: self.log.info( "Computation of the bounds was not possible. " "Waiting until the next checkpoint") if get_mpi(): # Broadcast and save the convergence status and the last R-1 of means success = get_mpi_comm().bcast( (success if not get_mpi_rank() else None), root=0) if success: self.Rminus1_last = get_mpi_comm().bcast( (Rminus1 if not get_mpi_rank() else None), root=0) self.converged = get_mpi_comm().bcast( (self.converged if not get_mpi_rank() else None), root=0) else: if success: self.Rminus1_last = Rminus1 # Do we want to learn a better proposal pdf? if self.learn_proposal and not self.converged and success: good_Rminus1 = (self.learn_proposal_Rminus1_max > self.Rminus1_last > self.learn_proposal_Rminus1_min) if not good_Rminus1: if not get_mpi_rank(): self.log.info("Bad convergence statistics: " "waiting until the next checkpoint.") return if get_mpi(): if get_mpi_rank(): mean_of_covs = np.empty( (self.model.prior.d(), self.model.prior.d())) get_mpi_comm().Bcast(mean_of_covs, root=0) elif not get_mpi(): mean_of_covs = covs[0] try: self.proposer.set_covariance(mean_of_covs) except: self.log.debug( "Updating covariance matrix failed unexpectedly. " "waiting until next checkpoint.") if not get_mpi_rank(): self.log.info("Updated covariance matrix of proposal pdf.") self.log.debug("%r", mean_of_covs) # Save checkpoint info self.write_checkpoint()
def check_convergence_and_learn_proposal(self): """ Checks the convergence of the sampling process (MPI only), and, if requested, learns a new covariance matrix for the proposal distribution from the covariance of the last samples. """ # Compute and gather means, covs and CL intervals of last half of chains mean = self.collection.mean(first=int(self.n() / 2)) cov = self.collection.cov(first=int(self.n() / 2)) # No logging of warnings temporarily, so getdist won't complain innecessarily logging.disable(logging.WARNING) mcsamples = self.collection.sampled_to_getdist_mcsamples( first=int(self.n() / 2)) logging.disable(logging.NOTSET) bound = np.array([[ mcsamples.confidence(i, limfrac=self.Rminus1_cl_level / 2., upper=which) for i in range(self.prior.d()) ] for which in [False, True]]).T Ns, means, covs, bounds = map( lambda x: np.array((get_mpi_comm().gather(x) if get_mpi() else [x])), [self.n(), mean, cov, bound]) # Compute convergence diagnostics if get_mpi(): if get_mpi_rank() == 0: # "Within" or "W" term -- our "units" for assessing convergence # and our prospective new covariance matrix mean_of_covs = np.average(covs, weights=Ns, axis=0) # "Between" or "B" term # We don't weight with the number of samples in the chains here: # shorter chains will likely be outliers, and we want to notice them cov_of_means = np.cov(means.T) # , fweights=Ns) # For numerical stability, we turn mean_of_covs into correlation matrix: # rho = (diag(Sigma))^(-1/2) * Sigma * (diag(Sigma))^(-1/2) # and apply the same transformation to the mean of covs (same eigenvals!) diagSinvsqrt = np.diag(np.power(np.diag(cov_of_means), -0.5)) corr_of_means = diagSinvsqrt.dot(cov_of_means).dot( diagSinvsqrt) norm_mean_of_covs = diagSinvsqrt.dot(mean_of_covs).dot( diagSinvsqrt) # Cholesky of (normalized) mean of covs and eigvals of Linv*cov_of_means*L try: L = np.linalg.cholesky(norm_mean_of_covs) except np.linalg.LinAlgError: self.log.warning( "Negative covariance eigenvectors. " "This may mean that the covariance of the samples does not " "contain enough information at this point. " "Skipping this checkpoint") success = False else: Linv = np.linalg.inv(L) eigvals = np.linalg.eigvalsh( Linv.dot(corr_of_means).dot(Linv.T)) Rminus1 = max(np.abs(eigvals)) # For real square matrices, a possible def of the cond number is: condition_number = Rminus1 / min(np.abs(eigvals)) self.log.debug("Condition number = %g", condition_number) self.log.debug("Eigenvalues = %r", eigvals) self.log.info( "Convergence of means: R-1 = %f after %d samples", Rminus1, self.n()) success = True # Have we converged in means? # (criterion must be fulfilled twice in a row) if (max(Rminus1, getattr(self, "Rminus1_last", np.inf)) < self.Rminus1_stop): # Check the convergence of the bounds of the confidence intervals # Same as R-1, but with the rms deviation from the mean bound # in units of the mean standard deviation of the chains Rminus1_cl = (np.std(bounds, axis=0).T / np.sqrt(np.diag(mean_of_covs))) self.log.debug("normalized std's of bounds = %r", Rminus1_cl) self.log.info( "Convergence of bounds: R-1 = %f after %d samples", np.max(Rminus1_cl), self.n()) if np.max(Rminus1_cl) < self.Rminus1_cl_stop: self.converged = True self.log.info("The run has converged!") # Broadcast and save the convergence status and the last R-1 of means success = get_mpi_comm().bcast( (success if not get_mpi_rank() else None), root=0) if success: self.Rminus1_last = get_mpi_comm().bcast( (Rminus1 if not get_mpi_rank() else None), root=0) self.converged = get_mpi_comm().bcast( (self.converged if not get_mpi_rank() else None), root=0) else: # No MPI pass # Do we want to learn a better proposal pdf? if self.learn_proposal and not self.converged: # update iff (not MPI, or MPI and "good" Rminus1) if get_mpi(): good_Rminus1 = (self.learn_proposal_Rminus1_max > self.Rminus1_last > self.learn_proposal_Rminus1_min) if not good_Rminus1: if not get_mpi_rank(): self.log.info("Bad convergence statistics: " "waiting until the next checkpoint.") return if get_mpi(): if get_mpi_rank(): mean_of_covs = np.empty((self.prior.d(), self.prior.d())) get_mpi_comm().Bcast(mean_of_covs, root=0) elif not get_mpi(): mean_of_covs = covs[0] self.proposer.set_covariance(mean_of_covs) if not get_mpi_rank(): self.log.info("Updated covariance matrix of proposal pdf.") self.log.debug("%r", mean_of_covs)
def initialize(self): """Imports the PolyChord sampler and prepares its arguments.""" if not get_mpi_rank(): # rank = 0 (MPI master) or None (no MPI) self.log.info("Initializing") # If path not given, try using general path to modules if not self.path and self.path_install: self.path = get_path(self.path_install) if self.path: if not get_mpi_rank(): self.log.info("Importing *local* PolyChord from " + self.path) if not os.path.exists(os.path.realpath(self.path)): self.log.error("The given path does not exist.") raise HandledException pc_build_path = get_build_path(self.path) if not pc_build_path: self.log.error("Either PolyChord is not in the given folder, " "'%s', or you have not compiled it.", self.path) raise HandledException # Inserting the previously found path into the list of import folders sys.path.insert(0, pc_build_path) else: self.log.info("Importing *global* PolyChord.") try: import pypolychord from pypolychord.settings import PolyChordSettings self.pc = pypolychord except ImportError: self.log.error( "Couldn't find the PolyChord python interface. " "Make sure that you have compiled it, and that you either\n" " (a) specify a path (you didn't) or\n" " (b) install the Python interface globally with\n" " '/path/to/PolyChord/python setup.py install --user'") raise HandledException # Prepare arguments and settings self.nDims = self.model.prior.d() self.nDerived = (len(self.model.parameterization.derived_params()) + len(self.model.prior) + len(self.model.likelihood._likelihoods)) if self.logzero is None: self.logzero = np.nan_to_num(-np.inf) if self.max_ndead == np.inf: self.max_ndead = -1 for p in ["nlive", "num_repeats", "nprior", "max_ndead"]: setattr(self, p, read_dnumber(getattr(self, p), self.nDims, dtype=int)) # Fill the automatic ones if getattr(self, "feedback", None) is None: values = {logging.CRITICAL: 0, logging.ERROR: 0, logging.WARNING: 0, logging.INFO: 1, logging.DEBUG: 2} self.feedback = values[self.log.getEffectiveLevel()] try: output_folder = getattr(self.output, "folder") output_prefix = getattr(self.output, "prefix") or "" self.read_resume = self.resuming except AttributeError: # dummy output -- no resume! self.read_resume = False from tempfile import gettempdir output_folder = gettempdir() if not get_mpi_rank(): from random import random output_prefix = hex(int(random() * 16 ** 6))[2:] else: output_prefix = None if get_mpi(): output_prefix = get_mpi_comm().bcast(output_prefix, root=0) self.base_dir = os.path.join(output_folder, self.base_dir) self.file_root = output_prefix if not get_mpi_rank(): # Creating output folder, if it does not exist (just one process) if not os.path.exists(self.base_dir): os.makedirs(self.base_dir) # Idem, a clusters folder if needed -- notice that PolyChord's default # is "True", here "None", hence the funny condition below if self.do_clustering is not False: # None here means "default" try: os.makedirs(os.path.join(self.base_dir, clusters)) except OSError: # exists! pass self.log.info("Storing raw PolyChord output in '%s'.", self.base_dir) # Exploiting the speed hierarchy speeds, blocks = self.model.likelihood._speeds_of_params(int_speeds=True) blocks_flat = list(chain(*blocks)) self.ordering = [ blocks_flat.index(p) for p in self.model.parameterization.sampled_params()] self.grade_dims = [len(block) for block in blocks] # self.grade_frac = np.array( # [i*j for i,j in zip(self.grade_dims, speeds)]) # self.grade_frac = ( # self.grade_frac/sum(self.grade_frac)) # Disabled for now. We need a way to override the "time" part of the meaning of grade_frac self.grade_frac = [1 / len(self.grade_dims) for _ in self.grade_dims] # Assign settings pc_args = ["nlive", "num_repeats", "nprior", "do_clustering", "precision_criterion", "max_ndead", "boost_posterior", "feedback", "logzero", "update_files", "posteriors", "equals", "cluster_posteriors", "write_resume", "read_resume", "write_stats", "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims", "feedback", "read_resume", "base_dir", "file_root", "grade_frac", "grade_dims"] self.pc_settings = PolyChordSettings( self.nDims, self.nDerived, seed=(self.seed if self.seed is not None else -1), **{p: getattr(self, p) for p in pc_args if getattr(self, p) is not None}) # prior conversion from the hypercube bounds = self.model.prior.bounds( confidence_for_unbounded=self.confidence_for_unbounded) # Check if priors are bounded (nan's to inf) inf = np.where(np.isinf(bounds)) if len(inf[0]): params_names = self.model.parameterization.sampled_params() params = [params_names[i] for i in sorted(list(set(inf[0])))] self.log.error("PolyChord needs bounded priors, but the parameter(s) '" "', '".join(params) + "' is(are) unbounded.") raise HandledException locs = bounds[:, 0] scales = bounds[:, 1] - bounds[:, 0] # This function re-scales the parameters AND puts them in the right order self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales).tolist() # We will need the volume of the prior domain, since PolyChord divides by it self.logvolume = np.log(np.prod(scales)) # Done! if not get_mpi_rank(): self.log.info("Calling PolyChord with arguments:") for p, v in inspect.getmembers(self.pc_settings, lambda a: not (callable(a))): if not p.startswith("_"): self.log.info(" %s: %s", p, v)
def initialize(self): """Imports the PolyChord sampler and prepares its arguments.""" if am_single_or_primary_process( ): # rank = 0 (MPI master) or None (no MPI) self.log.info("Initializing") # If path not given, try using general path to modules if not self.path and self.path_install: self.path = get_path(self.path_install) if self.path: if am_single_or_primary_process(): self.log.info("Importing *local* PolyChord from " + self.path) if not os.path.exists(os.path.realpath(self.path)): raise LoggedError( self.log, "The given path does not exist. " "Try installing PolyChord with " "'cobaya-install polychord -m [modules_path]") pc_build_path = get_build_path(self.path) if not pc_build_path: raise LoggedError( self.log, "Either PolyChord is not in the given folder, " "'%s', or you have not compiled it.", self.path) # Inserting the previously found path into the list of import folders sys.path.insert(0, pc_build_path) else: self.log.info("Importing *global* PolyChord.") try: import pypolychord from pypolychord.settings import PolyChordSettings self.pc = pypolychord except ImportError: raise LoggedError( self.log, "Couldn't find the PolyChord python interface. " "Make sure that you have compiled it, and that you either\n" " (a) specify a path (you didn't) or\n" " (b) install the Python interface globally with\n" " '/path/to/PolyChord/python setup.py install --user'") # Prepare arguments and settings self.nDims = self.model.prior.d() self.nDerived = (len(self.model.parameterization.derived_params()) + len(self.model.prior) + len(self.model.likelihood._likelihoods)) if self.logzero is None: self.logzero = np.nan_to_num(-np.inf) if self.max_ndead == np.inf: self.max_ndead = -1 for p in ["nlive", "nprior", "max_ndead"]: setattr(self, p, read_dnumber(getattr(self, p), self.nDims, dtype=int)) # Fill the automatic ones if getattr(self, "feedback", None) is None: values = { logging.CRITICAL: 0, logging.ERROR: 0, logging.WARNING: 0, logging.INFO: 1, logging.DEBUG: 2 } self.feedback = values[self.log.getEffectiveLevel()] try: output_folder = getattr(self.output, "folder") output_prefix = getattr(self.output, "prefix") or "" self.read_resume = self.resuming except AttributeError: # dummy output -- no resume! self.read_resume = False from tempfile import gettempdir output_folder = gettempdir() if am_single_or_primary_process(): from random import random output_prefix = hex(int(random() * 16**6))[2:] else: output_prefix = None if more_than_one_process(): output_prefix = get_mpi_comm().bcast(output_prefix, root=0) self.base_dir = os.path.join(output_folder, self.base_dir) self.file_root = output_prefix if am_single_or_primary_process(): # Creating output folder, if it does not exist (just one process) if not os.path.exists(self.base_dir): os.makedirs(self.base_dir) # Idem, a clusters folder if needed -- notice that PolyChord's default # is "True", here "None", hence the funny condition below if self.do_clustering is not False: # None here means "default" try: os.makedirs(os.path.join(self.base_dir, clusters)) except OSError: # exists! pass self.log.info("Storing raw PolyChord output in '%s'.", self.base_dir) # Exploiting the speed hierarchy if self.blocking: speeds, blocks = self.model.likelihood._check_speeds_of_params( self.blocking) else: speeds, blocks = self.model.likelihood._speeds_of_params( int_speeds=True) blocks_flat = list(chain(*blocks)) self.ordering = [ blocks_flat.index(p) for p in self.model.parameterization.sampled_params() ] self.grade_dims = np.array([len(block) for block in blocks]) # bugfix: pypolychord's C interface for Fortran does not like int numpy types self.grade_dims = [int(x) for x in self.grade_dims] # Steps per block # NB: num_repeats is ignored by PolyChord when int "grade_frac" given, # so needs to be applied by hand. # Make sure that speeds are integer, and that the slowest is 1, # for a straightforward application of num_repeats speeds = relative_to_int(speeds, 1) # In num_repeats, `d` is interpreted as dimension of each block self.grade_frac = [ int(speed * read_dnumber(self.num_repeats, dim_block)) for speed, dim_block in zip(speeds, self.grade_dims) ] # Assign settings pc_args = [ "nlive", "num_repeats", "nprior", "do_clustering", "precision_criterion", "max_ndead", "boost_posterior", "feedback", "logzero", "posteriors", "equals", "compression_factor", "cluster_posteriors", "write_resume", "read_resume", "write_stats", "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims", "feedback", "read_resume", "base_dir", "file_root", "grade_frac", "grade_dims" ] # As stated above, num_repeats is ignored, so let's not pass it pc_args.pop(pc_args.index("num_repeats")) self.pc_settings = PolyChordSettings( self.nDims, self.nDerived, seed=(self.seed if self.seed is not None else -1), **{ p: getattr(self, p) for p in pc_args if getattr(self, p) is not None }) # prior conversion from the hypercube bounds = self.model.prior.bounds( confidence_for_unbounded=self.confidence_for_unbounded) # Check if priors are bounded (nan's to inf) inf = np.where(np.isinf(bounds)) if len(inf[0]): params_names = self.model.parameterization.sampled_params() params = [params_names[i] for i in sorted(list(set(inf[0])))] raise LoggedError( self.log, "PolyChord needs bounded priors, but the parameter(s) '" "', '".join(params) + "' is(are) unbounded.") locs = bounds[:, 0] scales = bounds[:, 1] - bounds[:, 0] # This function re-scales the parameters AND puts them in the right order self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales ).tolist() # We will need the volume of the prior domain, since PolyChord divides by it self.logvolume = np.log(np.prod(scales)) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function)) self.last_point_callback = 0 # Prepare runtime live and dead points collections self.live = Collection(self.model, None, name="live", initial_size=self.pc_settings.nlive) self.dead = Collection(self.model, self.output, name="dead") self.n_sampled = len(self.model.parameterization.sampled_params()) self.n_derived = len(self.model.parameterization.derived_params()) self.n_priors = len(self.model.prior) self.n_likes = len(self.model.likelihood._likelihoods) # Done! if am_single_or_primary_process(): self.log.info("Calling PolyChord with arguments:") for p, v in inspect.getmembers(self.pc_settings, lambda a: not (callable(a))): if not p.startswith("_"): self.log.info(" %s: %s", p, v)