def close(self, *args): """ Determines success (or not), chooses best (if MPI) and produces output (if requested). """ # If something failed if not hasattr(self, "result"): return if get_mpi_size(): results = get_mpi_comm().gather(self.result, root=0) if not get_mpi_rank(): self.result = results[np.argmin([r.fun for r in results])] if not get_mpi_rank(): if not self.result.success: self.log.error("Maximization failed! Here is the `scipy` raw result:\n%r", self.result) raise HandledException self.log.info("log%s maximized at %g", "likelihood" if self.ignore_prior else "posterior", -self.result.fun) post = self.model.logposterior(self.result.x) recomputed_max = sum(post.loglikes) if self.ignore_prior else post.logpost if not np.allclose(-self.result.fun, recomputed_max): self.log.error("Cannot reproduce result. Something bad happened. " "Recomputed max: %g at %r", recomputed_max, self.result.x) raise HandledException self.maximum = OnePoint( self.model, self.output, name="maximum", extension=("likelihood" if self.ignore_prior else "posterior")) self.maximum.add(self.result.x, derived=post.derived, logpost=post.logpost, logpriors=post.logpriors, loglikes=post.loglikes) self.log.info("Parameter values at maximum:\n%s"%self.maximum.data.to_string()) self.maximum._out_update()
def format(self, record): self._fmt = (("[%d]" % get_mpi_rank() if get_mpi() else "") + "[%(name)s] " + { logging.ERROR: "*ERROR* ", logging.WARNING: "*WARNING* " }.get(record.levelno, "") + "%(message)s") return logging.Formatter.format(self, record)
def run(self): """ Runs the sampler. """ # Get first point, to be discarded -- not possible to determine its weight # Still, we need to compute derived parameters, since, as the proposal "blocked", # we may be saving the initial state of some block. initial_point = self.prior.reference(max_tries=self.max_tries) logpost, _, _, derived = self.logposterior(initial_point) self.current_point.add(initial_point, derived=derived, logpost=logpost) self.log.info("Initial point:\n %r ", self.current_point) # Main loop! self.converged = False self.log.info("Sampling!" + ( "(NB: nothing will be printed until %d burn-in samples " % self.burn_in + "have been obtained)" if self.burn_in else "")) while self.n() < self.effective_max_samples and not self.converged: self.get_new_sample() # Callback function if (hasattr(self, "callback_function_callable") and not (max(self.n(), 1) % self.callback_every) and self.current_point[_weight] == 1): self.callback_function_callable(self) # Checking convergence and (optionally) learning the covmat of the proposal if self.check_all_ready(): self.check_convergence_and_learn_proposal() # Make sure the last batch of samples ( < output_every ) are written self.collection.out_update() if not get_mpi_rank(): self.log.info("Sampling complete after %d accepted steps.", self.n())
def write_checkpoint(self): if not get_mpi_rank() and self.output: checkpoint_filename = self.checkpoint_filename() covmat_filename = self.covmat_filename() np.savetxt(covmat_filename, self.proposer.get_covariance(), header=" ".join( list(self.model.parameterization.sampled_params()))) checkpoint_info = { _sampler: { self.name: odict([ ["converged", bool(self.converged)], ["Rminus1_last", self.Rminus1_last], ["proposal_scale", self.proposer.get_scale()], ["blocks", self.blocks], ["oversampling_factors", self.oversampling_factors], ["i_last_slow_block", self.i_last_slow_block], [ "burn_in", ( self. burn_in # initial: repeat burn-in if not finished if not self.n() and self.burn_in_left else "d") ], # to avoid overweighting last point of prev. run ["mpi_size", get_mpi_size()] ]) } } yaml_dump_file(checkpoint_filename, checkpoint_info, error_if_exists=False) self.log.debug("Dumped checkpoint info and current covmat.")
def send_error_signal(self): """ Sends an error signal to the other MPI processes. """ for i_rank in range(get_mpi_size()): if i_rank != get_mpi_rank(): get_mpi_comm().isend(True, dest=i_rank, tag=_error_tag)
def initialize(self): """ Prepares the arguments for `iminuit.minimize`. """ if not get_mpi_rank(): self.log.info("Initializing") self.logp = ( (lambda x: self.model.logposterior(x, make_finite=True)[0]) if not self.ignore_prior else (lambda x: sum(self.model.loglikes(x, return_derived=True)[0]))) # Initial point: sample from reference and make sure that it has finite lik/post this_logp = -np.inf while not np.isfinite(this_logp): initial_point = self.model.prior.reference() this_logp = self.logp(initial_point) self.kwargs = { "fun": (lambda x: -self.logp(x)), "x0": initial_point, "bounds": self.model.prior.bounds(confidence_for_unbounded=0.999), "options": { "maxfev": self.maxfev, "disp": (self.log.getEffectiveLevel() == logging.DEBUG) } } self.kwargs.update(self.override or {}) self.log.debug("Arguments for iminuit.minimize:\n%r", self.kwargs)
def format(self, record): self._fmt = ( "[" + ("%d : " % get_mpi_rank() if more_than_one_process() else "") + "%(name)s" + (" %(asctime)s " if debug else "") + "] " + {logging.ERROR: "*ERROR* ", logging.WARNING: "*WARNING* "}.get(record.levelno, "") + "%(message)s") return logging.Formatter.format(self, record)
def format(self, record): fmt = ((" %(asctime)s " if debug else "") + "[" + ("%d : " % mpi.get_mpi_rank() if mpi.more_than_one_process() else "") + "%(name)s" + "] " + {logging.ERROR: "*ERROR* ", logging.WARNING: "*WARNING* "}.get(record.levelno, "") + "%(message)s") self._style._fmt = fmt return super().format(record)
def products(self): """ Auxiliary function to define what should be returned in a scripted call. Returns: The :class:`OnePoint` that maximizes the posterior or likelihood (depending on ``ignore_prior``), and the `scipy.optimize.OptimizeResult <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html>`_ instance. """ if not get_mpi_rank(): return {"maximum": self.maximum, "OptimizeResult": self.result}
def citation_script(): from cobaya.mpi import get_mpi_rank if not get_mpi_rank(): # Configure the logger ASAP from cobaya.log import logger_setup logger_setup() # Parse arguments and launch import argparse parser = argparse.ArgumentParser(description="Cobaya's citation tool.") parser.add_argument("files", action="store", nargs="+", metavar="input_file.yaml", help="One or more input files.") from cobaya.input import load_input infos = [load_input(f) for f in parser.parse_args().files] citation(*infos)
def run(info): assert hasattr(info, "keys"), ( "The first argument must be a dictionary with the info needed for the run. " "If you were trying to pass the name of an input file instead, " "load it first with 'cobaya.input.load_input', " "or, if you were passing a yaml string, load it with 'cobaya.yaml.yaml_load'." ) # Configure the logger ASAP # Just a dummy import before configuring the logger, until I fix root/individual level import getdist logger_setup(info.get(_debug), info.get(_debug_file)) import logging # Initialize output, if required output = Output(output_prefix=info.get(_output_prefix), resume=info.get(_resume), force_output=info.pop(_force, None), force_reproducible=info.get(_force_reproducible, _force_reproducible_default)) # Create the full input information, including defaults for each module. full_info = get_full_info(info) if output: full_info[_output_prefix] = output.updated_output_prefix() full_info[_resume] = output.is_resuming() if logging.root.getEffectiveLevel() <= logging.DEBUG: # Don't dump unless we are doing output, just in case something not serializable # May be fixed in the future if we find a way to serialize external functions if info.get(_output_prefix) and not get_mpi_rank(): logging.getLogger(__name__.split(".")[-1]).info( "Input info updated with defaults (dumped to YAML):\n%s", yaml_dump(full_info, force_reproducible=False)) # We dump the info now, before modules initialization, lest it is accidentally modified # If resuming a sample, it checks that old and new infos are consistent output.dump_info(info, full_info) # Initialize the posterior and the sampler with Model(full_info[_params], full_info[_likelihood], full_info.get(_prior), full_info.get(_theory), modules=info.get(_path_install), timing=full_info.get(_timing), allow_renames=False) as model: with Sampler(full_info[_sampler], model, output, resume=full_info.get(_resume), modules=info.get(_path_install)) as sampler: sampler.run() # For scripted calls return deepcopy(full_info), sampler.products()
def products(self): """ Auxiliary function to define what should be returned in a scripted call. Returns: The sample ``Collection`` containing the sequentially discarded live points. """ if not get_mpi_rank(): products = { "sample": self.collection, "logZ": self.logZ, "logZstd": self.logZstd} if self.pc_settings.do_clustering: products.update({"clusters": self.clusters}) return products else: return {}
def check_error_signal(self): """ Checks if any of the other process has sent an error signal, and fails. NB: This behaviour only shows up when running this sampler inside a Python script, not when running with `cobaya run` (in that case, the process raising an error will call `MPI_ABORT` and kill the rest. """ for i in range(get_mpi_size()): if i != get_mpi_rank(): from mpi4py import MPI status = MPI.Status() get_mpi_comm().iprobe(i, status=status) if status.tag == _error_tag: raise LoggedError(self.log, "Another process failed! Exiting.")
def check_all_ready(self): """ Checks if the chain(s) is(/are) ready to check convergence and, if requested, learn a new covariance matrix for the proposal distribution. """ msg_ready = ( ("Ready to" if get_mpi() or self.learn_proposal else "") + " check convergence" + (" and" if get_mpi() and self.learn_proposal else "") + (" learn a new proposal covmat" if self.learn_proposal else "")) # If *just* (weight==1) got ready to check+learn if (self.n() > 0 and self.current_point[_weight] == 1 and not (self.n() % self.check_every)): self.log.info("Checkpoint: %d samples accepted.", self.n()) if get_mpi(): self.been_waiting += 1 if self.been_waiting > self.max_waiting: self.log.error( "Waiting for too long for all chains to be ready. " "Maybe one of them is stuck or died unexpectedly?") raise HandledException self.model.dump_timing() # If not MPI, we are ready if not get_mpi(): if msg_ready: self.log.info(msg_ready) return True # If MPI, tell the rest that we are ready -- we use a "gather" # ("reduce" was problematic), but we are in practice just pinging if not hasattr(self, "req"): # just once! self.all_ready = np.empty(get_mpi_size()) self.req = get_mpi_comm().Iallgather(np.array([1.]), self.all_ready) self.log.info(msg_ready + " (waiting for the rest...)") # If all processes are ready to learn (= communication finished) if self.req.Test() if hasattr(self, "req") else False: # Sanity check: actually all processes have finished assert np.all(self.all_ready == 1), ( "This should not happen! Notify the developers. (Got %r)", self.all_ready) if get_mpi_rank() == 0: self.log.info("All chains are r" + msg_ready[1:]) delattr(self, "req") self.been_waiting = 0 # Just in case, a barrier here get_mpi_comm().barrier() return True return False
def initialise(self): """Prepares the arguments for `scipy.minimize`.""" if not get_mpi_rank(): self.log.info("Initializing") # Initial point: sample from reference and make sure that it has finite lik/post logp = -np.inf while not np.isfinite(logp): initial_point = self.prior.reference() logp = self.logposterior(initial_point, ignore_prior=self.ignore_prior)[0] self.kwargs = { "fun": (lambda x: -self.logposterior( x, ignore_prior=self.ignore_prior, make_finite=True)[0]), "x0": initial_point, "bounds": self.prior.bounds(confidence_for_unbounded=0.999), "tol": self.tol, "options": { "maxiter": self.maxiter, "disp": (self.log.getEffectiveLevel() == logging.DEBUG)}} self.kwargs.update(self.override or {}) self.log.debug("Arguments for scipy.optimize.minimize:\n%r", self.kwargs)
def run(self): """ Runs the sampler. """ # Get first point, to be discarded -- not possible to determine its weight # Still, we need to compute derived parameters, since, as the proposal "blocked", # we may be saving the initial state of some block. # NB: if resuming but nothing was written (burn-in not finished): re-start self.log.info("Initial point:") if self.resuming and self.collection.n(): initial_point = (self.collection[ self.collection.sampled_params].ix[self.collection.n() - 1]).values.copy() logpost = -(self.collection[_minuslogpost].ix[self.collection.n() - 1].copy()) logpriors = -(self.collection[self.collection.prior_names].ix[ self.collection.n() - 1].copy()) loglikes = -0.5 * (self.collection[self.collection.chi2_names].ix[ self.collection.n() - 1].copy()) derived = (self.collection[self.collection.derived_params].ix[ self.collection.n() - 1].values.copy()) else: initial_point = self.model.prior.reference( max_tries=self.max_tries) logpost, logpriors, loglikes, derived = self.model.logposterior( initial_point) self.current_point.add(initial_point, derived=derived, logpost=logpost, logpriors=logpriors, loglikes=loglikes) self.log.info( "\n%s", self.current_point.data.to_string(index=False, line_width=_line_width)) # Initial dummy checkpoint (needed when 1st checkpoint not reached in prev. run) self.write_checkpoint() # Main loop! self.log.info("Sampling!" + ( " (NB: nothing will be printed until %d burn-in samples " % self.burn_in + "have been obtained)" if self.burn_in else "")) while self.n() < self.effective_max_samples and not self.converged: self.get_new_sample() # Callback function if (hasattr(self, "callback_function_callable") and not (max(self.n(), 1) % self.callback_every) and self.current_point[_weight] == 1): self.callback_function_callable(self) # Checking convergence and (optionally) learning the covmat of the proposal if self.check_all_ready(): self.check_convergence_and_learn_proposal() if self.n() == self.effective_max_samples: self.log.info( "Reached maximum number of accepted steps allowed. " "Stopping.") # Make sure the last batch of samples ( < output_every ) are written self.collection._out_update() if get_mpi(): Ns = (lambda x: np.array(get_mpi_comm().gather(x)))(self.n()) else: Ns = [self.n()] if not get_mpi_rank(): self.log.info("Sampling complete after %d accepted steps.", sum(Ns))
def initialize(self): """Initializes the sampler: creates the proposal distribution and draws the initial sample.""" self.log.debug("Initializing") for p in [ "burn_in", "max_tries", "output_every", "check_every", "callback_every" ]: setattr( self, p, read_dnumber(getattr(self, p), self.model.prior.d(), dtype=int)) if self.callback_every is None: self.callback_every = self.check_every # Burning-in countdown -- the +1 accounts for the initial point (always accepted) self.burn_in_left = self.burn_in + 1 # Max # checkpoints to wait, in case one process dies without sending MPI_ABORT self.been_waiting = 0 self.max_waiting = max(50, self.max_tries / self.model.prior.d()) if self.resuming and (max(self.mpi_size or 0, 1) != max( get_mpi_size(), 1)): self.log.error( "Cannot resume a sample with a different number of chains: " "was %d and now is %d.", max(self.mpi_size, 1), max(get_mpi_size(), 1)) raise HandledException if not self.resuming and self.output: # Delete previous files (if not "forced", the run would have already failed) if ((os.path.abspath(self.covmat_filename()) != os.path.abspath( str(self.covmat)))): try: os.remove(self.covmat_filename()) except OSError: pass # There may be more that chains than expected, # if #ranks was bigger in a previous run i = 0 while True: i += 1 collection_filename, _ = self.output.prepare_collection(str(i)) try: os.remove(collection_filename) except OSError: break # One collection per MPI process: `name` is the MPI rank + 1 name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank())) self.collection = Collection(self.model, self.output, name=name, resuming=self.resuming) self.current_point = OnePoint(self.model, OutputDummy({}), name=name) # Use standard MH steps by default self.get_new_sample = self.get_new_sample_metropolis # Prepare oversampling / dragging if applicable self.effective_max_samples = self.max_samples if self.oversample and self.drag: self.log.error("Choose either oversampling or dragging, not both.") raise HandledException if self.oversample: factors, blocks = self.model.likelihood._speeds_of_params( int_speeds=True) self.oversampling_factors = factors self.log.info("Oversampling with factors:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip(self.oversampling_factors, blocks) ])) self.i_last_slow_block = None # No way right now to separate slow and fast slow_params = list(self.model.parameterization.sampled_params()) elif self.drag: speeds, blocks = self.model.likelihood._speeds_of_params( fast_slow=True, int_speeds=True) # For now, no blocking inside either fast or slow: just 2 blocks self.i_last_slow_block = 0 if np.all(speeds == speeds[0]): self.log.error( "All speeds are equal or too similar: cannot drag! " "Make sure to define accurate likelihoods' speeds.") raise HandledException # Make the 1st factor 1: speeds = [1, speeds[1] / speeds[0]] # Target: dragging step taking as long as slow step self.drag_interp_steps = self.drag * speeds[1] # Per dragging step, the (fast) posterior is evaluated *twice*, self.drag_interp_steps /= 2 self.drag_interp_steps = int(np.round(self.drag_interp_steps)) fast_params = list(chain(*blocks[1 + self.i_last_slow_block:])) # Not too much or too little dragging drag_limits = [(int(l) * len(fast_params) if l is not None else l) for l in self.drag_limits] if drag_limits[ 0] is not None and self.drag_interp_steps < drag_limits[0]: self.log.warning( "Number of dragging steps clipped from below: was not " "enough to efficiently explore the fast directions -- " "avoid this limit by decreasing 'drag_limits[0]'.") self.drag_interp_steps = drag_limits[0] if drag_limits[ 1] is not None and self.drag_interp_steps > drag_limits[1]: self.log.warning( "Number of dragging steps clipped from above: " "excessive, probably inefficient, exploration of the " "fast directions -- " "avoid this limit by increasing 'drag_limits[1]'.") self.drag_interp_steps = drag_limits[1] # Re-scale steps between checkpoint and callback to the slow dimensions only slow_params = list(chain(*blocks[:1 + self.i_last_slow_block])) self.n_slow = len(slow_params) for p in ["check_every", "callback_every"]: setattr( self, p, int(getattr(self, p) * self.n_slow / self.model.prior.d())) self.log.info("Dragging with oversampling per step:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip([1, self.drag_interp_steps], [blocks[0], fast_params]) ])) self.get_new_sample = self.get_new_sample_dragging else: _, blocks = self.model.likelihood._speeds_of_params() self.oversampling_factors = [1 for b in blocks] slow_params = list(self.model.parameterization.sampled_params()) self.n_slow = len(slow_params) # Turn parameter names into indices self.blocks = [[ list(self.model.parameterization.sampled_params()).index(p) for p in b ] for b in blocks] self.proposer = BlockedProposer( self.blocks, oversampling_factors=self.oversampling_factors, i_last_slow_block=self.i_last_slow_block, proposal_scale=self.proposal_scale) # Build the initial covariance matrix of the proposal, or load from checkpoint if self.resuming: covmat = np.loadtxt(self.covmat_filename()) self.log.info("Covariance matrix from checkpoint.") else: covmat = self.initial_proposal_covmat(slow_params=slow_params) self.log.info("Initial covariance matrix.") self.log.debug( "Sampling with covmat:\n%s", DataFrame( covmat, columns=self.model.parameterization.sampled_params(), index=self.model.parameterization.sampled_params()).to_string( line_width=_line_width)) self.proposer.set_covariance(covmat) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function))
def post(info, sample=None): logger_setup(info.get(_debug), info.get(_debug_file)) log = logging.getLogger(__name__.split(".")[-1]) try: info_post = info[_post] except KeyError: log.error("No 'post' block given. Nothing to do!") raise HandledException if get_mpi_rank(): log.warning( "Post-processing is not yet MPI-able. Doing nothing for rank > 1 processes." ) return # 1. Load existing sample output_in = Output(output_prefix=info.get(_output_prefix), resume=True) info_in = load_input(output_in.file_full) if output_in else deepcopy(info) dummy_model_in = DummyModel(info_in[_params], info_in[_likelihood], info_in.get(_prior, None), info_in.get(_theory, None)) if output_in: i = 0 while True: try: collection = Collection(dummy_model_in, output_in, name="%d" % (1 + i), load=True, onload_skip=info_post.get("skip", 0), onload_thin=info_post.get("thin", 1)) if i == 0: collection_in = collection else: collection_in._append(collection) i += 1 except IOError: break elif sample: if isinstance(sample, Collection): sample = [sample] collection_in = deepcopy(sample[0]) for s in sample[1:]: try: collection_in._append(s) except: log.error("Failed to load some of the input samples.") raise HandledException i = len(sample) else: log.error( "Not output from where to load from or input collections given.") raise HandledException log.info("Loaded %d chain%s. Will process %d samples.", i, "s" if i - 1 else "", collection_in.n()) if collection_in.n() <= 1: log.error( "Not enough samples for post-processing. Try using a larger sample, " "or skipping or thinning less.") raise HandledException # 2. Compare old and new info: determine what to do add = info_post.get("add", {}) remove = info_post.get("remove", {}) # Add a dummy 'one' likelihood, to absorb unused parameters if not add.get(_likelihood): add[_likelihood] = odict() add[_likelihood].update({"one": None}) # Expand the "add" info add = get_full_info(add) # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters out = {_params: deepcopy(info_in[_params])} for p in remove.get(_params, {}): pinfo = info_in[_params].get(p) if pinfo is None or not is_derived_param(pinfo): log.error( "You tried to remove parameter '%s', which is not a derived paramter. " "Only derived parameters can be removed during post-processing.", p) raise HandledException out[_params].pop(p) mlprior_names_add = [] for p, pinfo in add.get(_params, {}).items(): pinfo_in = info_in[_params].get(p) if is_sampled_param(pinfo): if not is_sampled_param(pinfo_in): # No added sampled parameters (de-marginalisation not implemented) if pinfo_in is None: log.error( "You added a new sampled parameter %r (maybe accidentaly " "by adding a new likelihood that depends on it). " "Adding new sampled parameters is not possible. Try fixing " "it to some value.", p) raise HandledException else: log.error( "You tried to change the prior of parameter '%s', " "but it was not a sampled parameter. " "To change that prior, you need to define as an external one.", p) raise HandledException if mlprior_names_add[:1] != _prior_1d_name: mlprior_names_add = ( [_minuslogprior + _separator + _prior_1d_name] + mlprior_names_add) elif is_derived_param(pinfo): if p in out[_params]: log.error( "You tried to add derived parameter '%s', which is already " "present. To force its recomputation, 'remove' it too.", p) raise HandledException elif is_fixed_param(pinfo): # Only one possibility left "fixed" parameter that was not present before: # input of new likelihood, or just an argument for dynamical derived (dropped) if ((p in info_in[_params] and pinfo[_p_value] != (pinfo_in or {}).get(_p_value, None))): log.error( "You tried to add a fixed parameter '%s: %r' that was already present" " but had a different value or was not fixed. This is not allowed. " "The old info of the parameter was '%s: %r'", p, dict(pinfo), p, dict(pinfo_in)) raise HandledException else: log.error("This should not happen. Contact the developers.") raise HandledException out[_params][p] = pinfo # For the likelihood only, turn the rest of *derived* parameters into constants, # so that the likelihoods do not try to compute them) # But be careful to exclude *input* params that have a "derived: True" value # (which in "full info" turns into "derived: 'lambda [x]: [x]'") out_params_like = deepcopy(out[_params]) for p, pinfo in out_params_like.items(): if ((is_derived_param(pinfo) and not (_p_value in pinfo) and p not in add.get(_params, {}))): out_params_like[p] = {_p_value: np.nan, _p_drop: True} parameterization_like = Parameterization(out_params_like, ignore_unused_sampled=True) # 2.2 Manage adding/removing priors and likelihoods warn_remove = False for level in [_prior, _likelihood]: out[level] = getattr(dummy_model_in, level) if level == _prior: out[level].remove(_prior_1d_name) for pdf in info_post.get("remove", {}).get(level, []) or []: try: out[level].remove(pdf) warn_remove = True except ValueError: log.error( "Trying to remove %s '%s', but it is not present. " "Existing ones: %r", level, pdf, out[level]) raise HandledException if warn_remove: log.warning("You are removing a prior or likelihood pdf. " "Notice that if the resulting posterior is much wider " "than the original one, or displaced enough, " "it is probably safer to explore it directly.") if _prior in add: mlprior_names_add += [ _minuslogprior + _separator + name for name in add[_prior] ] out[_prior] += list(add[_prior]) prior_recompute_1d = (mlprior_names_add[:1] == [ _minuslogprior + _separator + _prior_1d_name ]) # Don't initialise the theory code if not adding/recomputing theory, # theory-derived params or likelihoods recompute_theory = info_in.get(_theory) and not (list( add[_likelihood]) == ["one"] and not any([ is_derived_param(pinfo) for pinfo in add.get(_params, {}).values() ])) if recompute_theory: # Inherit from the original chain (needs input|output_params, renames, etc theory = list(info_in[_theory].keys())[0] info_theory_out = odict([[ theory, recursive_update(deepcopy(info_in[_theory][theory]), add.get(_theory, {theory: {}})[theory]) ]]) else: info_theory_out = None chi2_names_add = [ _chi2 + _separator + name for name in add[_likelihood] if name is not "one" ] out[_likelihood] += [l for l in add[_likelihood] if l is not "one"] if recompute_theory: log.warn( "You are recomputing the theory, but in the current version this does " "not force recomputation of any likelihood or derived parameter, " "unless explicitly removed+added.") for level in [_prior, _likelihood]: for i, x_i in enumerate(out[level]): if x_i in list(out[level])[i + 1:]: log.error( "You have added %s '%s', which was already present. If you " "want to force its recomputation, you must also 'remove' it.", level, x_i) raise HandledException # 3. Create output collection if "suffix" not in info_post: log.error("You need to provide a 'suffix' for your chains.") raise HandledException # Use default prefix if it exists. If it does not, produce no output by default. # {post: {output: None}} suppresses output, and if it's a string, updates it. out_prefix = info_post.get(_output_prefix, info.get(_output_prefix)) if out_prefix not in [None, False]: out_prefix += "_" + _post + "_" + info_post["suffix"] output_out = Output(output_prefix=out_prefix, force_output=info.get(_force)) info_out = deepcopy(info) info_out[_post] = info_post # Updated with input info and extended (full) add info info_out.update(info_in) info_out[_post]["add"] = add dummy_model_out = DummyModel(out[_params], out[_likelihood], info_prior=out[_prior]) if recompute_theory: theory = list(info_theory_out.keys())[0] if _input_params not in info_theory_out[theory]: log.error( "You appear to be post-processing a chain generated with an older " "version of Cobaya. For post-processing to work, please edit the " "'[root]__full.info' file of the original chain to add, inside the " "theory code block, the list of its input parameters. E.g.\n----\n" "theory:\n %s:\n input_params: [param1, param2, ...]\n" "----\nIf you get strange errors later, it is likely that you did not " "specify the correct set of theory parameters.\n" "The full set of input parameters are %s.", theory, list(dummy_model_out.parameterization.input_params())) raise HandledException prior_add = Prior(dummy_model_out.parameterization, add.get(_prior)) likelihood_add = Likelihood(add[_likelihood], parameterization_like, info_theory=info_theory_out, modules=info.get(_path_install)) # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post]["add"] add[_likelihood].pop("one") if likelihood_add.theory: # Make sure that theory.needs is called at least once, for adjustments likelihood_add.theory.needs() collection_out = Collection(dummy_model_out, output_out, name="1") output_out.dump_info({}, info_out) # 4. Main loop! log.info("Running post-processing...") last_percent = 0 for i, point in enumerate(collection_in.data.itertuples()): log.debug("Point: %r", point) sampled = [ getattr(point, param) for param in dummy_model_in.parameterization.sampled_params() ] derived = odict( [[param, getattr(point, param, None)] for param in dummy_model_out.parameterization.derived_params()]) inputs = odict([[ param, getattr( point, param, dummy_model_in.parameterization.constant_params().get( param, dummy_model_out.parameterization.constant_params().get( param, None))) ] for param in dummy_model_out.parameterization.input_params()]) # Solve inputs that depend on a function and were not saved # (we don't use the Parameterization_to_input method in case there are references # to functions that cannot be loaded at the moment) for p, value in inputs.items(): if value is None: func = dummy_model_out.parameterization._input_funcs[p] args = dummy_model_out.parameterization._input_args[p] inputs[p] = func(*[getattr(point, arg) for arg in args]) # Add/remove priors priors_add = prior_add.logps(sampled) if not prior_recompute_1d: priors_add = priors_add[1:] logpriors_add = odict(zip(mlprior_names_add, priors_add)) logpriors_new = [ logpriors_add.get(name, -getattr(point, name, 0)) for name in collection_out.minuslogprior_names ] if log.getEffectiveLevel() <= logging.DEBUG: log.debug("New set of priors: %r", dict(zip(dummy_model_out.prior, logpriors_new))) if -np.inf in logpriors_new: continue # Add/remove likelihoods output_like = [] if likelihood_add: # Notice "one" (last in likelihood_add) is ignored: not in chi2_names loglikes_add = odict( zip(chi2_names_add, likelihood_add.logps(inputs, _derived=output_like))) output_like = dict(zip(likelihood_add.output_params, output_like)) else: loglikes_add = dict() loglikes_new = [ loglikes_add.get(name, -0.5 * getattr(point, name, 0)) for name in collection_out.chi2_names ] if log.getEffectiveLevel() <= logging.DEBUG: log.debug("New set of likelihoods: %r", dict(zip(dummy_model_out.likelihood, loglikes_new))) if output_like: log.debug("New set of likelihood-derived parameters: %r", output_like) if -np.inf in loglikes_new: continue # Add/remove derived parameters and change priors of sampled parameters for p in add[_params]: if p in dummy_model_out.parameterization._directly_output: derived[p] = output_like[p] elif p in dummy_model_out.parameterization._derived_funcs: func = dummy_model_out.parameterization._derived_funcs[p] args = dummy_model_out.parameterization._derived_args[p] derived[p] = func(*[ getattr(point, arg, output_like.get(arg, None)) for arg in args ]) if log.getEffectiveLevel() <= logging.DEBUG: log.debug( "New derived parameters: %r", dict([[ p, derived[p] ] for p in dummy_model_out.parameterization.derived_params() if p in add[_params]])) # Save to the collection (keep old weight for now) collection_out.add(sampled, derived=derived.values(), weight=getattr(point, _weight), logpriors=logpriors_new, loglikes=loglikes_new) # Display progress percent = np.round(i / collection_in.n() * 100) if percent != last_percent and not percent % 5: last_percent = percent progress_bar(log, percent, " (%d/%d)" % (i, collection_in.n())) if not collection_out.data.last_valid_index(): log.error( "No elements in the final sample. Possible causes: " "added a prior or likelihood valued zero over the full sampled domain, " "or the computation of the theory failed everywhere, etc.") raise HandledException # Reweight -- account for large dynamic range! # Prefer to rescale +inf to finite, and ignore final points with -inf. # Remove -inf's (0-weight), and correct indices difflogmax = max(collection_in[_minuslogpost] - collection_out[_minuslogpost]) collection_out.data[_weight] *= np.exp(collection_in[_minuslogpost] - collection_out[_minuslogpost] - difflogmax) collection_out.data = ( collection_out.data[collection_out.data.weight > 0].reset_index( drop=True)) collection_out._n = collection_out.data.last_valid_index() + 1 # Write! collection_out._out_update() log.info("Finished! Final number of samples: %d", collection_out.n()) return info_out, {"sample": collection_out}
def initialize(self): """Imports the PolyChord sampler and prepares its arguments.""" if not get_mpi_rank(): # rank = 0 (MPI master) or None (no MPI) self.log.info("Initializing") # If path not given, try using general path to modules if not self.path and self.path_install: self.path = get_path(self.path_install) if self.path: if not get_mpi_rank(): self.log.info("Importing *local* PolyChord from " + self.path) if not os.path.exists(os.path.realpath(self.path)): self.log.error("The given path does not exist.") raise HandledException pc_build_path = get_build_path(self.path) if not pc_build_path: self.log.error("Either PolyChord is not in the given folder, " "'%s', or you have not compiled it.", self.path) raise HandledException # Inserting the previously found path into the list of import folders sys.path.insert(0, pc_build_path) else: self.log.info("Importing *global* PolyChord.") try: import pypolychord from pypolychord.settings import PolyChordSettings self.pc = pypolychord except ImportError: self.log.error( "Couldn't find the PolyChord python interface. " "Make sure that you have compiled it, and that you either\n" " (a) specify a path (you didn't) or\n" " (b) install the Python interface globally with\n" " '/path/to/PolyChord/python setup.py install --user'") raise HandledException # Prepare arguments and settings self.nDims = self.model.prior.d() self.nDerived = (len(self.model.parameterization.derived_params()) + len(self.model.prior) + len(self.model.likelihood._likelihoods)) if self.logzero is None: self.logzero = np.nan_to_num(-np.inf) if self.max_ndead == np.inf: self.max_ndead = -1 for p in ["nlive", "num_repeats", "nprior", "max_ndead"]: setattr(self, p, read_dnumber(getattr(self, p), self.nDims, dtype=int)) # Fill the automatic ones if getattr(self, "feedback", None) is None: values = {logging.CRITICAL: 0, logging.ERROR: 0, logging.WARNING: 0, logging.INFO: 1, logging.DEBUG: 2} self.feedback = values[self.log.getEffectiveLevel()] try: output_folder = getattr(self.output, "folder") output_prefix = getattr(self.output, "prefix") or "" self.read_resume = self.resuming except AttributeError: # dummy output -- no resume! self.read_resume = False from tempfile import gettempdir output_folder = gettempdir() if not get_mpi_rank(): from random import random output_prefix = hex(int(random() * 16 ** 6))[2:] else: output_prefix = None if get_mpi(): output_prefix = get_mpi_comm().bcast(output_prefix, root=0) self.base_dir = os.path.join(output_folder, self.base_dir) self.file_root = output_prefix if not get_mpi_rank(): # Creating output folder, if it does not exist (just one process) if not os.path.exists(self.base_dir): os.makedirs(self.base_dir) # Idem, a clusters folder if needed -- notice that PolyChord's default # is "True", here "None", hence the funny condition below if self.do_clustering is not False: # None here means "default" try: os.makedirs(os.path.join(self.base_dir, clusters)) except OSError: # exists! pass self.log.info("Storing raw PolyChord output in '%s'.", self.base_dir) # Exploiting the speed hierarchy speeds, blocks = self.model.likelihood._speeds_of_params(int_speeds=True) blocks_flat = list(chain(*blocks)) self.ordering = [ blocks_flat.index(p) for p in self.model.parameterization.sampled_params()] self.grade_dims = [len(block) for block in blocks] # self.grade_frac = np.array( # [i*j for i,j in zip(self.grade_dims, speeds)]) # self.grade_frac = ( # self.grade_frac/sum(self.grade_frac)) # Disabled for now. We need a way to override the "time" part of the meaning of grade_frac self.grade_frac = [1 / len(self.grade_dims) for _ in self.grade_dims] # Assign settings pc_args = ["nlive", "num_repeats", "nprior", "do_clustering", "precision_criterion", "max_ndead", "boost_posterior", "feedback", "logzero", "update_files", "posteriors", "equals", "cluster_posteriors", "write_resume", "read_resume", "write_stats", "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims", "feedback", "read_resume", "base_dir", "file_root", "grade_frac", "grade_dims"] self.pc_settings = PolyChordSettings( self.nDims, self.nDerived, seed=(self.seed if self.seed is not None else -1), **{p: getattr(self, p) for p in pc_args if getattr(self, p) is not None}) # prior conversion from the hypercube bounds = self.model.prior.bounds( confidence_for_unbounded=self.confidence_for_unbounded) # Check if priors are bounded (nan's to inf) inf = np.where(np.isinf(bounds)) if len(inf[0]): params_names = self.model.parameterization.sampled_params() params = [params_names[i] for i in sorted(list(set(inf[0])))] self.log.error("PolyChord needs bounded priors, but the parameter(s) '" "', '".join(params) + "' is(are) unbounded.") raise HandledException locs = bounds[:, 0] scales = bounds[:, 1] - bounds[:, 0] # This function re-scales the parameters AND puts them in the right order self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales).tolist() # We will need the volume of the prior domain, since PolyChord divides by it self.logvolume = np.log(np.prod(scales)) # Done! if not get_mpi_rank(): self.log.info("Calling PolyChord with arguments:") for p, v in inspect.getmembers(self.pc_settings, lambda a: not (callable(a))): if not p.startswith("_"): self.log.info(" %s: %s", p, v)
def install_script(): from cobaya.mpi import get_mpi_rank if not get_mpi_rank(): # Configure the logger ASAP logger_setup() log = logging.getLogger(__name__.split(".")[-1]) # Parse arguments import argparse parser = argparse.ArgumentParser( description="Cobaya's installation tool for external modules.") parser.add_argument( "files", action="store", nargs="+", metavar="input_file.yaml", help="One or more input files " "(or 'cosmo' for a basic collection of cosmological modules)") parser.add_argument( "-" + _modules_path_arg[0], "--" + _modules_path_arg, action="store", nargs=1, required=True, metavar="/install/path", help="Desired path where to install external modules.") parser.add_argument( "-f", "--force", action="store_true", default=False, help="Force re-installation of apparently installed modules.") parser.add_argument( "--no-progress-bars", action="store_true", default=False, help="No progress bars shown. Shorter logs (used in Travis).") group_just = parser.add_mutually_exclusive_group(required=False) group_just.add_argument("-C", "--just-code", action="store_false", default=True, help="Install code of the modules.", dest=_data) group_just.add_argument("-D", "--just-data", action="store_false", default=True, help="Install data of the modules.", dest=_code) arguments = parser.parse_args() if arguments.files == ["cosmo"]: log.info( "Installing cosmological modules (input files will be ignored") from cobaya.cosmo_input import install_basic infos = [install_basic] else: from cobaya.input import load_input infos = [load_input(f) for f in arguments.files] # Launch installer install(*infos, path=getattr(arguments, _modules_path_arg)[0], **{ arg: getattr(arguments, arg) for arg in ["force", _code, _data, "no_progress_bars"] })
def post(info, sample=None): logger_setup(info.get(_debug), info.get(_debug_file)) log = logging.getLogger(__name__.split(".")[-1]) # MARKED FOR DEPRECATION IN v3.0 # BEHAVIOUR TO BE REPLACED BY ERROR: check_deprecated_modules_path(info) # END OF DEPRECATION BLOCK try: info_post = info[_post] except KeyError: raise LoggedError(log, "No 'post' block given. Nothing to do!") if get_mpi_rank(): log.warning( "Post-processing is not yet MPI-aware. Doing nothing for rank > 1 processes.") return if info.get(_resume): log.warning("Resuming not implemented for post-processing. Re-starting.") # 1. Load existing sample output_in = get_output(output_prefix=info.get(_output_prefix)) if output_in: try: info_in = output_in.reload_updated_info() except FileNotFoundError: raise LoggedError(log, "Error loading input model: " "could not find input info at %s", output_in.file_updated) else: info_in = deepcopy_where_possible(info) dummy_model_in = DummyModel(info_in[_params], info_in[kinds.likelihood], info_in.get(_prior, None)) if output_in: if not output_in.find_collections(): raise LoggedError(log, "No samples found for the input model with prefix %s", os.path.join(output_in.folder, output_in.prefix)) collection_in = output_in.load_collections( dummy_model_in, skip=info_post.get("skip", 0), thin=info_post.get("thin", 1), concatenate=True) elif sample: if isinstance(sample, Collection): sample = [sample] collection_in = deepcopy(sample[0]) for s in sample[1:]: try: collection_in.append(s) except: raise LoggedError(log, "Failed to load some of the input samples.") else: raise LoggedError(log, "Not output from where to load from or input collections given.") log.info("Will process %d samples.", len(collection_in)) if len(collection_in) <= 1: raise LoggedError( log, "Not enough samples for post-processing. Try using a larger sample, " "or skipping or thinning less.") # 2. Compare old and new info: determine what to do add = info_post.get(_post_add, {}) or {} remove = info_post.get(_post_remove, {}) # Add a dummy 'one' likelihood, to absorb unused parameters if not add.get(kinds.likelihood): add[kinds.likelihood] = {} add[kinds.likelihood]["one"] = None # Expand the "add" info add = update_info(add) # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters out = {_params: deepcopy_where_possible(info_in[_params])} for p in remove.get(_params, {}): pinfo = info_in[_params].get(p) if pinfo is None or not is_derived_param(pinfo): raise LoggedError( log, "You tried to remove parameter '%s', which is not a derived parameter. " "Only derived parameters can be removed during post-processing.", p) out[_params].pop(p) # Force recomputation of aggregated chi2 for p in list(out[_params]): if p.startswith(_get_chi2_name("")): out[_params].pop(p) mlprior_names_add = [] for p, pinfo in add.get(_params, {}).items(): pinfo_in = info_in[_params].get(p) if is_sampled_param(pinfo): if not is_sampled_param(pinfo_in): # No added sampled parameters (de-marginalisation not implemented) if pinfo_in is None: raise LoggedError( log, "You added a new sampled parameter %r (maybe accidentally " "by adding a new likelihood that depends on it). " "Adding new sampled parameters is not possible. Try fixing " "it to some value.", p) else: raise LoggedError( log, "You tried to change the prior of parameter '%s', " "but it was not a sampled parameter. " "To change that prior, you need to define as an external one.", p) if mlprior_names_add[:1] != _prior_1d_name: mlprior_names_add = ([_minuslogprior + _separator + _prior_1d_name] + mlprior_names_add) elif is_derived_param(pinfo): if p in out[_params]: raise LoggedError( log, "You tried to add derived parameter '%s', which is already " "present. To force its recomputation, 'remove' it too.", p) elif is_fixed_param(pinfo): # Only one possibility left "fixed" parameter that was not present before: # input of new likelihood, or just an argument for dynamical derived (dropped) if ((p in info_in[_params] and pinfo[partag.value] != (pinfo_in or {}).get(partag.value, None))): raise LoggedError( log, "You tried to add a fixed parameter '%s: %r' that was already present" " but had a different value or was not fixed. This is not allowed. " "The old info of the parameter was '%s: %r'", p, dict(pinfo), p, dict(pinfo_in)) else: raise LoggedError(log, "This should not happen. Contact the developers.") out[_params][p] = pinfo # For the likelihood only, turn the rest of *derived* parameters into constants, # so that the likelihoods do not try to compute them) # But be careful to exclude *input* params that have a "derived: True" value # (which in "updated info" turns into "derived: 'lambda [x]: [x]'") out_params_like = deepcopy_where_possible(out[_params]) for p, pinfo in out_params_like.items(): if ((is_derived_param(pinfo) and not (partag.value in pinfo) and p not in add.get(_params, {}))): out_params_like[p] = {partag.value: np.nan, partag.drop: True} # 2.2 Manage adding/removing priors and likelihoods warn_remove = False for level in [_prior, kinds.likelihood]: out[level] = getattr(dummy_model_in, level) if level == _prior: out[level].remove(_prior_1d_name) for pdf in info_post.get(_post_remove, {}).get(level, []) or []: try: out[level].remove(pdf) warn_remove = True except ValueError: raise LoggedError( log, "Trying to remove %s '%s', but it is not present. " "Existing ones: %r", level, pdf, out[level]) if warn_remove: log.warning("You are removing a prior or likelihood pdf. " "Notice that if the resulting posterior is much wider " "than the original one, or displaced enough, " "it is probably safer to explore it directly.") if _prior in add: mlprior_names_add += [_minuslogprior + _separator + name for name in add[_prior]] out[_prior] += list(add[_prior]) prior_recompute_1d = ( mlprior_names_add[:1] == [_minuslogprior + _separator + _prior_1d_name]) # Don't initialise the theory code if not adding/recomputing theory, # theory-derived params or likelihoods recompute_theory = info_in.get(kinds.theory) and not ( list(add[kinds.likelihood]) == ["one"] and not any(is_derived_param(pinfo) for pinfo in add.get(_params, {}).values())) if recompute_theory: # Inherit from the original chain (needs input|output_params, renames, etc add_theory = add.get(kinds.theory) if add_theory: info_theory_out = {} if len(add_theory) > 1: log.warning('Importance sampling with more than one theory is ' 'not really tested') add_theory = add_theory.copy() for theory, theory_info in info_in[kinds.theory].items(): theory_copy = deepcopy_where_possible(theory_info) if theory in add_theory: info_theory_out[theory] = \ recursive_update(theory_copy, add_theory.pop(theory)) else: info_theory_out[theory] = theory_copy info_theory_out.update(add_theory) else: info_theory_out = deepcopy_where_possible(info_in[kinds.theory]) else: info_theory_out = None chi2_names_add = [ _get_chi2_name(name) for name in add[kinds.likelihood] if name != "one"] out[kinds.likelihood] += [l for l in add[kinds.likelihood] if l != "one"] if recompute_theory: log.warning("You are recomputing the theory, but in the current version this does" " not force recomputation of any likelihood or derived parameter, " "unless explicitly removed+added.") for level in [_prior, kinds.likelihood]: for i, x_i in enumerate(out[level]): if x_i in list(out[level])[i + 1:]: raise LoggedError( log, "You have added %s '%s', which was already present. If you " "want to force its recomputation, you must also 'remove' it.", level, x_i) # 3. Create output collection if _post_suffix not in info_post: raise LoggedError(log, "You need to provide a '%s' for your chains.", _post_suffix) # Use default prefix if it exists. If it does not, produce no output by default. # {post: {output: None}} suppresses output, and if it's a string, updates it. out_prefix = info_post.get(_output_prefix, info.get(_output_prefix)) if out_prefix not in [None, False]: out_prefix += _separator_files + _post + _separator_files + info_post[ _post_suffix] output_out = get_output(output_prefix=out_prefix, force=info.get(_force)) if output_out and not output_out.force and output_out.find_collections(): raise LoggedError(log, "Found existing post-processing output with prefix %r. " "Delete it manually or re-run with `force: True` " "(or `-f`, `--force` from the shell).", out_prefix) elif output_out and output_out.force: output_out.delete_infos() for regexp in output_out.find_collections(): output_out.delete_with_regexp(re.compile(regexp)) info_out = deepcopy_where_possible(info) info_out[_post] = info_post # Updated with input info and extended (updated) add info info_out.update(info_in) info_out[_post][_post_add] = add dummy_model_out = DummyModel(out[_params], out[kinds.likelihood], info_prior=out[_prior]) if recompute_theory: # TODO: May need updating for more than one, or maybe can be removed theory = list(info_theory_out)[0] if _input_params not in info_theory_out[theory]: raise LoggedError( log, "You appear to be post-processing a chain generated with an older " "version of Cobaya. For post-processing to work, please edit the " "'[root].updated.yaml' file of the original chain to add, inside the " "theory code block, the list of its input parameters. E.g.\n----\n" "theory:\n %s:\n input_params: [param1, param2, ...]\n" "----\nIf you get strange errors later, it is likely that you did not " "specify the correct set of theory parameters.\n" "The full set of input parameters are %s.", theory, list(dummy_model_out.parameterization.input_params())) # TODO: check allow_renames=False? # TODO: May well be simplifications here, this is v close to pre-refactor logic # Have not gone through or understood all the parameterization stuff model_add = Model(out_params_like, add[kinds.likelihood], info_prior=add.get(_prior), info_theory=info_theory_out, packages_path=info.get(_packages_path), allow_renames=False, post=True, prior_parameterization=dummy_model_out.parameterization) # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post][_post_add] add[kinds.likelihood].pop("one") collection_out = Collection(dummy_model_out, output_out, name="1") output_out.check_and_dump_info(None, info_out, check_compatible=False) # Prepare recomputation of aggregated chi2 # (they need to be recomputed by hand, because its autocomputation won't pick up # old likelihoods for a given type) all_types = { like: str_to_list(add[kinds.likelihood].get( like, info_in[kinds.likelihood].get(like)).get("type", []) or []) for like in out[kinds.likelihood]} types = set(chain(*list(all_types.values()))) inv_types = {t: [like for like, like_types in all_types.items() if t in like_types] for t in types} # 4. Main loop! log.info("Running post-processing...") last_percent = 0 for i, point in collection_in.data.iterrows(): log.debug("Point: %r", point) sampled = [point[param] for param in dummy_model_in.parameterization.sampled_params()] derived = {param: point.get(param, None) for param in dummy_model_out.parameterization.derived_params()} inputs = {param: point.get( param, dummy_model_in.parameterization.constant_params().get( param, dummy_model_out.parameterization.constant_params().get( param, None))) for param in dummy_model_out.parameterization.input_params()} # Solve inputs that depend on a function and were not saved # (we don't use the Parameterization_to_input method in case there are references # to functions that cannot be loaded at the moment) for p, value in inputs.items(): if value is None: func = dummy_model_out.parameterization._input_funcs[p] args = dummy_model_out.parameterization._input_args[p] inputs[p] = func(*[point.get(arg) for arg in args]) # Add/remove priors priors_add = model_add.prior.logps(sampled) if not prior_recompute_1d: priors_add = priors_add[1:] logpriors_add = dict(zip(mlprior_names_add, priors_add)) logpriors_new = [logpriors_add.get(name, - point.get(name, 0)) for name in collection_out.minuslogprior_names] if log.getEffectiveLevel() <= logging.DEBUG: log.debug( "New set of priors: %r", dict(zip(dummy_model_out.prior, logpriors_new))) if -np.inf in logpriors_new: continue # Add/remove likelihoods output_like = [] if add[kinds.likelihood]: # Notice "one" (last in likelihood_add) is ignored: not in chi2_names loglikes_add, output_like = model_add.logps(inputs, return_derived=True) loglikes_add = dict(zip(chi2_names_add, loglikes_add)) output_like = dict(zip(model_add.output_params, output_like)) else: loglikes_add = dict() loglikes_new = [loglikes_add.get(name, -0.5 * point.get(name, 0)) for name in collection_out.chi2_names] if log.getEffectiveLevel() <= logging.DEBUG: log.debug( "New set of likelihoods: %r", dict(zip(dummy_model_out.likelihood, loglikes_new))) if output_like: log.debug("New set of likelihood-derived parameters: %r", output_like) if -np.inf in loglikes_new: continue # Add/remove derived parameters and change priors of sampled parameters for p in add[_params]: if p in dummy_model_out.parameterization._directly_output: derived[p] = output_like[p] elif p in dummy_model_out.parameterization._derived_funcs: func = dummy_model_out.parameterization._derived_funcs[p] args = dummy_model_out.parameterization._derived_args[p] derived[p] = func( *[point.get(arg, output_like.get(arg, None)) for arg in args]) # We need to recompute the aggregated chi2 by hand for type_, likes in inv_types.items(): derived[_get_chi2_name(type_)] = sum( [-2 * lvalue for lname, lvalue in zip(collection_out.chi2_names, loglikes_new) if _undo_chi2_name(lname) in likes]) if log.getEffectiveLevel() <= logging.DEBUG: log.debug("New derived parameters: %r", dict([(p, derived[p]) for p in dummy_model_out.parameterization.derived_params() if p in add[_params]])) # Save to the collection (keep old weight for now) collection_out.add( sampled, derived=derived.values(), weight=point.get(_weight), logpriors=logpriors_new, loglikes=loglikes_new) # Display progress percent = np.round(i / len(collection_in) * 100) if percent != last_percent and not percent % 5: last_percent = percent progress_bar(log, percent, " (%d/%d)" % (i, len(collection_in))) if not collection_out.data.last_valid_index(): raise LoggedError( log, "No elements in the final sample. Possible causes: " "added a prior or likelihood valued zero over the full sampled domain, " "or the computation of the theory failed everywhere, etc.") # Reweight -- account for large dynamic range! # Prefer to rescale +inf to finite, and ignore final points with -inf. # Remove -inf's (0-weight), and correct indices difflogmax = max(collection_in[_minuslogpost] - collection_out[_minuslogpost]) collection_out.data[_weight] *= np.exp( collection_in[_minuslogpost] - collection_out[_minuslogpost] - difflogmax) collection_out.data = ( collection_out.data[collection_out.data.weight > 0].reset_index(drop=True)) collection_out._n = collection_out.data.last_valid_index() + 1 # Write! collection_out.out_update() log.info("Finished! Final number of samples: %d", len(collection_out)) return info_out, {"sample": collection_out}
def initialize(self): self.mpi_info("Initializing") self.max_evals = read_dnumber(self.max_evals, self.model.prior.d()) # Configure target method = self.model.loglike if self.ignore_prior else self.model.logpost kwargs = {"make_finite": True} if self.ignore_prior: kwargs["return_derived"] = False self.logp = lambda x: method(x, **kwargs) # Try to load info from previous samples. # If none, sample from reference (make sure that it has finite like/post) initial_point = None if self.output: files = self.output.find_collections() collection_in = None if files: if more_than_one_process(): if 1 + get_mpi_rank() <= len(files): collection_in = Collection(self.model, self.output, name=str(1 + get_mpi_rank()), resuming=True) else: collection_in = self.output.load_collections( self.model, concatenate=True) if collection_in: initial_point = (collection_in.bestfit() if self.ignore_prior else collection_in.MAP()) initial_point = initial_point[list( self.model.parameterization.sampled_params())].values self.log.info("Starting from %s of previous chain:", "best fit" if self.ignore_prior else "MAP") if initial_point is None: this_logp = -np.inf while not np.isfinite(this_logp): initial_point = self.model.prior.reference() this_logp = self.logp(initial_point) self.log.info("Starting from random initial point:") self.log.info( dict( zip(self.model.parameterization.sampled_params(), initial_point))) self._bounds = self.model.prior.bounds( confidence_for_unbounded=self.confidence_for_unbounded) # TODO: if ignore_prior, one should use *like* covariance (this is *post*) covmat = self._load_covmat(self.output)[0] # scale by conditional parameter widths (since not using correlation structure) scales = np.minimum(1 / np.sqrt(np.diag(np.linalg.inv(covmat))), (self._bounds[:, 1] - self._bounds[:, 0]) / 3) # Cov and affine transformation # Transform to space where initial point is at centre, and cov is normalised # Cannot do rotation, as supported minimization routines assume bounds aligned # with the parameter axes. self._affine_transform_matrix = np.diag(1 / scales) self._inv_affine_transform_matrix = np.diag(scales) self._scales = scales self._affine_transform_baseline = initial_point initial_point = self.affine_transform(initial_point) np.testing.assert_allclose(initial_point, np.zeros(initial_point.shape)) bounds = np.array( [self.affine_transform(self._bounds[:, i]) for i in range(2)]).T # Configure method if self.method.lower() == "bobyqa": self.minimizer = pybobyqa.solve self.kwargs = { "objfun": (lambda x: -self.logp_transf(x)), "x0": initial_point, "bounds": np.array(list(zip(*bounds))), "seek_global_minimum": (True if get_mpi_size() in [0, 1] else False), "maxfun": int(self.max_evals) } self.kwargs = recursive_update(deepcopy(self.kwargs), self.override_bobyqa or {}) self.log.debug( "Arguments for pybobyqa.solve:\n%r", {k: v for k, v in self.kwargs.items() if k != "objfun"}) elif self.method.lower() == "scipy": self.minimizer = scpminimize self.kwargs = { "fun": (lambda x: -self.logp_transf(x)), "x0": initial_point, "bounds": bounds, "options": { "maxiter": self.max_evals, "disp": (self.log.getEffectiveLevel() == logging.DEBUG) } } self.kwargs = recursive_update(deepcopy(self.kwargs), self.override_scipy or {}) self.log.debug( "Arguments for scipy.optimize.minimize:\n%r", {k: v for k, v in self.kwargs.items() if k != "fun"}) else: methods = ["bobyqa", "scipy"] raise LoggedError(self.log, "Method '%s' not recognized. Try one of %r.", self.method, methods)
def check_convergence_and_learn_proposal(self): """ Checks the convergence of the sampling process (MPI only), and, if requested, learns a new covariance matrix for the proposal distribution from the covariance of the last samples. """ if get_mpi(): # Compute and gather means, covs and CL intervals of last half of chains mean = self.collection.mean(first=int(self.n() / 2)) cov = self.collection.cov(first=int(self.n() / 2)) mcsamples = self.collection._sampled_to_getdist_mcsamples( first=int(self.n() / 2)) try: bound = np.array([[ mcsamples.confidence(i, limfrac=self.Rminus1_cl_level / 2., upper=which) for i in range(self.model.prior.d()) ] for which in [False, True]]).T success_bounds = True except: bound = None success_bounds = False Ns, means, covs, bounds = map( lambda x: np.array(get_mpi_comm().gather(x)), [self.n(), mean, cov, bound]) else: # Compute and gather means, covs and CL intervals of last m-1 chain fractions m = 1 + self.Rminus1_single_split cut = int(self.collection.n() / m) if cut <= 1: self.log.error( "Not enough points in chain to check convergence. " "Increase `check_every` or reduce `Rminus1_single_split`.") raise HandledException Ns = (m - 1) * [cut] means = np.array([ self.collection.mean(first=i * cut, last=(i + 1) * cut - 1) for i in range(1, m) ]) covs = np.array([ self.collection.cov(first=i * cut, last=(i + 1) * cut - 1) for i in range(1, m) ]) # No logging of warnings temporarily, so getdist won't complain unnecessarily logging.disable(logging.WARNING) mcsampleses = [ self.collection._sampled_to_getdist_mcsamples( first=i * cut, last=(i + 1) * cut - 1) for i in range(1, m) ] logging.disable(logging.NOTSET) try: bounds = [ np.array([[ mcs.confidence(i, limfrac=self.Rminus1_cl_level / 2., upper=which) for i in range(self.model.prior.d()) ] for which in [False, True]]).T for mcs in mcsampleses ] success_bounds = True except: bounds = None success_bounds = False # Compute convergence diagnostics if not get_mpi_rank(): # "Within" or "W" term -- our "units" for assessing convergence # and our prospective new covariance matrix mean_of_covs = np.average(covs, weights=Ns, axis=0) # "Between" or "B" term # We don't weight with the number of samples in the chains here: # shorter chains will likely be outliers, and we want to notice them cov_of_means = np.atleast_2d(np.cov(means.T)) # , fweights=Ns) # For numerical stability, we turn mean_of_covs into correlation matrix: # rho = (diag(Sigma))^(-1/2) * Sigma * (diag(Sigma))^(-1/2) # and apply the same transformation to the mean of covs (same eigenvals!) diagSinvsqrt = np.diag(np.power(np.diag(cov_of_means), -0.5)) corr_of_means = diagSinvsqrt.dot(cov_of_means).dot(diagSinvsqrt) norm_mean_of_covs = diagSinvsqrt.dot(mean_of_covs).dot( diagSinvsqrt) # Cholesky of (normalized) mean of covs and eigvals of Linv*cov_of_means*L try: L = np.linalg.cholesky(norm_mean_of_covs) except np.linalg.LinAlgError: self.log.warning( "Negative covariance eigenvectors. " "This may mean that the covariance of the samples does not " "contain enough information at this point. " "Skipping this checkpoint") success = False else: Linv = np.linalg.inv(L) try: eigvals = np.linalg.eigvalsh( Linv.dot(corr_of_means).dot(Linv.T)) success = True except np.linalg.LinAlgError: self.log.warning("Could not compute eigenvalues. " "Skipping this checkpoint.") success = False if success: Rminus1 = max(np.abs(eigvals)) # For real square matrices, a possible def of the cond number is: condition_number = Rminus1 / min(np.abs(eigvals)) self.log.debug("Condition number = %g", condition_number) self.log.debug("Eigenvalues = %r", eigvals) self.log.info( "Convergence of means: R-1 = %f after %d accepted steps" % (Rminus1, (sum(Ns) if get_mpi() else self.n())) + (" = sum(%r)" % list(Ns) if get_mpi() else "")) # Have we converged in means? # (criterion must be fulfilled twice in a row) if max(Rminus1, self.Rminus1_last) < self.Rminus1_stop: # Check the convergence of the bounds of the confidence intervals # Same as R-1, but with the rms deviation from the mean bound # in units of the mean standard deviation of the chains if success_bounds: Rminus1_cl = (np.std(bounds, axis=0).T / np.sqrt(np.diag(mean_of_covs))) self.log.debug("normalized std's of bounds = %r", Rminus1_cl) self.log.info( "Convergence of bounds: R-1 = %f after %d " % (np.max(Rminus1_cl), (sum(Ns) if get_mpi() else self.n())) + "accepted steps" + (" = sum(%r)" % list(Ns) if get_mpi() else "")) if np.max(Rminus1_cl) < self.Rminus1_cl_stop: self.converged = True self.log.info("The run has converged!") self._Ns = Ns else: self.log.info( "Computation of the bounds was not possible. " "Waiting until the next checkpoint") if get_mpi(): # Broadcast and save the convergence status and the last R-1 of means success = get_mpi_comm().bcast( (success if not get_mpi_rank() else None), root=0) if success: self.Rminus1_last = get_mpi_comm().bcast( (Rminus1 if not get_mpi_rank() else None), root=0) self.converged = get_mpi_comm().bcast( (self.converged if not get_mpi_rank() else None), root=0) else: if success: self.Rminus1_last = Rminus1 # Do we want to learn a better proposal pdf? if self.learn_proposal and not self.converged and success: good_Rminus1 = (self.learn_proposal_Rminus1_max > self.Rminus1_last > self.learn_proposal_Rminus1_min) if not good_Rminus1: if not get_mpi_rank(): self.log.info("Bad convergence statistics: " "waiting until the next checkpoint.") return if get_mpi(): if get_mpi_rank(): mean_of_covs = np.empty( (self.model.prior.d(), self.model.prior.d())) get_mpi_comm().Bcast(mean_of_covs, root=0) elif not get_mpi(): mean_of_covs = covs[0] try: self.proposer.set_covariance(mean_of_covs) except: self.log.debug( "Updating covariance matrix failed unexpectedly. " "waiting until next checkpoint.") if not get_mpi_rank(): self.log.info("Updated covariance matrix of proposal pdf.") self.log.debug("%r", mean_of_covs) # Save checkpoint info self.write_checkpoint()
def check_convergence_and_learn_proposal(self): """ Checks the convergence of the sampling process (MPI only), and, if requested, learns a new covariance matrix for the proposal distribution from the covariance of the last samples. """ # Compute and gather means, covs and CL intervals of last half of chains mean = self.collection.mean(first=int(self.n() / 2)) cov = self.collection.cov(first=int(self.n() / 2)) # No logging of warnings temporarily, so getdist won't complain innecessarily logging.disable(logging.WARNING) mcsamples = self.collection.sampled_to_getdist_mcsamples( first=int(self.n() / 2)) logging.disable(logging.NOTSET) bound = np.array([[ mcsamples.confidence(i, limfrac=self.Rminus1_cl_level / 2., upper=which) for i in range(self.prior.d()) ] for which in [False, True]]).T Ns, means, covs, bounds = map( lambda x: np.array((get_mpi_comm().gather(x) if get_mpi() else [x])), [self.n(), mean, cov, bound]) # Compute convergence diagnostics if get_mpi(): if get_mpi_rank() == 0: # "Within" or "W" term -- our "units" for assessing convergence # and our prospective new covariance matrix mean_of_covs = np.average(covs, weights=Ns, axis=0) # "Between" or "B" term # We don't weight with the number of samples in the chains here: # shorter chains will likely be outliers, and we want to notice them cov_of_means = np.cov(means.T) # , fweights=Ns) # For numerical stability, we turn mean_of_covs into correlation matrix: # rho = (diag(Sigma))^(-1/2) * Sigma * (diag(Sigma))^(-1/2) # and apply the same transformation to the mean of covs (same eigenvals!) diagSinvsqrt = np.diag(np.power(np.diag(cov_of_means), -0.5)) corr_of_means = diagSinvsqrt.dot(cov_of_means).dot( diagSinvsqrt) norm_mean_of_covs = diagSinvsqrt.dot(mean_of_covs).dot( diagSinvsqrt) # Cholesky of (normalized) mean of covs and eigvals of Linv*cov_of_means*L try: L = np.linalg.cholesky(norm_mean_of_covs) except np.linalg.LinAlgError: self.log.warning( "Negative covariance eigenvectors. " "This may mean that the covariance of the samples does not " "contain enough information at this point. " "Skipping this checkpoint") success = False else: Linv = np.linalg.inv(L) eigvals = np.linalg.eigvalsh( Linv.dot(corr_of_means).dot(Linv.T)) Rminus1 = max(np.abs(eigvals)) # For real square matrices, a possible def of the cond number is: condition_number = Rminus1 / min(np.abs(eigvals)) self.log.debug("Condition number = %g", condition_number) self.log.debug("Eigenvalues = %r", eigvals) self.log.info( "Convergence of means: R-1 = %f after %d samples", Rminus1, self.n()) success = True # Have we converged in means? # (criterion must be fulfilled twice in a row) if (max(Rminus1, getattr(self, "Rminus1_last", np.inf)) < self.Rminus1_stop): # Check the convergence of the bounds of the confidence intervals # Same as R-1, but with the rms deviation from the mean bound # in units of the mean standard deviation of the chains Rminus1_cl = (np.std(bounds, axis=0).T / np.sqrt(np.diag(mean_of_covs))) self.log.debug("normalized std's of bounds = %r", Rminus1_cl) self.log.info( "Convergence of bounds: R-1 = %f after %d samples", np.max(Rminus1_cl), self.n()) if np.max(Rminus1_cl) < self.Rminus1_cl_stop: self.converged = True self.log.info("The run has converged!") # Broadcast and save the convergence status and the last R-1 of means success = get_mpi_comm().bcast( (success if not get_mpi_rank() else None), root=0) if success: self.Rminus1_last = get_mpi_comm().bcast( (Rminus1 if not get_mpi_rank() else None), root=0) self.converged = get_mpi_comm().bcast( (self.converged if not get_mpi_rank() else None), root=0) else: # No MPI pass # Do we want to learn a better proposal pdf? if self.learn_proposal and not self.converged: # update iff (not MPI, or MPI and "good" Rminus1) if get_mpi(): good_Rminus1 = (self.learn_proposal_Rminus1_max > self.Rminus1_last > self.learn_proposal_Rminus1_min) if not good_Rminus1: if not get_mpi_rank(): self.log.info("Bad convergence statistics: " "waiting until the next checkpoint.") return if get_mpi(): if get_mpi_rank(): mean_of_covs = np.empty((self.prior.d(), self.prior.d())) get_mpi_comm().Bcast(mean_of_covs, root=0) elif not get_mpi(): mean_of_covs = covs[0] self.proposer.set_covariance(mean_of_covs) if not get_mpi_rank(): self.log.info("Updated covariance matrix of proposal pdf.") self.log.debug("%r", mean_of_covs)
def install_script(): from cobaya.mpi import get_mpi_rank if not get_mpi_rank(): # Configure the logger ASAP logger_setup() log = logging.getLogger(__name__.split(".")[-1]) # Parse arguments import argparse parser = argparse.ArgumentParser( description="Cobaya's installation tool for external modules.") parser.add_argument("files", action="store", nargs="+", metavar="input_file.yaml", help="One or more input files.") parser.add_argument( "-p", "--path", action="store", nargs=1, required=True, metavar="/install/path", help="Desired path where to install external modules.") parser.add_argument( "-f", "--force", action="store_true", default=False, help="Force re-installation of apparently installed modules.") parser.add_argument( "--no-progress-bars", action="store_true", default=False, help="No progress bars shown. Shorter logs (used in Travis).") group_just = parser.add_mutually_exclusive_group(required=False) group_just.add_argument("-c", "--just-code", action="store_false", default=True, help="Install code of the modules.", dest=_data) group_just.add_argument("-d", "--just-data", action="store_false", default=True, help="Install data of the modules.", dest=_code) arguments = parser.parse_args() from cobaya.input import load_input try: infos = [load_input(f) for f in arguments.files] except HandledException: log.error("Maybe you meant to pass an installation path? " "In that case, use '--path=/path/to/modules'.") raise HandledException # Launch installer install(*infos, path=arguments.path[0], **{ arg: getattr(arguments, arg) for arg in ["force", _code, _data, "no_progress_bars"] })
def close(self): """ Loads the sample of live points from ``PolyChord``'s raw output and writes it (if ``txt`` output requested). """ if not get_mpi_rank(): # process 0 or single (non-MPI process) self.log.info( "Loading PolyChord's results: samples and evidences.") self.n_sampled = len(self.parametrization.sampled_params()) self.n_derived = len(self.parametrization.derived_params()) self.n_liks = len(self.likelihood._likelihoods) prefix = os.path.join(self.pc_settings.base_dir, self.pc_settings.file_root) self.collection = self.save_sample(prefix + ".txt", "1") if self.pc_settings.do_clustering is not False: # NB: "None" == "default" self.clusters = {} do_output = hasattr(self.output, "folder") for f in os.listdir( os.path.join(self.pc_settings.base_dir, clusters)): if not f.startswith(self.pc_settings.file_root): continue if do_output: cluster_folder = os.path.join( self.output.folder, self.output.prefix + "_" + clusters) if not os.path.exists(cluster_folder): os.mkdir(cluster_folder) try: i = int(f[len(self.pc_settings.file_root) + 1:-len(".txt")]) except ValueError: continue if do_output: old_folder = self.output.folder self.output.folder = cluster_folder fname = os.path.join(self.pc_settings.base_dir, clusters, f) self.clusters[i] = { "sample": self.save_sample(fname, str(i)) } if do_output: self.output.folder = old_folder # Prepare the evidence pre = "log(Z" lines = [] with open(prefix + ".stats", "r") as statsfile: lines = [l for l in statsfile.readlines() if l.startswith(pre)] for l in lines: logZ, logZstd = [ float(n.replace("(Still Active)", "")) for n in l.split("=")[-1].split("+/-") ] component = l.split("=")[0].lstrip(pre + "_").rstrip(") ") if not component: self.logZ, self.logZstd = logZ, logZstd elif self.pc_settings.do_clustering: i = int(component) self.clusters[i]["logZ"], self.clusters[i][ "logZstd"] = logZ, logZstd # if get_mpi(): # bcast_from_0 = lambda attrname: setattr(self, # attrname, get_mpi_comm().bcast(getattr(self, attrname, None), root=0)) # map(bcast_from_0, ["collection", "logZ", "logZstd", "clusters"]) if not get_mpi_rank(): # process 0 or single (non-MPI process) self.log.info("Finished! Raw PolyChord output stored in '%s'.", self.pc_settings.base_dir)
def __init__(self, info_sampler, model, output=None, packages_path=None, name=None): """ Actual initialization of the class. Loads the default and input information and call the custom ``initialize`` method. [Do not modify this one.] """ self.model = model self.output = output self._updated_info = deepcopy_where_possible(info_sampler) super().__init__(info_sampler, packages_path=packages_path, name=name, initialize=False, standalone=False) # Seed, if requested if getattr(self, "seed", None) is not None: if not isinstance(self.seed, int) or not (0 <= self.seed <= 2**32 - 1): raise LoggedError( self.log, "Seeds must be a *positive integer* < 2**32 - 1, " "but got %r with type %r", self.seed, type(self.seed)) # MPI-awareness: sum the rank to the seed if more_than_one_process(): self.seed += get_mpi_rank() self.mpi_warning("This run has been SEEDED with seed %d", self.seed) # Load checkpoint info, if resuming if self.output.is_resuming() and not isinstance(self, Minimizer): try: checkpoint_info = yaml_load_file(self.checkpoint_filename()) try: for k, v in checkpoint_info[kinds.sampler][ self.get_name()].items(): setattr(self, k, v) self.mpi_info("Resuming from previous sample!") except KeyError: if is_main_process(): raise LoggedError( self.log, "Checkpoint file found at '%s' " "but it corresponds to a different sampler.", self.checkpoint_filename()) except (IOError, TypeError): pass else: try: os.remove(self.checkpoint_filename()) os.remove(self.progress_filename()) except (OSError, TypeError): pass self._set_rng() self.initialize() self._release_rng() self.model.set_cache_size(self._get_requested_cache_size()) # Add to the updated info some values which are # only available after initialisation self._updated_info[_version] = self.get_version()
def initialise(self): """Imports the PolyChord sampler and prepares its arguments.""" if not get_mpi_rank(): # rank = 0 (MPI master) or None (no MPI) self.log.info("Initializing") # If path not given, try using general path to modules path_to_installation = get_path_to_installation() if not self.path and path_to_installation: self.path = os.path.join(path_to_installation, "code", pc_repo_name) if self.path: if not get_mpi_rank(): self.log.info("Importing *local* PolyChord from " + self.path) pc_py_path = os.path.join(self.path, "PyPolyChord") pc_build_path = os.path.join(self.path, "build") post = next(d for d in os.listdir(pc_build_path) if d.startswith("lib.")) pc_build_path = os.path.join(pc_build_path, post) if not os.path.exists(pc_build_path): self.log.error( "Either PolyChord is not in the given folder, " "'%s', or you have not compiled it.", self.path) raise HandledException # Inserting the previously found path into the list of import folders sys.path.insert(0, pc_build_path) sys.path.insert(0, pc_py_path) else: self.log.info("Importing *global* PolyChord.") try: import PyPolyChord as PyPolyChord from PyPolyChord.settings import PolyChordSettings except ImportError: self.log.error( "Couldn't find the PolyChord python interface. " "Make sure that you have compiled it, and that you either\n" " (a) specify a path (you didn't) or\n" " (b) install the Python interface globally with\n" " '/path/to/PolyChord/python setup.py install --user'") raise HandledException self.pc = PyPolyChord # Prepare arguments and settings self.nDims = self.prior.d() self.nDerived = (len(self.parametrization.derived_params()) + 1 + len(self.likelihood._likelihoods)) self.pc_settings = PolyChordSettings(self.nDims, self.nDerived) for p in [ "nlive", "num_repeats", "nprior", "do_clustering", "precision_criterion", "max_ndead", "boost_posterior", "feedback", "update_files", "posteriors", "equals", "cluster_posteriors", "write_resume", "read_resume", "write_stats", "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims" ]: v = getattr(self, p) if v is not None: setattr(self.pc_settings, p, v) # Fill the automatic ones if getattr(self, "feedback", None) is None: values = { logging.CRITICAL: 0, logging.ERROR: 0, logging.WARNING: 0, logging.INFO: 1, logging.DEBUG: 2 } self.pc_settings.feedback = values[self.log.getEffectiveLevel()] try: output_folder = getattr(self.output, "folder") output_prefix = getattr(self.output, "prefix") or "pc" except AttributeError: # dummy output -- no resume! from tempfile import gettempdir output_folder = gettempdir() from random import random output_prefix = hex(int(random() * 16**6))[2:] self.pc_settings.read_resume = False self.pc_settings.base_dir = os.path.join(output_folder, self.pc_settings.base_dir) self.pc_settings.file_root = output_prefix if not get_mpi_rank(): # Creating output folder, if it does not exist (just one process) if not os.path.exists(self.pc_settings.base_dir): os.makedirs(self.pc_settings.base_dir) # Idem, a clusters folder if needed -- notice that PolyChord's default # is "True", here "None", hence the funny condition below if self.pc_settings.do_clustering is not False: # None here means "default" try: os.makedirs( os.path.join(self.pc_settings.base_dir, clusters)) except OSError: # exists! pass self.log.info("Storing raw PolyChord output in '%s'.", self.pc_settings.base_dir) # explotining the speed hierarchy # sort blocks by paramters order and check contiguity (required by PolyChord!!!) # speeds, blocks = zip(*self.likelihood.speed_blocked_params(as_indices=True)) # speeds, blocks = np.array(speeds), np.array(blocks) # weird behaviour of np.argsort with there is only 1 block # if len(blocks) > 1: # sorting_indices = np.argsort(blocks, axis=0) # else: # sorting_indices = [0] # speeds, blocks = speeds[sorting_indices], blocks[sorting_indices] # if np.all([np.all(block==range(block[0], block[-1]+1)) for block in blocks]): self.log.warning("Speed hierarchy exploitation disabled for now!") # self.pc_args["grade_frac"] = list(speeds) # self.pc_args["grade_dims"] = [len(block) for block in blocks] # self.log.info("Exploiting a speed hierarchy with speeds %r and blocks %r", # speeds, blocks) # else: # self.log.warning("Some speed blocks are not contiguous: PolyChord cannot deal " # "with the speed hierarchy. Not exploting it.") # prior conversion from the hypercube bounds = self.prior.bounds( confidence_for_unbounded=self.confidence_for_unbounded) # Check if priors are bounded (nan's to inf) inf = np.where(np.isinf(bounds)) if len(inf[0]): params_names = self.prior.names() params = [params_names[i] for i in sorted(list(set(inf[0])))] self.log.error( "PolyChord needs bounded priors, but the parameter(s) '" "', '".join(params) + "' is(are) unbounded.") raise HandledException locs = bounds[:, 0] scales = bounds[:, 1] - bounds[:, 0] self.pc_prior = lambda x: (locs + np.array(x) * scales).tolist() # We will need the volume of the prior domain, since PolyChord divides by it self.logvolume = np.log(np.prod(scales)) # Done! if not get_mpi_rank(): self.log.info("Calling PolyChord with arguments:") for p, v in inspect.getmembers(self.pc_settings, lambda a: not (callable(a))): if not p.startswith("_"): self.log.info(" %s: %s", p, v)
def initialize(self): """Initializes the sampler: creates the proposal distribution and draws the initial sample.""" if not self.model.prior.d(): raise LoggedError(self.log, "No parameters being varied for sampler") self.log.debug("Initializing") # MARKED FOR DEPRECATION IN v3.0 if getattr(self, "oversample", None) is not None: self.log.warning( "*DEPRECATION*: `oversample` will be deprecated in the " "next version. Oversampling is now requested by setting " "`oversample_power` > 0.") # END OF DEPRECATION BLOCK # MARKED FOR DEPRECATION IN v3.0 if getattr(self, "check_every", None) is not None: self.log.warning( "*DEPRECATION*: `check_every` will be deprecated in the " "next version. Please use `learn_every` instead.") # BEHAVIOUR TO BE REPLACED BY ERROR: self.learn_every = getattr(self, "check_every") # END OF DEPRECATION BLOCK if self.callback_every is None: self.callback_every = self.learn_every self._quants_d_units = [] for q in ["max_tries", "learn_every", "callback_every", "burn_in"]: number = NumberWithUnits(getattr(self, q), "d", dtype=int) self._quants_d_units.append(number) setattr(self, q, number) self.output_every = NumberWithUnits(self.output_every, "s", dtype=int) if is_main_process(): if self.output.is_resuming() and (max(self.mpi_size or 0, 1) != max(get_mpi_size(), 1)): raise LoggedError( self.log, "Cannot resume a run with a different number of chains: " "was %d and now is %d.", max(self.mpi_size, 1), max(get_mpi_size(), 1)) if more_than_one_process(): if get_mpi().Get_version()[0] < 3: raise LoggedError( self.log, "MPI use requires MPI version 3.0 or " "higher to support IALLGATHER.") sync_processes() # One collection per MPI process: `name` is the MPI rank + 1 name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank())) self.collection = Collection(self.model, self.output, name=name, resuming=self.output.is_resuming()) self.current_point = OneSamplePoint(self.model) # Use standard MH steps by default self.get_new_sample = self.get_new_sample_metropolis # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function)) # Useful for getting last points added inside callback function self.last_point_callback = 0 # Monitoring/restore progress if is_main_process(): cols = [ "N", "timestamp", "acceptance_rate", "Rminus1", "Rminus1_cl" ] self.progress = DataFrame(columns=cols) self.i_learn = 1 if self.output and not self.output.is_resuming(): with open(self.progress_filename(), "w", encoding="utf-8") as progress_file: progress_file.write("# " + " ".join(self.progress.columns) + "\n") # Get first point, to be discarded -- not possible to determine its weight # Still, we need to compute derived parameters, since, as the proposal "blocked", # we may be saving the initial state of some block. # NB: if resuming but nothing was written (burn-in not finished): re-start if self.output.is_resuming() and len(self.collection): initial_point = (self.collection[ self.collection.sampled_params].iloc[len(self.collection) - 1]).values.copy() logpost = -(self.collection[_minuslogpost].iloc[ len(self.collection) - 1].copy()) logpriors = -(self.collection[self.collection.minuslogprior_names]. iloc[len(self.collection) - 1].copy()) loglikes = -0.5 * (self.collection[self.collection.chi2_names]. iloc[len(self.collection) - 1].copy()) derived = (self.collection[self.collection.derived_params].iloc[ len(self.collection) - 1].values.copy()) else: # NB: max_tries adjusted to dim instead of #cycles (blocking not computed yet) self.max_tries.set_scale(self.model.prior.d()) self.log.info( "Getting initial point... (this may take a few seconds)") initial_point, logpost, logpriors, loglikes, derived = \ self.model.get_valid_point(max_tries=self.max_tries.value) # If resuming but no existing chain, assume failed run and ignore blocking # if speeds measurement requested if self.output.is_resuming() and not len(self.collection) \ and self.measure_speeds: self.blocking = None if self.measure_speeds and self.blocking: self.log.warning( "Parameter blocking manually fixed: speeds will not be measured." ) elif self.measure_speeds: n = None if self.measure_speeds is True else int( self.measure_speeds) self.model.measure_and_set_speeds(n=n, discard=0) self.set_proposer_blocking() self.set_proposer_covmat(load=True) self.current_point.add(initial_point, derived=derived, logpost=logpost, logpriors=logpriors, loglikes=loglikes) self.log.info("Initial point: %s", self.current_point) # Max #(learn+convergence checks) to wait, # in case one process dies without sending MPI_ABORT self.been_waiting = 0 self.max_waiting = max(50, self.max_tries.unit_value) # Burning-in countdown -- the +1 accounts for the initial point (always accepted) self.burn_in_left = self.burn_in.value * self.current_point.output_thin + 1 # Initial dummy checkpoint # (needed when 1st "learn point" not reached in prev. run) self.write_checkpoint()
def initialise(self): """Initialises the sampler: creates the proposal distribution and draws the initial sample.""" self.log.info("Initializing") # Burning-in countdown -- the +1 accounts for the initial point (always accepted) self.burn_in_left = self.burn_in + 1 # One collection per MPI process: `name` is the MPI rank + 1 name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank())) self.collection = Collection(self.parametrization, self.likelihood, self.output, name=name) self.current_point = OnePoint(self.parametrization, self.likelihood, self.output, name=name) # Use the standard steps by default self.get_new_sample = self.get_new_sample_metropolis # Prepare oversampling / fast-dragging if applicable self.effective_max_samples = self.max_samples if self.oversample and self.drag: self.log.error( "Choose either oversampling or fast-dragging, not both.") raise HandledException # if (self.oversample or self.drag) and len(set(factors)) == 1: # self.log.error("All block speeds are similar: " # "no dragging or oversampling possible.") # raise HandledException if self.oversample: factors, blocks = self.likelihood.speeds_of_params( oversampling_factors=True) self.oversampling_factors = factors # WIP: actually, we would have to re-normalise to the dimension of the blocks. self.log.info("Oversampling with factors:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip(self.oversampling_factors, blocks) ])) # WIP: useless until likelihoods have STATES! self.log.error("Sorry, oversampling is WIP") raise HandledException elif self.drag: # WIP: for now, can only separate between theory and likelihoods # until likelihoods have states if not self.likelihood.theory: self.log.error( "WIP: dragging disabled for now when no theory code present." ) raise HandledException # if self.max_speed_slow < min(speeds) or self.max_speed_slow >= max(speeds): # self.log.error("The maximum speed considered slow, `max_speed_slow`, must be " # "%g <= `max_speed_slow < %g, and is %g", # min(speeds), max(speeds), self.max_speed_slow) # raise HandledException speeds, blocks = self.likelihood.speeds_of_params(int_speeds=True, fast_slow=True) if np.all(speeds == speeds[0]): self.log.error( "All speeds are equal: cannot drag! Make sure to define, " "especially, the speed of the fastest likelihoods.") self.i_last_slow_block = 0 # just theory can be slow for now fast_params = list(chain(*blocks[1 + self.i_last_slow_block:])) self.n_slow = sum( len(blocks[i]) for i in range(1 + self.i_last_slow_block)) self.drag_interp_steps = int(self.drag * np.round(min(speeds[1:]) / speeds[0])) self.log.info("Dragging with oversampling per step:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip([1, self.drag_interp_steps], [blocks[0], fast_params]) ])) self.get_new_sample = self.get_new_sample_dragging else: _, blocks = self.likelihood.speeds_of_params() self.oversampling_factors = [1 for b in blocks] self.n_slow = len(self.parametrization.sampled_params()) # Turn parameter names into indices blocks = [[ list(self.parametrization.sampled_params().keys()).index(p) for p in b ] for b in blocks] self.proposer = BlockedProposer( blocks, oversampling_factors=getattr(self, "oversampling_factors", None), i_last_slow_block=getattr(self, "i_last_slow_block", None), propose_scale=self.propose_scale) # Build the initial covariance matrix of the proposal covmat = self.initial_proposal_covmat() self.log.info("Sampling with covariance matrix:") self.log.info("%r", covmat) self.proposer.set_covariance(covmat) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function))