def close(self, *args):
     """
     Determines success (or not), chooses best (if MPI)
     and produces output (if requested).
     """
     # If something failed
     if not hasattr(self, "result"):
         return
     if get_mpi_size():
         results = get_mpi_comm().gather(self.result, root=0)
         if not get_mpi_rank():
             self.result = results[np.argmin([r.fun for r in results])]
     if not get_mpi_rank():
         if not self.result.success:
             self.log.error("Maximization failed! Here is the `scipy` raw result:\n%r",
                            self.result)
             raise HandledException
         self.log.info("log%s maximized at %g",
                       "likelihood" if self.ignore_prior else "posterior",
                       -self.result.fun)
         post = self.model.logposterior(self.result.x)
         recomputed_max = sum(post.loglikes) if self.ignore_prior else post.logpost
         if not np.allclose(-self.result.fun, recomputed_max):
             self.log.error("Cannot reproduce result. Something bad happened. "
                            "Recomputed max: %g at %r", recomputed_max, self.result.x)
             raise HandledException
         self.maximum = OnePoint(
             self.model, self.output, name="maximum",
             extension=("likelihood" if self.ignore_prior else "posterior"))
         self.maximum.add(self.result.x, derived=post.derived, logpost=post.logpost,
                          logpriors=post.logpriors, loglikes=post.loglikes)
         self.log.info("Parameter values at maximum:\n%s"%self.maximum.data.to_string())
         self.maximum._out_update()
Beispiel #2
0
 def format(self, record):
     self._fmt = (("[%d]" % get_mpi_rank() if get_mpi() else "") +
                  "[%(name)s] " + {
                      logging.ERROR: "*ERROR* ",
                      logging.WARNING: "*WARNING* "
                  }.get(record.levelno, "") + "%(message)s")
     return logging.Formatter.format(self, record)
Beispiel #3
0
 def run(self):
     """
     Runs the sampler.
     """
     # Get first point, to be discarded -- not possible to determine its weight
     # Still, we need to compute derived parameters, since, as the proposal "blocked",
     # we may be saving the initial state of some block.
     initial_point = self.prior.reference(max_tries=self.max_tries)
     logpost, _, _, derived = self.logposterior(initial_point)
     self.current_point.add(initial_point, derived=derived, logpost=logpost)
     self.log.info("Initial point:\n %r ", self.current_point)
     # Main loop!
     self.converged = False
     self.log.info("Sampling!" + (
         "(NB: nothing will be printed until %d burn-in samples " %
         self.burn_in + "have been obtained)" if self.burn_in else ""))
     while self.n() < self.effective_max_samples and not self.converged:
         self.get_new_sample()
         # Callback function
         if (hasattr(self, "callback_function_callable")
                 and not (max(self.n(), 1) % self.callback_every)
                 and self.current_point[_weight] == 1):
             self.callback_function_callable(self)
         # Checking convergence and (optionally) learning the covmat of the proposal
         if self.check_all_ready():
             self.check_convergence_and_learn_proposal()
     # Make sure the last batch of samples ( < output_every ) are written
     self.collection.out_update()
     if not get_mpi_rank():
         self.log.info("Sampling complete after %d accepted steps.",
                       self.n())
Beispiel #4
0
 def write_checkpoint(self):
     if not get_mpi_rank() and self.output:
         checkpoint_filename = self.checkpoint_filename()
         covmat_filename = self.covmat_filename()
         np.savetxt(covmat_filename,
                    self.proposer.get_covariance(),
                    header=" ".join(
                        list(self.model.parameterization.sampled_params())))
         checkpoint_info = {
             _sampler: {
                 self.name:
                 odict([
                     ["converged", bool(self.converged)],
                     ["Rminus1_last", self.Rminus1_last],
                     ["proposal_scale",
                      self.proposer.get_scale()],
                     ["blocks", self.blocks],
                     ["oversampling_factors", self.oversampling_factors],
                     ["i_last_slow_block", self.i_last_slow_block],
                     [
                         "burn_in",
                         (
                             self.
                             burn_in  # initial: repeat burn-in if not finished
                             if not self.n() and self.burn_in_left else "d")
                     ],  # to avoid overweighting last point of prev. run
                     ["mpi_size", get_mpi_size()]
                 ])
             }
         }
         yaml_dump_file(checkpoint_filename,
                        checkpoint_info,
                        error_if_exists=False)
         self.log.debug("Dumped checkpoint info and current covmat.")
Beispiel #5
0
 def send_error_signal(self):
     """
     Sends an error signal to the other MPI processes.
     """
     for i_rank in range(get_mpi_size()):
         if i_rank != get_mpi_rank():
             get_mpi_comm().isend(True, dest=i_rank, tag=_error_tag)
Beispiel #6
0
 def initialize(self):
     """
     Prepares the arguments for `iminuit.minimize`.
     """
     if not get_mpi_rank():
         self.log.info("Initializing")
     self.logp = (
         (lambda x: self.model.logposterior(x, make_finite=True)[0])
         if not self.ignore_prior else
         (lambda x: sum(self.model.loglikes(x, return_derived=True)[0])))
     # Initial point: sample from reference and make sure that it has finite lik/post
     this_logp = -np.inf
     while not np.isfinite(this_logp):
         initial_point = self.model.prior.reference()
         this_logp = self.logp(initial_point)
     self.kwargs = {
         "fun": (lambda x: -self.logp(x)),
         "x0": initial_point,
         "bounds": self.model.prior.bounds(confidence_for_unbounded=0.999),
         "options": {
             "maxfev": self.maxfev,
             "disp": (self.log.getEffectiveLevel() == logging.DEBUG)
         }
     }
     self.kwargs.update(self.override or {})
     self.log.debug("Arguments for iminuit.minimize:\n%r", self.kwargs)
Beispiel #7
0
 def format(self, record):
     self._fmt = (
             "[" + ("%d : " % get_mpi_rank() if more_than_one_process() else "") +
             "%(name)s" + (" %(asctime)s " if debug else "") + "] " +
             {logging.ERROR: "*ERROR* ",
              logging.WARNING: "*WARNING* "}.get(record.levelno, "") +
             "%(message)s")
     return logging.Formatter.format(self, record)
Beispiel #8
0
 def format(self, record):
     fmt = ((" %(asctime)s " if debug else "") +
            "[" + ("%d : " % mpi.get_mpi_rank()
                   if mpi.more_than_one_process() else "") +
            "%(name)s" + "] " +
            {logging.ERROR: "*ERROR* ",
             logging.WARNING: "*WARNING* "}.get(record.levelno, "") +
            "%(message)s")
     self._style._fmt = fmt
     return super().format(record)
    def products(self):
        """
        Auxiliary function to define what should be returned in a scripted call.

        Returns:
           The :class:`OnePoint` that maximizes the posterior or likelihood (depending on
           ``ignore_prior``), and the `scipy.optimize.OptimizeResult
           <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html>`_
           instance.
        """
        if not get_mpi_rank():
            return {"maximum": self.maximum, "OptimizeResult": self.result}
Beispiel #10
0
def citation_script():
    from cobaya.mpi import get_mpi_rank
    if not get_mpi_rank():
        # Configure the logger ASAP
        from cobaya.log import logger_setup
        logger_setup()
        # Parse arguments and launch
        import argparse
        parser = argparse.ArgumentParser(description="Cobaya's citation tool.")
        parser.add_argument("files", action="store", nargs="+", metavar="input_file.yaml",
                            help="One or more input files.")
        from cobaya.input import load_input
        infos = [load_input(f) for f in parser.parse_args().files]
        citation(*infos)
Beispiel #11
0
def run(info):
    assert hasattr(info, "keys"), (
        "The first argument must be a dictionary with the info needed for the run. "
        "If you were trying to pass the name of an input file instead, "
        "load it first with 'cobaya.input.load_input', "
        "or, if you were passing a yaml string, load it with 'cobaya.yaml.yaml_load'."
    )
    # Configure the logger ASAP
    # Just a dummy import before configuring the logger, until I fix root/individual level
    import getdist
    logger_setup(info.get(_debug), info.get(_debug_file))
    import logging
    # Initialize output, if required
    output = Output(output_prefix=info.get(_output_prefix),
                    resume=info.get(_resume),
                    force_output=info.pop(_force, None),
                    force_reproducible=info.get(_force_reproducible,
                                                _force_reproducible_default))
    # Create the full input information, including defaults for each module.
    full_info = get_full_info(info)
    if output:
        full_info[_output_prefix] = output.updated_output_prefix()
        full_info[_resume] = output.is_resuming()
    if logging.root.getEffectiveLevel() <= logging.DEBUG:
        # Don't dump unless we are doing output, just in case something not serializable
        # May be fixed in the future if we find a way to serialize external functions
        if info.get(_output_prefix) and not get_mpi_rank():
            logging.getLogger(__name__.split(".")[-1]).info(
                "Input info updated with defaults (dumped to YAML):\n%s",
                yaml_dump(full_info, force_reproducible=False))
    # We dump the info now, before modules initialization, lest it is accidentally modified
    # If resuming a sample, it checks that old and new infos are consistent
    output.dump_info(info, full_info)
    # Initialize the posterior and the sampler
    with Model(full_info[_params],
               full_info[_likelihood],
               full_info.get(_prior),
               full_info.get(_theory),
               modules=info.get(_path_install),
               timing=full_info.get(_timing),
               allow_renames=False) as model:
        with Sampler(full_info[_sampler],
                     model,
                     output,
                     resume=full_info.get(_resume),
                     modules=info.get(_path_install)) as sampler:
            sampler.run()
    # For scripted calls
    return deepcopy(full_info), sampler.products()
Beispiel #12
0
    def products(self):
        """
        Auxiliary function to define what should be returned in a scripted call.

        Returns:
           The sample ``Collection`` containing the sequentially discarded live points.
        """
        if not get_mpi_rank():
            products = {
                "sample": self.collection, "logZ": self.logZ, "logZstd": self.logZstd}
            if self.pc_settings.do_clustering:
                products.update({"clusters": self.clusters})
            return products
        else:
            return {}
Beispiel #13
0
    def check_error_signal(self):
        """
        Checks if any of the other process has sent an error signal, and fails.

        NB: This behaviour only shows up when running this sampler inside a Python script,
            not when running with `cobaya run` (in that case, the process raising an error
            will call `MPI_ABORT` and kill the rest.
        """
        for i in range(get_mpi_size()):
            if i != get_mpi_rank():
                from mpi4py import MPI
                status = MPI.Status()
                get_mpi_comm().iprobe(i, status=status)
                if status.tag == _error_tag:
                    raise LoggedError(self.log, "Another process failed! Exiting.")
Beispiel #14
0
 def check_all_ready(self):
     """
     Checks if the chain(s) is(/are) ready to check convergence and, if requested,
     learn a new covariance matrix for the proposal distribution.
     """
     msg_ready = (
         ("Ready to" if get_mpi() or self.learn_proposal else "") +
         " check convergence" +
         (" and" if get_mpi() and self.learn_proposal else "") +
         (" learn a new proposal covmat" if self.learn_proposal else ""))
     # If *just* (weight==1) got ready to check+learn
     if (self.n() > 0 and self.current_point[_weight] == 1
             and not (self.n() % self.check_every)):
         self.log.info("Checkpoint: %d samples accepted.", self.n())
         if get_mpi():
             self.been_waiting += 1
             if self.been_waiting > self.max_waiting:
                 self.log.error(
                     "Waiting for too long for all chains to be ready. "
                     "Maybe one of them is stuck or died unexpectedly?")
                 raise HandledException
         self.model.dump_timing()
         # If not MPI, we are ready
         if not get_mpi():
             if msg_ready:
                 self.log.info(msg_ready)
             return True
         # If MPI, tell the rest that we are ready -- we use a "gather"
         # ("reduce" was problematic), but we are in practice just pinging
         if not hasattr(self, "req"):  # just once!
             self.all_ready = np.empty(get_mpi_size())
             self.req = get_mpi_comm().Iallgather(np.array([1.]),
                                                  self.all_ready)
             self.log.info(msg_ready + " (waiting for the rest...)")
     # If all processes are ready to learn (= communication finished)
     if self.req.Test() if hasattr(self, "req") else False:
         # Sanity check: actually all processes have finished
         assert np.all(self.all_ready == 1), (
             "This should not happen! Notify the developers. (Got %r)",
             self.all_ready)
         if get_mpi_rank() == 0:
             self.log.info("All chains are r" + msg_ready[1:])
         delattr(self, "req")
         self.been_waiting = 0
         # Just in case, a barrier here
         get_mpi_comm().barrier()
         return True
     return False
Beispiel #15
0
 def initialise(self):
     """Prepares the arguments for `scipy.minimize`."""
     if not get_mpi_rank():
         self.log.info("Initializing")
     # Initial point: sample from reference and make sure that it has finite lik/post
     logp = -np.inf
     while not np.isfinite(logp):
         initial_point = self.prior.reference()
         logp = self.logposterior(initial_point, ignore_prior=self.ignore_prior)[0]
     self.kwargs = {
         "fun": (lambda x: -self.logposterior(
             x, ignore_prior=self.ignore_prior, make_finite=True)[0]),
         "x0": initial_point,
         "bounds": self.prior.bounds(confidence_for_unbounded=0.999),
         "tol": self.tol,
         "options": {
             "maxiter": self.maxiter,
             "disp": (self.log.getEffectiveLevel() == logging.DEBUG)}}
     self.kwargs.update(self.override or {})
     self.log.debug("Arguments for scipy.optimize.minimize:\n%r", self.kwargs)
Beispiel #16
0
 def run(self):
     """
     Runs the sampler.
     """
     # Get first point, to be discarded -- not possible to determine its weight
     # Still, we need to compute derived parameters, since, as the proposal "blocked",
     # we may be saving the initial state of some block.
     # NB: if resuming but nothing was written (burn-in not finished): re-start
     self.log.info("Initial point:")
     if self.resuming and self.collection.n():
         initial_point = (self.collection[
             self.collection.sampled_params].ix[self.collection.n() -
                                                1]).values.copy()
         logpost = -(self.collection[_minuslogpost].ix[self.collection.n() -
                                                       1].copy())
         logpriors = -(self.collection[self.collection.prior_names].ix[
             self.collection.n() - 1].copy())
         loglikes = -0.5 * (self.collection[self.collection.chi2_names].ix[
             self.collection.n() - 1].copy())
         derived = (self.collection[self.collection.derived_params].ix[
             self.collection.n() - 1].values.copy())
     else:
         initial_point = self.model.prior.reference(
             max_tries=self.max_tries)
         logpost, logpriors, loglikes, derived = self.model.logposterior(
             initial_point)
     self.current_point.add(initial_point,
                            derived=derived,
                            logpost=logpost,
                            logpriors=logpriors,
                            loglikes=loglikes)
     self.log.info(
         "\n%s",
         self.current_point.data.to_string(index=False,
                                           line_width=_line_width))
     # Initial dummy checkpoint (needed when 1st checkpoint not reached in prev. run)
     self.write_checkpoint()
     # Main loop!
     self.log.info("Sampling!" + (
         " (NB: nothing will be printed until %d burn-in samples " %
         self.burn_in + "have been obtained)" if self.burn_in else ""))
     while self.n() < self.effective_max_samples and not self.converged:
         self.get_new_sample()
         # Callback function
         if (hasattr(self, "callback_function_callable")
                 and not (max(self.n(), 1) % self.callback_every)
                 and self.current_point[_weight] == 1):
             self.callback_function_callable(self)
         # Checking convergence and (optionally) learning the covmat of the proposal
         if self.check_all_ready():
             self.check_convergence_and_learn_proposal()
         if self.n() == self.effective_max_samples:
             self.log.info(
                 "Reached maximum number of accepted steps allowed. "
                 "Stopping.")
     # Make sure the last batch of samples ( < output_every ) are written
     self.collection._out_update()
     if get_mpi():
         Ns = (lambda x: np.array(get_mpi_comm().gather(x)))(self.n())
     else:
         Ns = [self.n()]
     if not get_mpi_rank():
         self.log.info("Sampling complete after %d accepted steps.",
                       sum(Ns))
Beispiel #17
0
 def initialize(self):
     """Initializes the sampler:
     creates the proposal distribution and draws the initial sample."""
     self.log.debug("Initializing")
     for p in [
             "burn_in", "max_tries", "output_every", "check_every",
             "callback_every"
     ]:
         setattr(
             self, p,
             read_dnumber(getattr(self, p), self.model.prior.d(),
                          dtype=int))
     if self.callback_every is None:
         self.callback_every = self.check_every
     # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
     self.burn_in_left = self.burn_in + 1
     # Max # checkpoints to wait, in case one process dies without sending MPI_ABORT
     self.been_waiting = 0
     self.max_waiting = max(50, self.max_tries / self.model.prior.d())
     if self.resuming and (max(self.mpi_size or 0, 1) != max(
             get_mpi_size(), 1)):
         self.log.error(
             "Cannot resume a sample with a different number of chains: "
             "was %d and now is %d.", max(self.mpi_size, 1),
             max(get_mpi_size(), 1))
         raise HandledException
     if not self.resuming and self.output:
         # Delete previous files (if not "forced", the run would have already failed)
         if ((os.path.abspath(self.covmat_filename()) != os.path.abspath(
                 str(self.covmat)))):
             try:
                 os.remove(self.covmat_filename())
             except OSError:
                 pass
         # There may be more that chains than expected,
         # if #ranks was bigger in a previous run
         i = 0
         while True:
             i += 1
             collection_filename, _ = self.output.prepare_collection(str(i))
             try:
                 os.remove(collection_filename)
             except OSError:
                 break
     # One collection per MPI process: `name` is the MPI rank + 1
     name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
     self.collection = Collection(self.model,
                                  self.output,
                                  name=name,
                                  resuming=self.resuming)
     self.current_point = OnePoint(self.model, OutputDummy({}), name=name)
     # Use standard MH steps by default
     self.get_new_sample = self.get_new_sample_metropolis
     # Prepare oversampling / dragging if applicable
     self.effective_max_samples = self.max_samples
     if self.oversample and self.drag:
         self.log.error("Choose either oversampling or dragging, not both.")
         raise HandledException
     if self.oversample:
         factors, blocks = self.model.likelihood._speeds_of_params(
             int_speeds=True)
         self.oversampling_factors = factors
         self.log.info("Oversampling with factors:\n" + "\n".join([
             "   %d : %r" % (f, b)
             for f, b in zip(self.oversampling_factors, blocks)
         ]))
         self.i_last_slow_block = None
         # No way right now to separate slow and fast
         slow_params = list(self.model.parameterization.sampled_params())
     elif self.drag:
         speeds, blocks = self.model.likelihood._speeds_of_params(
             fast_slow=True, int_speeds=True)
         # For now, no blocking inside either fast or slow: just 2 blocks
         self.i_last_slow_block = 0
         if np.all(speeds == speeds[0]):
             self.log.error(
                 "All speeds are equal or too similar: cannot drag! "
                 "Make sure to define accurate likelihoods' speeds.")
             raise HandledException
         # Make the 1st factor 1:
         speeds = [1, speeds[1] / speeds[0]]
         # Target: dragging step taking as long as slow step
         self.drag_interp_steps = self.drag * speeds[1]
         # Per dragging step, the (fast) posterior is evaluated *twice*,
         self.drag_interp_steps /= 2
         self.drag_interp_steps = int(np.round(self.drag_interp_steps))
         fast_params = list(chain(*blocks[1 + self.i_last_slow_block:]))
         # Not too much or too little dragging
         drag_limits = [(int(l) * len(fast_params) if l is not None else l)
                        for l in self.drag_limits]
         if drag_limits[
                 0] is not None and self.drag_interp_steps < drag_limits[0]:
             self.log.warning(
                 "Number of dragging steps clipped from below: was not "
                 "enough to efficiently explore the fast directions -- "
                 "avoid this limit by decreasing 'drag_limits[0]'.")
             self.drag_interp_steps = drag_limits[0]
         if drag_limits[
                 1] is not None and self.drag_interp_steps > drag_limits[1]:
             self.log.warning(
                 "Number of dragging steps clipped from above: "
                 "excessive, probably inefficient, exploration of the "
                 "fast directions -- "
                 "avoid this limit by increasing 'drag_limits[1]'.")
             self.drag_interp_steps = drag_limits[1]
         # Re-scale steps between checkpoint and callback to the slow dimensions only
         slow_params = list(chain(*blocks[:1 + self.i_last_slow_block]))
         self.n_slow = len(slow_params)
         for p in ["check_every", "callback_every"]:
             setattr(
                 self, p,
                 int(getattr(self, p) * self.n_slow / self.model.prior.d()))
         self.log.info("Dragging with oversampling per step:\n" +
                       "\n".join([
                           "   %d : %r" % (f, b)
                           for f, b in zip([1, self.drag_interp_steps],
                                           [blocks[0], fast_params])
                       ]))
         self.get_new_sample = self.get_new_sample_dragging
     else:
         _, blocks = self.model.likelihood._speeds_of_params()
         self.oversampling_factors = [1 for b in blocks]
         slow_params = list(self.model.parameterization.sampled_params())
         self.n_slow = len(slow_params)
     # Turn parameter names into indices
     self.blocks = [[
         list(self.model.parameterization.sampled_params()).index(p)
         for p in b
     ] for b in blocks]
     self.proposer = BlockedProposer(
         self.blocks,
         oversampling_factors=self.oversampling_factors,
         i_last_slow_block=self.i_last_slow_block,
         proposal_scale=self.proposal_scale)
     # Build the initial covariance matrix of the proposal, or load from checkpoint
     if self.resuming:
         covmat = np.loadtxt(self.covmat_filename())
         self.log.info("Covariance matrix from checkpoint.")
     else:
         covmat = self.initial_proposal_covmat(slow_params=slow_params)
         self.log.info("Initial covariance matrix.")
     self.log.debug(
         "Sampling with covmat:\n%s",
         DataFrame(
             covmat,
             columns=self.model.parameterization.sampled_params(),
             index=self.model.parameterization.sampled_params()).to_string(
                 line_width=_line_width))
     self.proposer.set_covariance(covmat)
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
Beispiel #18
0
def post(info, sample=None):
    logger_setup(info.get(_debug), info.get(_debug_file))
    log = logging.getLogger(__name__.split(".")[-1])
    try:
        info_post = info[_post]
    except KeyError:
        log.error("No 'post' block given. Nothing to do!")
        raise HandledException
    if get_mpi_rank():
        log.warning(
            "Post-processing is not yet MPI-able. Doing nothing for rank > 1 processes."
        )
        return
    # 1. Load existing sample
    output_in = Output(output_prefix=info.get(_output_prefix), resume=True)
    info_in = load_input(output_in.file_full) if output_in else deepcopy(info)
    dummy_model_in = DummyModel(info_in[_params], info_in[_likelihood],
                                info_in.get(_prior, None),
                                info_in.get(_theory, None))
    if output_in:
        i = 0
        while True:
            try:
                collection = Collection(dummy_model_in,
                                        output_in,
                                        name="%d" % (1 + i),
                                        load=True,
                                        onload_skip=info_post.get("skip", 0),
                                        onload_thin=info_post.get("thin", 1))
                if i == 0:
                    collection_in = collection
                else:
                    collection_in._append(collection)
                i += 1
            except IOError:
                break
    elif sample:
        if isinstance(sample, Collection):
            sample = [sample]
        collection_in = deepcopy(sample[0])
        for s in sample[1:]:
            try:
                collection_in._append(s)
            except:
                log.error("Failed to load some of the input samples.")
                raise HandledException
        i = len(sample)
    else:
        log.error(
            "Not output from where to load from or input collections given.")
        raise HandledException
    log.info("Loaded %d chain%s. Will process %d samples.", i,
             "s" if i - 1 else "", collection_in.n())
    if collection_in.n() <= 1:
        log.error(
            "Not enough samples for post-processing. Try using a larger sample, "
            "or skipping or thinning less.")
        raise HandledException
    # 2. Compare old and new info: determine what to do
    add = info_post.get("add", {})
    remove = info_post.get("remove", {})
    # Add a dummy 'one' likelihood, to absorb unused parameters
    if not add.get(_likelihood):
        add[_likelihood] = odict()
    add[_likelihood].update({"one": None})
    # Expand the "add" info
    add = get_full_info(add)
    # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters
    out = {_params: deepcopy(info_in[_params])}
    for p in remove.get(_params, {}):
        pinfo = info_in[_params].get(p)
        if pinfo is None or not is_derived_param(pinfo):
            log.error(
                "You tried to remove parameter '%s', which is not a derived paramter. "
                "Only derived parameters can be removed during post-processing.",
                p)
            raise HandledException
        out[_params].pop(p)
    mlprior_names_add = []
    for p, pinfo in add.get(_params, {}).items():
        pinfo_in = info_in[_params].get(p)
        if is_sampled_param(pinfo):
            if not is_sampled_param(pinfo_in):
                # No added sampled parameters (de-marginalisation not implemented)
                if pinfo_in is None:
                    log.error(
                        "You added a new sampled parameter %r (maybe accidentaly "
                        "by adding a new likelihood that depends on it). "
                        "Adding new sampled parameters is not possible. Try fixing "
                        "it to some value.", p)
                    raise HandledException
                else:
                    log.error(
                        "You tried to change the prior of parameter '%s', "
                        "but it was not a sampled parameter. "
                        "To change that prior, you need to define as an external one.",
                        p)
                    raise HandledException
            if mlprior_names_add[:1] != _prior_1d_name:
                mlprior_names_add = (
                    [_minuslogprior + _separator + _prior_1d_name] +
                    mlprior_names_add)
        elif is_derived_param(pinfo):
            if p in out[_params]:
                log.error(
                    "You tried to add derived parameter '%s', which is already "
                    "present. To force its recomputation, 'remove' it too.", p)
                raise HandledException
        elif is_fixed_param(pinfo):
            # Only one possibility left "fixed" parameter that was not present before:
            # input of new likelihood, or just an argument for dynamical derived (dropped)
            if ((p in info_in[_params] and pinfo[_p_value] !=
                 (pinfo_in or {}).get(_p_value, None))):
                log.error(
                    "You tried to add a fixed parameter '%s: %r' that was already present"
                    " but had a different value or was not fixed. This is not allowed. "
                    "The old info of the parameter was '%s: %r'", p,
                    dict(pinfo), p, dict(pinfo_in))
                raise HandledException
        else:
            log.error("This should not happen. Contact the developers.")
            raise HandledException
        out[_params][p] = pinfo
    # For the likelihood only, turn the rest of *derived* parameters into constants,
    # so that the likelihoods do not try to compute them)
    # But be careful to exclude *input* params that have a "derived: True" value
    # (which in "full info" turns into "derived: 'lambda [x]: [x]'")
    out_params_like = deepcopy(out[_params])
    for p, pinfo in out_params_like.items():
        if ((is_derived_param(pinfo) and not (_p_value in pinfo)
             and p not in add.get(_params, {}))):
            out_params_like[p] = {_p_value: np.nan, _p_drop: True}
    parameterization_like = Parameterization(out_params_like,
                                             ignore_unused_sampled=True)
    # 2.2 Manage adding/removing priors and likelihoods
    warn_remove = False
    for level in [_prior, _likelihood]:
        out[level] = getattr(dummy_model_in, level)
        if level == _prior:
            out[level].remove(_prior_1d_name)
        for pdf in info_post.get("remove", {}).get(level, []) or []:
            try:
                out[level].remove(pdf)
                warn_remove = True
            except ValueError:
                log.error(
                    "Trying to remove %s '%s', but it is not present. "
                    "Existing ones: %r", level, pdf, out[level])
                raise HandledException
    if warn_remove:
        log.warning("You are removing a prior or likelihood pdf. "
                    "Notice that if the resulting posterior is much wider "
                    "than the original one, or displaced enough, "
                    "it is probably safer to explore it directly.")
    if _prior in add:
        mlprior_names_add += [
            _minuslogprior + _separator + name for name in add[_prior]
        ]
        out[_prior] += list(add[_prior])
    prior_recompute_1d = (mlprior_names_add[:1] == [
        _minuslogprior + _separator + _prior_1d_name
    ])
    # Don't initialise the theory code if not adding/recomputing theory,
    # theory-derived params or likelihoods
    recompute_theory = info_in.get(_theory) and not (list(
        add[_likelihood]) == ["one"] and not any([
            is_derived_param(pinfo) for pinfo in add.get(_params, {}).values()
        ]))
    if recompute_theory:
        # Inherit from the original chain (needs input|output_params, renames, etc
        theory = list(info_in[_theory].keys())[0]
        info_theory_out = odict([[
            theory,
            recursive_update(deepcopy(info_in[_theory][theory]),
                             add.get(_theory, {theory: {}})[theory])
        ]])
    else:
        info_theory_out = None
    chi2_names_add = [
        _chi2 + _separator + name for name in add[_likelihood]
        if name is not "one"
    ]
    out[_likelihood] += [l for l in add[_likelihood] if l is not "one"]
    if recompute_theory:
        log.warn(
            "You are recomputing the theory, but in the current version this does "
            "not force recomputation of any likelihood or derived parameter, "
            "unless explicitly removed+added.")
    for level in [_prior, _likelihood]:
        for i, x_i in enumerate(out[level]):
            if x_i in list(out[level])[i + 1:]:
                log.error(
                    "You have added %s '%s', which was already present. If you "
                    "want to force its recomputation, you must also 'remove' it.",
                    level, x_i)
                raise HandledException
    # 3. Create output collection
    if "suffix" not in info_post:
        log.error("You need to provide a 'suffix' for your chains.")
        raise HandledException
    # Use default prefix if it exists. If it does not, produce no output by default.
    # {post: {output: None}} suppresses output, and if it's a string, updates it.
    out_prefix = info_post.get(_output_prefix, info.get(_output_prefix))
    if out_prefix not in [None, False]:
        out_prefix += "_" + _post + "_" + info_post["suffix"]
    output_out = Output(output_prefix=out_prefix,
                        force_output=info.get(_force))
    info_out = deepcopy(info)
    info_out[_post] = info_post
    # Updated with input info and extended (full) add info
    info_out.update(info_in)
    info_out[_post]["add"] = add
    dummy_model_out = DummyModel(out[_params],
                                 out[_likelihood],
                                 info_prior=out[_prior])
    if recompute_theory:
        theory = list(info_theory_out.keys())[0]
        if _input_params not in info_theory_out[theory]:
            log.error(
                "You appear to be post-processing a chain generated with an older "
                "version of Cobaya. For post-processing to work, please edit the "
                "'[root]__full.info' file of the original chain to add, inside the "
                "theory code block, the list of its input parameters. E.g.\n----\n"
                "theory:\n  %s:\n    input_params: [param1, param2, ...]\n"
                "----\nIf you get strange errors later, it is likely that you did not "
                "specify the correct set of theory parameters.\n"
                "The full set of input parameters are %s.", theory,
                list(dummy_model_out.parameterization.input_params()))
            raise HandledException
    prior_add = Prior(dummy_model_out.parameterization, add.get(_prior))
    likelihood_add = Likelihood(add[_likelihood],
                                parameterization_like,
                                info_theory=info_theory_out,
                                modules=info.get(_path_install))
    # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post]["add"]
    add[_likelihood].pop("one")
    if likelihood_add.theory:
        # Make sure that theory.needs is called at least once, for adjustments
        likelihood_add.theory.needs()
    collection_out = Collection(dummy_model_out, output_out, name="1")
    output_out.dump_info({}, info_out)
    # 4. Main loop!
    log.info("Running post-processing...")
    last_percent = 0
    for i, point in enumerate(collection_in.data.itertuples()):
        log.debug("Point: %r", point)
        sampled = [
            getattr(point, param)
            for param in dummy_model_in.parameterization.sampled_params()
        ]
        derived = odict(
            [[param, getattr(point, param, None)]
             for param in dummy_model_out.parameterization.derived_params()])
        inputs = odict([[
            param,
            getattr(
                point, param,
                dummy_model_in.parameterization.constant_params().get(
                    param,
                    dummy_model_out.parameterization.constant_params().get(
                        param, None)))
        ] for param in dummy_model_out.parameterization.input_params()])
        # Solve inputs that depend on a function and were not saved
        # (we don't use the Parameterization_to_input method in case there are references
        #  to functions that cannot be loaded at the moment)
        for p, value in inputs.items():
            if value is None:
                func = dummy_model_out.parameterization._input_funcs[p]
                args = dummy_model_out.parameterization._input_args[p]
                inputs[p] = func(*[getattr(point, arg) for arg in args])
        # Add/remove priors
        priors_add = prior_add.logps(sampled)
        if not prior_recompute_1d:
            priors_add = priors_add[1:]
        logpriors_add = odict(zip(mlprior_names_add, priors_add))
        logpriors_new = [
            logpriors_add.get(name, -getattr(point, name, 0))
            for name in collection_out.minuslogprior_names
        ]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New set of priors: %r",
                      dict(zip(dummy_model_out.prior, logpriors_new)))
        if -np.inf in logpriors_new:
            continue
        # Add/remove likelihoods
        output_like = []
        if likelihood_add:
            # Notice "one" (last in likelihood_add) is ignored: not in chi2_names
            loglikes_add = odict(
                zip(chi2_names_add,
                    likelihood_add.logps(inputs, _derived=output_like)))
            output_like = dict(zip(likelihood_add.output_params, output_like))
        else:
            loglikes_add = dict()
        loglikes_new = [
            loglikes_add.get(name, -0.5 * getattr(point, name, 0))
            for name in collection_out.chi2_names
        ]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New set of likelihoods: %r",
                      dict(zip(dummy_model_out.likelihood, loglikes_new)))
            if output_like:
                log.debug("New set of likelihood-derived parameters: %r",
                          output_like)
        if -np.inf in loglikes_new:
            continue
        # Add/remove derived parameters and change priors of sampled parameters
        for p in add[_params]:
            if p in dummy_model_out.parameterization._directly_output:
                derived[p] = output_like[p]
            elif p in dummy_model_out.parameterization._derived_funcs:
                func = dummy_model_out.parameterization._derived_funcs[p]
                args = dummy_model_out.parameterization._derived_args[p]
                derived[p] = func(*[
                    getattr(point, arg, output_like.get(arg, None))
                    for arg in args
                ])
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug(
                "New derived parameters: %r",
                dict([[
                    p, derived[p]
                ] for p in dummy_model_out.parameterization.derived_params()
                      if p in add[_params]]))
        # Save to the collection (keep old weight for now)
        collection_out.add(sampled,
                           derived=derived.values(),
                           weight=getattr(point, _weight),
                           logpriors=logpriors_new,
                           loglikes=loglikes_new)
        # Display progress
        percent = np.round(i / collection_in.n() * 100)
        if percent != last_percent and not percent % 5:
            last_percent = percent
            progress_bar(log, percent, " (%d/%d)" % (i, collection_in.n()))
    if not collection_out.data.last_valid_index():
        log.error(
            "No elements in the final sample. Possible causes: "
            "added a prior or likelihood valued zero over the full sampled domain, "
            "or the computation of the theory failed everywhere, etc.")
        raise HandledException
    # Reweight -- account for large dynamic range!
    #   Prefer to rescale +inf to finite, and ignore final points with -inf.
    #   Remove -inf's (0-weight), and correct indices
    difflogmax = max(collection_in[_minuslogpost] -
                     collection_out[_minuslogpost])
    collection_out.data[_weight] *= np.exp(collection_in[_minuslogpost] -
                                           collection_out[_minuslogpost] -
                                           difflogmax)
    collection_out.data = (
        collection_out.data[collection_out.data.weight > 0].reset_index(
            drop=True))
    collection_out._n = collection_out.data.last_valid_index() + 1
    # Write!
    collection_out._out_update()
    log.info("Finished! Final number of samples: %d", collection_out.n())
    return info_out, {"sample": collection_out}
Beispiel #19
0
 def initialize(self):
     """Imports the PolyChord sampler and prepares its arguments."""
     if not get_mpi_rank():  # rank = 0 (MPI master) or None (no MPI)
         self.log.info("Initializing")
     # If path not given, try using general path to modules
     if not self.path and self.path_install:
         self.path = get_path(self.path_install)
     if self.path:
         if not get_mpi_rank():
             self.log.info("Importing *local* PolyChord from " + self.path)
             if not os.path.exists(os.path.realpath(self.path)):
                 self.log.error("The given path does not exist.")
                 raise HandledException
         pc_build_path = get_build_path(self.path)
         if not pc_build_path:
             self.log.error("Either PolyChord is not in the given folder, "
                            "'%s', or you have not compiled it.", self.path)
             raise HandledException
         # Inserting the previously found path into the list of import folders
         sys.path.insert(0, pc_build_path)
     else:
         self.log.info("Importing *global* PolyChord.")
     try:
         import pypolychord
         from pypolychord.settings import PolyChordSettings
         self.pc = pypolychord
     except ImportError:
         self.log.error(
             "Couldn't find the PolyChord python interface. "
             "Make sure that you have compiled it, and that you either\n"
             " (a) specify a path (you didn't) or\n"
             " (b) install the Python interface globally with\n"
             "     '/path/to/PolyChord/python setup.py install --user'")
         raise HandledException
     # Prepare arguments and settings
     self.nDims = self.model.prior.d()
     self.nDerived = (len(self.model.parameterization.derived_params()) +
                      len(self.model.prior) + len(self.model.likelihood._likelihoods))
     if self.logzero is None:
         self.logzero = np.nan_to_num(-np.inf)
     if self.max_ndead == np.inf:
         self.max_ndead = -1
     for p in ["nlive", "num_repeats", "nprior", "max_ndead"]:
         setattr(self, p, read_dnumber(getattr(self, p), self.nDims, dtype=int))
     # Fill the automatic ones
     if getattr(self, "feedback", None) is None:
         values = {logging.CRITICAL: 0, logging.ERROR: 0, logging.WARNING: 0,
                   logging.INFO: 1, logging.DEBUG: 2}
         self.feedback = values[self.log.getEffectiveLevel()]
     try:
         output_folder = getattr(self.output, "folder")
         output_prefix = getattr(self.output, "prefix") or ""
         self.read_resume = self.resuming
     except AttributeError:
         # dummy output -- no resume!
         self.read_resume = False
         from tempfile import gettempdir
         output_folder = gettempdir()
         if not get_mpi_rank():
             from random import random
             output_prefix = hex(int(random() * 16 ** 6))[2:]
         else:
             output_prefix = None
         if get_mpi():
             output_prefix = get_mpi_comm().bcast(output_prefix, root=0)
     self.base_dir = os.path.join(output_folder, self.base_dir)
     self.file_root = output_prefix
     if not get_mpi_rank():
         # Creating output folder, if it does not exist (just one process)
         if not os.path.exists(self.base_dir):
             os.makedirs(self.base_dir)
         # Idem, a clusters folder if needed -- notice that PolyChord's default
         # is "True", here "None", hence the funny condition below
         if self.do_clustering is not False:  # None here means "default"
             try:
                 os.makedirs(os.path.join(self.base_dir, clusters))
             except OSError:  # exists!
                 pass
         self.log.info("Storing raw PolyChord output in '%s'.",
                       self.base_dir)
     # Exploiting the speed hierarchy
     speeds, blocks = self.model.likelihood._speeds_of_params(int_speeds=True)
     blocks_flat = list(chain(*blocks))
     self.ordering = [
         blocks_flat.index(p) for p in self.model.parameterization.sampled_params()]
     self.grade_dims = [len(block) for block in blocks]
     #        self.grade_frac = np.array(
     #            [i*j for i,j in zip(self.grade_dims, speeds)])
     #        self.grade_frac = (
     #            self.grade_frac/sum(self.grade_frac))
     # Disabled for now. We need a way to override the "time" part of the meaning of grade_frac
     self.grade_frac = [1 / len(self.grade_dims) for _ in self.grade_dims]
     # Assign settings
     pc_args = ["nlive", "num_repeats", "nprior", "do_clustering",
                "precision_criterion", "max_ndead", "boost_posterior", "feedback",
                "logzero", "update_files", "posteriors", "equals",
                "cluster_posteriors", "write_resume", "read_resume", "write_stats",
                "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims",
                "feedback", "read_resume", "base_dir", "file_root", "grade_frac",
                "grade_dims"]
     self.pc_settings = PolyChordSettings(
         self.nDims, self.nDerived, seed=(self.seed if self.seed is not None else -1),
         **{p: getattr(self, p) for p in pc_args if getattr(self, p) is not None})
     # prior conversion from the hypercube
     bounds = self.model.prior.bounds(
         confidence_for_unbounded=self.confidence_for_unbounded)
     # Check if priors are bounded (nan's to inf)
     inf = np.where(np.isinf(bounds))
     if len(inf[0]):
         params_names = self.model.parameterization.sampled_params()
         params = [params_names[i] for i in sorted(list(set(inf[0])))]
         self.log.error("PolyChord needs bounded priors, but the parameter(s) '"
                        "', '".join(params) + "' is(are) unbounded.")
         raise HandledException
     locs = bounds[:, 0]
     scales = bounds[:, 1] - bounds[:, 0]
     # This function re-scales the parameters AND puts them in the right order
     self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales).tolist()
     # We will need the volume of the prior domain, since PolyChord divides by it
     self.logvolume = np.log(np.prod(scales))
     # Done!
     if not get_mpi_rank():
         self.log.info("Calling PolyChord with arguments:")
         for p, v in inspect.getmembers(self.pc_settings, lambda a: not (callable(a))):
             if not p.startswith("_"):
                 self.log.info("  %s: %s", p, v)
Beispiel #20
0
def install_script():
    from cobaya.mpi import get_mpi_rank
    if not get_mpi_rank():
        # Configure the logger ASAP
        logger_setup()
        log = logging.getLogger(__name__.split(".")[-1])
        # Parse arguments
        import argparse
        parser = argparse.ArgumentParser(
            description="Cobaya's installation tool for external modules.")
        parser.add_argument(
            "files",
            action="store",
            nargs="+",
            metavar="input_file.yaml",
            help="One or more input files "
            "(or 'cosmo' for a basic collection of cosmological modules)")
        parser.add_argument(
            "-" + _modules_path_arg[0],
            "--" + _modules_path_arg,
            action="store",
            nargs=1,
            required=True,
            metavar="/install/path",
            help="Desired path where to install external modules.")
        parser.add_argument(
            "-f",
            "--force",
            action="store_true",
            default=False,
            help="Force re-installation of apparently installed modules.")
        parser.add_argument(
            "--no-progress-bars",
            action="store_true",
            default=False,
            help="No progress bars shown. Shorter logs (used in Travis).")
        group_just = parser.add_mutually_exclusive_group(required=False)
        group_just.add_argument("-C",
                                "--just-code",
                                action="store_false",
                                default=True,
                                help="Install code of the modules.",
                                dest=_data)
        group_just.add_argument("-D",
                                "--just-data",
                                action="store_false",
                                default=True,
                                help="Install data of the modules.",
                                dest=_code)
        arguments = parser.parse_args()
        if arguments.files == ["cosmo"]:
            log.info(
                "Installing cosmological modules (input files will be ignored")
            from cobaya.cosmo_input import install_basic
            infos = [install_basic]
        else:
            from cobaya.input import load_input
            infos = [load_input(f) for f in arguments.files]
        # Launch installer
        install(*infos,
                path=getattr(arguments, _modules_path_arg)[0],
                **{
                    arg: getattr(arguments, arg)
                    for arg in ["force", _code, _data, "no_progress_bars"]
                })
Beispiel #21
0
def post(info, sample=None):
    logger_setup(info.get(_debug), info.get(_debug_file))
    log = logging.getLogger(__name__.split(".")[-1])
    # MARKED FOR DEPRECATION IN v3.0
    # BEHAVIOUR TO BE REPLACED BY ERROR:
    check_deprecated_modules_path(info)
    # END OF DEPRECATION BLOCK
    try:
        info_post = info[_post]
    except KeyError:
        raise LoggedError(log, "No 'post' block given. Nothing to do!")
    if get_mpi_rank():
        log.warning(
            "Post-processing is not yet MPI-aware. Doing nothing for rank > 1 processes.")
        return
    if info.get(_resume):
        log.warning("Resuming not implemented for post-processing. Re-starting.")
    # 1. Load existing sample
    output_in = get_output(output_prefix=info.get(_output_prefix))
    if output_in:
        try:
            info_in = output_in.reload_updated_info()
        except FileNotFoundError:
            raise LoggedError(log, "Error loading input model: "
                                   "could not find input info at %s",
                              output_in.file_updated)
    else:
        info_in = deepcopy_where_possible(info)
    dummy_model_in = DummyModel(info_in[_params], info_in[kinds.likelihood],
                                info_in.get(_prior, None))
    if output_in:
        if not output_in.find_collections():
            raise LoggedError(log, "No samples found for the input model with prefix %s",
                              os.path.join(output_in.folder, output_in.prefix))
        collection_in = output_in.load_collections(
            dummy_model_in, skip=info_post.get("skip", 0), thin=info_post.get("thin", 1),
            concatenate=True)
    elif sample:
        if isinstance(sample, Collection):
            sample = [sample]
        collection_in = deepcopy(sample[0])
        for s in sample[1:]:
            try:
                collection_in.append(s)
            except:
                raise LoggedError(log, "Failed to load some of the input samples.")
    else:
        raise LoggedError(log,
                          "Not output from where to load from or input collections given.")
    log.info("Will process %d samples.", len(collection_in))
    if len(collection_in) <= 1:
        raise LoggedError(
            log, "Not enough samples for post-processing. Try using a larger sample, "
                 "or skipping or thinning less.")
    # 2. Compare old and new info: determine what to do
    add = info_post.get(_post_add, {}) or {}
    remove = info_post.get(_post_remove, {})
    # Add a dummy 'one' likelihood, to absorb unused parameters
    if not add.get(kinds.likelihood):
        add[kinds.likelihood] = {}
    add[kinds.likelihood]["one"] = None
    # Expand the "add" info
    add = update_info(add)
    # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters
    out = {_params: deepcopy_where_possible(info_in[_params])}
    for p in remove.get(_params, {}):
        pinfo = info_in[_params].get(p)
        if pinfo is None or not is_derived_param(pinfo):
            raise LoggedError(
                log,
                "You tried to remove parameter '%s', which is not a derived parameter. "
                "Only derived parameters can be removed during post-processing.", p)
        out[_params].pop(p)
    # Force recomputation of aggregated chi2
    for p in list(out[_params]):
        if p.startswith(_get_chi2_name("")):
            out[_params].pop(p)
    mlprior_names_add = []
    for p, pinfo in add.get(_params, {}).items():
        pinfo_in = info_in[_params].get(p)
        if is_sampled_param(pinfo):
            if not is_sampled_param(pinfo_in):
                # No added sampled parameters (de-marginalisation not implemented)
                if pinfo_in is None:
                    raise LoggedError(
                        log, "You added a new sampled parameter %r (maybe accidentally "
                             "by adding a new likelihood that depends on it). "
                             "Adding new sampled parameters is not possible. Try fixing "
                             "it to some value.", p)
                else:
                    raise LoggedError(
                        log,
                        "You tried to change the prior of parameter '%s', "
                        "but it was not a sampled parameter. "
                        "To change that prior, you need to define as an external one.", p)
            if mlprior_names_add[:1] != _prior_1d_name:
                mlprior_names_add = ([_minuslogprior + _separator + _prior_1d_name]
                                     + mlprior_names_add)
        elif is_derived_param(pinfo):
            if p in out[_params]:
                raise LoggedError(
                    log, "You tried to add derived parameter '%s', which is already "
                         "present. To force its recomputation, 'remove' it too.", p)
        elif is_fixed_param(pinfo):
            # Only one possibility left "fixed" parameter that was not present before:
            # input of new likelihood, or just an argument for dynamical derived (dropped)
            if ((p in info_in[_params] and
                 pinfo[partag.value] != (pinfo_in or {}).get(partag.value, None))):
                raise LoggedError(
                    log,
                    "You tried to add a fixed parameter '%s: %r' that was already present"
                    " but had a different value or was not fixed. This is not allowed. "
                    "The old info of the parameter was '%s: %r'",
                    p, dict(pinfo), p, dict(pinfo_in))
        else:
            raise LoggedError(log, "This should not happen. Contact the developers.")
        out[_params][p] = pinfo
    # For the likelihood only, turn the rest of *derived* parameters into constants,
    # so that the likelihoods do not try to compute them)
    # But be careful to exclude *input* params that have a "derived: True" value
    # (which in "updated info" turns into "derived: 'lambda [x]: [x]'")
    out_params_like = deepcopy_where_possible(out[_params])
    for p, pinfo in out_params_like.items():
        if ((is_derived_param(pinfo) and not (partag.value in pinfo)
             and p not in add.get(_params, {}))):
            out_params_like[p] = {partag.value: np.nan, partag.drop: True}
    # 2.2 Manage adding/removing priors and likelihoods
    warn_remove = False
    for level in [_prior, kinds.likelihood]:
        out[level] = getattr(dummy_model_in, level)
        if level == _prior:
            out[level].remove(_prior_1d_name)
        for pdf in info_post.get(_post_remove, {}).get(level, []) or []:
            try:
                out[level].remove(pdf)
                warn_remove = True
            except ValueError:
                raise LoggedError(
                    log, "Trying to remove %s '%s', but it is not present. "
                         "Existing ones: %r", level, pdf, out[level])
    if warn_remove:
        log.warning("You are removing a prior or likelihood pdf. "
                    "Notice that if the resulting posterior is much wider "
                    "than the original one, or displaced enough, "
                    "it is probably safer to explore it directly.")
    if _prior in add:
        mlprior_names_add += [_minuslogprior + _separator + name for name in add[_prior]]
        out[_prior] += list(add[_prior])
    prior_recompute_1d = (
            mlprior_names_add[:1] == [_minuslogprior + _separator + _prior_1d_name])
    # Don't initialise the theory code if not adding/recomputing theory,
    # theory-derived params or likelihoods
    recompute_theory = info_in.get(kinds.theory) and not (
            list(add[kinds.likelihood]) == ["one"] and
            not any(is_derived_param(pinfo) for pinfo in add.get(_params, {}).values()))
    if recompute_theory:
        # Inherit from the original chain (needs input|output_params, renames, etc
        add_theory = add.get(kinds.theory)
        if add_theory:
            info_theory_out = {}
            if len(add_theory) > 1:
                log.warning('Importance sampling with more than one theory is '
                            'not really tested')
            add_theory = add_theory.copy()
            for theory, theory_info in info_in[kinds.theory].items():
                theory_copy = deepcopy_where_possible(theory_info)
                if theory in add_theory:
                    info_theory_out[theory] = \
                        recursive_update(theory_copy, add_theory.pop(theory))
                else:
                    info_theory_out[theory] = theory_copy
            info_theory_out.update(add_theory)
        else:
            info_theory_out = deepcopy_where_possible(info_in[kinds.theory])
    else:
        info_theory_out = None
    chi2_names_add = [
        _get_chi2_name(name) for name in add[kinds.likelihood] if name != "one"]
    out[kinds.likelihood] += [l for l in add[kinds.likelihood] if l != "one"]
    if recompute_theory:
        log.warning("You are recomputing the theory, but in the current version this does"
                    " not force recomputation of any likelihood or derived parameter, "
                    "unless explicitly removed+added.")
    for level in [_prior, kinds.likelihood]:
        for i, x_i in enumerate(out[level]):
            if x_i in list(out[level])[i + 1:]:
                raise LoggedError(
                    log, "You have added %s '%s', which was already present. If you "
                         "want to force its recomputation, you must also 'remove' it.",
                    level, x_i)
    # 3. Create output collection
    if _post_suffix not in info_post:
        raise LoggedError(log, "You need to provide a '%s' for your chains.",
                          _post_suffix)
    # Use default prefix if it exists. If it does not, produce no output by default.
    # {post: {output: None}} suppresses output, and if it's a string, updates it.
    out_prefix = info_post.get(_output_prefix, info.get(_output_prefix))
    if out_prefix not in [None, False]:
        out_prefix += _separator_files + _post + _separator_files + info_post[
            _post_suffix]
    output_out = get_output(output_prefix=out_prefix, force=info.get(_force))
    if output_out and not output_out.force and output_out.find_collections():
        raise LoggedError(log, "Found existing post-processing output with prefix %r. "
                               "Delete it manually or re-run with `force: True` "
                               "(or `-f`, `--force` from the shell).", out_prefix)
    elif output_out and output_out.force:
        output_out.delete_infos()
        for regexp in output_out.find_collections():
            output_out.delete_with_regexp(re.compile(regexp))
    info_out = deepcopy_where_possible(info)
    info_out[_post] = info_post
    # Updated with input info and extended (updated) add info
    info_out.update(info_in)
    info_out[_post][_post_add] = add
    dummy_model_out = DummyModel(out[_params], out[kinds.likelihood],
                                 info_prior=out[_prior])
    if recompute_theory:
        # TODO: May need updating for more than one, or maybe can be removed
        theory = list(info_theory_out)[0]
        if _input_params not in info_theory_out[theory]:
            raise LoggedError(
                log,
                "You appear to be post-processing a chain generated with an older "
                "version of Cobaya. For post-processing to work, please edit the "
                "'[root].updated.yaml' file of the original chain to add, inside the "
                "theory code block, the list of its input parameters. E.g.\n----\n"
                "theory:\n  %s:\n    input_params: [param1, param2, ...]\n"
                "----\nIf you get strange errors later, it is likely that you did not "
                "specify the correct set of theory parameters.\n"
                "The full set of input parameters are %s.",
                theory, list(dummy_model_out.parameterization.input_params()))
    # TODO: check allow_renames=False?
    # TODO: May well be simplifications here, this is v close to pre-refactor logic
    # Have not gone through or understood all the parameterization  stuff
    model_add = Model(out_params_like, add[kinds.likelihood], info_prior=add.get(_prior),
                      info_theory=info_theory_out, packages_path=info.get(_packages_path),
                      allow_renames=False, post=True,
                      prior_parameterization=dummy_model_out.parameterization)
    # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post][_post_add]
    add[kinds.likelihood].pop("one")
    collection_out = Collection(dummy_model_out, output_out, name="1")
    output_out.check_and_dump_info(None, info_out, check_compatible=False)
    # Prepare recomputation of aggregated chi2
    # (they need to be recomputed by hand, because its autocomputation won't pick up
    #  old likelihoods for a given type)
    all_types = {
        like: str_to_list(add[kinds.likelihood].get(
            like, info_in[kinds.likelihood].get(like)).get("type", []) or [])
        for like in out[kinds.likelihood]}
    types = set(chain(*list(all_types.values())))
    inv_types = {t: [like for like, like_types in all_types.items() if t in like_types]
                 for t in types}
    # 4. Main loop!
    log.info("Running post-processing...")
    last_percent = 0
    for i, point in collection_in.data.iterrows():
        log.debug("Point: %r", point)
        sampled = [point[param] for param in
                   dummy_model_in.parameterization.sampled_params()]
        derived = {param: point.get(param, None)
                   for param in dummy_model_out.parameterization.derived_params()}
        inputs = {param: point.get(
            param, dummy_model_in.parameterization.constant_params().get(
                param, dummy_model_out.parameterization.constant_params().get(
                    param, None)))
            for param in dummy_model_out.parameterization.input_params()}
        # Solve inputs that depend on a function and were not saved
        # (we don't use the Parameterization_to_input method in case there are references
        #  to functions that cannot be loaded at the moment)
        for p, value in inputs.items():
            if value is None:
                func = dummy_model_out.parameterization._input_funcs[p]
                args = dummy_model_out.parameterization._input_args[p]
                inputs[p] = func(*[point.get(arg) for arg in args])
        # Add/remove priors
        priors_add = model_add.prior.logps(sampled)
        if not prior_recompute_1d:
            priors_add = priors_add[1:]
        logpriors_add = dict(zip(mlprior_names_add, priors_add))
        logpriors_new = [logpriors_add.get(name, - point.get(name, 0))
                         for name in collection_out.minuslogprior_names]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug(
                "New set of priors: %r", dict(zip(dummy_model_out.prior, logpriors_new)))
        if -np.inf in logpriors_new:
            continue
        # Add/remove likelihoods
        output_like = []
        if add[kinds.likelihood]:
            # Notice "one" (last in likelihood_add) is ignored: not in chi2_names
            loglikes_add, output_like = model_add.logps(inputs, return_derived=True)
            loglikes_add = dict(zip(chi2_names_add, loglikes_add))
            output_like = dict(zip(model_add.output_params, output_like))
        else:
            loglikes_add = dict()
        loglikes_new = [loglikes_add.get(name, -0.5 * point.get(name, 0))
                        for name in collection_out.chi2_names]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug(
                "New set of likelihoods: %r",
                dict(zip(dummy_model_out.likelihood, loglikes_new)))
            if output_like:
                log.debug("New set of likelihood-derived parameters: %r", output_like)
        if -np.inf in loglikes_new:
            continue
        # Add/remove derived parameters and change priors of sampled parameters
        for p in add[_params]:
            if p in dummy_model_out.parameterization._directly_output:
                derived[p] = output_like[p]
            elif p in dummy_model_out.parameterization._derived_funcs:
                func = dummy_model_out.parameterization._derived_funcs[p]
                args = dummy_model_out.parameterization._derived_args[p]
                derived[p] = func(
                    *[point.get(arg, output_like.get(arg, None)) for arg in args])
        # We need to recompute the aggregated chi2 by hand
        for type_, likes in inv_types.items():
            derived[_get_chi2_name(type_)] = sum(
                [-2 * lvalue for lname, lvalue
                 in zip(collection_out.chi2_names, loglikes_new)
                 if _undo_chi2_name(lname) in likes])
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New derived parameters: %r",
                      dict([(p, derived[p])
                            for p in dummy_model_out.parameterization.derived_params()
                            if p in add[_params]]))
        # Save to the collection (keep old weight for now)
        collection_out.add(
            sampled, derived=derived.values(), weight=point.get(_weight),
            logpriors=logpriors_new, loglikes=loglikes_new)
        # Display progress
        percent = np.round(i / len(collection_in) * 100)
        if percent != last_percent and not percent % 5:
            last_percent = percent
            progress_bar(log, percent, " (%d/%d)" % (i, len(collection_in)))
    if not collection_out.data.last_valid_index():
        raise LoggedError(
            log, "No elements in the final sample. Possible causes: "
                 "added a prior or likelihood valued zero over the full sampled domain, "
                 "or the computation of the theory failed everywhere, etc.")
    # Reweight -- account for large dynamic range!
    #   Prefer to rescale +inf to finite, and ignore final points with -inf.
    #   Remove -inf's (0-weight), and correct indices
    difflogmax = max(collection_in[_minuslogpost] - collection_out[_minuslogpost])
    collection_out.data[_weight] *= np.exp(
        collection_in[_minuslogpost] - collection_out[_minuslogpost] - difflogmax)
    collection_out.data = (
        collection_out.data[collection_out.data.weight > 0].reset_index(drop=True))
    collection_out._n = collection_out.data.last_valid_index() + 1
    # Write!
    collection_out.out_update()
    log.info("Finished! Final number of samples: %d", len(collection_out))
    return info_out, {"sample": collection_out}
Beispiel #22
0
    def initialize(self):
        self.mpi_info("Initializing")
        self.max_evals = read_dnumber(self.max_evals, self.model.prior.d())
        # Configure target
        method = self.model.loglike if self.ignore_prior else self.model.logpost
        kwargs = {"make_finite": True}
        if self.ignore_prior:
            kwargs["return_derived"] = False
        self.logp = lambda x: method(x, **kwargs)
        # Try to load info from previous samples.
        # If none, sample from reference (make sure that it has finite like/post)
        initial_point = None
        if self.output:
            files = self.output.find_collections()
            collection_in = None
            if files:
                if more_than_one_process():
                    if 1 + get_mpi_rank() <= len(files):
                        collection_in = Collection(self.model,
                                                   self.output,
                                                   name=str(1 +
                                                            get_mpi_rank()),
                                                   resuming=True)
                else:
                    collection_in = self.output.load_collections(
                        self.model, concatenate=True)
            if collection_in:
                initial_point = (collection_in.bestfit()
                                 if self.ignore_prior else collection_in.MAP())
                initial_point = initial_point[list(
                    self.model.parameterization.sampled_params())].values
                self.log.info("Starting from %s of previous chain:",
                              "best fit" if self.ignore_prior else "MAP")
        if initial_point is None:
            this_logp = -np.inf
            while not np.isfinite(this_logp):
                initial_point = self.model.prior.reference()
                this_logp = self.logp(initial_point)
            self.log.info("Starting from random initial point:")
        self.log.info(
            dict(
                zip(self.model.parameterization.sampled_params(),
                    initial_point)))

        self._bounds = self.model.prior.bounds(
            confidence_for_unbounded=self.confidence_for_unbounded)

        # TODO: if ignore_prior, one should use *like* covariance (this is *post*)
        covmat = self._load_covmat(self.output)[0]

        # scale by conditional parameter widths (since not using correlation structure)
        scales = np.minimum(1 / np.sqrt(np.diag(np.linalg.inv(covmat))),
                            (self._bounds[:, 1] - self._bounds[:, 0]) / 3)

        # Cov and affine transformation
        # Transform to space where initial point is at centre, and cov is normalised
        # Cannot do rotation, as supported minimization routines assume bounds aligned
        # with the parameter axes.
        self._affine_transform_matrix = np.diag(1 / scales)
        self._inv_affine_transform_matrix = np.diag(scales)
        self._scales = scales
        self._affine_transform_baseline = initial_point
        initial_point = self.affine_transform(initial_point)
        np.testing.assert_allclose(initial_point,
                                   np.zeros(initial_point.shape))
        bounds = np.array(
            [self.affine_transform(self._bounds[:, i]) for i in range(2)]).T
        # Configure method
        if self.method.lower() == "bobyqa":
            self.minimizer = pybobyqa.solve
            self.kwargs = {
                "objfun": (lambda x: -self.logp_transf(x)),
                "x0":
                initial_point,
                "bounds":
                np.array(list(zip(*bounds))),
                "seek_global_minimum":
                (True if get_mpi_size() in [0, 1] else False),
                "maxfun":
                int(self.max_evals)
            }
            self.kwargs = recursive_update(deepcopy(self.kwargs),
                                           self.override_bobyqa or {})
            self.log.debug(
                "Arguments for pybobyqa.solve:\n%r",
                {k: v
                 for k, v in self.kwargs.items() if k != "objfun"})
        elif self.method.lower() == "scipy":
            self.minimizer = scpminimize
            self.kwargs = {
                "fun": (lambda x: -self.logp_transf(x)),
                "x0": initial_point,
                "bounds": bounds,
                "options": {
                    "maxiter": self.max_evals,
                    "disp": (self.log.getEffectiveLevel() == logging.DEBUG)
                }
            }
            self.kwargs = recursive_update(deepcopy(self.kwargs),
                                           self.override_scipy or {})
            self.log.debug(
                "Arguments for scipy.optimize.minimize:\n%r",
                {k: v
                 for k, v in self.kwargs.items() if k != "fun"})
        else:
            methods = ["bobyqa", "scipy"]
            raise LoggedError(self.log,
                              "Method '%s' not recognized. Try one of %r.",
                              self.method, methods)
Beispiel #23
0
 def check_convergence_and_learn_proposal(self):
     """
     Checks the convergence of the sampling process (MPI only), and, if requested,
     learns a new covariance matrix for the proposal distribution from the covariance
     of the last samples.
     """
     if get_mpi():
         # Compute and gather means, covs and CL intervals of last half of chains
         mean = self.collection.mean(first=int(self.n() / 2))
         cov = self.collection.cov(first=int(self.n() / 2))
         mcsamples = self.collection._sampled_to_getdist_mcsamples(
             first=int(self.n() / 2))
         try:
             bound = np.array([[
                 mcsamples.confidence(i,
                                      limfrac=self.Rminus1_cl_level / 2.,
                                      upper=which)
                 for i in range(self.model.prior.d())
             ] for which in [False, True]]).T
             success_bounds = True
         except:
             bound = None
             success_bounds = False
         Ns, means, covs, bounds = map(
             lambda x: np.array(get_mpi_comm().gather(x)),
             [self.n(), mean, cov, bound])
     else:
         # Compute and gather means, covs and CL intervals of last m-1 chain fractions
         m = 1 + self.Rminus1_single_split
         cut = int(self.collection.n() / m)
         if cut <= 1:
             self.log.error(
                 "Not enough points in chain to check convergence. "
                 "Increase `check_every` or reduce `Rminus1_single_split`.")
             raise HandledException
         Ns = (m - 1) * [cut]
         means = np.array([
             self.collection.mean(first=i * cut, last=(i + 1) * cut - 1)
             for i in range(1, m)
         ])
         covs = np.array([
             self.collection.cov(first=i * cut, last=(i + 1) * cut - 1)
             for i in range(1, m)
         ])
         # No logging of warnings temporarily, so getdist won't complain unnecessarily
         logging.disable(logging.WARNING)
         mcsampleses = [
             self.collection._sampled_to_getdist_mcsamples(
                 first=i * cut, last=(i + 1) * cut - 1)
             for i in range(1, m)
         ]
         logging.disable(logging.NOTSET)
         try:
             bounds = [
                 np.array([[
                     mcs.confidence(i,
                                    limfrac=self.Rminus1_cl_level / 2.,
                                    upper=which)
                     for i in range(self.model.prior.d())
                 ] for which in [False, True]]).T for mcs in mcsampleses
             ]
             success_bounds = True
         except:
             bounds = None
             success_bounds = False
     # Compute convergence diagnostics
     if not get_mpi_rank():
         # "Within" or "W" term -- our "units" for assessing convergence
         # and our prospective new covariance matrix
         mean_of_covs = np.average(covs, weights=Ns, axis=0)
         # "Between" or "B" term
         # We don't weight with the number of samples in the chains here:
         # shorter chains will likely be outliers, and we want to notice them
         cov_of_means = np.atleast_2d(np.cov(means.T))  # , fweights=Ns)
         # For numerical stability, we turn mean_of_covs into correlation matrix:
         #   rho = (diag(Sigma))^(-1/2) * Sigma * (diag(Sigma))^(-1/2)
         # and apply the same transformation to the mean of covs (same eigenvals!)
         diagSinvsqrt = np.diag(np.power(np.diag(cov_of_means), -0.5))
         corr_of_means = diagSinvsqrt.dot(cov_of_means).dot(diagSinvsqrt)
         norm_mean_of_covs = diagSinvsqrt.dot(mean_of_covs).dot(
             diagSinvsqrt)
         # Cholesky of (normalized) mean of covs and eigvals of Linv*cov_of_means*L
         try:
             L = np.linalg.cholesky(norm_mean_of_covs)
         except np.linalg.LinAlgError:
             self.log.warning(
                 "Negative covariance eigenvectors. "
                 "This may mean that the covariance of the samples does not "
                 "contain enough information at this point. "
                 "Skipping this checkpoint")
             success = False
         else:
             Linv = np.linalg.inv(L)
             try:
                 eigvals = np.linalg.eigvalsh(
                     Linv.dot(corr_of_means).dot(Linv.T))
                 success = True
             except np.linalg.LinAlgError:
                 self.log.warning("Could not compute eigenvalues. "
                                  "Skipping this checkpoint.")
                 success = False
             if success:
                 Rminus1 = max(np.abs(eigvals))
                 # For real square matrices, a possible def of the cond number is:
                 condition_number = Rminus1 / min(np.abs(eigvals))
                 self.log.debug("Condition number = %g", condition_number)
                 self.log.debug("Eigenvalues = %r", eigvals)
                 self.log.info(
                     "Convergence of means: R-1 = %f after %d accepted steps"
                     % (Rminus1, (sum(Ns) if get_mpi() else self.n())) +
                     (" = sum(%r)" % list(Ns) if get_mpi() else ""))
                 # Have we converged in means?
                 # (criterion must be fulfilled twice in a row)
                 if max(Rminus1, self.Rminus1_last) < self.Rminus1_stop:
                     # Check the convergence of the bounds of the confidence intervals
                     # Same as R-1, but with the rms deviation from the mean bound
                     # in units of the mean standard deviation of the chains
                     if success_bounds:
                         Rminus1_cl = (np.std(bounds, axis=0).T /
                                       np.sqrt(np.diag(mean_of_covs)))
                         self.log.debug("normalized std's of bounds = %r",
                                        Rminus1_cl)
                         self.log.info(
                             "Convergence of bounds: R-1 = %f after %d " %
                             (np.max(Rminus1_cl),
                              (sum(Ns) if get_mpi() else self.n())) +
                             "accepted steps" +
                             (" = sum(%r)" % list(Ns) if get_mpi() else ""))
                         if np.max(Rminus1_cl) < self.Rminus1_cl_stop:
                             self.converged = True
                             self.log.info("The run has converged!")
                         self._Ns = Ns
                     else:
                         self.log.info(
                             "Computation of the bounds was not possible. "
                             "Waiting until the next checkpoint")
     if get_mpi():
         # Broadcast and save the convergence status and the last R-1 of means
         success = get_mpi_comm().bcast(
             (success if not get_mpi_rank() else None), root=0)
         if success:
             self.Rminus1_last = get_mpi_comm().bcast(
                 (Rminus1 if not get_mpi_rank() else None), root=0)
             self.converged = get_mpi_comm().bcast(
                 (self.converged if not get_mpi_rank() else None), root=0)
     else:
         if success:
             self.Rminus1_last = Rminus1
     # Do we want to learn a better proposal pdf?
     if self.learn_proposal and not self.converged and success:
         good_Rminus1 = (self.learn_proposal_Rminus1_max > self.Rminus1_last
                         > self.learn_proposal_Rminus1_min)
         if not good_Rminus1:
             if not get_mpi_rank():
                 self.log.info("Bad convergence statistics: "
                               "waiting until the next checkpoint.")
             return
         if get_mpi():
             if get_mpi_rank():
                 mean_of_covs = np.empty(
                     (self.model.prior.d(), self.model.prior.d()))
             get_mpi_comm().Bcast(mean_of_covs, root=0)
         elif not get_mpi():
             mean_of_covs = covs[0]
         try:
             self.proposer.set_covariance(mean_of_covs)
         except:
             self.log.debug(
                 "Updating covariance matrix failed unexpectedly. "
                 "waiting until next checkpoint.")
         if not get_mpi_rank():
             self.log.info("Updated covariance matrix of proposal pdf.")
             self.log.debug("%r", mean_of_covs)
     # Save checkpoint info
     self.write_checkpoint()
Beispiel #24
0
 def check_convergence_and_learn_proposal(self):
     """
     Checks the convergence of the sampling process (MPI only), and, if requested,
     learns a new covariance matrix for the proposal distribution from the covariance
     of the last samples.
     """
     # Compute and gather means, covs and CL intervals of last half of chains
     mean = self.collection.mean(first=int(self.n() / 2))
     cov = self.collection.cov(first=int(self.n() / 2))
     # No logging of warnings temporarily, so getdist won't complain innecessarily
     logging.disable(logging.WARNING)
     mcsamples = self.collection.sampled_to_getdist_mcsamples(
         first=int(self.n() / 2))
     logging.disable(logging.NOTSET)
     bound = np.array([[
         mcsamples.confidence(i,
                              limfrac=self.Rminus1_cl_level / 2.,
                              upper=which) for i in range(self.prior.d())
     ] for which in [False, True]]).T
     Ns, means, covs, bounds = map(
         lambda x: np.array((get_mpi_comm().gather(x)
                             if get_mpi() else [x])),
         [self.n(), mean, cov, bound])
     # Compute convergence diagnostics
     if get_mpi():
         if get_mpi_rank() == 0:
             # "Within" or "W" term -- our "units" for assessing convergence
             # and our prospective new covariance matrix
             mean_of_covs = np.average(covs, weights=Ns, axis=0)
             # "Between" or "B" term
             # We don't weight with the number of samples in the chains here:
             # shorter chains will likely be outliers, and we want to notice them
             cov_of_means = np.cov(means.T)  # , fweights=Ns)
             # For numerical stability, we turn mean_of_covs into correlation matrix:
             #   rho = (diag(Sigma))^(-1/2) * Sigma * (diag(Sigma))^(-1/2)
             # and apply the same transformation to the mean of covs (same eigenvals!)
             diagSinvsqrt = np.diag(np.power(np.diag(cov_of_means), -0.5))
             corr_of_means = diagSinvsqrt.dot(cov_of_means).dot(
                 diagSinvsqrt)
             norm_mean_of_covs = diagSinvsqrt.dot(mean_of_covs).dot(
                 diagSinvsqrt)
             # Cholesky of (normalized) mean of covs and eigvals of Linv*cov_of_means*L
             try:
                 L = np.linalg.cholesky(norm_mean_of_covs)
             except np.linalg.LinAlgError:
                 self.log.warning(
                     "Negative covariance eigenvectors. "
                     "This may mean that the covariance of the samples does not "
                     "contain enough information at this point. "
                     "Skipping this checkpoint")
                 success = False
             else:
                 Linv = np.linalg.inv(L)
                 eigvals = np.linalg.eigvalsh(
                     Linv.dot(corr_of_means).dot(Linv.T))
                 Rminus1 = max(np.abs(eigvals))
                 # For real square matrices, a possible def of the cond number is:
                 condition_number = Rminus1 / min(np.abs(eigvals))
                 self.log.debug("Condition number = %g", condition_number)
                 self.log.debug("Eigenvalues = %r", eigvals)
                 self.log.info(
                     "Convergence of means: R-1 = %f after %d samples",
                     Rminus1, self.n())
                 success = True
                 # Have we converged in means?
                 # (criterion must be fulfilled twice in a row)
                 if (max(Rminus1, getattr(self, "Rminus1_last", np.inf)) <
                         self.Rminus1_stop):
                     # Check the convergence of the bounds of the confidence intervals
                     # Same as R-1, but with the rms deviation from the mean bound
                     # in units of the mean standard deviation of the chains
                     Rminus1_cl = (np.std(bounds, axis=0).T /
                                   np.sqrt(np.diag(mean_of_covs)))
                     self.log.debug("normalized std's of bounds = %r",
                                    Rminus1_cl)
                     self.log.info(
                         "Convergence of bounds: R-1 = %f after %d samples",
                         np.max(Rminus1_cl), self.n())
                     if np.max(Rminus1_cl) < self.Rminus1_cl_stop:
                         self.converged = True
                         self.log.info("The run has converged!")
         # Broadcast and save the convergence status and the last R-1 of means
         success = get_mpi_comm().bcast(
             (success if not get_mpi_rank() else None), root=0)
         if success:
             self.Rminus1_last = get_mpi_comm().bcast(
                 (Rminus1 if not get_mpi_rank() else None), root=0)
             self.converged = get_mpi_comm().bcast(
                 (self.converged if not get_mpi_rank() else None), root=0)
     else:  # No MPI
         pass
     # Do we want to learn a better proposal pdf?
     if self.learn_proposal and not self.converged:
         # update iff (not MPI, or MPI and "good" Rminus1)
         if get_mpi():
             good_Rminus1 = (self.learn_proposal_Rminus1_max >
                             self.Rminus1_last >
                             self.learn_proposal_Rminus1_min)
             if not good_Rminus1:
                 if not get_mpi_rank():
                     self.log.info("Bad convergence statistics: "
                                   "waiting until the next checkpoint.")
                 return
         if get_mpi():
             if get_mpi_rank():
                 mean_of_covs = np.empty((self.prior.d(), self.prior.d()))
             get_mpi_comm().Bcast(mean_of_covs, root=0)
         elif not get_mpi():
             mean_of_covs = covs[0]
         self.proposer.set_covariance(mean_of_covs)
         if not get_mpi_rank():
             self.log.info("Updated covariance matrix of proposal pdf.")
             self.log.debug("%r", mean_of_covs)
Beispiel #25
0
def install_script():
    from cobaya.mpi import get_mpi_rank
    if not get_mpi_rank():
        # Configure the logger ASAP
        logger_setup()
        log = logging.getLogger(__name__.split(".")[-1])
        # Parse arguments
        import argparse
        parser = argparse.ArgumentParser(
            description="Cobaya's installation tool for external modules.")
        parser.add_argument("files",
                            action="store",
                            nargs="+",
                            metavar="input_file.yaml",
                            help="One or more input files.")
        parser.add_argument(
            "-p",
            "--path",
            action="store",
            nargs=1,
            required=True,
            metavar="/install/path",
            help="Desired path where to install external modules.")
        parser.add_argument(
            "-f",
            "--force",
            action="store_true",
            default=False,
            help="Force re-installation of apparently installed modules.")
        parser.add_argument(
            "--no-progress-bars",
            action="store_true",
            default=False,
            help="No progress bars shown. Shorter logs (used in Travis).")
        group_just = parser.add_mutually_exclusive_group(required=False)
        group_just.add_argument("-c",
                                "--just-code",
                                action="store_false",
                                default=True,
                                help="Install code of the modules.",
                                dest=_data)
        group_just.add_argument("-d",
                                "--just-data",
                                action="store_false",
                                default=True,
                                help="Install data of the modules.",
                                dest=_code)
        arguments = parser.parse_args()
        from cobaya.input import load_input
        try:
            infos = [load_input(f) for f in arguments.files]
        except HandledException:
            log.error("Maybe you meant to pass an installation path? "
                      "In that case, use '--path=/path/to/modules'.")
            raise HandledException
        # Launch installer
        install(*infos,
                path=arguments.path[0],
                **{
                    arg: getattr(arguments, arg)
                    for arg in ["force", _code, _data, "no_progress_bars"]
                })
Beispiel #26
0
    def close(self):
        """
        Loads the sample of live points from ``PolyChord``'s raw output and writes it
        (if ``txt`` output requested).
        """
        if not get_mpi_rank():  # process 0 or single (non-MPI process)
            self.log.info(
                "Loading PolyChord's results: samples and evidences.")
            self.n_sampled = len(self.parametrization.sampled_params())
            self.n_derived = len(self.parametrization.derived_params())
            self.n_liks = len(self.likelihood._likelihoods)
            prefix = os.path.join(self.pc_settings.base_dir,
                                  self.pc_settings.file_root)
            self.collection = self.save_sample(prefix + ".txt", "1")
            if self.pc_settings.do_clustering is not False:  # NB: "None" == "default"
                self.clusters = {}
                do_output = hasattr(self.output, "folder")
                for f in os.listdir(
                        os.path.join(self.pc_settings.base_dir, clusters)):
                    if not f.startswith(self.pc_settings.file_root):
                        continue
                    if do_output:
                        cluster_folder = os.path.join(
                            self.output.folder,
                            self.output.prefix + "_" + clusters)
                        if not os.path.exists(cluster_folder):
                            os.mkdir(cluster_folder)
                    try:
                        i = int(f[len(self.pc_settings.file_root) +
                                  1:-len(".txt")])
                    except ValueError:
                        continue
                    if do_output:
                        old_folder = self.output.folder
                        self.output.folder = cluster_folder
                    fname = os.path.join(self.pc_settings.base_dir, clusters,
                                         f)
                    self.clusters[i] = {
                        "sample": self.save_sample(fname, str(i))
                    }
                    if do_output:
                        self.output.folder = old_folder
            # Prepare the evidence
            pre = "log(Z"
            lines = []
            with open(prefix + ".stats", "r") as statsfile:
                lines = [l for l in statsfile.readlines() if l.startswith(pre)]
            for l in lines:
                logZ, logZstd = [
                    float(n.replace("(Still Active)", ""))
                    for n in l.split("=")[-1].split("+/-")
                ]
                component = l.split("=")[0].lstrip(pre + "_").rstrip(") ")
                if not component:
                    self.logZ, self.logZstd = logZ, logZstd
                elif self.pc_settings.do_clustering:
                    i = int(component)
                    self.clusters[i]["logZ"], self.clusters[i][
                        "logZstd"] = logZ, logZstd
#        if get_mpi():
#            bcast_from_0 = lambda attrname: setattr(self,
#                attrname, get_mpi_comm().bcast(getattr(self, attrname, None), root=0))
#            map(bcast_from_0, ["collection", "logZ", "logZstd", "clusters"])
        if not get_mpi_rank():  # process 0 or single (non-MPI process)
            self.log.info("Finished! Raw PolyChord output stored in '%s'.",
                          self.pc_settings.base_dir)
Beispiel #27
0
    def __init__(self,
                 info_sampler,
                 model,
                 output=None,
                 packages_path=None,
                 name=None):
        """
        Actual initialization of the class. Loads the default and input information and
        call the custom ``initialize`` method.

        [Do not modify this one.]
        """
        self.model = model
        self.output = output
        self._updated_info = deepcopy_where_possible(info_sampler)
        super().__init__(info_sampler,
                         packages_path=packages_path,
                         name=name,
                         initialize=False,
                         standalone=False)
        # Seed, if requested
        if getattr(self, "seed", None) is not None:
            if not isinstance(self.seed,
                              int) or not (0 <= self.seed <= 2**32 - 1):
                raise LoggedError(
                    self.log,
                    "Seeds must be a *positive integer* < 2**32 - 1, "
                    "but got %r with type %r", self.seed, type(self.seed))
            # MPI-awareness: sum the rank to the seed
            if more_than_one_process():
                self.seed += get_mpi_rank()
            self.mpi_warning("This run has been SEEDED with seed %d",
                             self.seed)
        # Load checkpoint info, if resuming
        if self.output.is_resuming() and not isinstance(self, Minimizer):
            try:
                checkpoint_info = yaml_load_file(self.checkpoint_filename())
                try:
                    for k, v in checkpoint_info[kinds.sampler][
                            self.get_name()].items():
                        setattr(self, k, v)
                    self.mpi_info("Resuming from previous sample!")
                except KeyError:
                    if is_main_process():
                        raise LoggedError(
                            self.log, "Checkpoint file found at '%s' "
                            "but it corresponds to a different sampler.",
                            self.checkpoint_filename())
            except (IOError, TypeError):
                pass
        else:
            try:
                os.remove(self.checkpoint_filename())
                os.remove(self.progress_filename())
            except (OSError, TypeError):
                pass
        self._set_rng()
        self.initialize()
        self._release_rng()
        self.model.set_cache_size(self._get_requested_cache_size())
        # Add to the updated info some values which are
        # only available after initialisation
        self._updated_info[_version] = self.get_version()
Beispiel #28
0
    def initialise(self):
        """Imports the PolyChord sampler and prepares its arguments."""
        if not get_mpi_rank():  # rank = 0 (MPI master) or None (no MPI)
            self.log.info("Initializing")
        # If path not given, try using general path to modules
        path_to_installation = get_path_to_installation()
        if not self.path and path_to_installation:
            self.path = os.path.join(path_to_installation, "code",
                                     pc_repo_name)
        if self.path:
            if not get_mpi_rank():
                self.log.info("Importing *local* PolyChord from " + self.path)
            pc_py_path = os.path.join(self.path, "PyPolyChord")
            pc_build_path = os.path.join(self.path, "build")
            post = next(d for d in os.listdir(pc_build_path)
                        if d.startswith("lib."))
            pc_build_path = os.path.join(pc_build_path, post)
            if not os.path.exists(pc_build_path):
                self.log.error(
                    "Either PolyChord is not in the given folder, "
                    "'%s', or you have not compiled it.", self.path)
                raise HandledException
            # Inserting the previously found path into the list of import folders
            sys.path.insert(0, pc_build_path)
            sys.path.insert(0, pc_py_path)
        else:
            self.log.info("Importing *global* PolyChord.")
        try:
            import PyPolyChord as PyPolyChord
            from PyPolyChord.settings import PolyChordSettings
        except ImportError:
            self.log.error(
                "Couldn't find the PolyChord python interface. "
                "Make sure that you have compiled it, and that you either\n"
                " (a) specify a path (you didn't) or\n"
                " (b) install the Python interface globally with\n"
                "     '/path/to/PolyChord/python setup.py install --user'")
            raise HandledException
        self.pc = PyPolyChord
        # Prepare arguments and settings
        self.nDims = self.prior.d()
        self.nDerived = (len(self.parametrization.derived_params()) + 1 +
                         len(self.likelihood._likelihoods))
        self.pc_settings = PolyChordSettings(self.nDims, self.nDerived)
        for p in [
                "nlive", "num_repeats", "nprior", "do_clustering",
                "precision_criterion", "max_ndead", "boost_posterior",
                "feedback", "update_files", "posteriors", "equals",
                "cluster_posteriors", "write_resume", "read_resume",
                "write_stats", "write_live", "write_dead", "base_dir",
                "grade_frac", "grade_dims"
        ]:
            v = getattr(self, p)
            if v is not None:
                setattr(self.pc_settings, p, v)
        # Fill the automatic ones
        if getattr(self, "feedback", None) is None:
            values = {
                logging.CRITICAL: 0,
                logging.ERROR: 0,
                logging.WARNING: 0,
                logging.INFO: 1,
                logging.DEBUG: 2
            }
            self.pc_settings.feedback = values[self.log.getEffectiveLevel()]
        try:
            output_folder = getattr(self.output, "folder")
            output_prefix = getattr(self.output, "prefix") or "pc"
        except AttributeError:
            # dummy output -- no resume!
            from tempfile import gettempdir
            output_folder = gettempdir()
            from random import random
            output_prefix = hex(int(random() * 16**6))[2:]
            self.pc_settings.read_resume = False
        self.pc_settings.base_dir = os.path.join(output_folder,
                                                 self.pc_settings.base_dir)
        self.pc_settings.file_root = output_prefix
        if not get_mpi_rank():
            # Creating output folder, if it does not exist (just one process)
            if not os.path.exists(self.pc_settings.base_dir):
                os.makedirs(self.pc_settings.base_dir)
            # Idem, a clusters folder if needed -- notice that PolyChord's default
            # is "True", here "None", hence the funny condition below
            if self.pc_settings.do_clustering is not False:  # None here means "default"
                try:
                    os.makedirs(
                        os.path.join(self.pc_settings.base_dir, clusters))
                except OSError:  # exists!
                    pass
            self.log.info("Storing raw PolyChord output in '%s'.",
                          self.pc_settings.base_dir)
        # explotining the speed hierarchy
        # sort blocks by paramters order and check contiguity (required by PolyChord!!!)
#        speeds, blocks = zip(*self.likelihood.speed_blocked_params(as_indices=True))
#        speeds, blocks = np.array(speeds), np.array(blocks)
# weird behaviour of np.argsort with there is only 1 block
#        if len(blocks) > 1:
#            sorting_indices = np.argsort(blocks, axis=0)
#        else:
#            sorting_indices = [0]
#        speeds, blocks = speeds[sorting_indices], blocks[sorting_indices]
#        if np.all([np.all(block==range(block[0], block[-1]+1)) for block in blocks]):
        self.log.warning("Speed hierarchy exploitation disabled for now!")
        #            self.pc_args["grade_frac"] = list(speeds)
        #            self.pc_args["grade_dims"] = [len(block) for block in blocks]
        #            self.log.info("Exploiting a speed hierarchy with speeds %r and blocks %r",
        #                     speeds, blocks)
        #        else:
        #            self.log.warning("Some speed blocks are not contiguous: PolyChord cannot deal "
        #                        "with the speed hierarchy. Not exploting it.")
        # prior conversion from the hypercube
        bounds = self.prior.bounds(
            confidence_for_unbounded=self.confidence_for_unbounded)
        # Check if priors are bounded (nan's to inf)
        inf = np.where(np.isinf(bounds))
        if len(inf[0]):
            params_names = self.prior.names()
            params = [params_names[i] for i in sorted(list(set(inf[0])))]
            self.log.error(
                "PolyChord needs bounded priors, but the parameter(s) '"
                "', '".join(params) + "' is(are) unbounded.")
            raise HandledException
        locs = bounds[:, 0]
        scales = bounds[:, 1] - bounds[:, 0]
        self.pc_prior = lambda x: (locs + np.array(x) * scales).tolist()
        # We will need the volume of the prior domain, since PolyChord divides by it
        self.logvolume = np.log(np.prod(scales))
        # Done!
        if not get_mpi_rank():
            self.log.info("Calling PolyChord with arguments:")
            for p, v in inspect.getmembers(self.pc_settings,
                                           lambda a: not (callable(a))):
                if not p.startswith("_"):
                    self.log.info("  %s: %s", p, v)
Beispiel #29
0
 def initialize(self):
     """Initializes the sampler:
     creates the proposal distribution and draws the initial sample."""
     if not self.model.prior.d():
         raise LoggedError(self.log,
                           "No parameters being varied for sampler")
     self.log.debug("Initializing")
     # MARKED FOR DEPRECATION IN v3.0
     if getattr(self, "oversample", None) is not None:
         self.log.warning(
             "*DEPRECATION*: `oversample` will be deprecated in the "
             "next version. Oversampling is now requested by setting "
             "`oversample_power` > 0.")
     # END OF DEPRECATION BLOCK
     # MARKED FOR DEPRECATION IN v3.0
     if getattr(self, "check_every", None) is not None:
         self.log.warning(
             "*DEPRECATION*: `check_every` will be deprecated in the "
             "next version. Please use `learn_every` instead.")
         # BEHAVIOUR TO BE REPLACED BY ERROR:
         self.learn_every = getattr(self, "check_every")
     # END OF DEPRECATION BLOCK
     if self.callback_every is None:
         self.callback_every = self.learn_every
     self._quants_d_units = []
     for q in ["max_tries", "learn_every", "callback_every", "burn_in"]:
         number = NumberWithUnits(getattr(self, q), "d", dtype=int)
         self._quants_d_units.append(number)
         setattr(self, q, number)
     self.output_every = NumberWithUnits(self.output_every, "s", dtype=int)
     if is_main_process():
         if self.output.is_resuming() and (max(self.mpi_size or 0, 1) !=
                                           max(get_mpi_size(), 1)):
             raise LoggedError(
                 self.log,
                 "Cannot resume a run with a different number of chains: "
                 "was %d and now is %d.", max(self.mpi_size, 1),
                 max(get_mpi_size(), 1))
         if more_than_one_process():
             if get_mpi().Get_version()[0] < 3:
                 raise LoggedError(
                     self.log, "MPI use requires MPI version 3.0 or "
                     "higher to support IALLGATHER.")
     sync_processes()
     # One collection per MPI process: `name` is the MPI rank + 1
     name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
     self.collection = Collection(self.model,
                                  self.output,
                                  name=name,
                                  resuming=self.output.is_resuming())
     self.current_point = OneSamplePoint(self.model)
     # Use standard MH steps by default
     self.get_new_sample = self.get_new_sample_metropolis
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     # Useful for getting last points added inside callback function
     self.last_point_callback = 0
     # Monitoring/restore progress
     if is_main_process():
         cols = [
             "N", "timestamp", "acceptance_rate", "Rminus1", "Rminus1_cl"
         ]
         self.progress = DataFrame(columns=cols)
         self.i_learn = 1
         if self.output and not self.output.is_resuming():
             with open(self.progress_filename(), "w",
                       encoding="utf-8") as progress_file:
                 progress_file.write("# " +
                                     " ".join(self.progress.columns) + "\n")
     # Get first point, to be discarded -- not possible to determine its weight
     # Still, we need to compute derived parameters, since, as the proposal "blocked",
     # we may be saving the initial state of some block.
     # NB: if resuming but nothing was written (burn-in not finished): re-start
     if self.output.is_resuming() and len(self.collection):
         initial_point = (self.collection[
             self.collection.sampled_params].iloc[len(self.collection) -
                                                  1]).values.copy()
         logpost = -(self.collection[_minuslogpost].iloc[
             len(self.collection) - 1].copy())
         logpriors = -(self.collection[self.collection.minuslogprior_names].
                       iloc[len(self.collection) - 1].copy())
         loglikes = -0.5 * (self.collection[self.collection.chi2_names].
                            iloc[len(self.collection) - 1].copy())
         derived = (self.collection[self.collection.derived_params].iloc[
             len(self.collection) - 1].values.copy())
     else:
         # NB: max_tries adjusted to dim instead of #cycles (blocking not computed yet)
         self.max_tries.set_scale(self.model.prior.d())
         self.log.info(
             "Getting initial point... (this may take a few seconds)")
         initial_point, logpost, logpriors, loglikes, derived = \
             self.model.get_valid_point(max_tries=self.max_tries.value)
         # If resuming but no existing chain, assume failed run and ignore blocking
         # if speeds measurement requested
         if self.output.is_resuming() and not len(self.collection) \
            and self.measure_speeds:
             self.blocking = None
         if self.measure_speeds and self.blocking:
             self.log.warning(
                 "Parameter blocking manually fixed: speeds will not be measured."
             )
         elif self.measure_speeds:
             n = None if self.measure_speeds is True else int(
                 self.measure_speeds)
             self.model.measure_and_set_speeds(n=n, discard=0)
     self.set_proposer_blocking()
     self.set_proposer_covmat(load=True)
     self.current_point.add(initial_point,
                            derived=derived,
                            logpost=logpost,
                            logpriors=logpriors,
                            loglikes=loglikes)
     self.log.info("Initial point: %s", self.current_point)
     # Max #(learn+convergence checks) to wait,
     # in case one process dies without sending MPI_ABORT
     self.been_waiting = 0
     self.max_waiting = max(50, self.max_tries.unit_value)
     # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
     self.burn_in_left = self.burn_in.value * self.current_point.output_thin + 1
     # Initial dummy checkpoint
     # (needed when 1st "learn point" not reached in prev. run)
     self.write_checkpoint()
Beispiel #30
0
    def initialise(self):
        """Initialises the sampler:
        creates the proposal distribution and draws the initial sample."""
        self.log.info("Initializing")
        # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
        self.burn_in_left = self.burn_in + 1
        # One collection per MPI process: `name` is the MPI rank + 1
        name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
        self.collection = Collection(self.parametrization,
                                     self.likelihood,
                                     self.output,
                                     name=name)
        self.current_point = OnePoint(self.parametrization,
                                      self.likelihood,
                                      self.output,
                                      name=name)
        # Use the standard steps by default
        self.get_new_sample = self.get_new_sample_metropolis
        # Prepare oversampling / fast-dragging if applicable
        self.effective_max_samples = self.max_samples
        if self.oversample and self.drag:
            self.log.error(
                "Choose either oversampling or fast-dragging, not both.")
            raise HandledException
#        if (self.oversample or self.drag) and len(set(factors)) == 1:
#            self.log.error("All block speeds are similar: "
#                           "no dragging or oversampling possible.")
#            raise HandledException
        if self.oversample:
            factors, blocks = self.likelihood.speeds_of_params(
                oversampling_factors=True)
            self.oversampling_factors = factors
            # WIP: actually, we would have to re-normalise to the dimension of the blocks.
            self.log.info("Oversampling with factors:\n" + "\n".join([
                "   %d : %r" % (f, b)
                for f, b in zip(self.oversampling_factors, blocks)
            ]))
            # WIP: useless until likelihoods have STATES!
            self.log.error("Sorry, oversampling is WIP")
            raise HandledException
        elif self.drag:
            # WIP: for now, can only separate between theory and likelihoods
            # until likelihoods have states
            if not self.likelihood.theory:
                self.log.error(
                    "WIP: dragging disabled for now when no theory code present."
                )
                raise HandledException
#            if self.max_speed_slow < min(speeds) or self.max_speed_slow >= max(speeds):
#                self.log.error("The maximum speed considered slow, `max_speed_slow`, must be "
#                          "%g <= `max_speed_slow < %g, and is %g",
#                          min(speeds), max(speeds), self.max_speed_slow)
#                raise HandledException
            speeds, blocks = self.likelihood.speeds_of_params(int_speeds=True,
                                                              fast_slow=True)
            if np.all(speeds == speeds[0]):
                self.log.error(
                    "All speeds are equal: cannot drag! Make sure to define, "
                    "especially, the speed of the fastest likelihoods.")
            self.i_last_slow_block = 0  # just theory can be slow for now
            fast_params = list(chain(*blocks[1 + self.i_last_slow_block:]))
            self.n_slow = sum(
                len(blocks[i]) for i in range(1 + self.i_last_slow_block))
            self.drag_interp_steps = int(self.drag *
                                         np.round(min(speeds[1:]) / speeds[0]))
            self.log.info("Dragging with oversampling per step:\n" +
                          "\n".join([
                              "   %d : %r" % (f, b)
                              for f, b in zip([1, self.drag_interp_steps],
                                              [blocks[0], fast_params])
                          ]))
            self.get_new_sample = self.get_new_sample_dragging
        else:
            _, blocks = self.likelihood.speeds_of_params()
            self.oversampling_factors = [1 for b in blocks]
            self.n_slow = len(self.parametrization.sampled_params())
        # Turn parameter names into indices
        blocks = [[
            list(self.parametrization.sampled_params().keys()).index(p)
            for p in b
        ] for b in blocks]
        self.proposer = BlockedProposer(
            blocks,
            oversampling_factors=getattr(self, "oversampling_factors", None),
            i_last_slow_block=getattr(self, "i_last_slow_block", None),
            propose_scale=self.propose_scale)
        # Build the initial covariance matrix of the proposal
        covmat = self.initial_proposal_covmat()
        self.log.info("Sampling with covariance matrix:")
        self.log.info("%r", covmat)
        self.proposer.set_covariance(covmat)
        # Prepare callback function
        if self.callback_function is not None:
            self.callback_function_callable = (get_external_function(
                self.callback_function))