Esempio n. 1
0
    def load_collections(self,
                         model,
                         skip=0,
                         thin=1,
                         concatenate=False,
                         name=None,
                         extension=None):
        """
        Loads all collection files found which are compatible with this `Output`
        instance, including their path in their name.

        Use `name` for particular types of collections (default: any number).
        Pass `False` to mean there is nothing between the output prefix and the extension.
        """
        filenames = self.find_collections(name=name, extension=extension)
        collections = [
            Collection(model,
                       self,
                       name="%d" % (1 + i),
                       file_name=filename,
                       load=True,
                       onload_skip=skip,
                       onload_thin=thin)
            for i, filename in enumerate(filenames)
        ]
        if concatenate and collections:
            collection = collections[0]
            for collection_i in collections[1:]:
                collection.append(collection_i)
            collections = collection
        return collections
Esempio n. 2
0
 def initialise(self):
     """
     Creates a 1-point collection to store the point
     at which the posterior is evaluated.
     """
     self.one_point = Collection(self.parametrization, self.likelihood,
                                 self.output, initial_size=1, name="1")
     self.log.info("Initialised!")
Esempio n. 3
0
 def initialize(self):
     """
     Creates a 1-point collection to store the point
     at which the posterior is evaluated.
     """
     self.one_point = Collection(self.model,
                                 self.output,
                                 initial_size=1,
                                 name="1")
     self.log.info("Initialized!")
 def initialize(self):
     self.log.info("Initializing.")
     # Example for setting default options
     print("+++ For option '%s' got value '%s'. Add more options in fisher.yaml." % (
         "example_option", self.example_option))
     # Prepare the list of "samples" (posterior evaluations)
     self.collection = Collection(self.model, self.output)
     # Prepare vectors to store derivatives
     # ... up to you
     # Prepare the matrix
     self.fisher_matrix = np.eye(self.model.prior.d())
Esempio n. 5
0
 def load_collections(self, model, skip=0, thin=1, concatenate=False):
     filenames = self.find_collections()
     collections = [
         Collection(model, self, name="%d" % (1 + i), file_name=filename,
                    load=True, onload_skip=skip, onload_thin=thin)
         for i, filename in enumerate(filenames)]
     if concatenate and collections:
         collection = collections[0]
         for collection_i in collections[1:]:
             collection._append(collection_i)
         collections = collection
     return collections
Esempio n. 6
0
 def initialize(self):
     """
     Creates a 1-point collection to store the point
     at which the posterior is evaluated.
     """
     try:
         self.N = int(self.N)
     except:
         raise LoggedError(
             self.log,
             "Could not convert the number of samples to an integer: %r", self.N)
     self.one_point = Collection(
         self.model, self.output, initial_size=self.N, name="1")
     self.log.info("Initialized!")
Esempio n. 7
0
 def save_sample(self, fname, name):
     sample = np.atleast_2d(np.loadtxt(fname))
     if not sample.size:
         return None
     collection = Collection(self.model, self.output, name=str(name))
     for row in sample:
         collection.add(
             row[2:2 + self.n_sampled],
             derived=row[2 + self.n_sampled:2 + self.n_sampled + self.n_derived + 1],
             weight=row[0], logpost=-row[1],
             logpriors=row[-(self.n_priors + self.n_likes):-self.n_likes],
             loglikes=row[-self.n_likes:])
     # make sure that the points are written
     collection._out_update()
     return collection
Esempio n. 8
0
 def save_sample(self, fname, name):
     sample = np.atleast_2d(np.loadtxt(fname))
     collection = Collection(self.parametrization,
                             self.likelihood,
                             self.output,
                             name=str(name))
     for row in sample:
         collection.add(row[2:2 + self.n_sampled],
                        derived=row[2 + self.n_sampled:2 + self.n_sampled +
                                    self.n_derived + 1],
                        weight=row[0],
                        logpost=-row[1],
                        logprior=row[-(1 + self.n_liks)],
                        logliks=row[-self.n_liks:])
     # make sure that the points are written
     collection.out_update()
     return collection
Esempio n. 9
0
 def initialize(self):
     """Imports the PolyChord sampler and prepares its arguments."""
     # Allow global import if no direct path specification
     allow_global = not self.path
     if not self.path and self.packages_path:
         self.path = self.get_path(self.packages_path)
     self.pc = self.is_installed(path=self.path, allow_global=allow_global)
     if not self.pc:
         raise NotInstalledError(
             self.log,
             "Could not find PolyChord. Check error message above. "
             "To install it, run 'cobaya-install polychord --%s "
             "[packages_path]'", _packages_path_arg)
     # Prepare arguments and settings
     from pypolychord.settings import PolyChordSettings
     self.n_sampled = len(self.model.parameterization.sampled_params())
     self.n_derived = len(self.model.parameterization.derived_params())
     self.n_priors = len(self.model.prior)
     self.n_likes = len(self.model.likelihood)
     self.nDims = self.model.prior.d()
     self.nDerived = (self.n_derived + self.n_priors + self.n_likes)
     if self.logzero is None:
         self.logzero = np.nan_to_num(-np.inf)
     if self.max_ndead == np.inf:
         self.max_ndead = -1
     self._quants_d_units = ["nlive", "max_ndead"]
     for p in self._quants_d_units:
         if getattr(self, p) is not None:
             setattr(
                 self, p,
                 NumberWithUnits(getattr(self, p),
                                 "d",
                                 scale=self.nDims,
                                 dtype=int).value)
     self._quants_nlive_units = ["nprior"]
     for p in self._quants_nlive_units:
         if getattr(self, p) is not None:
             setattr(
                 self, p,
                 NumberWithUnits(getattr(self, p),
                                 "nlive",
                                 scale=self.nlive,
                                 dtype=int).value)
     # Fill the automatic ones
     if getattr(self, "feedback", None) is None:
         values = {
             logging.CRITICAL: 0,
             logging.ERROR: 0,
             logging.WARNING: 0,
             logging.INFO: 1,
             logging.DEBUG: 2
         }
         self.feedback = values[self.log.getEffectiveLevel()]
     # Prepare output folders and prefixes
     if self.output:
         self.file_root = self.output.prefix
         self.read_resume = self.output.is_resuming()
     else:
         output_prefix = share_mpi(
             hex(int(random() * 16**6))[2:] if is_main_process() else None)
         self.file_root = output_prefix
         # dummy output -- no resume!
         self.read_resume = False
     self.base_dir = self.get_base_dir(self.output)
     self.raw_clusters_dir = os.path.join(self.base_dir, self._clusters_dir)
     self.output.create_folder(self.base_dir)
     if self.do_clustering:
         self.clusters_folder = self.get_clusters_dir(self.output)
         self.output.create_folder(self.clusters_folder)
     self.mpi_info("Storing raw PolyChord output in '%s'.", self.base_dir)
     # Exploiting the speed hierarchy
     if self.blocking:
         blocks, oversampling_factors = self.model.check_blocking(
             self.blocking)
     else:
         if self.measure_speeds:
             self.model.measure_and_set_speeds(n=self.measure_speeds)
         blocks, oversampling_factors = self.model.get_param_blocking_for_sampler(
             oversample_power=self.oversample_power)
     self.mpi_info("Parameter blocks and their oversampling factors:")
     max_width = len(str(max(oversampling_factors)))
     for f, b in zip(oversampling_factors, blocks):
         self.mpi_info("* %" + "%d" % max_width + "d : %r", f, b)
     # Save blocking in updated info, in case we want to resume
     self._updated_info["blocking"] = list(zip(oversampling_factors,
                                               blocks))
     blocks_flat = list(chain(*blocks))
     self.ordering = [
         blocks_flat.index(p)
         for p in self.model.parameterization.sampled_params()
     ]
     self.grade_dims = [len(block) for block in blocks]
     # Steps per block
     # NB: num_repeats is ignored by PolyChord when int "grade_frac" given,
     # so needs to be applied by hand.
     # In num_repeats, `d` is interpreted as dimension of each block
     self.grade_frac = [
         int(o * read_dnumber(self.num_repeats, dim_block))
         for o, dim_block in zip(oversampling_factors, self.grade_dims)
     ]
     # Assign settings
     pc_args = [
         "nlive", "num_repeats", "nprior", "do_clustering",
         "precision_criterion", "max_ndead", "boost_posterior", "feedback",
         "logzero", "posteriors", "equals", "compression_factor",
         "cluster_posteriors", "write_resume", "read_resume", "write_stats",
         "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims",
         "feedback", "read_resume", "base_dir", "file_root", "grade_frac",
         "grade_dims"
     ]
     # As stated above, num_repeats is ignored, so let's not pass it
     pc_args.pop(pc_args.index("num_repeats"))
     self.pc_settings = PolyChordSettings(
         self.nDims,
         self.nDerived,
         seed=(self.seed if self.seed is not None else -1),
         **{
             p: getattr(self, p)
             for p in pc_args if getattr(self, p) is not None
         })
     # prior conversion from the hypercube
     bounds = self.model.prior.bounds(
         confidence_for_unbounded=self.confidence_for_unbounded)
     # Check if priors are bounded (nan's to inf)
     inf = np.where(np.isinf(bounds))
     if len(inf[0]):
         params_names = self.model.parameterization.sampled_params()
         params = [params_names[i] for i in sorted(list(set(inf[0])))]
         raise LoggedError(
             self.log,
             "PolyChord needs bounded priors, but the parameter(s) '"
             "', '".join(params) + "' is(are) unbounded.")
     locs = bounds[:, 0]
     scales = bounds[:, 1] - bounds[:, 0]
     # This function re-scales the parameters AND puts them in the right order
     self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales
                                ).tolist()
     # We will need the volume of the prior domain, since PolyChord divides by it
     self.logvolume = np.log(np.prod(scales))
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     self.last_point_callback = 0
     # Prepare runtime live and dead points collections
     self.live = Collection(self.model,
                            None,
                            name="live",
                            initial_size=self.pc_settings.nlive)
     self.dead = Collection(self.model, self.output, name="dead")
     # Done!
     if is_main_process():
         self.log.debug("Calling PolyChord with arguments:")
         for p, v in inspect.getmembers(self.pc_settings,
                                        lambda a: not (callable(a))):
             if not p.startswith("_"):
                 self.log.debug("  %s: %s", p, v)
     self.mpi_info("Initialized!")
Esempio n. 10
0
def post(info, sample=None):
    logger_setup(info.get(_debug), info.get(_debug_file))
    log = logging.getLogger(__name__.split(".")[-1])
    # MARKED FOR DEPRECATION IN v3.0
    # BEHAVIOUR TO BE REPLACED BY ERROR:
    check_deprecated_modules_path(info)
    # END OF DEPRECATION BLOCK
    try:
        info_post = info[_post]
    except KeyError:
        raise LoggedError(log, "No 'post' block given. Nothing to do!")
    if get_mpi_rank():
        log.warning(
            "Post-processing is not yet MPI-aware. Doing nothing for rank > 1 processes.")
        return
    if info.get(_resume):
        log.warning("Resuming not implemented for post-processing. Re-starting.")
    # 1. Load existing sample
    output_in = get_output(output_prefix=info.get(_output_prefix))
    if output_in:
        try:
            info_in = output_in.reload_updated_info()
        except FileNotFoundError:
            raise LoggedError(log, "Error loading input model: "
                                   "could not find input info at %s",
                              output_in.file_updated)
    else:
        info_in = deepcopy_where_possible(info)
    dummy_model_in = DummyModel(info_in[_params], info_in[kinds.likelihood],
                                info_in.get(_prior, None))
    if output_in:
        if not output_in.find_collections():
            raise LoggedError(log, "No samples found for the input model with prefix %s",
                              os.path.join(output_in.folder, output_in.prefix))
        collection_in = output_in.load_collections(
            dummy_model_in, skip=info_post.get("skip", 0), thin=info_post.get("thin", 1),
            concatenate=True)
    elif sample:
        if isinstance(sample, Collection):
            sample = [sample]
        collection_in = deepcopy(sample[0])
        for s in sample[1:]:
            try:
                collection_in.append(s)
            except:
                raise LoggedError(log, "Failed to load some of the input samples.")
    else:
        raise LoggedError(log,
                          "Not output from where to load from or input collections given.")
    log.info("Will process %d samples.", len(collection_in))
    if len(collection_in) <= 1:
        raise LoggedError(
            log, "Not enough samples for post-processing. Try using a larger sample, "
                 "or skipping or thinning less.")
    # 2. Compare old and new info: determine what to do
    add = info_post.get(_post_add, {}) or {}
    remove = info_post.get(_post_remove, {})
    # Add a dummy 'one' likelihood, to absorb unused parameters
    if not add.get(kinds.likelihood):
        add[kinds.likelihood] = {}
    add[kinds.likelihood]["one"] = None
    # Expand the "add" info
    add = update_info(add)
    # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters
    out = {_params: deepcopy_where_possible(info_in[_params])}
    for p in remove.get(_params, {}):
        pinfo = info_in[_params].get(p)
        if pinfo is None or not is_derived_param(pinfo):
            raise LoggedError(
                log,
                "You tried to remove parameter '%s', which is not a derived parameter. "
                "Only derived parameters can be removed during post-processing.", p)
        out[_params].pop(p)
    # Force recomputation of aggregated chi2
    for p in list(out[_params]):
        if p.startswith(_get_chi2_name("")):
            out[_params].pop(p)
    mlprior_names_add = []
    for p, pinfo in add.get(_params, {}).items():
        pinfo_in = info_in[_params].get(p)
        if is_sampled_param(pinfo):
            if not is_sampled_param(pinfo_in):
                # No added sampled parameters (de-marginalisation not implemented)
                if pinfo_in is None:
                    raise LoggedError(
                        log, "You added a new sampled parameter %r (maybe accidentally "
                             "by adding a new likelihood that depends on it). "
                             "Adding new sampled parameters is not possible. Try fixing "
                             "it to some value.", p)
                else:
                    raise LoggedError(
                        log,
                        "You tried to change the prior of parameter '%s', "
                        "but it was not a sampled parameter. "
                        "To change that prior, you need to define as an external one.", p)
            if mlprior_names_add[:1] != _prior_1d_name:
                mlprior_names_add = ([_minuslogprior + _separator + _prior_1d_name]
                                     + mlprior_names_add)
        elif is_derived_param(pinfo):
            if p in out[_params]:
                raise LoggedError(
                    log, "You tried to add derived parameter '%s', which is already "
                         "present. To force its recomputation, 'remove' it too.", p)
        elif is_fixed_param(pinfo):
            # Only one possibility left "fixed" parameter that was not present before:
            # input of new likelihood, or just an argument for dynamical derived (dropped)
            if ((p in info_in[_params] and
                 pinfo[partag.value] != (pinfo_in or {}).get(partag.value, None))):
                raise LoggedError(
                    log,
                    "You tried to add a fixed parameter '%s: %r' that was already present"
                    " but had a different value or was not fixed. This is not allowed. "
                    "The old info of the parameter was '%s: %r'",
                    p, dict(pinfo), p, dict(pinfo_in))
        else:
            raise LoggedError(log, "This should not happen. Contact the developers.")
        out[_params][p] = pinfo
    # For the likelihood only, turn the rest of *derived* parameters into constants,
    # so that the likelihoods do not try to compute them)
    # But be careful to exclude *input* params that have a "derived: True" value
    # (which in "updated info" turns into "derived: 'lambda [x]: [x]'")
    out_params_like = deepcopy_where_possible(out[_params])
    for p, pinfo in out_params_like.items():
        if ((is_derived_param(pinfo) and not (partag.value in pinfo)
             and p not in add.get(_params, {}))):
            out_params_like[p] = {partag.value: np.nan, partag.drop: True}
    # 2.2 Manage adding/removing priors and likelihoods
    warn_remove = False
    for level in [_prior, kinds.likelihood]:
        out[level] = getattr(dummy_model_in, level)
        if level == _prior:
            out[level].remove(_prior_1d_name)
        for pdf in info_post.get(_post_remove, {}).get(level, []) or []:
            try:
                out[level].remove(pdf)
                warn_remove = True
            except ValueError:
                raise LoggedError(
                    log, "Trying to remove %s '%s', but it is not present. "
                         "Existing ones: %r", level, pdf, out[level])
    if warn_remove:
        log.warning("You are removing a prior or likelihood pdf. "
                    "Notice that if the resulting posterior is much wider "
                    "than the original one, or displaced enough, "
                    "it is probably safer to explore it directly.")
    if _prior in add:
        mlprior_names_add += [_minuslogprior + _separator + name for name in add[_prior]]
        out[_prior] += list(add[_prior])
    prior_recompute_1d = (
            mlprior_names_add[:1] == [_minuslogprior + _separator + _prior_1d_name])
    # Don't initialise the theory code if not adding/recomputing theory,
    # theory-derived params or likelihoods
    recompute_theory = info_in.get(kinds.theory) and not (
            list(add[kinds.likelihood]) == ["one"] and
            not any(is_derived_param(pinfo) for pinfo in add.get(_params, {}).values()))
    if recompute_theory:
        # Inherit from the original chain (needs input|output_params, renames, etc
        add_theory = add.get(kinds.theory)
        if add_theory:
            info_theory_out = {}
            if len(add_theory) > 1:
                log.warning('Importance sampling with more than one theory is '
                            'not really tested')
            add_theory = add_theory.copy()
            for theory, theory_info in info_in[kinds.theory].items():
                theory_copy = deepcopy_where_possible(theory_info)
                if theory in add_theory:
                    info_theory_out[theory] = \
                        recursive_update(theory_copy, add_theory.pop(theory))
                else:
                    info_theory_out[theory] = theory_copy
            info_theory_out.update(add_theory)
        else:
            info_theory_out = deepcopy_where_possible(info_in[kinds.theory])
    else:
        info_theory_out = None
    chi2_names_add = [
        _get_chi2_name(name) for name in add[kinds.likelihood] if name != "one"]
    out[kinds.likelihood] += [l for l in add[kinds.likelihood] if l != "one"]
    if recompute_theory:
        log.warning("You are recomputing the theory, but in the current version this does"
                    " not force recomputation of any likelihood or derived parameter, "
                    "unless explicitly removed+added.")
    for level in [_prior, kinds.likelihood]:
        for i, x_i in enumerate(out[level]):
            if x_i in list(out[level])[i + 1:]:
                raise LoggedError(
                    log, "You have added %s '%s', which was already present. If you "
                         "want to force its recomputation, you must also 'remove' it.",
                    level, x_i)
    # 3. Create output collection
    if _post_suffix not in info_post:
        raise LoggedError(log, "You need to provide a '%s' for your chains.",
                          _post_suffix)
    # Use default prefix if it exists. If it does not, produce no output by default.
    # {post: {output: None}} suppresses output, and if it's a string, updates it.
    out_prefix = info_post.get(_output_prefix, info.get(_output_prefix))
    if out_prefix not in [None, False]:
        out_prefix += _separator_files + _post + _separator_files + info_post[
            _post_suffix]
    output_out = get_output(output_prefix=out_prefix, force=info.get(_force))
    if output_out and not output_out.force and output_out.find_collections():
        raise LoggedError(log, "Found existing post-processing output with prefix %r. "
                               "Delete it manually or re-run with `force: True` "
                               "(or `-f`, `--force` from the shell).", out_prefix)
    elif output_out and output_out.force:
        output_out.delete_infos()
        for regexp in output_out.find_collections():
            output_out.delete_with_regexp(re.compile(regexp))
    info_out = deepcopy_where_possible(info)
    info_out[_post] = info_post
    # Updated with input info and extended (updated) add info
    info_out.update(info_in)
    info_out[_post][_post_add] = add
    dummy_model_out = DummyModel(out[_params], out[kinds.likelihood],
                                 info_prior=out[_prior])
    if recompute_theory:
        # TODO: May need updating for more than one, or maybe can be removed
        theory = list(info_theory_out)[0]
        if _input_params not in info_theory_out[theory]:
            raise LoggedError(
                log,
                "You appear to be post-processing a chain generated with an older "
                "version of Cobaya. For post-processing to work, please edit the "
                "'[root].updated.yaml' file of the original chain to add, inside the "
                "theory code block, the list of its input parameters. E.g.\n----\n"
                "theory:\n  %s:\n    input_params: [param1, param2, ...]\n"
                "----\nIf you get strange errors later, it is likely that you did not "
                "specify the correct set of theory parameters.\n"
                "The full set of input parameters are %s.",
                theory, list(dummy_model_out.parameterization.input_params()))
    # TODO: check allow_renames=False?
    # TODO: May well be simplifications here, this is v close to pre-refactor logic
    # Have not gone through or understood all the parameterization  stuff
    model_add = Model(out_params_like, add[kinds.likelihood], info_prior=add.get(_prior),
                      info_theory=info_theory_out, packages_path=info.get(_packages_path),
                      allow_renames=False, post=True,
                      prior_parameterization=dummy_model_out.parameterization)
    # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post][_post_add]
    add[kinds.likelihood].pop("one")
    collection_out = Collection(dummy_model_out, output_out, name="1")
    output_out.check_and_dump_info(None, info_out, check_compatible=False)
    # Prepare recomputation of aggregated chi2
    # (they need to be recomputed by hand, because its autocomputation won't pick up
    #  old likelihoods for a given type)
    all_types = {
        like: str_to_list(add[kinds.likelihood].get(
            like, info_in[kinds.likelihood].get(like)).get("type", []) or [])
        for like in out[kinds.likelihood]}
    types = set(chain(*list(all_types.values())))
    inv_types = {t: [like for like, like_types in all_types.items() if t in like_types]
                 for t in types}
    # 4. Main loop!
    log.info("Running post-processing...")
    last_percent = 0
    for i, point in collection_in.data.iterrows():
        log.debug("Point: %r", point)
        sampled = [point[param] for param in
                   dummy_model_in.parameterization.sampled_params()]
        derived = {param: point.get(param, None)
                   for param in dummy_model_out.parameterization.derived_params()}
        inputs = {param: point.get(
            param, dummy_model_in.parameterization.constant_params().get(
                param, dummy_model_out.parameterization.constant_params().get(
                    param, None)))
            for param in dummy_model_out.parameterization.input_params()}
        # Solve inputs that depend on a function and were not saved
        # (we don't use the Parameterization_to_input method in case there are references
        #  to functions that cannot be loaded at the moment)
        for p, value in inputs.items():
            if value is None:
                func = dummy_model_out.parameterization._input_funcs[p]
                args = dummy_model_out.parameterization._input_args[p]
                inputs[p] = func(*[point.get(arg) for arg in args])
        # Add/remove priors
        priors_add = model_add.prior.logps(sampled)
        if not prior_recompute_1d:
            priors_add = priors_add[1:]
        logpriors_add = dict(zip(mlprior_names_add, priors_add))
        logpriors_new = [logpriors_add.get(name, - point.get(name, 0))
                         for name in collection_out.minuslogprior_names]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug(
                "New set of priors: %r", dict(zip(dummy_model_out.prior, logpriors_new)))
        if -np.inf in logpriors_new:
            continue
        # Add/remove likelihoods
        output_like = []
        if add[kinds.likelihood]:
            # Notice "one" (last in likelihood_add) is ignored: not in chi2_names
            loglikes_add, output_like = model_add.logps(inputs, return_derived=True)
            loglikes_add = dict(zip(chi2_names_add, loglikes_add))
            output_like = dict(zip(model_add.output_params, output_like))
        else:
            loglikes_add = dict()
        loglikes_new = [loglikes_add.get(name, -0.5 * point.get(name, 0))
                        for name in collection_out.chi2_names]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug(
                "New set of likelihoods: %r",
                dict(zip(dummy_model_out.likelihood, loglikes_new)))
            if output_like:
                log.debug("New set of likelihood-derived parameters: %r", output_like)
        if -np.inf in loglikes_new:
            continue
        # Add/remove derived parameters and change priors of sampled parameters
        for p in add[_params]:
            if p in dummy_model_out.parameterization._directly_output:
                derived[p] = output_like[p]
            elif p in dummy_model_out.parameterization._derived_funcs:
                func = dummy_model_out.parameterization._derived_funcs[p]
                args = dummy_model_out.parameterization._derived_args[p]
                derived[p] = func(
                    *[point.get(arg, output_like.get(arg, None)) for arg in args])
        # We need to recompute the aggregated chi2 by hand
        for type_, likes in inv_types.items():
            derived[_get_chi2_name(type_)] = sum(
                [-2 * lvalue for lname, lvalue
                 in zip(collection_out.chi2_names, loglikes_new)
                 if _undo_chi2_name(lname) in likes])
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New derived parameters: %r",
                      dict([(p, derived[p])
                            for p in dummy_model_out.parameterization.derived_params()
                            if p in add[_params]]))
        # Save to the collection (keep old weight for now)
        collection_out.add(
            sampled, derived=derived.values(), weight=point.get(_weight),
            logpriors=logpriors_new, loglikes=loglikes_new)
        # Display progress
        percent = np.round(i / len(collection_in) * 100)
        if percent != last_percent and not percent % 5:
            last_percent = percent
            progress_bar(log, percent, " (%d/%d)" % (i, len(collection_in)))
    if not collection_out.data.last_valid_index():
        raise LoggedError(
            log, "No elements in the final sample. Possible causes: "
                 "added a prior or likelihood valued zero over the full sampled domain, "
                 "or the computation of the theory failed everywhere, etc.")
    # Reweight -- account for large dynamic range!
    #   Prefer to rescale +inf to finite, and ignore final points with -inf.
    #   Remove -inf's (0-weight), and correct indices
    difflogmax = max(collection_in[_minuslogpost] - collection_out[_minuslogpost])
    collection_out.data[_weight] *= np.exp(
        collection_in[_minuslogpost] - collection_out[_minuslogpost] - difflogmax)
    collection_out.data = (
        collection_out.data[collection_out.data.weight > 0].reset_index(drop=True))
    collection_out._n = collection_out.data.last_valid_index() + 1
    # Write!
    collection_out.out_update()
    log.info("Finished! Final number of samples: %d", len(collection_out))
    return info_out, {"sample": collection_out}
Esempio n. 11
0
    def initialize(self):
        self.mpi_info("Initializing")
        self.max_evals = read_dnumber(self.max_evals, self.model.prior.d())
        # Configure target
        method = self.model.loglike if self.ignore_prior else self.model.logpost
        kwargs = {"make_finite": True}
        if self.ignore_prior:
            kwargs["return_derived"] = False
        self.logp = lambda x: method(x, **kwargs)
        # Try to load info from previous samples.
        # If none, sample from reference (make sure that it has finite like/post)
        initial_point = None
        if self.output:
            files = self.output.find_collections()
            collection_in = None
            if files:
                if more_than_one_process():
                    if 1 + get_mpi_rank() <= len(files):
                        collection_in = Collection(self.model,
                                                   self.output,
                                                   name=str(1 +
                                                            get_mpi_rank()),
                                                   resuming=True)
                else:
                    collection_in = self.output.load_collections(
                        self.model, concatenate=True)
            if collection_in:
                initial_point = (collection_in.bestfit()
                                 if self.ignore_prior else collection_in.MAP())
                initial_point = initial_point[list(
                    self.model.parameterization.sampled_params())].values
                self.log.info("Starting from %s of previous chain:",
                              "best fit" if self.ignore_prior else "MAP")
        if initial_point is None:
            this_logp = -np.inf
            while not np.isfinite(this_logp):
                initial_point = self.model.prior.reference()
                this_logp = self.logp(initial_point)
            self.log.info("Starting from random initial point:")
        self.log.info(
            dict(
                zip(self.model.parameterization.sampled_params(),
                    initial_point)))

        self._bounds = self.model.prior.bounds(
            confidence_for_unbounded=self.confidence_for_unbounded)

        # TODO: if ignore_prior, one should use *like* covariance (this is *post*)
        covmat = self._load_covmat(self.output)[0]

        # scale by conditional parameter widths (since not using correlation structure)
        scales = np.minimum(1 / np.sqrt(np.diag(np.linalg.inv(covmat))),
                            (self._bounds[:, 1] - self._bounds[:, 0]) / 3)

        # Cov and affine transformation
        # Transform to space where initial point is at centre, and cov is normalised
        # Cannot do rotation, as supported minimization routines assume bounds aligned
        # with the parameter axes.
        self._affine_transform_matrix = np.diag(1 / scales)
        self._inv_affine_transform_matrix = np.diag(scales)
        self._scales = scales
        self._affine_transform_baseline = initial_point
        initial_point = self.affine_transform(initial_point)
        np.testing.assert_allclose(initial_point,
                                   np.zeros(initial_point.shape))
        bounds = np.array(
            [self.affine_transform(self._bounds[:, i]) for i in range(2)]).T
        # Configure method
        if self.method.lower() == "bobyqa":
            self.minimizer = pybobyqa.solve
            self.kwargs = {
                "objfun": (lambda x: -self.logp_transf(x)),
                "x0":
                initial_point,
                "bounds":
                np.array(list(zip(*bounds))),
                "seek_global_minimum":
                (True if get_mpi_size() in [0, 1] else False),
                "maxfun":
                int(self.max_evals)
            }
            self.kwargs = recursive_update(deepcopy(self.kwargs),
                                           self.override_bobyqa or {})
            self.log.debug(
                "Arguments for pybobyqa.solve:\n%r",
                {k: v
                 for k, v in self.kwargs.items() if k != "objfun"})
        elif self.method.lower() == "scipy":
            self.minimizer = scpminimize
            self.kwargs = {
                "fun": (lambda x: -self.logp_transf(x)),
                "x0": initial_point,
                "bounds": bounds,
                "options": {
                    "maxiter": self.max_evals,
                    "disp": (self.log.getEffectiveLevel() == logging.DEBUG)
                }
            }
            self.kwargs = recursive_update(deepcopy(self.kwargs),
                                           self.override_scipy or {})
            self.log.debug(
                "Arguments for scipy.optimize.minimize:\n%r",
                {k: v
                 for k, v in self.kwargs.items() if k != "fun"})
        else:
            methods = ["bobyqa", "scipy"]
            raise LoggedError(self.log,
                              "Method '%s' not recognized. Try one of %r.",
                              self.method, methods)
Esempio n. 12
0
 def initialize(self):
     """Initializes the sampler:
     creates the proposal distribution and draws the initial sample."""
     self.log.debug("Initializing")
     for p in [
             "burn_in", "max_tries", "output_every", "check_every",
             "callback_every"
     ]:
         setattr(
             self, p,
             read_dnumber(getattr(self, p), self.model.prior.d(),
                          dtype=int))
     if self.callback_every is None:
         self.callback_every = self.check_every
     # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
     self.burn_in_left = self.burn_in + 1
     # Max # checkpoints to wait, in case one process dies without sending MPI_ABORT
     self.been_waiting = 0
     self.max_waiting = max(50, self.max_tries / self.model.prior.d())
     if self.resuming and (max(self.mpi_size or 0, 1) != max(
             get_mpi_size(), 1)):
         self.log.error(
             "Cannot resume a sample with a different number of chains: "
             "was %d and now is %d.", max(self.mpi_size, 1),
             max(get_mpi_size(), 1))
         raise HandledException
     if not self.resuming and self.output:
         # Delete previous files (if not "forced", the run would have already failed)
         if ((os.path.abspath(self.covmat_filename()) != os.path.abspath(
                 str(self.covmat)))):
             try:
                 os.remove(self.covmat_filename())
             except OSError:
                 pass
         # There may be more that chains than expected,
         # if #ranks was bigger in a previous run
         i = 0
         while True:
             i += 1
             collection_filename, _ = self.output.prepare_collection(str(i))
             try:
                 os.remove(collection_filename)
             except OSError:
                 break
     # One collection per MPI process: `name` is the MPI rank + 1
     name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
     self.collection = Collection(self.model,
                                  self.output,
                                  name=name,
                                  resuming=self.resuming)
     self.current_point = OnePoint(self.model, OutputDummy({}), name=name)
     # Use standard MH steps by default
     self.get_new_sample = self.get_new_sample_metropolis
     # Prepare oversampling / dragging if applicable
     self.effective_max_samples = self.max_samples
     if self.oversample and self.drag:
         self.log.error("Choose either oversampling or dragging, not both.")
         raise HandledException
     if self.oversample:
         factors, blocks = self.model.likelihood._speeds_of_params(
             int_speeds=True)
         self.oversampling_factors = factors
         self.log.info("Oversampling with factors:\n" + "\n".join([
             "   %d : %r" % (f, b)
             for f, b in zip(self.oversampling_factors, blocks)
         ]))
         self.i_last_slow_block = None
         # No way right now to separate slow and fast
         slow_params = list(self.model.parameterization.sampled_params())
     elif self.drag:
         speeds, blocks = self.model.likelihood._speeds_of_params(
             fast_slow=True, int_speeds=True)
         # For now, no blocking inside either fast or slow: just 2 blocks
         self.i_last_slow_block = 0
         if np.all(speeds == speeds[0]):
             self.log.error(
                 "All speeds are equal or too similar: cannot drag! "
                 "Make sure to define accurate likelihoods' speeds.")
             raise HandledException
         # Make the 1st factor 1:
         speeds = [1, speeds[1] / speeds[0]]
         # Target: dragging step taking as long as slow step
         self.drag_interp_steps = self.drag * speeds[1]
         # Per dragging step, the (fast) posterior is evaluated *twice*,
         self.drag_interp_steps /= 2
         self.drag_interp_steps = int(np.round(self.drag_interp_steps))
         fast_params = list(chain(*blocks[1 + self.i_last_slow_block:]))
         # Not too much or too little dragging
         drag_limits = [(int(l) * len(fast_params) if l is not None else l)
                        for l in self.drag_limits]
         if drag_limits[
                 0] is not None and self.drag_interp_steps < drag_limits[0]:
             self.log.warning(
                 "Number of dragging steps clipped from below: was not "
                 "enough to efficiently explore the fast directions -- "
                 "avoid this limit by decreasing 'drag_limits[0]'.")
             self.drag_interp_steps = drag_limits[0]
         if drag_limits[
                 1] is not None and self.drag_interp_steps > drag_limits[1]:
             self.log.warning(
                 "Number of dragging steps clipped from above: "
                 "excessive, probably inefficient, exploration of the "
                 "fast directions -- "
                 "avoid this limit by increasing 'drag_limits[1]'.")
             self.drag_interp_steps = drag_limits[1]
         # Re-scale steps between checkpoint and callback to the slow dimensions only
         slow_params = list(chain(*blocks[:1 + self.i_last_slow_block]))
         self.n_slow = len(slow_params)
         for p in ["check_every", "callback_every"]:
             setattr(
                 self, p,
                 int(getattr(self, p) * self.n_slow / self.model.prior.d()))
         self.log.info("Dragging with oversampling per step:\n" +
                       "\n".join([
                           "   %d : %r" % (f, b)
                           for f, b in zip([1, self.drag_interp_steps],
                                           [blocks[0], fast_params])
                       ]))
         self.get_new_sample = self.get_new_sample_dragging
     else:
         _, blocks = self.model.likelihood._speeds_of_params()
         self.oversampling_factors = [1 for b in blocks]
         slow_params = list(self.model.parameterization.sampled_params())
         self.n_slow = len(slow_params)
     # Turn parameter names into indices
     self.blocks = [[
         list(self.model.parameterization.sampled_params()).index(p)
         for p in b
     ] for b in blocks]
     self.proposer = BlockedProposer(
         self.blocks,
         oversampling_factors=self.oversampling_factors,
         i_last_slow_block=self.i_last_slow_block,
         proposal_scale=self.proposal_scale)
     # Build the initial covariance matrix of the proposal, or load from checkpoint
     if self.resuming:
         covmat = np.loadtxt(self.covmat_filename())
         self.log.info("Covariance matrix from checkpoint.")
     else:
         covmat = self.initial_proposal_covmat(slow_params=slow_params)
         self.log.info("Initial covariance matrix.")
     self.log.debug(
         "Sampling with covmat:\n%s",
         DataFrame(
             covmat,
             columns=self.model.parameterization.sampled_params(),
             index=self.model.parameterization.sampled_params()).to_string(
                 line_width=_line_width))
     self.proposer.set_covariance(covmat)
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
Esempio n. 13
0
 def initialize(self):
     """Initializes the sampler:
     creates the proposal distribution and draws the initial sample."""
     if not self.model.prior.d():
         raise LoggedError(self.log,
                           "No parameters being varied for sampler")
     self.log.debug("Initializing")
     # MARKED FOR DEPRECATION IN v3.0
     if getattr(self, "oversample", None) is not None:
         self.log.warning(
             "*DEPRECATION*: `oversample` will be deprecated in the "
             "next version. Oversampling is now requested by setting "
             "`oversample_power` > 0.")
     # END OF DEPRECATION BLOCK
     # MARKED FOR DEPRECATION IN v3.0
     if getattr(self, "check_every", None) is not None:
         self.log.warning(
             "*DEPRECATION*: `check_every` will be deprecated in the "
             "next version. Please use `learn_every` instead.")
         # BEHAVIOUR TO BE REPLACED BY ERROR:
         self.learn_every = getattr(self, "check_every")
     # END OF DEPRECATION BLOCK
     if self.callback_every is None:
         self.callback_every = self.learn_every
     self._quants_d_units = []
     for q in ["max_tries", "learn_every", "callback_every", "burn_in"]:
         number = NumberWithUnits(getattr(self, q), "d", dtype=int)
         self._quants_d_units.append(number)
         setattr(self, q, number)
     self.output_every = NumberWithUnits(self.output_every, "s", dtype=int)
     if is_main_process():
         if self.output.is_resuming() and (max(self.mpi_size or 0, 1) !=
                                           max(get_mpi_size(), 1)):
             raise LoggedError(
                 self.log,
                 "Cannot resume a run with a different number of chains: "
                 "was %d and now is %d.", max(self.mpi_size, 1),
                 max(get_mpi_size(), 1))
         if more_than_one_process():
             if get_mpi().Get_version()[0] < 3:
                 raise LoggedError(
                     self.log, "MPI use requires MPI version 3.0 or "
                     "higher to support IALLGATHER.")
     sync_processes()
     # One collection per MPI process: `name` is the MPI rank + 1
     name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
     self.collection = Collection(self.model,
                                  self.output,
                                  name=name,
                                  resuming=self.output.is_resuming())
     self.current_point = OneSamplePoint(self.model)
     # Use standard MH steps by default
     self.get_new_sample = self.get_new_sample_metropolis
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     # Useful for getting last points added inside callback function
     self.last_point_callback = 0
     # Monitoring/restore progress
     if is_main_process():
         cols = [
             "N", "timestamp", "acceptance_rate", "Rminus1", "Rminus1_cl"
         ]
         self.progress = DataFrame(columns=cols)
         self.i_learn = 1
         if self.output and not self.output.is_resuming():
             with open(self.progress_filename(), "w",
                       encoding="utf-8") as progress_file:
                 progress_file.write("# " +
                                     " ".join(self.progress.columns) + "\n")
     # Get first point, to be discarded -- not possible to determine its weight
     # Still, we need to compute derived parameters, since, as the proposal "blocked",
     # we may be saving the initial state of some block.
     # NB: if resuming but nothing was written (burn-in not finished): re-start
     if self.output.is_resuming() and len(self.collection):
         initial_point = (self.collection[
             self.collection.sampled_params].iloc[len(self.collection) -
                                                  1]).values.copy()
         logpost = -(self.collection[_minuslogpost].iloc[
             len(self.collection) - 1].copy())
         logpriors = -(self.collection[self.collection.minuslogprior_names].
                       iloc[len(self.collection) - 1].copy())
         loglikes = -0.5 * (self.collection[self.collection.chi2_names].
                            iloc[len(self.collection) - 1].copy())
         derived = (self.collection[self.collection.derived_params].iloc[
             len(self.collection) - 1].values.copy())
     else:
         # NB: max_tries adjusted to dim instead of #cycles (blocking not computed yet)
         self.max_tries.set_scale(self.model.prior.d())
         self.log.info(
             "Getting initial point... (this may take a few seconds)")
         initial_point, logpost, logpriors, loglikes, derived = \
             self.model.get_valid_point(max_tries=self.max_tries.value)
         # If resuming but no existing chain, assume failed run and ignore blocking
         # if speeds measurement requested
         if self.output.is_resuming() and not len(self.collection) \
            and self.measure_speeds:
             self.blocking = None
         if self.measure_speeds and self.blocking:
             self.log.warning(
                 "Parameter blocking manually fixed: speeds will not be measured."
             )
         elif self.measure_speeds:
             n = None if self.measure_speeds is True else int(
                 self.measure_speeds)
             self.model.measure_and_set_speeds(n=n, discard=0)
     self.set_proposer_blocking()
     self.set_proposer_covmat(load=True)
     self.current_point.add(initial_point,
                            derived=derived,
                            logpost=logpost,
                            logpriors=logpriors,
                            loglikes=loglikes)
     self.log.info("Initial point: %s", self.current_point)
     # Max #(learn+convergence checks) to wait,
     # in case one process dies without sending MPI_ABORT
     self.been_waiting = 0
     self.max_waiting = max(50, self.max_tries.unit_value)
     # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
     self.burn_in_left = self.burn_in.value * self.current_point.output_thin + 1
     # Initial dummy checkpoint
     # (needed when 1st "learn point" not reached in prev. run)
     self.write_checkpoint()
Esempio n. 14
0
 def initialize(self):
     """Imports the PolyChord sampler and prepares its arguments."""
     if am_single_or_primary_process(
     ):  # rank = 0 (MPI master) or None (no MPI)
         self.log.info("Initializing")
     # If path not given, try using general path to modules
     if not self.path and self.path_install:
         self.path = get_path(self.path_install)
     if self.path:
         if am_single_or_primary_process():
             self.log.info("Importing *local* PolyChord from " + self.path)
             if not os.path.exists(os.path.realpath(self.path)):
                 raise LoggedError(
                     self.log, "The given path does not exist. "
                     "Try installing PolyChord with "
                     "'cobaya-install polychord -m [modules_path]")
         pc_build_path = get_build_path(self.path)
         if not pc_build_path:
             raise LoggedError(
                 self.log, "Either PolyChord is not in the given folder, "
                 "'%s', or you have not compiled it.", self.path)
         # Inserting the previously found path into the list of import folders
         sys.path.insert(0, pc_build_path)
     else:
         self.log.info("Importing *global* PolyChord.")
     try:
         import pypolychord
         from pypolychord.settings import PolyChordSettings
         self.pc = pypolychord
     except ImportError:
         raise LoggedError(
             self.log, "Couldn't find the PolyChord python interface. "
             "Make sure that you have compiled it, and that you either\n"
             " (a) specify a path (you didn't) or\n"
             " (b) install the Python interface globally with\n"
             "     '/path/to/PolyChord/python setup.py install --user'")
     # Prepare arguments and settings
     self.nDims = self.model.prior.d()
     self.nDerived = (len(self.model.parameterization.derived_params()) +
                      len(self.model.prior) +
                      len(self.model.likelihood._likelihoods))
     if self.logzero is None:
         self.logzero = np.nan_to_num(-np.inf)
     if self.max_ndead == np.inf:
         self.max_ndead = -1
     for p in ["nlive", "nprior", "max_ndead"]:
         setattr(self, p,
                 read_dnumber(getattr(self, p), self.nDims, dtype=int))
     # Fill the automatic ones
     if getattr(self, "feedback", None) is None:
         values = {
             logging.CRITICAL: 0,
             logging.ERROR: 0,
             logging.WARNING: 0,
             logging.INFO: 1,
             logging.DEBUG: 2
         }
         self.feedback = values[self.log.getEffectiveLevel()]
     try:
         output_folder = getattr(self.output, "folder")
         output_prefix = getattr(self.output, "prefix") or ""
         self.read_resume = self.resuming
     except AttributeError:
         # dummy output -- no resume!
         self.read_resume = False
         from tempfile import gettempdir
         output_folder = gettempdir()
         if am_single_or_primary_process():
             from random import random
             output_prefix = hex(int(random() * 16**6))[2:]
         else:
             output_prefix = None
         if more_than_one_process():
             output_prefix = get_mpi_comm().bcast(output_prefix, root=0)
     self.base_dir = os.path.join(output_folder, self.base_dir)
     self.file_root = output_prefix
     if am_single_or_primary_process():
         # Creating output folder, if it does not exist (just one process)
         if not os.path.exists(self.base_dir):
             os.makedirs(self.base_dir)
         # Idem, a clusters folder if needed -- notice that PolyChord's default
         # is "True", here "None", hence the funny condition below
         if self.do_clustering is not False:  # None here means "default"
             try:
                 os.makedirs(os.path.join(self.base_dir, clusters))
             except OSError:  # exists!
                 pass
         self.log.info("Storing raw PolyChord output in '%s'.",
                       self.base_dir)
     # Exploiting the speed hierarchy
     if self.blocking:
         speeds, blocks = self.model.likelihood._check_speeds_of_params(
             self.blocking)
     else:
         speeds, blocks = self.model.likelihood._speeds_of_params(
             int_speeds=True)
     blocks_flat = list(chain(*blocks))
     self.ordering = [
         blocks_flat.index(p)
         for p in self.model.parameterization.sampled_params()
     ]
     self.grade_dims = np.array([len(block) for block in blocks])
     # bugfix: pypolychord's C interface for Fortran does not like int numpy types
     self.grade_dims = [int(x) for x in self.grade_dims]
     # Steps per block
     # NB: num_repeats is ignored by PolyChord when int "grade_frac" given,
     # so needs to be applied by hand.
     # Make sure that speeds are integer, and that the slowest is 1,
     # for a straightforward application of num_repeats
     speeds = relative_to_int(speeds, 1)
     # In num_repeats, `d` is interpreted as dimension of each block
     self.grade_frac = [
         int(speed * read_dnumber(self.num_repeats, dim_block))
         for speed, dim_block in zip(speeds, self.grade_dims)
     ]
     # Assign settings
     pc_args = [
         "nlive", "num_repeats", "nprior", "do_clustering",
         "precision_criterion", "max_ndead", "boost_posterior", "feedback",
         "logzero", "posteriors", "equals", "compression_factor",
         "cluster_posteriors", "write_resume", "read_resume", "write_stats",
         "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims",
         "feedback", "read_resume", "base_dir", "file_root", "grade_frac",
         "grade_dims"
     ]
     # As stated above, num_repeats is ignored, so let's not pass it
     pc_args.pop(pc_args.index("num_repeats"))
     self.pc_settings = PolyChordSettings(
         self.nDims,
         self.nDerived,
         seed=(self.seed if self.seed is not None else -1),
         **{
             p: getattr(self, p)
             for p in pc_args if getattr(self, p) is not None
         })
     # prior conversion from the hypercube
     bounds = self.model.prior.bounds(
         confidence_for_unbounded=self.confidence_for_unbounded)
     # Check if priors are bounded (nan's to inf)
     inf = np.where(np.isinf(bounds))
     if len(inf[0]):
         params_names = self.model.parameterization.sampled_params()
         params = [params_names[i] for i in sorted(list(set(inf[0])))]
         raise LoggedError(
             self.log,
             "PolyChord needs bounded priors, but the parameter(s) '"
             "', '".join(params) + "' is(are) unbounded.")
     locs = bounds[:, 0]
     scales = bounds[:, 1] - bounds[:, 0]
     # This function re-scales the parameters AND puts them in the right order
     self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales
                                ).tolist()
     # We will need the volume of the prior domain, since PolyChord divides by it
     self.logvolume = np.log(np.prod(scales))
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     self.last_point_callback = 0
     # Prepare runtime live and dead points collections
     self.live = Collection(self.model,
                            None,
                            name="live",
                            initial_size=self.pc_settings.nlive)
     self.dead = Collection(self.model, self.output, name="dead")
     self.n_sampled = len(self.model.parameterization.sampled_params())
     self.n_derived = len(self.model.parameterization.derived_params())
     self.n_priors = len(self.model.prior)
     self.n_likes = len(self.model.likelihood._likelihoods)
     # Done!
     if am_single_or_primary_process():
         self.log.info("Calling PolyChord with arguments:")
         for p, v in inspect.getmembers(self.pc_settings,
                                        lambda a: not (callable(a))):
             if not p.startswith("_"):
                 self.log.info("  %s: %s", p, v)
Esempio n. 15
0
    def initialise(self):
        """Initialises the sampler:
        creates the proposal distribution and draws the initial sample."""
        self.log.info("Initializing")
        # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
        self.burn_in_left = self.burn_in + 1
        # One collection per MPI process: `name` is the MPI rank + 1
        name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
        self.collection = Collection(self.parametrization,
                                     self.likelihood,
                                     self.output,
                                     name=name)
        self.current_point = OnePoint(self.parametrization,
                                      self.likelihood,
                                      self.output,
                                      name=name)
        # Use the standard steps by default
        self.get_new_sample = self.get_new_sample_metropolis
        # Prepare oversampling / fast-dragging if applicable
        self.effective_max_samples = self.max_samples
        if self.oversample and self.drag:
            self.log.error(
                "Choose either oversampling or fast-dragging, not both.")
            raise HandledException
#        if (self.oversample or self.drag) and len(set(factors)) == 1:
#            self.log.error("All block speeds are similar: "
#                           "no dragging or oversampling possible.")
#            raise HandledException
        if self.oversample:
            factors, blocks = self.likelihood.speeds_of_params(
                oversampling_factors=True)
            self.oversampling_factors = factors
            # WIP: actually, we would have to re-normalise to the dimension of the blocks.
            self.log.info("Oversampling with factors:\n" + "\n".join([
                "   %d : %r" % (f, b)
                for f, b in zip(self.oversampling_factors, blocks)
            ]))
            # WIP: useless until likelihoods have STATES!
            self.log.error("Sorry, oversampling is WIP")
            raise HandledException
        elif self.drag:
            # WIP: for now, can only separate between theory and likelihoods
            # until likelihoods have states
            if not self.likelihood.theory:
                self.log.error(
                    "WIP: dragging disabled for now when no theory code present."
                )
                raise HandledException
#            if self.max_speed_slow < min(speeds) or self.max_speed_slow >= max(speeds):
#                self.log.error("The maximum speed considered slow, `max_speed_slow`, must be "
#                          "%g <= `max_speed_slow < %g, and is %g",
#                          min(speeds), max(speeds), self.max_speed_slow)
#                raise HandledException
            speeds, blocks = self.likelihood.speeds_of_params(int_speeds=True,
                                                              fast_slow=True)
            if np.all(speeds == speeds[0]):
                self.log.error(
                    "All speeds are equal: cannot drag! Make sure to define, "
                    "especially, the speed of the fastest likelihoods.")
            self.i_last_slow_block = 0  # just theory can be slow for now
            fast_params = list(chain(*blocks[1 + self.i_last_slow_block:]))
            self.n_slow = sum(
                len(blocks[i]) for i in range(1 + self.i_last_slow_block))
            self.drag_interp_steps = int(self.drag *
                                         np.round(min(speeds[1:]) / speeds[0]))
            self.log.info("Dragging with oversampling per step:\n" +
                          "\n".join([
                              "   %d : %r" % (f, b)
                              for f, b in zip([1, self.drag_interp_steps],
                                              [blocks[0], fast_params])
                          ]))
            self.get_new_sample = self.get_new_sample_dragging
        else:
            _, blocks = self.likelihood.speeds_of_params()
            self.oversampling_factors = [1 for b in blocks]
            self.n_slow = len(self.parametrization.sampled_params())
        # Turn parameter names into indices
        blocks = [[
            list(self.parametrization.sampled_params().keys()).index(p)
            for p in b
        ] for b in blocks]
        self.proposer = BlockedProposer(
            blocks,
            oversampling_factors=getattr(self, "oversampling_factors", None),
            i_last_slow_block=getattr(self, "i_last_slow_block", None),
            propose_scale=self.propose_scale)
        # Build the initial covariance matrix of the proposal
        covmat = self.initial_proposal_covmat()
        self.log.info("Sampling with covariance matrix:")
        self.log.info("%r", covmat)
        self.proposer.set_covariance(covmat)
        # Prepare callback function
        if self.callback_function is not None:
            self.callback_function_callable = (get_external_function(
                self.callback_function))
Esempio n. 16
0
def post(info, sample=None):
    logger_setup(info.get(_debug), info.get(_debug_file))
    log = logging.getLogger(__name__.split(".")[-1])
    try:
        info_post = info[_post]
    except KeyError:
        log.error("No 'post' block given. Nothing to do!")
        raise HandledException
    if get_mpi_rank():
        log.warning(
            "Post-processing is not yet MPI-able. Doing nothing for rank > 1 processes."
        )
        return
    # 1. Load existing sample
    output_in = Output(output_prefix=info.get(_output_prefix), resume=True)
    info_in = load_input(output_in.file_full) if output_in else deepcopy(info)
    dummy_model_in = DummyModel(info_in[_params], info_in[_likelihood],
                                info_in.get(_prior, None),
                                info_in.get(_theory, None))
    if output_in:
        i = 0
        while True:
            try:
                collection = Collection(dummy_model_in,
                                        output_in,
                                        name="%d" % (1 + i),
                                        load=True,
                                        onload_skip=info_post.get("skip", 0),
                                        onload_thin=info_post.get("thin", 1))
                if i == 0:
                    collection_in = collection
                else:
                    collection_in._append(collection)
                i += 1
            except IOError:
                break
    elif sample:
        if isinstance(sample, Collection):
            sample = [sample]
        collection_in = deepcopy(sample[0])
        for s in sample[1:]:
            try:
                collection_in._append(s)
            except:
                log.error("Failed to load some of the input samples.")
                raise HandledException
        i = len(sample)
    else:
        log.error(
            "Not output from where to load from or input collections given.")
        raise HandledException
    log.info("Loaded %d chain%s. Will process %d samples.", i,
             "s" if i - 1 else "", collection_in.n())
    if collection_in.n() <= 1:
        log.error(
            "Not enough samples for post-processing. Try using a larger sample, "
            "or skipping or thinning less.")
        raise HandledException
    # 2. Compare old and new info: determine what to do
    add = info_post.get("add", {})
    remove = info_post.get("remove", {})
    # Add a dummy 'one' likelihood, to absorb unused parameters
    if not add.get(_likelihood):
        add[_likelihood] = odict()
    add[_likelihood].update({"one": None})
    # Expand the "add" info
    add = get_full_info(add)
    # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters
    out = {_params: deepcopy(info_in[_params])}
    for p in remove.get(_params, {}):
        pinfo = info_in[_params].get(p)
        if pinfo is None or not is_derived_param(pinfo):
            log.error(
                "You tried to remove parameter '%s', which is not a derived paramter. "
                "Only derived parameters can be removed during post-processing.",
                p)
            raise HandledException
        out[_params].pop(p)
    mlprior_names_add = []
    for p, pinfo in add.get(_params, {}).items():
        pinfo_in = info_in[_params].get(p)
        if is_sampled_param(pinfo):
            if not is_sampled_param(pinfo_in):
                # No added sampled parameters (de-marginalisation not implemented)
                if pinfo_in is None:
                    log.error(
                        "You added a new sampled parameter %r (maybe accidentaly "
                        "by adding a new likelihood that depends on it). "
                        "Adding new sampled parameters is not possible. Try fixing "
                        "it to some value.", p)
                    raise HandledException
                else:
                    log.error(
                        "You tried to change the prior of parameter '%s', "
                        "but it was not a sampled parameter. "
                        "To change that prior, you need to define as an external one.",
                        p)
                    raise HandledException
            if mlprior_names_add[:1] != _prior_1d_name:
                mlprior_names_add = (
                    [_minuslogprior + _separator + _prior_1d_name] +
                    mlprior_names_add)
        elif is_derived_param(pinfo):
            if p in out[_params]:
                log.error(
                    "You tried to add derived parameter '%s', which is already "
                    "present. To force its recomputation, 'remove' it too.", p)
                raise HandledException
        elif is_fixed_param(pinfo):
            # Only one possibility left "fixed" parameter that was not present before:
            # input of new likelihood, or just an argument for dynamical derived (dropped)
            if ((p in info_in[_params] and pinfo[_p_value] !=
                 (pinfo_in or {}).get(_p_value, None))):
                log.error(
                    "You tried to add a fixed parameter '%s: %r' that was already present"
                    " but had a different value or was not fixed. This is not allowed. "
                    "The old info of the parameter was '%s: %r'", p,
                    dict(pinfo), p, dict(pinfo_in))
                raise HandledException
        else:
            log.error("This should not happen. Contact the developers.")
            raise HandledException
        out[_params][p] = pinfo
    # For the likelihood only, turn the rest of *derived* parameters into constants,
    # so that the likelihoods do not try to compute them)
    # But be careful to exclude *input* params that have a "derived: True" value
    # (which in "full info" turns into "derived: 'lambda [x]: [x]'")
    out_params_like = deepcopy(out[_params])
    for p, pinfo in out_params_like.items():
        if ((is_derived_param(pinfo) and not (_p_value in pinfo)
             and p not in add.get(_params, {}))):
            out_params_like[p] = {_p_value: np.nan, _p_drop: True}
    parameterization_like = Parameterization(out_params_like,
                                             ignore_unused_sampled=True)
    # 2.2 Manage adding/removing priors and likelihoods
    warn_remove = False
    for level in [_prior, _likelihood]:
        out[level] = getattr(dummy_model_in, level)
        if level == _prior:
            out[level].remove(_prior_1d_name)
        for pdf in info_post.get("remove", {}).get(level, []) or []:
            try:
                out[level].remove(pdf)
                warn_remove = True
            except ValueError:
                log.error(
                    "Trying to remove %s '%s', but it is not present. "
                    "Existing ones: %r", level, pdf, out[level])
                raise HandledException
    if warn_remove:
        log.warning("You are removing a prior or likelihood pdf. "
                    "Notice that if the resulting posterior is much wider "
                    "than the original one, or displaced enough, "
                    "it is probably safer to explore it directly.")
    if _prior in add:
        mlprior_names_add += [
            _minuslogprior + _separator + name for name in add[_prior]
        ]
        out[_prior] += list(add[_prior])
    prior_recompute_1d = (mlprior_names_add[:1] == [
        _minuslogprior + _separator + _prior_1d_name
    ])
    # Don't initialise the theory code if not adding/recomputing theory,
    # theory-derived params or likelihoods
    recompute_theory = info_in.get(_theory) and not (list(
        add[_likelihood]) == ["one"] and not any([
            is_derived_param(pinfo) for pinfo in add.get(_params, {}).values()
        ]))
    if recompute_theory:
        # Inherit from the original chain (needs input|output_params, renames, etc
        theory = list(info_in[_theory].keys())[0]
        info_theory_out = odict([[
            theory,
            recursive_update(deepcopy(info_in[_theory][theory]),
                             add.get(_theory, {theory: {}})[theory])
        ]])
    else:
        info_theory_out = None
    chi2_names_add = [
        _chi2 + _separator + name for name in add[_likelihood]
        if name is not "one"
    ]
    out[_likelihood] += [l for l in add[_likelihood] if l is not "one"]
    if recompute_theory:
        log.warn(
            "You are recomputing the theory, but in the current version this does "
            "not force recomputation of any likelihood or derived parameter, "
            "unless explicitly removed+added.")
    for level in [_prior, _likelihood]:
        for i, x_i in enumerate(out[level]):
            if x_i in list(out[level])[i + 1:]:
                log.error(
                    "You have added %s '%s', which was already present. If you "
                    "want to force its recomputation, you must also 'remove' it.",
                    level, x_i)
                raise HandledException
    # 3. Create output collection
    if "suffix" not in info_post:
        log.error("You need to provide a 'suffix' for your chains.")
        raise HandledException
    # Use default prefix if it exists. If it does not, produce no output by default.
    # {post: {output: None}} suppresses output, and if it's a string, updates it.
    out_prefix = info_post.get(_output_prefix, info.get(_output_prefix))
    if out_prefix not in [None, False]:
        out_prefix += "_" + _post + "_" + info_post["suffix"]
    output_out = Output(output_prefix=out_prefix,
                        force_output=info.get(_force))
    info_out = deepcopy(info)
    info_out[_post] = info_post
    # Updated with input info and extended (full) add info
    info_out.update(info_in)
    info_out[_post]["add"] = add
    dummy_model_out = DummyModel(out[_params],
                                 out[_likelihood],
                                 info_prior=out[_prior])
    if recompute_theory:
        theory = list(info_theory_out.keys())[0]
        if _input_params not in info_theory_out[theory]:
            log.error(
                "You appear to be post-processing a chain generated with an older "
                "version of Cobaya. For post-processing to work, please edit the "
                "'[root]__full.info' file of the original chain to add, inside the "
                "theory code block, the list of its input parameters. E.g.\n----\n"
                "theory:\n  %s:\n    input_params: [param1, param2, ...]\n"
                "----\nIf you get strange errors later, it is likely that you did not "
                "specify the correct set of theory parameters.\n"
                "The full set of input parameters are %s.", theory,
                list(dummy_model_out.parameterization.input_params()))
            raise HandledException
    prior_add = Prior(dummy_model_out.parameterization, add.get(_prior))
    likelihood_add = Likelihood(add[_likelihood],
                                parameterization_like,
                                info_theory=info_theory_out,
                                modules=info.get(_path_install))
    # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post]["add"]
    add[_likelihood].pop("one")
    if likelihood_add.theory:
        # Make sure that theory.needs is called at least once, for adjustments
        likelihood_add.theory.needs()
    collection_out = Collection(dummy_model_out, output_out, name="1")
    output_out.dump_info({}, info_out)
    # 4. Main loop!
    log.info("Running post-processing...")
    last_percent = 0
    for i, point in enumerate(collection_in.data.itertuples()):
        log.debug("Point: %r", point)
        sampled = [
            getattr(point, param)
            for param in dummy_model_in.parameterization.sampled_params()
        ]
        derived = odict(
            [[param, getattr(point, param, None)]
             for param in dummy_model_out.parameterization.derived_params()])
        inputs = odict([[
            param,
            getattr(
                point, param,
                dummy_model_in.parameterization.constant_params().get(
                    param,
                    dummy_model_out.parameterization.constant_params().get(
                        param, None)))
        ] for param in dummy_model_out.parameterization.input_params()])
        # Solve inputs that depend on a function and were not saved
        # (we don't use the Parameterization_to_input method in case there are references
        #  to functions that cannot be loaded at the moment)
        for p, value in inputs.items():
            if value is None:
                func = dummy_model_out.parameterization._input_funcs[p]
                args = dummy_model_out.parameterization._input_args[p]
                inputs[p] = func(*[getattr(point, arg) for arg in args])
        # Add/remove priors
        priors_add = prior_add.logps(sampled)
        if not prior_recompute_1d:
            priors_add = priors_add[1:]
        logpriors_add = odict(zip(mlprior_names_add, priors_add))
        logpriors_new = [
            logpriors_add.get(name, -getattr(point, name, 0))
            for name in collection_out.minuslogprior_names
        ]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New set of priors: %r",
                      dict(zip(dummy_model_out.prior, logpriors_new)))
        if -np.inf in logpriors_new:
            continue
        # Add/remove likelihoods
        output_like = []
        if likelihood_add:
            # Notice "one" (last in likelihood_add) is ignored: not in chi2_names
            loglikes_add = odict(
                zip(chi2_names_add,
                    likelihood_add.logps(inputs, _derived=output_like)))
            output_like = dict(zip(likelihood_add.output_params, output_like))
        else:
            loglikes_add = dict()
        loglikes_new = [
            loglikes_add.get(name, -0.5 * getattr(point, name, 0))
            for name in collection_out.chi2_names
        ]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New set of likelihoods: %r",
                      dict(zip(dummy_model_out.likelihood, loglikes_new)))
            if output_like:
                log.debug("New set of likelihood-derived parameters: %r",
                          output_like)
        if -np.inf in loglikes_new:
            continue
        # Add/remove derived parameters and change priors of sampled parameters
        for p in add[_params]:
            if p in dummy_model_out.parameterization._directly_output:
                derived[p] = output_like[p]
            elif p in dummy_model_out.parameterization._derived_funcs:
                func = dummy_model_out.parameterization._derived_funcs[p]
                args = dummy_model_out.parameterization._derived_args[p]
                derived[p] = func(*[
                    getattr(point, arg, output_like.get(arg, None))
                    for arg in args
                ])
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug(
                "New derived parameters: %r",
                dict([[
                    p, derived[p]
                ] for p in dummy_model_out.parameterization.derived_params()
                      if p in add[_params]]))
        # Save to the collection (keep old weight for now)
        collection_out.add(sampled,
                           derived=derived.values(),
                           weight=getattr(point, _weight),
                           logpriors=logpriors_new,
                           loglikes=loglikes_new)
        # Display progress
        percent = np.round(i / collection_in.n() * 100)
        if percent != last_percent and not percent % 5:
            last_percent = percent
            progress_bar(log, percent, " (%d/%d)" % (i, collection_in.n()))
    if not collection_out.data.last_valid_index():
        log.error(
            "No elements in the final sample. Possible causes: "
            "added a prior or likelihood valued zero over the full sampled domain, "
            "or the computation of the theory failed everywhere, etc.")
        raise HandledException
    # Reweight -- account for large dynamic range!
    #   Prefer to rescale +inf to finite, and ignore final points with -inf.
    #   Remove -inf's (0-weight), and correct indices
    difflogmax = max(collection_in[_minuslogpost] -
                     collection_out[_minuslogpost])
    collection_out.data[_weight] *= np.exp(collection_in[_minuslogpost] -
                                           collection_out[_minuslogpost] -
                                           difflogmax)
    collection_out.data = (
        collection_out.data[collection_out.data.weight > 0].reset_index(
            drop=True))
    collection_out._n = collection_out.data.last_valid_index() + 1
    # Write!
    collection_out._out_update()
    log.info("Finished! Final number of samples: %d", collection_out.n())
    return info_out, {"sample": collection_out}