Example #1
0
 def __init__(self,
              info_params,
              info_likelihood,
              info_prior=None,
              info_theory=None,
              modules=None,
              timing=None,
              allow_renames=True):
     self.log = logging.getLogger(self.__class__.__name__)
     self._full_info = {
         _params: deepcopy(info_params),
         _likelihood: deepcopy(info_likelihood)
     }
     if not self._full_info[_likelihood]:
         self.log.error("No likelihood requested!")
         raise HandledException
     for like in self._full_info[_likelihood].values():
         like.pop(_params)
     for k, v in ((_prior, info_prior), (_theory, info_theory),
                  (_path_install, modules), (_timing, timing)):
         if v not in (None, {}):
             self._full_info[k] = deepcopy(v)
     self.parameterization = Parameterization(info_params,
                                              allow_renames=allow_renames)
     self.prior = Prior(self.parameterization, info_prior)
     self.likelihood = Likelihood(info_likelihood,
                                  self.parameterization,
                                  info_theory,
                                  modules=modules,
                                  timing=timing)
Example #2
0
 def __init__(self,
              info_params,
              info_likelihood,
              info_prior=None,
              info_theory=None,
              modules=None,
              timing=None,
              allow_renames=True):
     self.set_logger(lowercase=True)
     self._updated_info = {
         _params: deepcopy_where_possible(info_params),
         _likelihood: deepcopy_where_possible(info_likelihood)
     }
     if not self._updated_info[_likelihood]:
         raise LoggedError(self.log, "No likelihood requested!")
     for k, v in ((_prior, info_prior), (_theory, info_theory),
                  (_path_install, modules), (_timing, timing)):
         if v not in (None, {}):
             self._updated_info[k] = deepcopy_where_possible(v)
     self.parameterization = Parameterization(self._updated_info[_params],
                                              allow_renames=allow_renames)
     self.prior = Prior(self.parameterization,
                        self._updated_info.get(_prior, None))
     self.likelihood = Likelihood(self._updated_info[_likelihood],
                                  self.parameterization,
                                  self._updated_info.get(_theory),
                                  modules=modules,
                                  timing=timing)
Example #3
0
 def __init__(self,
              info_params,
              info_likelihood,
              info_prior=None,
              info_theory=None):
     self.parameterization = Parameterization(info_params,
                                              ignore_unused_sampled=True)
     self.prior = [_prior_1d_name] + list(info_prior or [])
     self.likelihood = list(info_likelihood)
Example #4
0
def post(info_or_yaml_or_file: Union[InputDict, str, os.PathLike],
         sample: Union[SampleCollection, List[SampleCollection], None] = None
         ) -> PostTuple:
    info = load_input_dict(info_or_yaml_or_file)
    logger_setup(info.get("debug"), info.get("debug_file"))
    log = get_logger(__name__)
    # MARKED FOR DEPRECATION IN v3.0
    if info.get("modules"):
        raise LoggedError(log, "The input field 'modules' has been deprecated."
                               "Please use instead %r", packages_path_input)
    # END OF DEPRECATION BLOCK
    info_post: PostDict = info.get("post") or {}
    if not info_post:
        raise LoggedError(log, "No 'post' block given. Nothing to do!")
    if mpi.is_main_process() and info.get("resume"):
        log.warning("Resuming not implemented for post-processing. Re-starting.")
    if not info.get("output") and info_post.get("output") \
            and not info.get("params"):
        raise LoggedError(log, "The input dictionary must have be a full option "
                               "dictionary, or have an existing 'output' root to load "
                               "previous settings from ('output' to read from is in the "
                               "main block not under 'post'). ")
    # 1. Load existing sample
    output_in = get_output(prefix=info.get("output"))
    if output_in:
        info_in = output_in.load_updated_info() or update_info(info)
    else:
        info_in = update_info(info)
    params_in: ExpandedParamsDict = info_in["params"]  # type: ignore
    dummy_model_in = DummyModel(params_in, info_in.get("likelihood", {}),
                                info_in.get("prior"))

    in_collections = []
    thin = info_post.get("thin", 1)
    skip = info_post.get("skip", 0)
    if info.get('thin') is not None or info.get('skip') is not None:  # type: ignore
        raise LoggedError(log, "'thin' and 'skip' should be "
                               "parameters of the 'post' block")

    if sample:
        # If MPI, assume for each MPI process post is passed in the list of
        # collections that should be processed by that process
        # (e.g. single chain output from sampler)
        if isinstance(sample, SampleCollection):
            in_collections = [sample]
        else:
            in_collections = sample
        for i, collection in enumerate(in_collections):
            if skip:
                if 0 < skip < 1:
                    skip = int(round(skip * len(collection)))
                collection = collection.filtered_copy(slice(skip, None))
            if thin != 1:
                collection = collection.thin_samples(thin)
            in_collections[i] = collection
    elif output_in:
        files = output_in.find_collections()
        numbered = files
        if not numbered:
            # look for un-numbered output files
            files = output_in.find_collections(name=False)
        if files:
            if mpi.size() > len(files):
                raise LoggedError(log, "Number of MPI processes (%s) is larger than "
                                       "the number of sample files (%s)",
                                  mpi.size(), len(files))
            for num in range(mpi.rank(), len(files), mpi.size()):
                in_collections += [SampleCollection(
                    dummy_model_in, output_in,
                    onload_thin=thin, onload_skip=skip, load=True, file_name=files[num],
                    name=str(num + 1) if numbered else "")]
        else:
            raise LoggedError(log, "No samples found for the input model with prefix %s",
                              os.path.join(output_in.folder, output_in.prefix))

    else:
        raise LoggedError(log, "No output from where to load from, "
                               "nor input collections given.")
    if any(len(c) <= 1 for c in in_collections):
        raise LoggedError(
            log, "Not enough samples for post-processing. Try using a larger sample, "
                 "or skipping or thinning less.")
    mpi.sync_processes()
    log.info("Will process %d sample points.", sum(len(c) for c in in_collections))

    # 2. Compare old and new info: determine what to do
    add = info_post.get("add") or {}
    if "remove" in add:
        raise LoggedError(log, "remove block should be under 'post', not 'add'")
    remove = info_post.get("remove") or {}
    # Add a dummy 'one' likelihood, to absorb unused parameters
    if not add.get("likelihood"):
        add["likelihood"] = {}
    add["likelihood"]["one"] = None
    # Expand the "add" info, but don't add new default sampled parameters
    orig_params = set(add.get("params") or [])
    add = update_info(add, add_aggr_chi2=False)
    add_params: ExpandedParamsDict = add["params"]  # type: ignore
    for p in set(add_params) - orig_params:
        if p in params_in:
            add_params.pop(p)

    # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters
    out_combined_params = deepcopy_where_possible(params_in)
    remove_params = list(str_to_list(remove.get("params")) or [])
    for p in remove_params:
        pinfo = params_in.get(p)
        if pinfo is None or not is_derived_param(pinfo):
            raise LoggedError(
                log,
                "You tried to remove parameter '%s', which is not a derived parameter. "
                "Only derived parameters can be removed during post-processing.", p)
        out_combined_params.pop(p)
    # Force recomputation of aggregated chi2
    for p in list(out_combined_params):
        if p.startswith(get_chi2_name("")):
            out_combined_params.pop(p)
    prior_recompute_1d = False
    for p, pinfo in add_params.items():
        pinfo_in = params_in.get(p)
        if is_sampled_param(pinfo):
            if not is_sampled_param(pinfo_in):
                # No added sampled parameters (de-marginalisation not implemented)
                if pinfo_in is None:
                    raise LoggedError(
                        log, "You added a new sampled parameter %r (maybe accidentally "
                             "by adding a new likelihood that depends on it). "
                             "Adding new sampled parameters is not possible. Try fixing "
                             "it to some value.", p)
                else:
                    raise LoggedError(
                        log,
                        "You tried to change the prior of parameter '%s', "
                        "but it was not a sampled parameter. "
                        "To change that prior, you need to define as an external one.", p)
            # recompute prior if potentially changed sampled parameter priors
            prior_recompute_1d = True
        elif is_derived_param(pinfo):
            if p in out_combined_params:
                raise LoggedError(
                    log, "You tried to add derived parameter '%s', which is already "
                         "present. To force its recomputation, 'remove' it too.", p)
        elif is_fixed_or_function_param(pinfo):
            # Only one possibility left "fixed" parameter that was not present before:
            # input of new likelihood, or just an argument for dynamical derived (dropped)
            if pinfo_in and p in params_in and pinfo["value"] != pinfo_in.get("value"):
                raise LoggedError(
                    log,
                    "You tried to add a fixed parameter '%s: %r' that was already present"
                    " but had a different value or was not fixed. This is not allowed. "
                    "The old info of the parameter was '%s: %r'",
                    p, dict(pinfo), p, dict(pinfo_in))
        elif not pinfo_in:  # OK as long as we have known value for it
            raise LoggedError(log, "Parameter %s no known value. ", p)
        out_combined_params[p] = pinfo

    out_combined: InputDict = {"params": out_combined_params}  # type: ignore
    # Turn the rest of *derived* parameters into constants,
    # so that the likelihoods do not try to recompute them
    # But be careful to exclude *input* params that have a "derived: True" value
    # (which in "updated info" turns into "derived: 'lambda [x]: [x]'")
    # Don't assign to derived parameters to theories, only likelihoods, so they can be
    # recomputed if needed. If the theory does not need to be computed, it doesn't matter
    # if it is already assigned parameters in the usual way; likelihoods can get
    # the required derived parameters from the stored sample derived parameter inputs.
    out_params_with_computed = deepcopy_where_possible(out_combined_params)

    dropped_theory = set()
    for p, pinfo in out_params_with_computed.items():
        if (is_derived_param(pinfo) and "value" not in pinfo
                and p not in add_params):
            out_params_with_computed[p] = {"value": np.nan}
            dropped_theory.add(p)
    # 2.2 Manage adding/removing priors and likelihoods
    warn_remove = False
    kind: ModelBlock
    for kind in ("prior", "likelihood", "theory"):
        out_combined[kind] = deepcopy_where_possible(info_in.get(kind)) or {}
        for remove_item in str_to_list(remove.get(kind)) or []:
            try:
                out_combined[kind].pop(remove_item, None)
                if remove_item not in (add.get(kind) or []) and kind != "theory":
                    warn_remove = True
            except ValueError:
                raise LoggedError(
                    log, "Trying to remove %s '%s', but it is not present. "
                         "Existing ones: %r", kind, remove_item, list(out_combined[kind]))
        if kind != "theory" and kind in add:
            dups = set(add.get(kind) or []).intersection(out_combined[kind]) - {"one"}
            if dups:
                raise LoggedError(
                    log, "You have added %s '%s', which was already present. If you "
                         "want to force its recomputation, you must also 'remove' it.",
                    kind, dups)
            out_combined[kind].update(add[kind])

    if warn_remove and mpi.is_main_process():
        log.warning("You are removing a prior or likelihood pdf. "
                    "Notice that if the resulting posterior is much wider "
                    "than the original one, or displaced enough, "
                    "it is probably safer to explore it directly.")

    mlprior_names_add = minuslogprior_names(add.get("prior") or [])
    chi2_names_add = [get_chi2_name(name) for name in add["likelihood"] if
                      name != "one"]
    out_combined["likelihood"].pop("one", None)

    add_theory = add.get("theory")
    if add_theory:
        if len(add["likelihood"]) == 1 and not any(
                is_derived_param(pinfo) for pinfo in add_params.values()):
            log.warning("You are adding a theory, but this does not force recomputation "
                        "of any likelihood or derived parameters unless explicitly "
                        "removed+added.")
        # Inherit from the original chain (input|output_params, renames, etc)
        added_theory = add_theory.copy()
        for theory, theory_info in out_combined["theory"].items():
            if theory in list(added_theory):
                out_combined["theory"][theory] = \
                    recursive_update(theory_info, added_theory.pop(theory))
        out_combined["theory"].update(added_theory)

    # Prepare recomputation of aggregated chi2
    # (they need to be recomputed by hand, because auto-computation won't pick up
    #  old likelihoods for a given type)
    all_types = {like: str_to_list(opts.get("type") or [])
                 for like, opts in out_combined["likelihood"].items()}
    types = set(chain(*all_types.values()))
    inv_types = {t: [like for like, like_types in all_types.items() if t in like_types]
                 for t in sorted(types)}
    add_aggregated_chi2_params(out_combined_params, types)

    # 3. Create output collection
    # Use default prefix if it exists. If it does not, produce no output by default.
    # {post: {output: None}} suppresses output, and if it's a string, updates it.
    out_prefix = info_post.get("output", info.get("output"))
    if out_prefix:
        suffix = info_post.get("suffix")
        if not suffix:
            raise LoggedError(log, "You need to provide a '%s' for your output chains.",
                              "suffix")
        out_prefix += separator_files + "post" + separator_files + suffix
    output_out = get_output(prefix=out_prefix, force=info.get("force"))
    output_out.set_lock()

    if output_out and not output_out.force and output_out.find_collections():
        raise LoggedError(log, "Found existing post-processing output with prefix %r. "
                               "Delete it manually or re-run with `force: True` "
                               "(or `-f`, `--force` from the shell).", out_prefix)
    elif output_out and output_out.force and mpi.is_main_process():
        output_out.delete_infos()
        for _file in output_out.find_collections():
            output_out.delete_file_or_folder(_file)
    info_out = deepcopy_where_possible(info)
    info_post = info_post.copy()
    info_out["post"] = info_post
    # Updated with input info and extended (updated) add info
    info_out.update(info_in)  # type: ignore
    info_post["add"] = add

    dummy_model_out = DummyModel(out_combined_params, out_combined["likelihood"],
                                 info_prior=out_combined["prior"])
    out_func_parameterization = Parameterization(out_params_with_computed)

    # TODO: check allow_renames=False?
    model_add = Model(out_params_with_computed, add["likelihood"],
                      info_prior=add.get("prior"), info_theory=out_combined["theory"],
                      packages_path=(info_post.get(packages_path_input) or
                                     info.get(packages_path_input)),
                      allow_renames=False, post=True,
                      stop_at_error=info.get('stop_at_error', False),
                      skip_unused_theories=True, dropped_theory_params=dropped_theory)
    # Remove auxiliary "one" before dumping -- 'add' *is* info_out["post"]["add"]
    add["likelihood"].pop("one")
    out_collections = [SampleCollection(dummy_model_out, output_out, name=c.name,
                                        cache_size=OutputOptions.default_post_cache_size)
                       for c in in_collections]
    # TODO: should maybe add skip/thin to out_combined, so can tell post-processed?
    output_out.check_and_dump_info(info_out, out_combined, check_compatible=False)
    collection_in = in_collections[0]
    collection_out = out_collections[0]

    last_percent = None
    known_constants = dummy_model_out.parameterization.constant_params()
    known_constants.update(dummy_model_in.parameterization.constant_params())
    missing_params = dummy_model_in.parameterization.sampled_params().keys() - set(
        collection_in.columns)
    if missing_params:
        raise LoggedError(log, "Input samples do not contain expected sampled parameter "
                               "values: %s", missing_params)

    missing_priors = set(name for name in collection_out.minuslogprior_names if
                         name not in mlprior_names_add
                         and name not in collection_in.columns)
    if _minuslogprior_1d_name in missing_priors:
        prior_recompute_1d = True
    if prior_recompute_1d:
        missing_priors.discard(_minuslogprior_1d_name)
        mlprior_names_add.insert(0, _minuslogprior_1d_name)
    prior_regenerate: Optional[Prior]
    if missing_priors and "prior" in info_in:
        # in case there are input priors that are not stored in input samples
        # e.g. when postprocessing GetDist/CosmoMC-format chains
        in_names = minuslogprior_names(info_in["prior"])
        info_prior = {piname: inf for (piname, inf), in_name in
                      zip(info_in["prior"].items(), in_names) if
                      in_name in missing_priors}
        regenerated_prior_names = minuslogprior_names(info_prior)
        missing_priors.difference_update(regenerated_prior_names)
        prior_regenerate = Prior(dummy_model_in.parameterization, info_prior)
    else:
        prior_regenerate = None
        regenerated_prior_names = None
    if missing_priors:
        raise LoggedError(log, "Missing priors: %s", missing_priors)

    mpi.sync_processes()
    output_in.check_lock()

    # 4. Main loop! Loop over input samples and adjust as required.
    if mpi.is_main_process():
        log.info("Running post-processing...")
    difflogmax: Optional[float] = None
    to_do = sum(len(c) for c in in_collections)
    weights = []
    done = 0
    last_dump_time = time.time()
    for collection_in, collection_out in zip(in_collections, out_collections):
        importance_weights = []

        def set_difflogmax():
            nonlocal difflogmax
            difflog = (collection_in[OutPar.minuslogpost].to_numpy(
                dtype=np.float64)[:len(collection_out)]
                       - collection_out[OutPar.minuslogpost].to_numpy(dtype=np.float64))
            difflogmax = np.max(difflog)
            if abs(difflogmax) < 1:
                difflogmax = 0  # keep simple when e.g. very similar
            log.debug("difflogmax: %g", difflogmax)
            if mpi.more_than_one_process():
                difflogmax = max(mpi.allgather(difflogmax))
            if mpi.is_main_process():
                log.debug("Set difflogmax: %g", difflogmax)
            _weights = np.exp(difflog - difflogmax)
            importance_weights.extend(_weights)
            collection_out.reweight(_weights)

        for i, point in collection_in.data.iterrows():
            all_params = point.to_dict()
            for p in remove_params:
                all_params.pop(p, None)
            log.debug("Point: %r", point)
            sampled = np.array([all_params[param] for param in
                                dummy_model_in.parameterization.sampled_params()])
            all_params = out_func_parameterization.to_input(all_params).copy()

            # Add/remove priors
            if prior_recompute_1d:
                priors_add = [model_add.prior.logps_internal(sampled)]
                if priors_add[0] == -np.inf:
                    continue
            else:
                priors_add = []
            if model_add.prior.external:
                priors_add.extend(model_add.prior.logps_external(all_params))

            logpriors_add = dict(zip(mlprior_names_add, priors_add))
            logpriors_new = [logpriors_add.get(name, - point.get(name, 0))
                             for name in collection_out.minuslogprior_names]
            if prior_regenerate:
                regenerated = dict(zip(regenerated_prior_names,
                                       prior_regenerate.logps_external(all_params)))
                for _i, name in enumerate(collection_out.minuslogprior_names):
                    if name in regenerated_prior_names:
                        logpriors_new[_i] = regenerated[name]

            if is_debug(log):
                log.debug("New set of priors: %r",
                          dict(zip(dummy_model_out.prior, logpriors_new)))
            if -np.inf in logpriors_new:
                continue
            # Add/remove likelihoods and/or (re-)calculate derived parameters
            loglikes_add, output_derived = model_add._loglikes_input_params(
                all_params, return_output_params=True)
            loglikes_add = dict(zip(chi2_names_add, loglikes_add))
            output_derived = dict(zip(model_add.output_params, output_derived))
            loglikes_new = [loglikes_add.get(name, -0.5 * point.get(name, 0))
                            for name in collection_out.chi2_names]
            if is_debug(log):
                log.debug("New set of likelihoods: %r",
                          dict(zip(dummy_model_out.likelihood, loglikes_new)))
                if output_derived:
                    log.debug("New set of derived parameters: %r", output_derived)
            if -np.inf in loglikes_new:
                continue
            all_params.update(output_derived)

            all_params.update(out_func_parameterization.to_derived(all_params))
            derived = {param: all_params.get(param) for param in
                       dummy_model_out.parameterization.derived_params()}
            # We need to recompute the aggregated chi2 by hand
            for type_, likes in inv_types.items():
                derived[get_chi2_name(type_)] = sum(
                    -2 * lvalue for lname, lvalue
                    in zip(collection_out.chi2_names, loglikes_new)
                    if undo_chi2_name(lname) in likes)
            if is_debug(log):
                log.debug("New derived parameters: %r",
                          {p: derived[p]
                           for p in dummy_model_out.parameterization.derived_params()
                           if p in add["params"]})
            # Save to the collection (keep old weight for now)
            weight = point.get(OutPar.weight)
            mpi.check_errors()
            if difflogmax is None and i > OutputOptions.reweight_after and \
                    time.time() - last_dump_time > OutputOptions.output_inteveral_s / 2:
                set_difflogmax()
                collection_out.out_update()

            if difflogmax is not None:
                logpost_new = sum(logpriors_new) + sum(loglikes_new)
                importance_weight = np.exp(logpost_new + point.get(OutPar.minuslogpost)
                                           - difflogmax)
                weight = weight * importance_weight
                importance_weights.append(importance_weight)
                if time.time() - last_dump_time > OutputOptions.output_inteveral_s:
                    collection_out.out_update()
                    last_dump_time = time.time()

            if weight > 0:
                collection_out.add(sampled, derived=derived.values(), weight=weight,
                                   logpriors=logpriors_new, loglikes=loglikes_new)

            # Display progress
            percent = int(np.round((i + done) / to_do * 100))
            if percent != last_percent and not percent % 5:
                last_percent = percent
                progress_bar(log, percent, " (%d/%d)" % (i + done, to_do))

        if difflogmax is None:
            set_difflogmax()
        if not collection_out.data.last_valid_index():
            raise LoggedError(
                log, "No elements in the final sample. Possible causes: "
                     "added a prior or likelihood valued zero over the full sampled "
                     "domain, or the computation of the theory failed everywhere, etc.")
        collection_out.out_update()
        weights.append(np.array(importance_weights))
        done += len(collection_in)

    assert difflogmax is not None
    points = 0
    tot_weight = 0
    min_weight = np.inf
    max_weight = -np.inf
    max_output_weight = -np.inf
    sum_w2 = 0
    points_removed = 0
    for collection_in, collection_out, importance_weights in zip(in_collections,
                                                                 out_collections,
                                                                 weights):
        output_weights = collection_out[OutPar.weight]
        points += len(collection_out)
        tot_weight += np.sum(output_weights)
        points_removed += len(importance_weights) - len(output_weights)
        min_weight = min(min_weight, np.min(importance_weights))
        max_weight = max(max_weight, np.max(importance_weights))
        max_output_weight = max(max_output_weight, np.max(output_weights))
        sum_w2 += np.dot(output_weights, output_weights)

    (tot_weights, min_weights, max_weights, max_output_weights, sum_w2s, points_s,
     points_removed_s) = mpi.zip_gather(
        [tot_weight, min_weight, max_weight, max_output_weight, sum_w2,
         points, points_removed])

    if mpi.is_main_process():
        output_out.clear_lock()
        log.info("Finished! Final number of distinct sample points: %s", sum(points_s))
        log.info("Importance weight range: %.4g -- %.4g",
                 min(min_weights), max(max_weights))
        if sum(points_removed_s):
            log.info("Points deleted due to zero weight: %s", sum(points_removed_s))
        log.info("Effective number of single samples if independent (sum w)/max(w): %s",
                 int(sum(tot_weights) / max(max_output_weights)))
        log.info(
            "Effective number of weighted samples if independent (sum w)^2/sum(w^2): "
            "%s", int(sum(tot_weights) ** 2 / sum(sum_w2s)))
    products: PostResultDict = {"sample": value_or_list(out_collections),
                                "stats": {'min_importance_weight': (min(min_weights) /
                                                                    max(max_weights)),
                                          'points_removed': sum(points_removed_s),
                                          'tot_weight': sum(tot_weights),
                                          'max_weight': max(max_output_weights),
                                          'sum_w2': sum(sum_w2s),
                                          'points': sum(points_s)},
                                "logpost_weight_offset": difflogmax,
                                "weights": value_or_list(weights)}
    return PostTuple(info=out_combined, products=products)
Example #5
0
    def __init__(self,
                 parameterization: Parameterization,
                 info_prior: Optional[PriorsDict] = None):
        """
        Initializes the prior and reference pdf's from the input information.
        """
        self.set_logger()
        self._parameterization = parameterization
        sampled_params_info = parameterization.sampled_params_info()
        # pdf: a list of independent components
        # in principle, separable: one per parameter
        self.params = []
        self.pdf = []
        self.ref_pdf = []
        self._ref_is_pointlike = True
        self._bounds = np.zeros((len(sampled_params_info), 2))
        for i, p in enumerate(sampled_params_info):
            self.params += [p]
            prior = sampled_params_info[p].get("prior")
            self.pdf += [get_scipy_1d_pdf({p: prior})]
            fast_logpdf = fast_logpdfs.get(self.pdf[-1].dist.name)
            if fast_logpdf:
                self.pdf[-1].logpdf = MethodType(fast_logpdf, self.pdf[-1])
            # Get the reference (1d) pdf
            ref = sampled_params_info[p].get("ref")
            # Cases: number, pdf (something, but not a number), nothing
            if isinstance(ref, Sequence) and len(ref) == 2 and all(
                    isinstance(n, numbers.Number) for n in ref):
                ref = {"dist": "norm", "loc": ref[0], "scale": ref[1]}
            if isinstance(ref, numbers.Real):
                self.ref_pdf += [float(ref)]
            elif isinstance(ref, Mapping):
                self.ref_pdf += [get_scipy_1d_pdf({p: ref})]
                self._ref_is_pointlike = False
            elif ref is None:
                self.ref_pdf += [np.nan]
                self._ref_is_pointlike = False
            else:
                raise LoggedError(
                    self.log,
                    "'ref' for starting position should be None or a number"
                    ", a list of two numbers for normal mean and deviation,"
                    "or a dict with parameters for a scipy distribution.")

            self._bounds[i] = [-np.inf, np.inf]
            try:
                self._bounds[i] = self.pdf[-1].interval(1)
            except AttributeError:
                raise LoggedError(
                    self.log, "No bounds defined for parameter '%s' "
                    "(maybe not a scipy 1d pdf).", p)
        self._uniform_indices = np.array([
            i for i, pdf in enumerate(self.pdf) if pdf.dist.name == 'uniform'
        ],
                                         dtype=int)
        self._non_uniform_indices = np.array([
            i for i in range(len(self.pdf)) if i not in self._uniform_indices
        ],
                                             dtype=int)
        self._non_uniform_logpdf = [
            self.pdf[i].logpdf for i in self._non_uniform_indices
        ]
        self._upper_limits = self._bounds[:, 1].copy()
        self._lower_limits = self._bounds[:, 0].copy()
        self._uniform_logp = -np.sum(
            np.log(self._upper_limits[self._uniform_indices] -
                   self._lower_limits[self._uniform_indices]))

        # Process the external prior(s):
        self.external = {}
        self.external_dependence = set()
        info_prior = info_prior or {}
        for name in info_prior:
            if name == prior_1d_name:
                raise LoggedError(
                    self.log, "The name '%s' is a reserved prior name. "
                    "Please use a different one.", prior_1d_name)
            self.log.debug("Loading external prior '%s' from: '%s'", name,
                           info_prior[name])
            logp = get_external_function(info_prior[name], name=name)

            argspec = getfullargspec(logp)
            known = set(parameterization.input_params())
            params = [p for p in argspec.args if p in known]
            params_without_default = set(
                argspec.args[:(len(argspec.args) -
                               len(argspec.defaults or []))])
            unknown = params_without_default - known
            if unknown:
                if unknown.intersection(parameterization.derived_params()):
                    err = (
                        "External prior '%s' has arguments %s that are output derived "
                        "parameters, Priors must be functions of input parameters. "
                        "Use a separate 'likelihood' for the prior if needed.")
                else:
                    err = (
                        "Some of the arguments of the external prior '%s' cannot be "
                        "found and don't have a default value either: %s")
                raise LoggedError(self.log, err, name, list(unknown))
            self.external_dependence.update(params)
            self.external[name] = ExternalPrior(logp=logp, params=params)
            self.mpi_warning(
                "External prior '%s' loaded. "
                "Mind that it might not be normalized!", name)

        parameterization.check_dropped(self.external_dependence)
Example #6
0
class Model(object):
    """
    Class containing all the information necessary to compute the unnormalized posterior.

    Allows for low-level interaction with the theory code, prior and likelihood.

    **NB:** do not initialize this class directly; use :func:`~model.get_model` instead,
    with some info as input.
    """
    def __init__(self,
                 info_params,
                 info_likelihood,
                 info_prior=None,
                 info_theory=None,
                 modules=None,
                 timing=None,
                 allow_renames=True):
        self.log = logging.getLogger(self.__class__.__name__)
        self._full_info = {
            _params: deepcopy(info_params),
            _likelihood: deepcopy(info_likelihood)
        }
        if not self._full_info[_likelihood]:
            self.log.error("No likelihood requested!")
            raise HandledException
        for like in self._full_info[_likelihood].values():
            like.pop(_params)
        for k, v in ((_prior, info_prior), (_theory, info_theory),
                     (_path_install, modules), (_timing, timing)):
            if v not in (None, {}):
                self._full_info[k] = deepcopy(v)
        self.parameterization = Parameterization(info_params,
                                                 allow_renames=allow_renames)
        self.prior = Prior(self.parameterization, info_prior)
        self.likelihood = Likelihood(info_likelihood,
                                     self.parameterization,
                                     info_theory,
                                     modules=modules,
                                     timing=timing)

    def info(self):
        """
        Returns a copy of the information used to create the model, including defaults.
        """
        return deepcopy(self._full_info)

    def _to_sampled_array(self, params_values):
        """
        Internal method to interact with the prior.
        Needs correct (not renamed) parameter names.
        """
        if hasattr(params_values, "keys"):
            params_values_array = np.array(list(params_values.values()))
        else:
            params_values_array = np.atleast_1d(params_values)
            if params_values_array.shape[0] != self.prior.d():
                self.log.error(
                    "Wrong dimensionality: it's %d and it should be %d.",
                    len(params_values_array), self.prior.d())
                raise HandledException
        if len(params_values_array.shape) >= 2:
            self.log.error(
                "Cannot take arrays of points as inputs, just single points.")
            raise HandledException
        return params_values_array

    def logpriors(self, params_values, make_finite=False):
        """
        Takes an array or dictionary of sampled parameter values.
        If the argument is an array, parameters must have the same order as in the input.
        When in doubt, you can get the correct order as
        ``list([your_model].parameterization.sampled_params())``.

        Returns the log-values of the priors, in the same order as it is returned by
        ``list([your_model].prior)``. The first one, named ``0``, corresponds to the
        product of the 1-dimensional priors specified in the ``params`` block, and it's
        normalized (in general, the external prior densities aren't).

        If ``make_finite=True``, it will try to represent infinities as the largest real
        numbers allowed by machine precision.
        """
        if hasattr(params_values, "keys"):
            params_values = self.parameterization._check_sampled(
                **params_values)
        params_values_array = self._to_sampled_array(params_values)
        logpriors = self.prior.logps(params_values_array)
        if make_finite:
            return np.nan_to_num(logpriors)
        return logpriors

    def logprior(self, params_values, make_finite=False):
        """
        Takes an array or dictionary of sampled parameter values.
        If the argument is an array, parameters must have the same order as in the input.
        When in doubt, you can get the correct order as
        ``list([your_model].parameterization.sampled_params())``.

        Returns the log-value of the prior (in general, unnormalized, unless the only
        priors specified are the 1-dimensional ones in the ``params`` block).

        If ``make_finite=True``, it will try to represent infinities as the largest real
        numbers allowed by machine precision.
        """
        logprior = np.sum(self.logpriors(params_values))
        if make_finite:
            return np.nan_to_num(logprior)
        return logprior

    def loglikes(self,
                 params_values,
                 return_derived=True,
                 make_finite=False,
                 cached=True,
                 _no_check=False):
        """
        Takes an array or dictionary of sampled parameter values.
        If the argument is an array, parameters must have the same order as in the input.
        When in doubt, you can get the correct order as
        ``list([your_model].parameterization.sampled_params())``.

        Returns a tuple ``(loglikes, derived_params)``, where ``loglikes`` are the
        log-values of the likelihoods (unnormalized, in general) in the same order as
        it is returned by ``list([your_model].likelihood)``, and ``derived_params``
        are the values of the derived parameters in the order given by
        ``list([your_model].parameterization.derived_params())``.

        To return just the list of log-likelihood values, make ``return_derived=False``.

        If ``make_finite=True``, it will try to represent infinities as the largest real
        numbers allowed by machine precision.

        If ``cached=False`` (default: True), it ignores previously computed results that
        could be reused.
        """
        if hasattr(params_values, "keys") and not _no_check:
            params_values = self.parameterization._check_sampled(
                **params_values)
        _derived = [] if return_derived else None
        loglikes = self.likelihood.logps(
            self.parameterization._to_input(params_values),
            _derived=_derived,
            cached=cached)
        if make_finite:
            loglikes = np.nan_to_num(loglikes)
        if return_derived:
            derived_sampler = self.parameterization._to_derived(_derived)
            if self.log.getEffectiveLevel() <= logging.DEBUG:
                self.log.debug(
                    "Computed derived parameters: %s",
                    dict(
                        zip(self.parameterization.derived_params(),
                            derived_sampler)))
            return loglikes, derived_sampler
        return loglikes

    def loglike(self,
                params_values,
                return_derived=True,
                make_finite=False,
                cached=True):
        """
        Takes an array or dictionary of sampled parameter values.
        If the argument is an array, parameters must have the same order as in the input.
        When in doubt, you can get the correct order as
        ``list([your_model].parameterization.sampled_params())``.

        Returns a tuple ``(loglike, derived_params)``, where ``loglike`` is the log-value
        of the likelihood (unnormalized, in general), and ``derived_params``
        are the values of the derived parameters in the order given by
        ``list([your_model].parameterization.derived_params())``.

        To return just the list of log-likelihood values, make ``return_derived=False``.

        If ``make_finite=True``, it will try to represent infinities as the largest real
        numbers allowed by machine precision.

        If ``cached=False`` (default: True), it ignores previously computed results that
        could be reused.
        """
        ret_value = self.loglikes(params_values,
                                  return_derived=return_derived,
                                  cached=cached)
        if return_derived:
            loglike = np.sum(ret_value[0])
            if make_finite:
                return np.nan_to_num(loglike), ret_value[1]
            return loglike, ret_value[1]
        else:
            loglike = np.sum(ret_value)
            if make_finite:
                return np.nan_to_num(loglike)
            return loglike

    def logposterior(self,
                     params_values,
                     return_derived=True,
                     make_finite=False,
                     cached=True):
        """
        Takes an array or dictionary of sampled parameter values.
        If the argument is an array, parameters must have the same order as in the input.
        When in doubt, you can get the correct order as
        ``list([your_model].parameterization.sampled_params())``.

        Returns the a ``logposterior`` ``NamedTuple``, with the following fields:

        - ``logpost``: log-value of the posterior.
        - ``logpriors``: log-values of the priors, in the same order as in
          ``list([your_model].prior)``. The first one, corresponds to the
          product of the 1-dimensional priors specified in the ``params``
          block. Except for the first one, the priors are unnormalized.
        - ``loglikes``: log-values of the likelihoods (unnormalized, in general),
          in the same order as in ``list([your_model].likelihood)``.
        - ``derived``: values of the derived parameters in the order given by
          ``list([your_model].parameterization.derived_params())``.

        Only computes the log-likelihood and the derived parameters if the prior is
        non-null (otherwise the fields ``loglikes`` and ``derived`` are empty lists).

        To ignore the derived parameters, make ``return_derived=False``.

        If ``make_finite=True``, it will try to represent infinities as the largest real
        numbers allowed by machine precision.

        If ``cached=False`` (default: True), it ignores previously computed results that
        could be reused.
        """
        if hasattr(params_values, "keys"):
            params_values = self.parameterization._check_sampled(
                **params_values)
        params_values_array = self._to_sampled_array(params_values)
        if self.log.getEffectiveLevel() <= logging.DEBUG:
            self.log.debug(
                "Posterior to be computed for parameters %s",
                dict(
                    zip(self.parameterization.sampled_params(),
                        params_values_array)))
        if not np.all(np.isfinite(params_values_array)):
            self.log.error(
                "Got non-finite parameter values: %r",
                dict(
                    zip(self.parameterization.sampled_params(),
                        params_values_array)))
            raise HandledException
        # Notice that we don't use the make_finite in the prior call,
        # to correctly check if we have to compute the likelihood
        logpriors = self.logpriors(params_values_array, make_finite=False)
        logpost = sum(logpriors)
        if -np.inf not in logpriors:
            l = self.loglikes(params_values,
                              return_derived=return_derived,
                              make_finite=make_finite,
                              cached=cached,
                              _no_check=True)
            loglikes, derived_sampler = l if return_derived else (l, [])
            logpost += sum(loglikes)
        else:
            loglikes = []
            derived_sampler = []
        if make_finite:
            logpriors = np.nan_to_num(logpriors)
            logpost = np.nan_to_num(logpost)
        return logposterior(logpost=logpost,
                            logpriors=logpriors,
                            loglikes=loglikes,
                            derived=derived_sampler)

    def logpost(self, params_values, make_finite=False, cached=True):
        """
        Takes an array or dictionary of sampled parameter values.
        If the argument is an array, parameters must have the same order as in the input.
        When in doubt, you can get the correct order as
        ``list([your_model].parameterization.sampled_params())``.

        Returns the log-value of the posterior.

        If ``make_finite=True``, it will try to represent infinities as the largest real
        numbers allowed by machine precision.

        If ``cached=False`` (default: True), it ignores previously computed results that
        could be reused.
        """
        return self.logposterior(params_values,
                                 make_finite=make_finite,
                                 return_derived=False,
                                 cached=cached)[0]

    def dump_timing(self):
        """
        Prints the average computation time of the theory code and likelihoods.

        It's more reliable the more times the likelihood has been evaluated.
        """
        self.likelihood.dump_timing()

    # Python magic for the "with" statement
    def __enter__(self):
        return self

    def __exit__(self,
                 exception_type=None,
                 exception_value=None,
                 traceback=None):
        self.likelihood.__exit__(exception_type, exception_value, traceback)

    def close(self):
        self.__exit__()
Example #7
0
    def __init__(self,
                 parameterization: Parameterization,
                 info_prior: Optional[PriorsDict] = None):
        """
        Initializes the prior and reference pdf's from the input information.
        """
        self.set_logger()
        self._parameterization = parameterization
        sampled_params_info = parameterization.sampled_params_info()
        # pdf: a list of independent components
        # in principle, separable: one per parameter
        self.params = []
        self.pdf = []
        self._bounds = np.zeros((len(sampled_params_info), 2))
        for i, p in enumerate(sampled_params_info):
            self.params += [p]
            prior = sampled_params_info[p].get("prior")
            self.pdf += [get_scipy_1d_pdf({p: prior})]
            fast_logpdf = fast_logpdfs.get(self.pdf[-1].dist.name)
            if fast_logpdf:
                self.pdf[-1].logpdf = MethodType(fast_logpdf, self.pdf[-1])
            self._bounds[i] = [-np.inf, np.inf]
            try:
                self._bounds[i] = self.pdf[-1].interval(1)
            except AttributeError:
                raise LoggedError(
                    self.log, "No bounds defined for parameter '%s' "
                    "(maybe not a scipy 1d pdf).", p)
        self._uniform_indices = np.array([
            i for i, pdf in enumerate(self.pdf) if pdf.dist.name == 'uniform'
        ],
                                         dtype=int)
        self._non_uniform_indices = np.array([
            i for i in range(len(self.pdf)) if i not in self._uniform_indices
        ],
                                             dtype=int)
        self._non_uniform_logpdf = [
            self.pdf[i].logpdf for i in self._non_uniform_indices
        ]
        self._upper_limits = self._bounds[:, 1].copy()
        self._lower_limits = self._bounds[:, 0].copy()
        self._uniform_logp = -np.sum(
            np.log(self._upper_limits[self._uniform_indices] -
                   self._lower_limits[self._uniform_indices]))
        # Set the reference pdf's
        self.set_reference(
            {p: v.get("ref")
             for p, v in sampled_params_info.items()})
        # Process the external prior(s):
        self.external = {}
        self.external_dependence = set()
        info_prior = info_prior or {}
        for name in info_prior:
            if name == prior_1d_name:
                raise LoggedError(
                    self.log, "The name '%s' is a reserved prior name. "
                    "Please use a different one.", prior_1d_name)
            self.log.debug("Loading external prior '%s' from: '%s'", name,
                           info_prior[name])
            logp = get_external_function(info_prior[name], name=name)
            argspec = getfullargspec(logp)
            known = set(parameterization.input_params())
            params = [p for p in argspec.args if p in known]
            params_without_default = set(
                argspec.args[:(len(argspec.args) -
                               len(argspec.defaults or []))])
            unknown = params_without_default - known
            if unknown:
                if unknown.intersection(parameterization.derived_params()):
                    err = (
                        "External prior '%s' has arguments %s that are output derived "
                        "parameters, Priors must be functions of input parameters. "
                        "Use a separate 'likelihood' for the prior if needed.")
                else:
                    err = (
                        "Some of the arguments of the external prior '%s' cannot be "
                        "found and don't have a default value either: %s")
                raise LoggedError(self.log, err, name, list(unknown))
            self.external_dependence.update(params)
            self.external[name] = ExternalPrior(logp=logp, params=params)
            self.mpi_warning(
                "External prior '%s' loaded. "
                "Mind that it might not be normalized!", name)

        parameterization.check_dropped(self.external_dependence)
Example #8
0
def post(info, sample=None):
    logger_setup(info.get(_debug), info.get(_debug_file))
    log = logging.getLogger(__name__.split(".")[-1])
    try:
        info_post = info[_post]
    except KeyError:
        log.error("No 'post' block given. Nothing to do!")
        raise HandledException
    if get_mpi_rank():
        log.warning(
            "Post-processing is not yet MPI-able. Doing nothing for rank > 1 processes."
        )
        return
    # 1. Load existing sample
    output_in = Output(output_prefix=info.get(_output_prefix), resume=True)
    info_in = load_input(output_in.file_full) if output_in else deepcopy(info)
    dummy_model_in = DummyModel(info_in[_params], info_in[_likelihood],
                                info_in.get(_prior, None),
                                info_in.get(_theory, None))
    if output_in:
        i = 0
        while True:
            try:
                collection = Collection(dummy_model_in,
                                        output_in,
                                        name="%d" % (1 + i),
                                        load=True,
                                        onload_skip=info_post.get("skip", 0),
                                        onload_thin=info_post.get("thin", 1))
                if i == 0:
                    collection_in = collection
                else:
                    collection_in._append(collection)
                i += 1
            except IOError:
                break
    elif sample:
        if isinstance(sample, Collection):
            sample = [sample]
        collection_in = deepcopy(sample[0])
        for s in sample[1:]:
            try:
                collection_in._append(s)
            except:
                log.error("Failed to load some of the input samples.")
                raise HandledException
        i = len(sample)
    else:
        log.error(
            "Not output from where to load from or input collections given.")
        raise HandledException
    log.info("Loaded %d chain%s. Will process %d samples.", i,
             "s" if i - 1 else "", collection_in.n())
    if collection_in.n() <= 1:
        log.error(
            "Not enough samples for post-processing. Try using a larger sample, "
            "or skipping or thinning less.")
        raise HandledException
    # 2. Compare old and new info: determine what to do
    add = info_post.get("add", {})
    remove = info_post.get("remove", {})
    # Add a dummy 'one' likelihood, to absorb unused parameters
    if not add.get(_likelihood):
        add[_likelihood] = odict()
    add[_likelihood].update({"one": None})
    # Expand the "add" info
    add = get_full_info(add)
    # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters
    out = {_params: deepcopy(info_in[_params])}
    for p in remove.get(_params, {}):
        pinfo = info_in[_params].get(p)
        if pinfo is None or not is_derived_param(pinfo):
            log.error(
                "You tried to remove parameter '%s', which is not a derived paramter. "
                "Only derived parameters can be removed during post-processing.",
                p)
            raise HandledException
        out[_params].pop(p)
    mlprior_names_add = []
    for p, pinfo in add.get(_params, {}).items():
        pinfo_in = info_in[_params].get(p)
        if is_sampled_param(pinfo):
            if not is_sampled_param(pinfo_in):
                # No added sampled parameters (de-marginalisation not implemented)
                if pinfo_in is None:
                    log.error(
                        "You added a new sampled parameter %r (maybe accidentaly "
                        "by adding a new likelihood that depends on it). "
                        "Adding new sampled parameters is not possible. Try fixing "
                        "it to some value.", p)
                    raise HandledException
                else:
                    log.error(
                        "You tried to change the prior of parameter '%s', "
                        "but it was not a sampled parameter. "
                        "To change that prior, you need to define as an external one.",
                        p)
                    raise HandledException
            if mlprior_names_add[:1] != _prior_1d_name:
                mlprior_names_add = (
                    [_minuslogprior + _separator + _prior_1d_name] +
                    mlprior_names_add)
        elif is_derived_param(pinfo):
            if p in out[_params]:
                log.error(
                    "You tried to add derived parameter '%s', which is already "
                    "present. To force its recomputation, 'remove' it too.", p)
                raise HandledException
        elif is_fixed_param(pinfo):
            # Only one possibility left "fixed" parameter that was not present before:
            # input of new likelihood, or just an argument for dynamical derived (dropped)
            if ((p in info_in[_params] and pinfo[_p_value] !=
                 (pinfo_in or {}).get(_p_value, None))):
                log.error(
                    "You tried to add a fixed parameter '%s: %r' that was already present"
                    " but had a different value or was not fixed. This is not allowed. "
                    "The old info of the parameter was '%s: %r'", p,
                    dict(pinfo), p, dict(pinfo_in))
                raise HandledException
        else:
            log.error("This should not happen. Contact the developers.")
            raise HandledException
        out[_params][p] = pinfo
    # For the likelihood only, turn the rest of *derived* parameters into constants,
    # so that the likelihoods do not try to compute them)
    # But be careful to exclude *input* params that have a "derived: True" value
    # (which in "full info" turns into "derived: 'lambda [x]: [x]'")
    out_params_like = deepcopy(out[_params])
    for p, pinfo in out_params_like.items():
        if ((is_derived_param(pinfo) and not (_p_value in pinfo)
             and p not in add.get(_params, {}))):
            out_params_like[p] = {_p_value: np.nan, _p_drop: True}
    parameterization_like = Parameterization(out_params_like,
                                             ignore_unused_sampled=True)
    # 2.2 Manage adding/removing priors and likelihoods
    warn_remove = False
    for level in [_prior, _likelihood]:
        out[level] = getattr(dummy_model_in, level)
        if level == _prior:
            out[level].remove(_prior_1d_name)
        for pdf in info_post.get("remove", {}).get(level, []) or []:
            try:
                out[level].remove(pdf)
                warn_remove = True
            except ValueError:
                log.error(
                    "Trying to remove %s '%s', but it is not present. "
                    "Existing ones: %r", level, pdf, out[level])
                raise HandledException
    if warn_remove:
        log.warning("You are removing a prior or likelihood pdf. "
                    "Notice that if the resulting posterior is much wider "
                    "than the original one, or displaced enough, "
                    "it is probably safer to explore it directly.")
    if _prior in add:
        mlprior_names_add += [
            _minuslogprior + _separator + name for name in add[_prior]
        ]
        out[_prior] += list(add[_prior])
    prior_recompute_1d = (mlprior_names_add[:1] == [
        _minuslogprior + _separator + _prior_1d_name
    ])
    # Don't initialise the theory code if not adding/recomputing theory,
    # theory-derived params or likelihoods
    recompute_theory = info_in.get(_theory) and not (list(
        add[_likelihood]) == ["one"] and not any([
            is_derived_param(pinfo) for pinfo in add.get(_params, {}).values()
        ]))
    if recompute_theory:
        # Inherit from the original chain (needs input|output_params, renames, etc
        theory = list(info_in[_theory].keys())[0]
        info_theory_out = odict([[
            theory,
            recursive_update(deepcopy(info_in[_theory][theory]),
                             add.get(_theory, {theory: {}})[theory])
        ]])
    else:
        info_theory_out = None
    chi2_names_add = [
        _chi2 + _separator + name for name in add[_likelihood]
        if name is not "one"
    ]
    out[_likelihood] += [l for l in add[_likelihood] if l is not "one"]
    if recompute_theory:
        log.warn(
            "You are recomputing the theory, but in the current version this does "
            "not force recomputation of any likelihood or derived parameter, "
            "unless explicitly removed+added.")
    for level in [_prior, _likelihood]:
        for i, x_i in enumerate(out[level]):
            if x_i in list(out[level])[i + 1:]:
                log.error(
                    "You have added %s '%s', which was already present. If you "
                    "want to force its recomputation, you must also 'remove' it.",
                    level, x_i)
                raise HandledException
    # 3. Create output collection
    if "suffix" not in info_post:
        log.error("You need to provide a 'suffix' for your chains.")
        raise HandledException
    # Use default prefix if it exists. If it does not, produce no output by default.
    # {post: {output: None}} suppresses output, and if it's a string, updates it.
    out_prefix = info_post.get(_output_prefix, info.get(_output_prefix))
    if out_prefix not in [None, False]:
        out_prefix += "_" + _post + "_" + info_post["suffix"]
    output_out = Output(output_prefix=out_prefix,
                        force_output=info.get(_force))
    info_out = deepcopy(info)
    info_out[_post] = info_post
    # Updated with input info and extended (full) add info
    info_out.update(info_in)
    info_out[_post]["add"] = add
    dummy_model_out = DummyModel(out[_params],
                                 out[_likelihood],
                                 info_prior=out[_prior])
    if recompute_theory:
        theory = list(info_theory_out.keys())[0]
        if _input_params not in info_theory_out[theory]:
            log.error(
                "You appear to be post-processing a chain generated with an older "
                "version of Cobaya. For post-processing to work, please edit the "
                "'[root]__full.info' file of the original chain to add, inside the "
                "theory code block, the list of its input parameters. E.g.\n----\n"
                "theory:\n  %s:\n    input_params: [param1, param2, ...]\n"
                "----\nIf you get strange errors later, it is likely that you did not "
                "specify the correct set of theory parameters.\n"
                "The full set of input parameters are %s.", theory,
                list(dummy_model_out.parameterization.input_params()))
            raise HandledException
    prior_add = Prior(dummy_model_out.parameterization, add.get(_prior))
    likelihood_add = Likelihood(add[_likelihood],
                                parameterization_like,
                                info_theory=info_theory_out,
                                modules=info.get(_path_install))
    # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post]["add"]
    add[_likelihood].pop("one")
    if likelihood_add.theory:
        # Make sure that theory.needs is called at least once, for adjustments
        likelihood_add.theory.needs()
    collection_out = Collection(dummy_model_out, output_out, name="1")
    output_out.dump_info({}, info_out)
    # 4. Main loop!
    log.info("Running post-processing...")
    last_percent = 0
    for i, point in enumerate(collection_in.data.itertuples()):
        log.debug("Point: %r", point)
        sampled = [
            getattr(point, param)
            for param in dummy_model_in.parameterization.sampled_params()
        ]
        derived = odict(
            [[param, getattr(point, param, None)]
             for param in dummy_model_out.parameterization.derived_params()])
        inputs = odict([[
            param,
            getattr(
                point, param,
                dummy_model_in.parameterization.constant_params().get(
                    param,
                    dummy_model_out.parameterization.constant_params().get(
                        param, None)))
        ] for param in dummy_model_out.parameterization.input_params()])
        # Solve inputs that depend on a function and were not saved
        # (we don't use the Parameterization_to_input method in case there are references
        #  to functions that cannot be loaded at the moment)
        for p, value in inputs.items():
            if value is None:
                func = dummy_model_out.parameterization._input_funcs[p]
                args = dummy_model_out.parameterization._input_args[p]
                inputs[p] = func(*[getattr(point, arg) for arg in args])
        # Add/remove priors
        priors_add = prior_add.logps(sampled)
        if not prior_recompute_1d:
            priors_add = priors_add[1:]
        logpriors_add = odict(zip(mlprior_names_add, priors_add))
        logpriors_new = [
            logpriors_add.get(name, -getattr(point, name, 0))
            for name in collection_out.minuslogprior_names
        ]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New set of priors: %r",
                      dict(zip(dummy_model_out.prior, logpriors_new)))
        if -np.inf in logpriors_new:
            continue
        # Add/remove likelihoods
        output_like = []
        if likelihood_add:
            # Notice "one" (last in likelihood_add) is ignored: not in chi2_names
            loglikes_add = odict(
                zip(chi2_names_add,
                    likelihood_add.logps(inputs, _derived=output_like)))
            output_like = dict(zip(likelihood_add.output_params, output_like))
        else:
            loglikes_add = dict()
        loglikes_new = [
            loglikes_add.get(name, -0.5 * getattr(point, name, 0))
            for name in collection_out.chi2_names
        ]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New set of likelihoods: %r",
                      dict(zip(dummy_model_out.likelihood, loglikes_new)))
            if output_like:
                log.debug("New set of likelihood-derived parameters: %r",
                          output_like)
        if -np.inf in loglikes_new:
            continue
        # Add/remove derived parameters and change priors of sampled parameters
        for p in add[_params]:
            if p in dummy_model_out.parameterization._directly_output:
                derived[p] = output_like[p]
            elif p in dummy_model_out.parameterization._derived_funcs:
                func = dummy_model_out.parameterization._derived_funcs[p]
                args = dummy_model_out.parameterization._derived_args[p]
                derived[p] = func(*[
                    getattr(point, arg, output_like.get(arg, None))
                    for arg in args
                ])
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug(
                "New derived parameters: %r",
                dict([[
                    p, derived[p]
                ] for p in dummy_model_out.parameterization.derived_params()
                      if p in add[_params]]))
        # Save to the collection (keep old weight for now)
        collection_out.add(sampled,
                           derived=derived.values(),
                           weight=getattr(point, _weight),
                           logpriors=logpriors_new,
                           loglikes=loglikes_new)
        # Display progress
        percent = np.round(i / collection_in.n() * 100)
        if percent != last_percent and not percent % 5:
            last_percent = percent
            progress_bar(log, percent, " (%d/%d)" % (i, collection_in.n()))
    if not collection_out.data.last_valid_index():
        log.error(
            "No elements in the final sample. Possible causes: "
            "added a prior or likelihood valued zero over the full sampled domain, "
            "or the computation of the theory failed everywhere, etc.")
        raise HandledException
    # Reweight -- account for large dynamic range!
    #   Prefer to rescale +inf to finite, and ignore final points with -inf.
    #   Remove -inf's (0-weight), and correct indices
    difflogmax = max(collection_in[_minuslogpost] -
                     collection_out[_minuslogpost])
    collection_out.data[_weight] *= np.exp(collection_in[_minuslogpost] -
                                           collection_out[_minuslogpost] -
                                           difflogmax)
    collection_out.data = (
        collection_out.data[collection_out.data.weight > 0].reset_index(
            drop=True))
    collection_out._n = collection_out.data.last_valid_index() + 1
    # Write!
    collection_out._out_update()
    log.info("Finished! Final number of samples: %d", collection_out.n())
    return info_out, {"sample": collection_out}