Exemple #1
0
    def run(self):
        """
        Prepares the posterior function and calls ``PolyChord``'s ``run`` function.
        """

        # Prepare the posterior
        # Don't forget to multiply by the volume of the physical hypercube,
        # since PolyChord divides by it
        def logpost(params_values):
            result = self.model.logposterior(params_values)
            loglikes = result.loglikes
            if len(loglikes) != self.n_likes:
                loglikes = np.full(self.n_likes, np.nan)
            derived = result.derived
            if len(derived) != self.n_derived:
                derived = np.full(self.n_derived, np.nan)
            derived = list(derived) + list(result.logpriors) + list(loglikes)
            return (max(result.logpost + self.logvolume, self.pc_settings.logzero),
                    derived)

        sync_processes()
        self.mpi_info("Calling PolyChord...")
        self.pc.run_polychord(logpost, self.nDims, self.nDerived, self.pc_settings,
                              self.pc_prior, self.dumper)
        self.process_raw_output()
Exemple #2
0
    def run(self):
        """
        Prepares the posterior function and calls ``PolyChord``'s ``run`` function.
        """

        # Prepare the posterior
        # Don't forget to multiply by the volume of the physical hypercube,
        # since PolyChord divides by it
        def logpost(params_values):
            logposterior, logpriors, loglikes, derived = (
                self.model.logposterior(params_values))
            if len(derived) != len(
                    self.model.parameterization.derived_params()):
                derived = np.full(
                    len(self.model.parameterization.derived_params()), np.nan)
            if len(loglikes) != len(self.model.likelihood._likelihoods):
                loglikes = np.full(len(self.model.likelihood._likelihoods),
                                   np.nan)
            derived = list(derived) + list(logpriors) + list(loglikes)
            return (max(logposterior + self.logvolume,
                        0.99 * self.pc_settings.logzero), derived)

        sync_processes()
        if am_single_or_primary_process():
            self.log.info("Sampling!")
        self.pc.run_polychord(logpost, self.nDims, self.nDerived,
                              self.pc_settings, self.pc_prior, self.dumper)
    def run(self):
        """
        Prepares the posterior function and calls ``PolyChord``'s ``run`` function.
        """

        # Prepare the polychord likelihood
        def loglikelihood(params_values):
            result = self.model.logposterior(params_values)
            loglikes = result.loglikes
            if len(loglikes) != self.n_likes:
                loglikes = np.full(self.n_likes, np.nan)
            derived = result.derived
            if len(derived) != self.n_derived:
                derived = np.full(self.n_derived, np.nan)
            derived = list(derived) + list(result.logpriors) + list(loglikes)
            return max(loglikes.sum(), self.pc_settings.logzero), derived

        def prior(cube):
            theta = np.empty_like(cube)
            for i, xi in enumerate(np.array(cube)[self.ordering]):
                theta[i] = self.model.prior.pdf[i].ppf(xi)
            return theta

        if is_main_process():
            self.dump_paramnames(self.raw_prefix)
        sync_processes()
        self.mpi_info("Calling PolyChord...")
        self.pc.run_polychord(loglikelihood, self.nDims, self.nDerived,
                              self.pc_settings, prior, self.dumper)
        self.process_raw_output()
Exemple #4
0
 def check_all_ready(self):
     """
     Checks if the chain(s) is(/are) ready to check convergence and, if requested,
     learn a new covariance matrix for the proposal distribution.
     """
     msg_ready = ("Ready to check convergence" +
                  (" and learn a new proposal covmat"
                   if self.learn_proposal else ""))
     n = len(self.collection)
     # If *just* (weight==1) got ready to check+learn
     if not (n % self.learn_every.value) and n > 0:
         self.log.info("Learn + convergence test @ %d samples accepted.", n)
         if more_than_one_process():
             self.been_waiting += 1
             if self.been_waiting > self.max_waiting:
                 self.send_error_signal()
                 raise LoggedError(
                     self.log,
                     "Waiting for too long for all chains to be ready. "
                     "Maybe one of them is stuck or died unexpectedly?")
         self.model.dump_timing()
         # If not MPI size > 1, we are ready
         if not more_than_one_process():
             self.log.debug(msg_ready)
             return True
         # Error check in case any process already sent an error signal
         self.check_error_signal()
         # If MPI, tell the rest that we are ready -- we use a "gather"
         # ("reduce" was problematic), but we are in practice just pinging
         if not hasattr(self, "req"):  # just once!
             self.all_ready = np.empty(get_mpi_size())
             self.req = get_mpi_comm().Iallgather(np.array([1.]),
                                                  self.all_ready)
             self.log.info(msg_ready + " (waiting for the rest...)")
     # If all processes are ready to learn (= communication finished)
     if self.req.Test() if hasattr(self, "req") else False:
         # Sanity check: actually all processes have finished
         assert np.all(self.all_ready == 1), (
             "This should not happen! Notify the developers. (Got %r)",
             self.all_ready)
         if more_than_one_process() and is_main_process():
             self.log.info("All chains are r" + msg_ready[1:])
         delattr(self, "req")
         self.been_waiting = 0
         # Another error check, in case the error occurred after sending "ready" signal
         self.check_error_signal()
         # Just in case, a barrier here
         sync_processes()
         return True
     return False
Exemple #5
0
 def check_all_ready(self):
     """
     Checks if the chain(s) is(/are) ready to check convergence and, if requested,
     learn a new covariance matrix for the proposal distribution.
     """
     msg_ready = ("Ready to check convergence" +
                  (" and learn a new proposal covmat"
                   if self.learn_proposal else ""))
     # If *just* (weight==1) got ready to check+learn
     if (self.n() > 0 and self.current_point[_weight] == 1
             and not (self.n() % self.check_every)):
         self.log.info("Checkpoint: %d samples accepted.", self.n())
         if more_than_one_process():
             self.been_waiting += 1
             if self.been_waiting > self.max_waiting:
                 self.log.error(
                     "Waiting for too long for all chains to be ready. "
                     "Maybe one of them is stuck or died unexpectedly?")
                 raise HandledException
         self.model.dump_timing()
         # If not MPI size > 1, we are ready
         if not more_than_one_process():
             if msg_ready:
                 self.log.info(msg_ready)
             return True
         # If MPI, tell the rest that we are ready -- we use a "gather"
         # ("reduce" was problematic), but we are in practice just pinging
         if not hasattr(self, "req"):  # just once!
             self.all_ready = np.empty(get_mpi_size())
             self.req = get_mpi_comm().Iallgather(np.array([1.]),
                                                  self.all_ready)
             self.log.info(msg_ready + " (waiting for the rest...)")
     # If all processes are ready to learn (= communication finished)
     if self.req.Test() if hasattr(self, "req") else False:
         # Sanity check: actually all processes have finished
         assert np.all(self.all_ready == 1), (
             "This should not happen! Notify the developers. (Got %r)",
             self.all_ready)
         if more_than_one_process() and am_single_or_primary_process():
             self.log.info("All chains are r" + msg_ready[1:])
         delattr(self, "req")
         self.been_waiting = 0
         # Just in case, a barrier here
         sync_processes()
         return True
     return False
Exemple #6
0
    def initialize(self):
        """Initializes the sampler:
        creates the proposal distribution and draws the initial sample."""
        if not self.model.prior.d():
            raise LoggedError(self.log,
                              "No parameters being varied for sampler")
        self.log.debug("Initializing")
        # MARKED FOR DEPRECATION IN v3.0
        if getattr(self, "oversample", None) is not None:
            raise LoggedError(
                self.log, "`oversample` has been deprecated. "
                "Oversampling is now requested by setting "
                "`oversample_power` > 0.")
        # END OF DEPRECATION BLOCK
        # MARKED FOR DEPRECATION IN v3.0
        if getattr(self, "check_every", None) is not None:
            raise LoggedError(
                self.log, "`check_every` has been deprecated. "
                "Please use `learn_every` instead.")
        # END OF DEPRECATION BLOCK
        if self.callback_every is None:
            self.callback_every = self.learn_every
        self._quants_d_units = []
        for q in ["max_tries", "learn_every", "callback_every", "burn_in"]:
            number = NumberWithUnits(getattr(self, q), "d", dtype=int)
            self._quants_d_units.append(number)
            setattr(self, q, number)
        self.output_every = NumberWithUnits(self.output_every, "s", dtype=int)
        if is_main_process():
            if self.output.is_resuming() and (max(self.mpi_size or 0, 1) !=
                                              mpi.size()):
                raise LoggedError(
                    self.log,
                    "Cannot resume a run with a different number of chains: "
                    "was %d and now is %d.", max(self.mpi_size or 0, 1),
                    mpi.size())
        sync_processes()
        # One collection per MPI process: `name` is the MPI rank + 1
        name = str(1 + mpi.rank())
        self.collection = SampleCollection(self.model,
                                           self.output,
                                           name=name,
                                           resuming=self.output.is_resuming())
        self.current_point = OneSamplePoint(self.model)
        # Use standard MH steps by default
        self.get_new_sample = self.get_new_sample_metropolis
        # Prepare callback function
        if self.callback_function:
            self.callback_function_callable = (get_external_function(
                self.callback_function))
        # Useful for getting last points added inside callback function
        self.last_point_callback = 0
        self.i_learn = 1
        # Monitoring/restore progress
        if is_main_process():
            cols = [
                "N", "timestamp", "acceptance_rate", "Rminus1", "Rminus1_cl"
            ]
            self.progress = DataFrame(columns=cols)
            if self.output and not self.output.is_resuming():
                header_fmt = {
                    "N": 6 * " " + "N",
                    "timestamp": 17 * " " + "timestamp"
                }
                with open(self.progress_filename(), "w",
                          encoding="utf-8") as progress_file:
                    progress_file.write("# " + " ".join([
                        header_fmt.get(col, ((7 + 8) - len(col)) * " " + col)
                        for col in self.progress.columns
                    ]) + "\n")
        # Get first point, to be discarded -- not possible to determine its weight
        # Still, we need to compute derived parameters, since, as the proposal "blocked",
        # we may be saving the initial state of some block.
        # NB: if resuming but nothing was written (burn-in not finished): re-start
        if self.output.is_resuming() and len(self.collection):
            last = len(self.collection) - 1
            initial_point = (self.collection[
                self.collection.sampled_params].iloc[last]).to_numpy(
                    dtype=np.float64, copy=True)
            results = LogPosterior(
                logpost=-self.collection[OutPar.minuslogpost].iloc[last],
                logpriors=-(self.collection[
                    self.collection.minuslogprior_names].iloc[last].to_numpy(
                        dtype=np.float64, copy=True)),
                loglikes=-0.5 *
                (self.collection[self.collection.chi2_names].iloc[last].
                 to_numpy(dtype=np.float64, copy=True)),
                derived=(self.collection[
                    self.collection.derived_params].iloc[last].to_numpy(
                        dtype=np.float64, copy=True)))
        else:
            # NB: max_tries adjusted to dim instead of #cycles (blocking not computed yet)
            self.max_tries.set_scale(self.model.prior.d())
            self.log.info(
                "Getting initial point... (this may take a few seconds)")
            initial_point, results = \
                self.model.get_valid_point(max_tries=self.max_tries.value,
                                           random_state=self._rng)
            # If resuming but no existing chain, assume failed run and ignore blocking
            # if speeds measurement requested
            if self.output.is_resuming() and not len(self.collection) \
                    and self.measure_speeds:
                self.blocking = None
            if self.measure_speeds and self.blocking:
                self.mpi_warning(
                    "Parameter blocking manually fixed: speeds will not be measured."
                )
            elif self.measure_speeds:
                n = None if self.measure_speeds is True else int(
                    self.measure_speeds)
                self.model.measure_and_set_speeds(n=n,
                                                  discard=0,
                                                  random_state=self._rng)
        self.set_proposer_blocking()
        self.set_proposer_initial_covmat(load=True)

        self.current_point.add(initial_point, results)
        self.log.info("Initial point: %s", self.current_point)
        # Max #(learn+convergence checks) to wait,
        # in case one process dies/hangs without raising error
        self.been_waiting = 0
        self.max_waiting = max(50, self.max_tries.unit_value)
        # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
        self.burn_in_left = self.burn_in.value * self.current_point.output_thin + 1
        self._msg_ready = ("Ready to check convergence" +
                           (" and learn a new proposal covmat"
                            if self.learn_proposal else ""))

        # Initial dummy checkpoint
        # (needed when 1st "learn point" not reached in prev. run)
        self.write_checkpoint()
Exemple #7
0
def post(info_or_yaml_or_file: Union[InputDict, str, os.PathLike],
         sample: Union[SampleCollection, List[SampleCollection], None] = None
         ) -> PostTuple:
    info = load_input_dict(info_or_yaml_or_file)
    logger_setup(info.get("debug"), info.get("debug_file"))
    log = get_logger(__name__)
    # MARKED FOR DEPRECATION IN v3.0
    if info.get("modules"):
        raise LoggedError(log, "The input field 'modules' has been deprecated."
                               "Please use instead %r", packages_path_input)
    # END OF DEPRECATION BLOCK
    info_post: PostDict = info.get("post") or {}
    if not info_post:
        raise LoggedError(log, "No 'post' block given. Nothing to do!")
    if mpi.is_main_process() and info.get("resume"):
        log.warning("Resuming not implemented for post-processing. Re-starting.")
    if not info.get("output") and info_post.get("output") \
            and not info.get("params"):
        raise LoggedError(log, "The input dictionary must have be a full option "
                               "dictionary, or have an existing 'output' root to load "
                               "previous settings from ('output' to read from is in the "
                               "main block not under 'post'). ")
    # 1. Load existing sample
    output_in = get_output(prefix=info.get("output"))
    if output_in:
        info_in = output_in.load_updated_info() or update_info(info)
    else:
        info_in = update_info(info)
    params_in: ExpandedParamsDict = info_in["params"]  # type: ignore
    dummy_model_in = DummyModel(params_in, info_in.get("likelihood", {}),
                                info_in.get("prior"))

    in_collections = []
    thin = info_post.get("thin", 1)
    skip = info_post.get("skip", 0)
    if info.get('thin') is not None or info.get('skip') is not None:  # type: ignore
        raise LoggedError(log, "'thin' and 'skip' should be "
                               "parameters of the 'post' block")

    if sample:
        # If MPI, assume for each MPI process post is passed in the list of
        # collections that should be processed by that process
        # (e.g. single chain output from sampler)
        if isinstance(sample, SampleCollection):
            in_collections = [sample]
        else:
            in_collections = sample
        for i, collection in enumerate(in_collections):
            if skip:
                if 0 < skip < 1:
                    skip = int(round(skip * len(collection)))
                collection = collection.filtered_copy(slice(skip, None))
            if thin != 1:
                collection = collection.thin_samples(thin)
            in_collections[i] = collection
    elif output_in:
        files = output_in.find_collections()
        numbered = files
        if not numbered:
            # look for un-numbered output files
            files = output_in.find_collections(name=False)
        if files:
            if mpi.size() > len(files):
                raise LoggedError(log, "Number of MPI processes (%s) is larger than "
                                       "the number of sample files (%s)",
                                  mpi.size(), len(files))
            for num in range(mpi.rank(), len(files), mpi.size()):
                in_collections += [SampleCollection(
                    dummy_model_in, output_in,
                    onload_thin=thin, onload_skip=skip, load=True, file_name=files[num],
                    name=str(num + 1) if numbered else "")]
        else:
            raise LoggedError(log, "No samples found for the input model with prefix %s",
                              os.path.join(output_in.folder, output_in.prefix))

    else:
        raise LoggedError(log, "No output from where to load from, "
                               "nor input collections given.")
    if any(len(c) <= 1 for c in in_collections):
        raise LoggedError(
            log, "Not enough samples for post-processing. Try using a larger sample, "
                 "or skipping or thinning less.")
    mpi.sync_processes()
    log.info("Will process %d sample points.", sum(len(c) for c in in_collections))

    # 2. Compare old and new info: determine what to do
    add = info_post.get("add") or {}
    if "remove" in add:
        raise LoggedError(log, "remove block should be under 'post', not 'add'")
    remove = info_post.get("remove") or {}
    # Add a dummy 'one' likelihood, to absorb unused parameters
    if not add.get("likelihood"):
        add["likelihood"] = {}
    add["likelihood"]["one"] = None
    # Expand the "add" info, but don't add new default sampled parameters
    orig_params = set(add.get("params") or [])
    add = update_info(add, add_aggr_chi2=False)
    add_params: ExpandedParamsDict = add["params"]  # type: ignore
    for p in set(add_params) - orig_params:
        if p in params_in:
            add_params.pop(p)

    # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters
    out_combined_params = deepcopy_where_possible(params_in)
    remove_params = list(str_to_list(remove.get("params")) or [])
    for p in remove_params:
        pinfo = params_in.get(p)
        if pinfo is None or not is_derived_param(pinfo):
            raise LoggedError(
                log,
                "You tried to remove parameter '%s', which is not a derived parameter. "
                "Only derived parameters can be removed during post-processing.", p)
        out_combined_params.pop(p)
    # Force recomputation of aggregated chi2
    for p in list(out_combined_params):
        if p.startswith(get_chi2_name("")):
            out_combined_params.pop(p)
    prior_recompute_1d = False
    for p, pinfo in add_params.items():
        pinfo_in = params_in.get(p)
        if is_sampled_param(pinfo):
            if not is_sampled_param(pinfo_in):
                # No added sampled parameters (de-marginalisation not implemented)
                if pinfo_in is None:
                    raise LoggedError(
                        log, "You added a new sampled parameter %r (maybe accidentally "
                             "by adding a new likelihood that depends on it). "
                             "Adding new sampled parameters is not possible. Try fixing "
                             "it to some value.", p)
                else:
                    raise LoggedError(
                        log,
                        "You tried to change the prior of parameter '%s', "
                        "but it was not a sampled parameter. "
                        "To change that prior, you need to define as an external one.", p)
            # recompute prior if potentially changed sampled parameter priors
            prior_recompute_1d = True
        elif is_derived_param(pinfo):
            if p in out_combined_params:
                raise LoggedError(
                    log, "You tried to add derived parameter '%s', which is already "
                         "present. To force its recomputation, 'remove' it too.", p)
        elif is_fixed_or_function_param(pinfo):
            # Only one possibility left "fixed" parameter that was not present before:
            # input of new likelihood, or just an argument for dynamical derived (dropped)
            if pinfo_in and p in params_in and pinfo["value"] != pinfo_in.get("value"):
                raise LoggedError(
                    log,
                    "You tried to add a fixed parameter '%s: %r' that was already present"
                    " but had a different value or was not fixed. This is not allowed. "
                    "The old info of the parameter was '%s: %r'",
                    p, dict(pinfo), p, dict(pinfo_in))
        elif not pinfo_in:  # OK as long as we have known value for it
            raise LoggedError(log, "Parameter %s no known value. ", p)
        out_combined_params[p] = pinfo

    out_combined: InputDict = {"params": out_combined_params}  # type: ignore
    # Turn the rest of *derived* parameters into constants,
    # so that the likelihoods do not try to recompute them
    # But be careful to exclude *input* params that have a "derived: True" value
    # (which in "updated info" turns into "derived: 'lambda [x]: [x]'")
    # Don't assign to derived parameters to theories, only likelihoods, so they can be
    # recomputed if needed. If the theory does not need to be computed, it doesn't matter
    # if it is already assigned parameters in the usual way; likelihoods can get
    # the required derived parameters from the stored sample derived parameter inputs.
    out_params_with_computed = deepcopy_where_possible(out_combined_params)

    dropped_theory = set()
    for p, pinfo in out_params_with_computed.items():
        if (is_derived_param(pinfo) and "value" not in pinfo
                and p not in add_params):
            out_params_with_computed[p] = {"value": np.nan}
            dropped_theory.add(p)
    # 2.2 Manage adding/removing priors and likelihoods
    warn_remove = False
    kind: ModelBlock
    for kind in ("prior", "likelihood", "theory"):
        out_combined[kind] = deepcopy_where_possible(info_in.get(kind)) or {}
        for remove_item in str_to_list(remove.get(kind)) or []:
            try:
                out_combined[kind].pop(remove_item, None)
                if remove_item not in (add.get(kind) or []) and kind != "theory":
                    warn_remove = True
            except ValueError:
                raise LoggedError(
                    log, "Trying to remove %s '%s', but it is not present. "
                         "Existing ones: %r", kind, remove_item, list(out_combined[kind]))
        if kind != "theory" and kind in add:
            dups = set(add.get(kind) or []).intersection(out_combined[kind]) - {"one"}
            if dups:
                raise LoggedError(
                    log, "You have added %s '%s', which was already present. If you "
                         "want to force its recomputation, you must also 'remove' it.",
                    kind, dups)
            out_combined[kind].update(add[kind])

    if warn_remove and mpi.is_main_process():
        log.warning("You are removing a prior or likelihood pdf. "
                    "Notice that if the resulting posterior is much wider "
                    "than the original one, or displaced enough, "
                    "it is probably safer to explore it directly.")

    mlprior_names_add = minuslogprior_names(add.get("prior") or [])
    chi2_names_add = [get_chi2_name(name) for name in add["likelihood"] if
                      name != "one"]
    out_combined["likelihood"].pop("one", None)

    add_theory = add.get("theory")
    if add_theory:
        if len(add["likelihood"]) == 1 and not any(
                is_derived_param(pinfo) for pinfo in add_params.values()):
            log.warning("You are adding a theory, but this does not force recomputation "
                        "of any likelihood or derived parameters unless explicitly "
                        "removed+added.")
        # Inherit from the original chain (input|output_params, renames, etc)
        added_theory = add_theory.copy()
        for theory, theory_info in out_combined["theory"].items():
            if theory in list(added_theory):
                out_combined["theory"][theory] = \
                    recursive_update(theory_info, added_theory.pop(theory))
        out_combined["theory"].update(added_theory)

    # Prepare recomputation of aggregated chi2
    # (they need to be recomputed by hand, because auto-computation won't pick up
    #  old likelihoods for a given type)
    all_types = {like: str_to_list(opts.get("type") or [])
                 for like, opts in out_combined["likelihood"].items()}
    types = set(chain(*all_types.values()))
    inv_types = {t: [like for like, like_types in all_types.items() if t in like_types]
                 for t in sorted(types)}
    add_aggregated_chi2_params(out_combined_params, types)

    # 3. Create output collection
    # Use default prefix if it exists. If it does not, produce no output by default.
    # {post: {output: None}} suppresses output, and if it's a string, updates it.
    out_prefix = info_post.get("output", info.get("output"))
    if out_prefix:
        suffix = info_post.get("suffix")
        if not suffix:
            raise LoggedError(log, "You need to provide a '%s' for your output chains.",
                              "suffix")
        out_prefix += separator_files + "post" + separator_files + suffix
    output_out = get_output(prefix=out_prefix, force=info.get("force"))
    output_out.set_lock()

    if output_out and not output_out.force and output_out.find_collections():
        raise LoggedError(log, "Found existing post-processing output with prefix %r. "
                               "Delete it manually or re-run with `force: True` "
                               "(or `-f`, `--force` from the shell).", out_prefix)
    elif output_out and output_out.force and mpi.is_main_process():
        output_out.delete_infos()
        for _file in output_out.find_collections():
            output_out.delete_file_or_folder(_file)
    info_out = deepcopy_where_possible(info)
    info_post = info_post.copy()
    info_out["post"] = info_post
    # Updated with input info and extended (updated) add info
    info_out.update(info_in)  # type: ignore
    info_post["add"] = add

    dummy_model_out = DummyModel(out_combined_params, out_combined["likelihood"],
                                 info_prior=out_combined["prior"])
    out_func_parameterization = Parameterization(out_params_with_computed)

    # TODO: check allow_renames=False?
    model_add = Model(out_params_with_computed, add["likelihood"],
                      info_prior=add.get("prior"), info_theory=out_combined["theory"],
                      packages_path=(info_post.get(packages_path_input) or
                                     info.get(packages_path_input)),
                      allow_renames=False, post=True,
                      stop_at_error=info.get('stop_at_error', False),
                      skip_unused_theories=True, dropped_theory_params=dropped_theory)
    # Remove auxiliary "one" before dumping -- 'add' *is* info_out["post"]["add"]
    add["likelihood"].pop("one")
    out_collections = [SampleCollection(dummy_model_out, output_out, name=c.name,
                                        cache_size=OutputOptions.default_post_cache_size)
                       for c in in_collections]
    # TODO: should maybe add skip/thin to out_combined, so can tell post-processed?
    output_out.check_and_dump_info(info_out, out_combined, check_compatible=False)
    collection_in = in_collections[0]
    collection_out = out_collections[0]

    last_percent = None
    known_constants = dummy_model_out.parameterization.constant_params()
    known_constants.update(dummy_model_in.parameterization.constant_params())
    missing_params = dummy_model_in.parameterization.sampled_params().keys() - set(
        collection_in.columns)
    if missing_params:
        raise LoggedError(log, "Input samples do not contain expected sampled parameter "
                               "values: %s", missing_params)

    missing_priors = set(name for name in collection_out.minuslogprior_names if
                         name not in mlprior_names_add
                         and name not in collection_in.columns)
    if _minuslogprior_1d_name in missing_priors:
        prior_recompute_1d = True
    if prior_recompute_1d:
        missing_priors.discard(_minuslogprior_1d_name)
        mlprior_names_add.insert(0, _minuslogprior_1d_name)
    prior_regenerate: Optional[Prior]
    if missing_priors and "prior" in info_in:
        # in case there are input priors that are not stored in input samples
        # e.g. when postprocessing GetDist/CosmoMC-format chains
        in_names = minuslogprior_names(info_in["prior"])
        info_prior = {piname: inf for (piname, inf), in_name in
                      zip(info_in["prior"].items(), in_names) if
                      in_name in missing_priors}
        regenerated_prior_names = minuslogprior_names(info_prior)
        missing_priors.difference_update(regenerated_prior_names)
        prior_regenerate = Prior(dummy_model_in.parameterization, info_prior)
    else:
        prior_regenerate = None
        regenerated_prior_names = None
    if missing_priors:
        raise LoggedError(log, "Missing priors: %s", missing_priors)

    mpi.sync_processes()
    output_in.check_lock()

    # 4. Main loop! Loop over input samples and adjust as required.
    if mpi.is_main_process():
        log.info("Running post-processing...")
    difflogmax: Optional[float] = None
    to_do = sum(len(c) for c in in_collections)
    weights = []
    done = 0
    last_dump_time = time.time()
    for collection_in, collection_out in zip(in_collections, out_collections):
        importance_weights = []

        def set_difflogmax():
            nonlocal difflogmax
            difflog = (collection_in[OutPar.minuslogpost].to_numpy(
                dtype=np.float64)[:len(collection_out)]
                       - collection_out[OutPar.minuslogpost].to_numpy(dtype=np.float64))
            difflogmax = np.max(difflog)
            if abs(difflogmax) < 1:
                difflogmax = 0  # keep simple when e.g. very similar
            log.debug("difflogmax: %g", difflogmax)
            if mpi.more_than_one_process():
                difflogmax = max(mpi.allgather(difflogmax))
            if mpi.is_main_process():
                log.debug("Set difflogmax: %g", difflogmax)
            _weights = np.exp(difflog - difflogmax)
            importance_weights.extend(_weights)
            collection_out.reweight(_weights)

        for i, point in collection_in.data.iterrows():
            all_params = point.to_dict()
            for p in remove_params:
                all_params.pop(p, None)
            log.debug("Point: %r", point)
            sampled = np.array([all_params[param] for param in
                                dummy_model_in.parameterization.sampled_params()])
            all_params = out_func_parameterization.to_input(all_params).copy()

            # Add/remove priors
            if prior_recompute_1d:
                priors_add = [model_add.prior.logps_internal(sampled)]
                if priors_add[0] == -np.inf:
                    continue
            else:
                priors_add = []
            if model_add.prior.external:
                priors_add.extend(model_add.prior.logps_external(all_params))

            logpriors_add = dict(zip(mlprior_names_add, priors_add))
            logpriors_new = [logpriors_add.get(name, - point.get(name, 0))
                             for name in collection_out.minuslogprior_names]
            if prior_regenerate:
                regenerated = dict(zip(regenerated_prior_names,
                                       prior_regenerate.logps_external(all_params)))
                for _i, name in enumerate(collection_out.minuslogprior_names):
                    if name in regenerated_prior_names:
                        logpriors_new[_i] = regenerated[name]

            if is_debug(log):
                log.debug("New set of priors: %r",
                          dict(zip(dummy_model_out.prior, logpriors_new)))
            if -np.inf in logpriors_new:
                continue
            # Add/remove likelihoods and/or (re-)calculate derived parameters
            loglikes_add, output_derived = model_add._loglikes_input_params(
                all_params, return_output_params=True)
            loglikes_add = dict(zip(chi2_names_add, loglikes_add))
            output_derived = dict(zip(model_add.output_params, output_derived))
            loglikes_new = [loglikes_add.get(name, -0.5 * point.get(name, 0))
                            for name in collection_out.chi2_names]
            if is_debug(log):
                log.debug("New set of likelihoods: %r",
                          dict(zip(dummy_model_out.likelihood, loglikes_new)))
                if output_derived:
                    log.debug("New set of derived parameters: %r", output_derived)
            if -np.inf in loglikes_new:
                continue
            all_params.update(output_derived)

            all_params.update(out_func_parameterization.to_derived(all_params))
            derived = {param: all_params.get(param) for param in
                       dummy_model_out.parameterization.derived_params()}
            # We need to recompute the aggregated chi2 by hand
            for type_, likes in inv_types.items():
                derived[get_chi2_name(type_)] = sum(
                    -2 * lvalue for lname, lvalue
                    in zip(collection_out.chi2_names, loglikes_new)
                    if undo_chi2_name(lname) in likes)
            if is_debug(log):
                log.debug("New derived parameters: %r",
                          {p: derived[p]
                           for p in dummy_model_out.parameterization.derived_params()
                           if p in add["params"]})
            # Save to the collection (keep old weight for now)
            weight = point.get(OutPar.weight)
            mpi.check_errors()
            if difflogmax is None and i > OutputOptions.reweight_after and \
                    time.time() - last_dump_time > OutputOptions.output_inteveral_s / 2:
                set_difflogmax()
                collection_out.out_update()

            if difflogmax is not None:
                logpost_new = sum(logpriors_new) + sum(loglikes_new)
                importance_weight = np.exp(logpost_new + point.get(OutPar.minuslogpost)
                                           - difflogmax)
                weight = weight * importance_weight
                importance_weights.append(importance_weight)
                if time.time() - last_dump_time > OutputOptions.output_inteveral_s:
                    collection_out.out_update()
                    last_dump_time = time.time()

            if weight > 0:
                collection_out.add(sampled, derived=derived.values(), weight=weight,
                                   logpriors=logpriors_new, loglikes=loglikes_new)

            # Display progress
            percent = int(np.round((i + done) / to_do * 100))
            if percent != last_percent and not percent % 5:
                last_percent = percent
                progress_bar(log, percent, " (%d/%d)" % (i + done, to_do))

        if difflogmax is None:
            set_difflogmax()
        if not collection_out.data.last_valid_index():
            raise LoggedError(
                log, "No elements in the final sample. Possible causes: "
                     "added a prior or likelihood valued zero over the full sampled "
                     "domain, or the computation of the theory failed everywhere, etc.")
        collection_out.out_update()
        weights.append(np.array(importance_weights))
        done += len(collection_in)

    assert difflogmax is not None
    points = 0
    tot_weight = 0
    min_weight = np.inf
    max_weight = -np.inf
    max_output_weight = -np.inf
    sum_w2 = 0
    points_removed = 0
    for collection_in, collection_out, importance_weights in zip(in_collections,
                                                                 out_collections,
                                                                 weights):
        output_weights = collection_out[OutPar.weight]
        points += len(collection_out)
        tot_weight += np.sum(output_weights)
        points_removed += len(importance_weights) - len(output_weights)
        min_weight = min(min_weight, np.min(importance_weights))
        max_weight = max(max_weight, np.max(importance_weights))
        max_output_weight = max(max_output_weight, np.max(output_weights))
        sum_w2 += np.dot(output_weights, output_weights)

    (tot_weights, min_weights, max_weights, max_output_weights, sum_w2s, points_s,
     points_removed_s) = mpi.zip_gather(
        [tot_weight, min_weight, max_weight, max_output_weight, sum_w2,
         points, points_removed])

    if mpi.is_main_process():
        output_out.clear_lock()
        log.info("Finished! Final number of distinct sample points: %s", sum(points_s))
        log.info("Importance weight range: %.4g -- %.4g",
                 min(min_weights), max(max_weights))
        if sum(points_removed_s):
            log.info("Points deleted due to zero weight: %s", sum(points_removed_s))
        log.info("Effective number of single samples if independent (sum w)/max(w): %s",
                 int(sum(tot_weights) / max(max_output_weights)))
        log.info(
            "Effective number of weighted samples if independent (sum w)^2/sum(w^2): "
            "%s", int(sum(tot_weights) ** 2 / sum(sum_w2s)))
    products: PostResultDict = {"sample": value_or_list(out_collections),
                                "stats": {'min_importance_weight': (min(min_weights) /
                                                                    max(max_weights)),
                                          'points_removed': sum(points_removed_s),
                                          'tot_weight': sum(tot_weights),
                                          'max_weight': max(max_output_weights),
                                          'sum_w2': sum(sum_w2s),
                                          'points': sum(points_s)},
                                "logpost_weight_offset": difflogmax,
                                "weights": value_or_list(weights)}
    return PostTuple(info=out_combined, products=products)
Exemple #8
0
def run(
    info_or_yaml_or_file: Union[InputDict, str, os.PathLike],
    packages_path: Optional[str] = None,
    output: Union[str, LiteralFalse, None] = None,
    debug: Union[bool, int, None] = None,
    stop_at_error: Optional[bool] = None,
    resume: bool = False,
    force: bool = False,
    no_mpi: bool = False,
    test: bool = False,
    override: Optional[InputDict] = None,
) -> Union[InfoSamplerTuple, PostTuple]:
    """
    Run from an input dictionary, file name or yaml string, with optional arguments
    to override settings in the input as needed.

    :param info_or_yaml_or_file: input options dictionary, yaml file, or yaml text
    :param packages_path: path where external packages were installed
    :param output: path name prefix for output files, or False for no file output
    :param debug: true for verbose debug output, or a specific logging level
    :param stop_at_error: stop if an error is raised
    :param resume: continue an existing run
    :param force: overwrite existing output if it exists
    :param no_mpi: run without MPI
    :param test: only test initialization rather than actually running
    :param override: option dictionary to merge into the input one, overriding settings
       (but with lower precedence than the explicit keyword arguments)
    :return: (updated_info, sampler) tuple of options dictionary and Sampler instance,
              or (updated_info, results) if using "post" post-processing
    """

    # This function reproduces the model-->output-->sampler pipeline one would follow
    # when instantiating by hand, but alters the order to performs checks and dump info
    # as early as possible, e.g. to check if resuming possible or `force` needed.
    if no_mpi or test:
        mpi.set_mpi_disabled()

    with mpi.ProcessState("run"):
        info: InputDict = load_info_overrides(info_or_yaml_or_file, debug,
                                              stop_at_error, packages_path,
                                              override)

        if test:
            info["test"] = True
        # If any of resume|force given as cmd args, ignore those in the input file
        if resume or force:
            if resume and force:
                raise ValueError("'rename' and 'force' are exclusive options")
            info["resume"] = bool(resume)
            info["force"] = bool(force)
        if info.get("post"):
            if isinstance(output, str) or output is False:
                info["post"]["output"] = output or None
            return post(info)

        if isinstance(output, str) or output is False:
            info["output"] = output or None
        logger_setup(info.get("debug"), info.get("debug_file"))
        logger_run = get_logger(run.__name__)
        # MARKED FOR DEPRECATION IN v3.0
        # BEHAVIOUR TO BE REPLACED BY ERROR:
        check_deprecated_modules_path(info)
        # END OF DEPRECATION BLOCK
        # 1. Prepare output driver, if requested by defining an output_prefix
        # GetDist needs to know the original sampler, so don't overwrite if minimizer
        try:
            which_sampler = list(info["sampler"])[0]
        except (KeyError, TypeError):
            raise LoggedError(
                logger_run,
                "You need to specify a sampler using the 'sampler' key "
                "as e.g. `sampler: {mcmc: None}.`")
        infix = "minimize" if which_sampler == "minimize" else None
        with get_output(prefix=info.get("output"),
                        resume=info.get("resume"),
                        force=info.get("force"),
                        infix=infix) as out:
            # 2. Update the input info with the defaults for each component
            updated_info = update_info(info)
            if is_debug(logger_run):
                # Dump only if not doing output
                # (otherwise, the user can check the .updated file)
                if not out and mpi.is_main_process():
                    logger_run.info(
                        "Input info updated with defaults (dumped to YAML):\n%s",
                        yaml_dump(sort_cosmetic(updated_info)))
            # 3. If output requested, check compatibility if existing one, and dump.
            # 3.1 First: model only
            out.check_and_dump_info(info,
                                    updated_info,
                                    cache_old=True,
                                    ignore_blocks=["sampler"])
            # 3.2 Then sampler -- 1st get the last sampler mentioned in the updated.yaml
            # TODO: ideally, using Minimizer would *append* to the sampler block.
            #       Some code already in place, but not possible at the moment.
            try:
                last_sampler = list(updated_info["sampler"])[-1]
                last_sampler_info = {
                    last_sampler: updated_info["sampler"][last_sampler]
                }
            except (KeyError, TypeError):
                raise LoggedError(logger_run, "No sampler requested.")
            sampler_name, sampler_class = get_sampler_name_and_class(
                last_sampler_info)
            check_sampler_info((out.reload_updated_info(use_cache=True)
                                or {}).get("sampler"),
                               updated_info["sampler"],
                               is_resuming=out.is_resuming())
            # Dump again, now including sampler info
            out.check_and_dump_info(info, updated_info, check_compatible=False)
            # Check if resumable run
            sampler_class.check_force_resume(
                out, info=updated_info["sampler"][sampler_name])
            # 4. Initialize the posterior and the sampler
            with Model(updated_info["params"],
                       updated_info["likelihood"],
                       updated_info.get("prior"),
                       updated_info.get("theory"),
                       packages_path=info.get("packages_path"),
                       timing=updated_info.get("timing"),
                       allow_renames=False,
                       stop_at_error=info.get("stop_at_error",
                                              False)) as model:
                # Re-dump the updated info, now containing parameter routes and version
                updated_info = recursive_update(updated_info, model.info())
                out.check_and_dump_info(None,
                                        updated_info,
                                        check_compatible=False)
                sampler = sampler_class(
                    updated_info["sampler"][sampler_name],
                    model,
                    out,
                    name=sampler_name,
                    packages_path=info.get("packages_path"))
                # Re-dump updated info, now also containing updates from the sampler
                updated_info["sampler"][sampler_name] = \
                    recursive_update(updated_info["sampler"][sampler_name],
                                     sampler.info())
                out.check_and_dump_info(None,
                                        updated_info,
                                        check_compatible=False)
                mpi.sync_processes()
                if info.get("test", False):
                    logger_run.info(
                        "Test initialization successful! "
                        "You can probably run now without `--%s`.", "test")
                    return InfoSamplerTuple(updated_info, sampler)
                # Run the sampler
                sampler.run()

    return InfoSamplerTuple(updated_info, sampler)
Exemple #9
0
 def initialize(self):
     """Initializes the sampler:
     creates the proposal distribution and draws the initial sample."""
     self.log.debug("Initializing")
     for p in [
             "burn_in", "max_tries", "output_every", "check_every",
             "callback_every"
     ]:
         setattr(
             self, p,
             read_dnumber(getattr(self, p), self.model.prior.d(),
                          dtype=int))
     if self.callback_every is None:
         self.callback_every = self.check_every
     # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
     self.burn_in_left = self.burn_in + 1
     # Max # checkpoints to wait, in case one process dies without sending MPI_ABORT
     self.been_waiting = 0
     self.max_waiting = max(50, self.max_tries / self.model.prior.d())
     if am_single_or_primary_process():
         if self.resuming and (max(self.mpi_size or 0, 1) != max(
                 get_mpi_size(), 1)):
             self.log.error(
                 "Cannot resume a sample with a different number of chains: "
                 "was %d and now is %d.", max(self.mpi_size, 1),
                 max(get_mpi_size(), 1))
             raise HandledException
     sync_processes()
     if not self.resuming and self.output:
         # Delete previous files (if not "forced", the run would have already failed)
         if ((os.path.abspath(self.covmat_filename()) != os.path.abspath(
                 str(self.covmat)))):
             try:
                 os.remove(self.covmat_filename())
             except OSError:
                 pass
         # There may be more that chains than expected,
         # if #ranks was bigger in a previous run
         i = 0
         while True:
             i += 1
             collection_filename, _ = self.output.prepare_collection(str(i))
             try:
                 os.remove(collection_filename)
             except OSError:
                 break
     # One collection per MPI process: `name` is the MPI rank + 1
     name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
     self.collection = Collection(self.model,
                                  self.output,
                                  name=name,
                                  resuming=self.resuming)
     self.current_point = OnePoint(self.model, OutputDummy({}), name=name)
     # Use standard MH steps by default
     self.get_new_sample = self.get_new_sample_metropolis
     # Prepare oversampling / dragging if applicable
     self.effective_max_samples = self.max_samples
     if self.oversample and self.drag:
         self.log.error("Choose either oversampling or dragging, not both.")
         raise HandledException
     if self.blocking:
         try:
             speeds, blocks = zip(*list(self.blocking))
             speeds = np.array(speeds)
         except:
             raise HandledException(
                 "Manual blocking not understood. Check documentation.")
         check = compare_params_lists(
             list(chain(*blocks)),
             list(self.model.parameterization.sampled_params()))
         duplicate = check.pop("duplicate_A", None)
         missing = check.pop("B_but_not_A", None)
         unknown = check.pop("A_but_not_B", None)
         if duplicate:
             self.log.error("Manual blocking: repeated parameters: %r",
                            duplicate)
             raise HandledException
         if missing:
             self.log.error("Manual blocking: missing parameters: %r",
                            missing)
             raise HandledException
         if unknown:
             self.log.error("Manual blocking: unkown parameters: %r",
                            unknown)
             raise HandledException
         int_speeds = self.oversample or self.drag
         if int_speeds:
             speeds = relative_to_int(speeds)
         if (speeds != np.sort(speeds)).all():
             self.log.warn(
                 "Manual blocking: speed-blocking non-optimal: sort by "
                 "ascending speed when possible")
     else:
         speeds, blocks = self.model.likelihood._speeds_of_params(
             int_speeds=self.oversample or self.drag, fast_slow=self.drag)
     if self.oversample:
         self.oversampling_factors = speeds
         self.log.info("Oversampling with factors:\n" + "\n".join([
             "   %d : %r" % (f, b)
             for f, b in zip(self.oversampling_factors, blocks)
         ]))
         self.i_last_slow_block = None
         # No way right now to separate slow and fast
         slow_params = list(self.model.parameterization.sampled_params())
     elif self.drag:
         # For now, no blocking inside either fast or slow: just 2 blocks
         self.i_last_slow_block = 0
         if np.all(speeds == speeds[0]):
             self.log.error(
                 "All speeds are equal or too similar: cannot drag! "
                 "Make sure to define accurate likelihoods' speeds.")
             raise HandledException
         # Make the 1st factor 1:
         speeds = [1, speeds[1] / speeds[0]]
         # Target: dragging step taking as long as slow step
         self.drag_interp_steps = self.drag * speeds[1]
         # Per dragging step, the (fast) posterior is evaluated *twice*,
         self.drag_interp_steps /= 2
         self.drag_interp_steps = int(np.round(self.drag_interp_steps))
         fast_params = list(chain(*blocks[1 + self.i_last_slow_block:]))
         # Not too much or too little dragging
         drag_limits = [(int(l) * len(fast_params) if l is not None else l)
                        for l in self.drag_limits]
         if drag_limits[
                 0] is not None and self.drag_interp_steps < drag_limits[0]:
             self.log.warning(
                 "Number of dragging steps clipped from below: was not "
                 "enough to efficiently explore the fast directions -- "
                 "avoid this limit by decreasing 'drag_limits[0]'.")
             self.drag_interp_steps = drag_limits[0]
         if drag_limits[
                 1] is not None and self.drag_interp_steps > drag_limits[1]:
             self.log.warning(
                 "Number of dragging steps clipped from above: "
                 "excessive, probably inefficient, exploration of the "
                 "fast directions -- "
                 "avoid this limit by increasing 'drag_limits[1]'.")
             self.drag_interp_steps = drag_limits[1]
         # Re-scale steps between checkpoint and callback to the slow dimensions only
         slow_params = list(chain(*blocks[:1 + self.i_last_slow_block]))
         self.n_slow = len(slow_params)
         for p in ["check_every", "callback_every"]:
             setattr(
                 self, p,
                 int(getattr(self, p) * self.n_slow / self.model.prior.d()))
         self.log.info("Dragging with oversampling per step:\n" +
                       "\n".join([
                           "   %d : %r" % (f, b)
                           for f, b in zip([1, self.drag_interp_steps],
                                           [blocks[0], fast_params])
                       ]))
         self.get_new_sample = self.get_new_sample_dragging
     else:
         self.oversampling_factors = [1 for b in blocks]
         slow_params = list(self.model.parameterization.sampled_params())
         self.n_slow = len(slow_params)
     # Turn parameter names into indices
     self.blocks = [[
         list(self.model.parameterization.sampled_params()).index(p)
         for p in b
     ] for b in blocks]
     self.proposer = BlockedProposer(
         self.blocks,
         oversampling_factors=self.oversampling_factors,
         i_last_slow_block=self.i_last_slow_block,
         proposal_scale=self.proposal_scale)
     # Build the initial covariance matrix of the proposal, or load from checkpoint
     if self.resuming:
         covmat = np.loadtxt(self.covmat_filename())
         self.log.info("Covariance matrix from checkpoint.")
     else:
         covmat = self.initial_proposal_covmat(slow_params=slow_params)
         self.log.info("Initial covariance matrix.")
     self.log.debug(
         "Sampling with covmat:\n%s",
         DataFrame(
             covmat,
             columns=self.model.parameterization.sampled_params(),
             index=self.model.parameterization.sampled_params()).to_string(
                 line_width=_line_width))
     self.proposer.set_covariance(covmat)
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     # Useful for getting last points added inside callback function
     self.last_point_callback = 0
Exemple #10
0
 def initialize(self):
     """Initializes the sampler:
     creates the proposal distribution and draws the initial sample."""
     if not self.model.prior.d():
         raise LoggedError(self.log,
                           "No parameters being varied for sampler")
     self.log.debug("Initializing")
     # MARKED FOR DEPRECATION IN v3.0
     if getattr(self, "oversample", None) is not None:
         self.log.warning(
             "*DEPRECATION*: `oversample` will be deprecated in the "
             "next version. Oversampling is now requested by setting "
             "`oversample_power` > 0.")
     # END OF DEPRECATION BLOCK
     # MARKED FOR DEPRECATION IN v3.0
     if getattr(self, "check_every", None) is not None:
         self.log.warning(
             "*DEPRECATION*: `check_every` will be deprecated in the "
             "next version. Please use `learn_every` instead.")
         # BEHAVIOUR TO BE REPLACED BY ERROR:
         self.learn_every = getattr(self, "check_every")
     # END OF DEPRECATION BLOCK
     if self.callback_every is None:
         self.callback_every = self.learn_every
     self._quants_d_units = []
     for q in ["max_tries", "learn_every", "callback_every", "burn_in"]:
         number = NumberWithUnits(getattr(self, q), "d", dtype=int)
         self._quants_d_units.append(number)
         setattr(self, q, number)
     self.output_every = NumberWithUnits(self.output_every, "s", dtype=int)
     if is_main_process():
         if self.output.is_resuming() and (max(self.mpi_size or 0, 1) !=
                                           max(get_mpi_size(), 1)):
             raise LoggedError(
                 self.log,
                 "Cannot resume a run with a different number of chains: "
                 "was %d and now is %d.", max(self.mpi_size, 1),
                 max(get_mpi_size(), 1))
         if more_than_one_process():
             if get_mpi().Get_version()[0] < 3:
                 raise LoggedError(
                     self.log, "MPI use requires MPI version 3.0 or "
                     "higher to support IALLGATHER.")
     sync_processes()
     # One collection per MPI process: `name` is the MPI rank + 1
     name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
     self.collection = Collection(self.model,
                                  self.output,
                                  name=name,
                                  resuming=self.output.is_resuming())
     self.current_point = OneSamplePoint(self.model)
     # Use standard MH steps by default
     self.get_new_sample = self.get_new_sample_metropolis
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     # Useful for getting last points added inside callback function
     self.last_point_callback = 0
     # Monitoring/restore progress
     if is_main_process():
         cols = [
             "N", "timestamp", "acceptance_rate", "Rminus1", "Rminus1_cl"
         ]
         self.progress = DataFrame(columns=cols)
         self.i_learn = 1
         if self.output and not self.output.is_resuming():
             with open(self.progress_filename(), "w",
                       encoding="utf-8") as progress_file:
                 progress_file.write("# " +
                                     " ".join(self.progress.columns) + "\n")
     # Get first point, to be discarded -- not possible to determine its weight
     # Still, we need to compute derived parameters, since, as the proposal "blocked",
     # we may be saving the initial state of some block.
     # NB: if resuming but nothing was written (burn-in not finished): re-start
     if self.output.is_resuming() and len(self.collection):
         initial_point = (self.collection[
             self.collection.sampled_params].iloc[len(self.collection) -
                                                  1]).values.copy()
         logpost = -(self.collection[_minuslogpost].iloc[
             len(self.collection) - 1].copy())
         logpriors = -(self.collection[self.collection.minuslogprior_names].
                       iloc[len(self.collection) - 1].copy())
         loglikes = -0.5 * (self.collection[self.collection.chi2_names].
                            iloc[len(self.collection) - 1].copy())
         derived = (self.collection[self.collection.derived_params].iloc[
             len(self.collection) - 1].values.copy())
     else:
         # NB: max_tries adjusted to dim instead of #cycles (blocking not computed yet)
         self.max_tries.set_scale(self.model.prior.d())
         self.log.info(
             "Getting initial point... (this may take a few seconds)")
         initial_point, logpost, logpriors, loglikes, derived = \
             self.model.get_valid_point(max_tries=self.max_tries.value)
         # If resuming but no existing chain, assume failed run and ignore blocking
         # if speeds measurement requested
         if self.output.is_resuming() and not len(self.collection) \
            and self.measure_speeds:
             self.blocking = None
         if self.measure_speeds and self.blocking:
             self.log.warning(
                 "Parameter blocking manually fixed: speeds will not be measured."
             )
         elif self.measure_speeds:
             n = None if self.measure_speeds is True else int(
                 self.measure_speeds)
             self.model.measure_and_set_speeds(n=n, discard=0)
     self.set_proposer_blocking()
     self.set_proposer_covmat(load=True)
     self.current_point.add(initial_point,
                            derived=derived,
                            logpost=logpost,
                            logpriors=logpriors,
                            loglikes=loglikes)
     self.log.info("Initial point: %s", self.current_point)
     # Max #(learn+convergence checks) to wait,
     # in case one process dies without sending MPI_ABORT
     self.been_waiting = 0
     self.max_waiting = max(50, self.max_tries.unit_value)
     # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
     self.burn_in_left = self.burn_in.value * self.current_point.output_thin + 1
     # Initial dummy checkpoint
     # (needed when 1st "learn point" not reached in prev. run)
     self.write_checkpoint()
def test_cosmo_run_resume_post(tmpdir, packages_path=None):
    # only vary As, so fast chain. Chain does not need to converge (tested elsewhere).
    info['output'] = os.path.join(tmpdir, 'testchain')
    if packages_path:
        info["packages_path"] = process_packages_path(packages_path)
    run(info, force=True)
    # note that continuing from files leads to text-file precision at read in, so a mix of
    # precision in the output SampleCollection returned from run
    run(info, resume=True, override={'sampler': {'mcmc': {'Rminus1_stop': 0.2}}})
    updated_info, sampler = run(info['output'] + '.updated' + Extension.dill,
                                resume=True,
                                override={'sampler': {'mcmc': {'Rminus1_stop': 0.05}}})
    results = mpi.allgather(sampler.products()["sample"])
    samp = MCSamplesFromCobaya(updated_info, results, ignore_rows=0.2)
    assert np.isclose(samp.mean('As100'), 100 * samp.mean('As'))

    # post-processing
    info_post: PostDict = {'add': {'params': {'h': None},
                                   "likelihood": {"test_likelihood2": likelihood2}},
                           'remove': {'likelihood': ["test_likelihood"]},
                           'suffix': 'testpost',
                           'skip': 0.2, 'thin': 4
                           }

    output_info, products = run(updated_info, override={'post': info_post}, force=True)
    results2 = mpi.allgather(products["sample"])
    samp2 = MCSamplesFromCobaya(output_info, results2)
    samp_test = samp.copy()
    samp_test.weighted_thin(4)
    sigma8 = samp_test.getParams().sigma8
    samp_test.reweightAddingLogLikes(-(sigma8 - 0.7) ** 2 / 0.1 ** 2
                                     + (sigma8 - 0.75) ** 2 / 0.07 ** 2)
    assert np.isclose(samp_test.mean('sigma8'), samp2.mean('sigma8'))

    # from getdist-format chain files
    root = os.path.join(tmpdir, 'getdist_format')
    if mpi.is_main_process():
        samp.saveChainsAsText(root)
    mpi.sync_processes()

    from_txt = dict(updated_info, output=root)
    post_from_text = dict(info_post, skip=0)  # getdist already skipped
    output_info, products = run(from_txt, override={'post': post_from_text}, force=True)
    samp_getdist = MCSamplesFromCobaya(output_info, mpi.allgather(products["sample"]))
    assert not products["stats"]["points_removed"]
    assert samp2.numrows == samp_getdist.numrows
    assert np.isclose(samp2.mean('sigma8'), samp_getdist.mean('sigma8'))

    # again with inferred-inputs for params
    info_conv = cosmomc_root_to_cobaya_info_dict(root)
    # have to manually add consistent likelihoods if re-computing
    info_conv['likelihood'] = info['likelihood']
    info_conv['theory'] = info['theory']
    post_from_text = dict(info_post, skip=0, suffix='getdist2')  # getdist already skipped
    output_info, products = run(info_conv, override={'post': post_from_text},
                                output=False)
    samp_getdist2 = MCSamplesFromCobaya(output_info, mpi.allgather(products["sample"]))
    assert np.isclose(samp2.mean('sigma8'), samp_getdist2.mean('sigma8'))

    # from save info, no output
    info_post['output'] = None
    output_info, products = run({'output': info['output'], 'post': info_post}, force=True)
    results3 = mpi.allgather(products["sample"])
    samp3 = MCSamplesFromCobaya(output_info, results3)
    assert np.isclose(samp3.mean("sigma8"), samp2.mean("sigma8"))
    assert np.isclose(samp3.mean("joint"), samp2.mean("joint"))
    samps4 = loadMCSamples(info['output'] + '.post.testpost')
    assert np.isclose(samp3.mean("joint"), samps4.mean("joint"))

    # test recover original answer swapping likelihoods back
    info_revert = {'add': {'likelihood': info['likelihood']},
                   'remove': {'likelihood': ["test_likelihood2"]},
                   'suffix': 'revert',
                   'skip': 0, 'thin': 1,
                   'output': None
                   }
    output_info, products = run({'output': info['output'] + '.post.testpost',
                                 'post': info_revert}, force=True)
    results_revert = mpi.allgather(products["sample"])
    samp_revert = MCSamplesFromCobaya(output_info, results_revert)

    samp_thin = MCSamplesFromCobaya(updated_info, results, ignore_rows=0.2)
    samp_thin.weighted_thin(4)
    assert samp_thin.numrows == samp_revert.numrows + products["stats"]["points_removed"]
    if not products["stats"]["points_removed"]:
        assert np.isclose(samp_revert.mean("sigma8"), samp_thin.mean("sigma8"))
    else:
        assert abs(samp_revert.mean("sigma8") - samp_thin.mean("sigma8")) < 0.01
    assert not products["stats"]["points_removed"]

    # no remove
    info_post = {
        'add': {'params': {'h': None}, "likelihood": {"test_likelihood2": likelihood2}},
        'suffix': 'test2', 'skip': 0.2, 'thin': 4}
    output_info, products = run(updated_info, override={'post': info_post}, force=True)
    results2 = mpi.allgather(products["sample"])
    samp2 = MCSamplesFromCobaya(output_info, results2)
    assert "chi2__type1" in samp2.paramNames.list()
    # check what has been saved to disk is consistent
    samps4 = loadMCSamples(updated_info['output'] + '.post.test2')
    assert samp2.paramNames.list() == samps4.paramNames.list()
    assert np.isclose(samp2.mean("sigma8"), samps4.mean("sigma8"))

    # adding new theory derived
    info_post['add']['theory'] = {'new_param_theory': BTheory}
    output_info, products = run(updated_info, override={'post': info_post}, output=False)
    results3 = mpi.allgather(products["sample"])
    samp3 = MCSamplesFromCobaya(output_info, results3)
    assert np.isclose(samp3.mean("sigma8"), samp2.mean("sigma8"))
    assert np.isclose(samp3.mean("As1000"), samp2.mean("As") * 1000)

    info_post['add']['theory'] = {'new_param_theory': CTheory}
    with pytest.raises(LoggedError) as e, NoLogging(logging.ERROR):
        run(updated_info, override={'post': info_post}, output=False)
    assert 'Parameter AsX no known value' in str(e)