Ejemplo n.º 1
0
def test_post_prior(tmpdir):
    # Generate original chain
    info: InputDict = {
        "output": os.path.join(tmpdir, "gaussian"), "force": True,
        "params": info_params, "sampler": info_sampler,
        "likelihood": {"one": None}, "prior": {"gaussian": sampled_pdf}}
    info_post: InputDict = {
        "output": info["output"], "force": True,
        "post": {"suffix": "foo", 'skip': 0.1,
                 "remove": {"prior": {"gaussian": None}},
                 "add": {"prior": {"target": target_pdf_prior}}}}
    _, sampler = run(info)
    if mpi.is_main_process():
        mcsamples_in = loadMCSamples(info["output"], settings={'ignore_rows': 0.1})
        target_mean, target_cov = mpi.share(_get_targets(mcsamples_in))
    else:
        target_mean, target_cov = mpi.share()

    for mem in [False, True]:
        post(info_post, sample=sampler.products()["sample"] if mem else None)
        # Load with GetDist and compare
        if mpi.is_main_process():
            mcsamples = loadMCSamples(
                info_post["output"] + _post_ + info_post["post"]["suffix"])
            new_mean = mcsamples.mean(["a", "b"])
            new_cov = mcsamples.getCovMat().matrix
            mpi.share((new_mean, new_cov))
        else:
            new_mean, new_cov = mpi.share()
        assert np.allclose(new_mean, target_mean)
        assert np.allclose(new_cov, target_cov)
Ejemplo n.º 2
0
 def __init__(self, *args, **kwargs):
     if is_main_process():
         Output.__init__(self, *args, **kwargs)
     if more_than_one_process():
         to_broadcast = (
             "folder", "prefix", "kind", "ext", "_resuming", "prefix_regexp_str")
         values = share_mpi([getattr(self, var) for var in to_broadcast]
                            if is_main_process() else None)
         for name, var in zip(to_broadcast, values):
             setattr(self, name, var)
Ejemplo n.º 3
0
 def is_installed(cls, **kwargs):
     log = get_logger(cls.__name__)
     if not kwargs.get("code", True):
         return True
     check = kwargs.get("check", True)
     func = log.info if check else log.error
     path: Optional[str] = kwargs["path"]
     if path is not None and path.lower() == "global":
         path = None
     if path and not kwargs.get("allow_global"):
         if is_main_process():
             log.info("Importing *local* PolyChord from '%s'.", path)
         if not os.path.exists(path):
             if is_main_process():
                 func("The given folder does not exist: '%s'", path)
             return False
         poly_build_path = cls.get_import_path(path)
         if not poly_build_path:
             return False
     elif not path:
         if is_main_process():
             log.info("Importing *global* PolyChord.")
         poly_build_path = None
     else:
         if is_main_process():
             log.info(
                 "Importing *auto-installed* PolyChord (but defaulting to *global*)."
             )
         poly_build_path = cls.get_import_path(path)
     cls._poly_build_path = poly_build_path
     try:
         # TODO: add min_version when polychord module version available
         return load_module('pypolychord',
                            path=poly_build_path,
                            min_version=None)
     except ModuleNotFoundError:
         if path is not None and path.lower() != "global":
             log.error(
                 "Couldn't find the PolyChord python interface at '%s'. "
                 "Are you sure it has been installed there?", path)
         elif not check:
             log.error(
                 "Could not import global PolyChord installation. "
                 "Specify a Cobaya or PolyChord installation path, "
                 "or install the PolyChord Python interface globally with "
                 "'cd /path/to/polychord/ ; python setup.py install'")
         return False
     except ImportError as e:
         log.error(
             "Couldn't load the PolyChord python interface in %s:\n"
             "%s", poly_build_path or "global", e)
         return False
     except VersionCheckError as e:
         log.error(str(e))
         return False
Ejemplo n.º 4
0
    def __init__(self,
                 info_sampler: SamplerDict,
                 model: Model,
                 output=Optional[Output],
                 packages_path: Optional[str] = None,
                 name: Optional[str] = None):
        """
        Actual initialization of the class. Loads the default and input information and
        call the custom ``initialize`` method.

        [Do not modify this one.]
        """
        self._model = model
        self._output = output
        self._updated_info = deepcopy_where_possible(info_sampler)
        super().__init__(info_sampler,
                         packages_path=packages_path,
                         name=name,
                         initialize=False,
                         standalone=False)
        if not model.parameterization.sampled_params():
            self.mpi_warning("No sampled parameters requested! "
                             "This will fail for non-mock samplers.")
        # Load checkpoint info, if resuming
        if self.output.is_resuming() and not isinstance(self, Minimizer):
            checkpoint_info = None
            if mpi.is_main_process():
                try:
                    checkpoint_info = yaml_load_file(
                        self.checkpoint_filename())

                    if self.get_name() not in checkpoint_info["sampler"]:
                        raise LoggedError(
                            self.log, "Checkpoint file found at '%s' "
                            "but it corresponds to a different sampler.",
                            self.checkpoint_filename())
                except (IOError, TypeError):
                    pass
            checkpoint_info = mpi.share_mpi(checkpoint_info)
            if checkpoint_info:
                self.set_checkpoint_info(checkpoint_info)
                self.mpi_info("Resuming from previous sample!")
        elif not isinstance(self, Minimizer) and mpi.is_main_process():
            try:
                output.delete_file_or_folder(self.checkpoint_filename())
                output.delete_file_or_folder(self.progress_filename())
            except (OSError, TypeError):
                pass
        self._set_rng()
        self.initialize()
        model.set_cache_size(self._get_requested_cache_size())
        # Add to the updated info some values which are
        # only available after initialisation
        self._updated_info["version"] = self.get_version()
Ejemplo n.º 5
0
 def _run(self):
     """
     Runs the sampler.
     """
     self.mpi_info(
         "Sampling!" +
         (" (NB: no accepted step will be saved until %d burn-in samples " %
          self.burn_in.value + "have been obtained)"
          if self.burn_in.value else ""))
     self.n_steps_raw = 0
     last_output = 0
     last_n = self.n()
     while last_n < self.max_samples and not self.converged:
         self.get_new_sample()
         self.n_steps_raw += 1
         if self.output_every.unit:
             # if output_every in sec, print some info and dump at fixed time intervals
             now = datetime.datetime.now()
             now_sec = now.timestamp()
             if now_sec >= last_output + self.output_every.value:
                 self.do_output(now)
                 last_output = now_sec
         if self.current_point.weight == 1:
             # have added new point
             # Callback function
             n = self.n()
             if n != last_n:
                 # and actually added
                 last_n = n
                 if (hasattr(self, "callback_function_callable") and
                         not (max(n, 1) % self.callback_every.value) and
                         self.current_point.weight == 1):
                     self.callback_function_callable(self)
                     self.last_point_callback = len(self.collection)
                 # Checking convergence and (optionally) learning
                 # the covmat of the proposal
                 if self.check_all_ready():
                     self.check_convergence_and_learn_proposal()
                     if is_main_process():
                         self.i_learn += 1
     if last_n == self.max_samples:
         self.log.info("Reached maximum number of accepted steps allowed. "
                       "Stopping.")
     # Make sure the last batch of samples ( < output_every (not in sec)) are written
     self.collection.out_update()
     if more_than_one_process():
         Ns = (lambda x: np.array(get_mpi_comm().gather(x)))(self.n())
         if not is_main_process():
             Ns = []
     else:
         Ns = [self.n()]
     self.mpi_info("Sampling complete after %d accepted steps.", sum(Ns))
Ejemplo n.º 6
0
 def _load_covmat(self, prefer_load_old, auto_params=None):
     if prefer_load_old and os.path.exists(self.covmat_filename()):
         if is_main_process():
             covmat = np.atleast_2d(np.loadtxt(self.covmat_filename()))
         else:
             covmat = None
         covmat = share_mpi(covmat)
         self.mpi_info("Covariance matrix from previous sample.")
         return covmat, []
     else:
         return share_mpi(
             self.initial_proposal_covmat(
                 auto_params=auto_params) if is_main_process() else None)
Ejemplo n.º 7
0
 def lock_error(self):
     if not self.has_lock():
         assert self.lock_error_file
         try:
             # make lock_err so process holding lock can check
             # another process had an error
             with open(self.lock_error_file, 'wb'):
                 pass
         except OSError:
             pass
     if mpi.get_mpi():
         import mpi4py
     else:
         mpi4py = None
     if mpi.is_main_process() and use_portalocker() is None:
         self.log.warning('install "portalocker" for better file lock control.')
     raise LoggedError(self.log,
                       "File %s is locked.\nYou may be running multiple jobs with "
                       "the same output when you intended to run with MPI. "
                       "Check that mpi4py is correctly installed and "
                       "configured (using the same mpi as mpirun/mpiexec); "
                       "e.g. try the test at\n"
                       "https://cobaya.readthedocs.io/en/latest/installation."
                       "html#mpi-parallelization-optional-but-encouraged\n"
                       + ("Your current mpi4py config is:"
                          "\n %s" % mpi4py.get_config()
                          if mpi4py is not None else
                          "mpi4py is NOT currently installed."), self.lock_file)
Ejemplo n.º 8
0
 def reload_updated_info(self,
                         cache=False,
                         use_cache=False) -> Optional[InputDict]:
     if mpi.is_main_process():
         if use_cache and hasattr(self, "_old_updated_info"):
             return self._old_updated_info
         try:
             if os.path.isfile(self.dump_file_updated):
                 loaded = load_info_dump(self.dump_file_updated)
             else:
                 loaded = yaml_load_file(self.file_updated)  # type: ignore
             if cache:
                 self._old_updated_info = deepcopy_where_possible(loaded)
             return loaded
         except IOError:
             if cache:
                 self._old_updated_info = None
             return None
     else:
         # Only cached possible when non main process
         if not use_cache:
             raise LoggedError(
                 self.log, "Cannot call `reload_updated_info` from "
                 "non-main process unless cached version "
                 "(`use_cache=True`) requested.")
         return getattr(self, "_old_updated_info", None)
Ejemplo n.º 9
0
    def products(self):
        r"""
        Returns a dictionary containing:

        - ``minimum``: :class:`OnePoint` that maximizes the posterior or likelihood
          (depending on ``ignore_prior``).

        - ``result_object``: instance of results class of
          `scipy <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html>`_
          or `pyBOBYQA
          <https://numericalalgorithmsgroup.github.io/pybobyqa/build/html/userguide.html>`_.

        - ``M``: inverse of the affine transform matrix (see below).
          ``None`` if no transformation applied.

        - ``X0``: offset of the affine transform matrix (see below)
          ``None`` if no transformation applied.

        If non-trivial ``M`` and ``X0`` are returned, this means that the minimizer has
        been working on an affine-transformed parameter space :math:`x^\prime`, from which
        the real space points can be obtained as :math:`x = M x^\prime + X_0`. This inverse
        transformation needs to be applied to the coordinates appearing inside the
        ``result_object``.
        """
        if is_main_process():
            return {
                "minimum": self.minimum,
                "result_object": self.result,
                "M": self._inv_affine_transform_matrix,
                "X0": self._affine_transform_baseline
            }
Ejemplo n.º 10
0
 def check_force_resume(cls, output, info=None):
     """
     Performs the necessary checks on existing files if resuming or forcing
     (including deleting some output files when forcing).
     """
     if not output:
         return
     resuming: Optional[bool]
     if mpi.is_main_process():
         resuming = False
         if output.force:
             cls.delete_output_files(output, info=info)
         elif any(find_with_regexp(regexp, root or output.folder) for (regexp, root)
                  in cls.output_files_regexps(output=output, info=info, minimal=True)):
             if output.is_resuming():
                 output.log.info("Found an old sample. Resuming.")
                 resuming = True
             else:
                 raise LoggedError(
                     output.log, "Delete the previous output manually, automatically "
                                 "('-%s', '--%s', '%s: True')" % (
                                     "force"[0], "force", "force") +
                                 " or request resuming ('-%s', '--%s', '%s: True')" % (
                                     "resume"[0], "resume", "resume"))
         else:
             if output.is_resuming():
                 output.log.info(
                     "Did not find an old sample. Cleaning up and starting anew.")
             # Clean up old files, and set resuming=False,
             # regardless of requested value
             cls.delete_output_files(output, info=info)
     else:
         resuming = None
     output.set_resuming(resuming)
Ejemplo n.º 11
0
def test_minimize_gaussian(tmpdir):
    maxloglik = 0
    for method in reversed(valid_methods):
        NoisyCovLike.noise = 0.005 if method == 'bobyqa' else 0
        info: InputDict = {
            'likelihood': {
                'like': NoisyCovLike
            },
            "sampler": {
                "minimize": {
                    "ignore_prior": True,
                    "method": method
                }
            }
        }
        products = run(info).sampler.products()
        error = abs(maxloglik - -products["minimum"]["minuslogpost"])
        assert error < 0.01

        info['output'] = os.path.join(tmpdir, 'testmin')
        products = run(info, force=True).sampler.products()
        if mpi.is_main_process():
            from getdist.types import BestFit
            res = BestFit(info['output'] + '.bestfit').getParamDict()
            assert np.isclose(res["loglike"],
                              products["minimum"]["minuslogpost"])
            for p, v in list(res.items())[:-2]:
                assert np.isclose(products["minimum"][p], v)
Ejemplo n.º 12
0
def random_cov(ranges, O_std_min=1e-2, O_std_max=1, n_modes=1, mpi_warn=True):
    """
    Returns a random covariance matrix, with standard deviations sampled log-uniformly
    from the length of the parameter ranges times ``O_std_min`` and ``O_std_max``, and
    uniformly sampled correlation coefficients between ``rho_min`` and ``rho_max``.

    The output of this function can be used directly as the value of the option ``cov`` of
    the :class:`likelihoods.gaussian`.

    If ``n_modes>1``, returns a list of such matrices.
    """
    if not is_main_process() and mpi_warn:
        print("WARNING! "
              "Using with MPI: different process will produce different random results.")
    dim = len(ranges)
    scales = np.array([r[1] - r[0] for r in ranges])
    cov = []
    for i in range(n_modes):
        stds = scales * 10 ** (uniform.rvs(size=dim, loc=np.log10(O_std_min),
                                           scale=np.log10(O_std_max / O_std_min)))
        this_cov = np.diag(stds).dot(
            (random_correlation.rvs(dim * stds / sum(stds)) if dim > 1 else np.eye(1))
                .dot(np.diag(stds)))
        # Symmetrize (numerical noise is usually introduced in the last step)
        cov += [(this_cov + this_cov.T) / 2]
    if n_modes == 1:
        cov = cov[0]
    return cov
Ejemplo n.º 13
0
 def write_checkpoint(self):
     if is_main_process() and self.output:
         checkpoint_filename = self.checkpoint_filename()
         self.dump_covmat(self.proposer.get_covariance())
         checkpoint_info = {
             "sampler": {
                 self.get_name():
                 dict([
                     ("converged", self.converged),
                     ("Rminus1_last", self.Rminus1_last),
                     (
                         "burn_in",
                         (
                             self.burn_in.
                             value  # initial: repeat burn-in if not finished
                             if not self.n() and self.burn_in_left else 0)
                     ),  # to avoid overweighting last point of prev. run
                     ("mpi_size", get_mpi_size())
                 ])
             }
         }
         yaml_dump_file(checkpoint_filename,
                        checkpoint_info,
                        error_if_exists=False)
         if not self.progress.empty:
             with open(self.progress_filename(), "a",
                       encoding="utf-8") as progress_file:
                 fmts = {"N": lambda x: "{:9d}".format(x)}
                 # TODO: next one is ignored when added to the dict
                 #        "acceptance_rate": lambda x: "{:15.8g}".format(x)}
                 progress_file.write(
                     self.progress.tail(1).to_string(
                         header=False, index=False, formatters=fmts) + "\n")
         self.log.debug(
             "Dumped checkpoint and progress info, and current covmat.")
Ejemplo n.º 14
0
def bib_script():
    from cobaya.mpi import is_main_process
    if not is_main_process():
        return
    warn_deprecation()
    # Parse arguments and launch
    import argparse
    parser = argparse.ArgumentParser(
        prog="cobaya bib",
        description=
        "Prints bibliography to be cited for a component or input file.")
    parser.add_argument(
        "components_or_files",
        action="store",
        nargs="+",
        metavar="component_name or input_file.yaml",
        help="Component(s) or input file(s) whose bib info is requested.")
    kind_opt, kind_opt_ishort = "kind", 0
    parser.add_argument(
        "-" + kind_opt[kind_opt_ishort],
        "--" + kind_opt,
        action="store",
        nargs=1,
        default=None,
        metavar="component_kind",
        help=("If component name given, "
              "kind of component whose bib is requested: " +
              ", ".join(['%s' % kind for kind in kinds]) + ". " +
              "Use only when component name is not unique (it would fail)."))
    arguments = parser.parse_args()
    # Case of files
    are_yaml = [(os.path.splitext(f)[1] in _yaml_extensions)
                for f in arguments.components_or_files]
    if all(are_yaml):
        infos = [load_input(f) for f in arguments.components_or_files]
        print(prettyprint_bib(*get_bib_info(*infos)))
    elif not any(are_yaml):
        if arguments.kind:
            arguments.kind = arguments.kind[0].lower()
        for component in arguments.components_or_files:
            try:
                print(
                    create_banner(component,
                                  symbol=_default_symbol,
                                  length=_default_length))
                print(get_bib_component(component, arguments.kind))
                return
            except:
                if not arguments.kind:
                    print(
                        "Specify its kind with '--%s [component_kind]'." %
                        kind_opt +
                        "(NB: all requested components must have the same kind, "
                        "or be requested separately).")
                print("")
    else:
        print("Give either a list of input yaml files, "
              "or of component names (not a mix of them).")
        return 1
    return
Ejemplo n.º 15
0
    def run(self):
        """
        Prepares the posterior function and calls ``PolyChord``'s ``run`` function.
        """

        # Prepare the polychord likelihood
        def loglikelihood(params_values):
            result = self.model.logposterior(params_values)
            loglikes = result.loglikes
            if len(loglikes) != self.n_likes:
                loglikes = np.full(self.n_likes, np.nan)
            derived = result.derived
            if len(derived) != self.n_derived:
                derived = np.full(self.n_derived, np.nan)
            derived = list(derived) + list(result.logpriors) + list(loglikes)
            return max(loglikes.sum(), self.pc_settings.logzero), derived

        def prior(cube):
            theta = np.empty_like(cube)
            for i, xi in enumerate(np.array(cube)[self.ordering]):
                theta[i] = self.model.prior.pdf[i].ppf(xi)
            return theta

        if is_main_process():
            self.dump_paramnames(self.raw_prefix)
        sync_processes()
        self.mpi_info("Calling PolyChord...")
        self.pc.run_polychord(loglikelihood, self.nDims, self.nDerived,
                              self.pc_settings, prior, self.dumper)
        self.process_raw_output()
Ejemplo n.º 16
0
 def _load_covmat(self,
                  from_old_chain,
                  default_not_found=None,
                  auto_params=None):
     if from_old_chain and os.path.exists(self.covmat_filename()):
         if is_main_process():
             covmat = np.atleast_2d(np.loadtxt(self.covmat_filename()))
         else:
             covmat = None
         covmat = share_mpi(covmat)
         self.mpi_info("Covariance matrix from checkpoint.")
         return covmat, []
     elif default_not_found is not None:
         return default_not_found, []
     else:
         return share_mpi(
             self.initial_proposal_covmat(
                 auto_params=auto_params) if is_main_process() else None)
Ejemplo n.º 17
0
def info_random_gaussian_mixture(
        ranges, n_modes=1, input_params_prefix="", output_params_prefix="",
        O_std_min=1e-2, O_std_max=1, derived=False, mpi_aware=True):
    """
    Wrapper around ``random_mean`` and ``random_cov`` to generate the likelihood and
    parameter info for a random Gaussian.

    If ``mpi_aware=True``, it draws the random stuff only once, and communicates it to
    the rest of the MPI processes.
    """
    if is_main_process() or not mpi_aware:
        cov = random_cov(ranges, n_modes=n_modes,
                         O_std_min=O_std_min, O_std_max=O_std_max, mpi_warn=False)
        if n_modes == 1:
            cov = [cov]
        # Make sure it stays away from the edges
        mean = [[]] * n_modes
        for i in range(n_modes):
            std = np.sqrt(cov[i].diagonal())
            factor = 3
            ranges_mean = [[l[0] + factor * s, l[1] - +factor * s] for l, s in
                           zip(ranges, std)]
            # If this implies min>max, take the centre
            ranges_mean = [
                (l if l[0] <= l[1] else 2 * [(l[0] + l[1]) / 2]) for l in ranges_mean]
            mean[i] = random_mean(ranges_mean, n_modes=1, mpi_warn=False)
    if mpi_aware:
        mean, cov = share_mpi((mean, cov) if is_main_process() else None)
    dimension = len(ranges)
    info = {kinds.likelihood: {"gaussian_mixture": {
        "means": mean, "covs": cov, _input_params_prefix: input_params_prefix,
        _output_params_prefix: output_params_prefix, "derived": derived}}}
    info[_params] = dict(
        # sampled
        [(input_params_prefix + "_%d" % i,
          {"prior": {"min": ranges[i][0], "max": ranges[i][1]},
           "latex": r"\alpha_{%i}" % i})
         for i in range(dimension)] +
        # derived
        ([[output_params_prefix + "_%d" % i,
           {"min": -3, "max": 3, "latex": r"\beta_{%i}" % i}]
          for i in range(dimension * n_modes)] if derived else []))
    return info
Ejemplo n.º 18
0
 def reload_updated_info(self, *args, **kwargs):
     if is_main_process():
         return Output.reload_updated_info(self, *args, **kwargs)
     else:
         # Only cached possible when non main process
         if not kwargs.get("use_cache"):
             raise ValueError(
                 "Cannot call `reload_updated_info` from non-main process "
                 "unless cached version (`use_cache=True`) requested.")
         return self._old_updated_info
Ejemplo n.º 19
0
    def products(self):
        """
        Auxiliary function to define what should be returned in a scripted call.

        Returns:
           The sample ``SampleCollection`` containing the accepted steps.
        """
        products = {"sample": self.collection}
        if is_main_process():
            products["progress"] = self.progress
        return products
Ejemplo n.º 20
0
 def check_force_resume(cls, output, info=None):
     """
     Performs the necessary checks on existing files if resuming or forcing
     (including deleting some output files when forcing).
     """
     if output.is_resuming():
         if mpi.is_main_process():
             raise LoggedError(
                 output.log, "Minimizer does not support resuming. "
                             "If you want to start over, force "
                             "('-f', '--force', 'force: True')")
     super().check_force_resume(output, info=info)
Ejemplo n.º 21
0
 def delete_output_files(cls, output, info=None):
     if output and is_main_process():
         for (regexp, root) in cls.output_files_regexps(output, info=info):
             # Special case: CovmatSampler's may have been given a covmat with the same
             # name that the output one. In that case, don't delete it!
             if issubclass(cls, CovmatSampler) and info:
                 if regexp.pattern.rstrip("$").endswith(_covmat_extension):
                     covmat_file = info.get("covmat", "")
                     if (isinstance(covmat_file, str)
                             and covmat_file == getattr(
                                 regexp.match(covmat_file), "group",
                                 lambda: None)()):
                         continue
             output.delete_with_regexp(regexp, root)
Ejemplo n.º 22
0
 def check_all_ready(self):
     """
     Checks if the chain(s) is(/are) ready to check convergence and, if requested,
     learn a new covariance matrix for the proposal distribution.
     """
     msg_ready = ("Ready to check convergence" +
                  (" and learn a new proposal covmat"
                   if self.learn_proposal else ""))
     n = len(self.collection)
     # If *just* (weight==1) got ready to check+learn
     if not (n % self.learn_every.value) and n > 0:
         self.log.info("Learn + convergence test @ %d samples accepted.", n)
         if more_than_one_process():
             self.been_waiting += 1
             if self.been_waiting > self.max_waiting:
                 self.send_error_signal()
                 raise LoggedError(
                     self.log,
                     "Waiting for too long for all chains to be ready. "
                     "Maybe one of them is stuck or died unexpectedly?")
         self.model.dump_timing()
         # If not MPI size > 1, we are ready
         if not more_than_one_process():
             self.log.debug(msg_ready)
             return True
         # Error check in case any process already sent an error signal
         self.check_error_signal()
         # If MPI, tell the rest that we are ready -- we use a "gather"
         # ("reduce" was problematic), but we are in practice just pinging
         if not hasattr(self, "req"):  # just once!
             self.all_ready = np.empty(get_mpi_size())
             self.req = get_mpi_comm().Iallgather(np.array([1.]),
                                                  self.all_ready)
             self.log.info(msg_ready + " (waiting for the rest...)")
     # If all processes are ready to learn (= communication finished)
     if self.req.Test() if hasattr(self, "req") else False:
         # Sanity check: actually all processes have finished
         assert np.all(self.all_ready == 1), (
             "This should not happen! Notify the developers. (Got %r)",
             self.all_ready)
         if more_than_one_process() and is_main_process():
             self.log.info("All chains are r" + msg_ready[1:])
         delattr(self, "req")
         self.been_waiting = 0
         # Another error check, in case the error occurred after sending "ready" signal
         self.check_error_signal()
         # Just in case, a barrier here
         sync_processes()
         return True
     return False
Ejemplo n.º 23
0
def info_random_gaussian_mixture(ranges, n_modes=1, input_params_prefix="",
                                 output_params_prefix="", O_std_min=1e-2, O_std_max=1,
                                 derived=False, mpi_aware=True,
                                 random_state=None):
    """
    Wrapper around ``random_mean`` and ``random_cov`` to generate the likelihood and
    parameter info for a random Gaussian.

    If ``mpi_aware=True``, it draws the random stuff only once, and communicates it to
    the rest of the MPI processes.
    """
    cov: Any
    mean: Any
    if is_main_process() or not mpi_aware:
        cov = random_cov(ranges, n_modes=n_modes, O_std_min=O_std_min,
                         O_std_max=O_std_max, mpi_warn=False, random_state=random_state)
        if n_modes == 1:
            cov = [cov]
        # Make sure it stays away from the edges
        mean = [[]] * n_modes
        for i in range(n_modes):
            std = np.sqrt(cov[i].diagonal())
            factor = 3
            ranges_mean = [[r[0] + factor * s, r[1] - +factor * s] for r, s in
                           zip(ranges, std)]
            # If this implies min>max, take the centre
            ranges_mean = [
                (r if r[0] <= r[1] else 2 * [(r[0] + r[1]) / 2]) for r in ranges_mean]
            mean[i] = random_mean(ranges_mean, n_modes=1, mpi_warn=False,
                                  random_state=random_state)
    else:
        mean, cov = None, None
    if mpi_aware:
        mean, cov = share_mpi((mean, cov))
    dimension = len(ranges)
    info: InputDict = {"likelihood": {"gaussian_mixture": {
        "means": mean, "covs": cov, "input_params_prefix": input_params_prefix,
        "output_params_prefix": output_params_prefix, "derived": derived}},
        "params": dict(
            # sampled
            tuple((input_params_prefix + "_%d" % i,
                   {"prior": {"min": ranges[i][0], "max": ranges[i][1]},
                    "latex": r"\alpha_{%i}" % i})
                  for i in range(dimension)) +
            # derived
            (tuple((output_params_prefix + "_%d" % i,
                    {"latex": r"\beta_{%i}" % i})
                   for i in range(dimension * n_modes)) if derived else ()))}
    return info
Ejemplo n.º 24
0
    def products(self):
        """
        Auxiliary function to define what should be returned in a scripted call.

        Returns:
           The sample ``SampleCollection`` containing the sequentially
           discarded live points.
        """
        if is_main_process():
            products = {
                "sample": self.collection, "logZ": self.logZ, "logZstd": self.logZstd}
            if self.pc_settings.do_clustering:
                products.update({"clusters": self.clusters})
            return products
        else:
            return {}
Ejemplo n.º 25
0
 def _set_rng(self):
     """
     Initialize random generator stream. For seeded runs, sets the state reproducibly.
     """
     # TODO: checkpointing save of self._rng.bit_generator.state per process
     if mpi.is_main_process():
         seed = getattr(self, "seed", None)
         if seed is not None:
             self.mpi_warning("This run has been SEEDED with seed %s", seed)
         ss = SeedSequence(seed)
         child_seeds = ss.spawn(mpi.size())
     else:
         child_seeds = None
     ss = mpi.scatter(child_seeds)
     self._entropy = ss.entropy  # for debugging store for reproducibility
     self._rng = default_rng(ss)
Ejemplo n.º 26
0
 def set_difflogmax():
     nonlocal difflogmax
     difflog = (collection_in[OutPar.minuslogpost].to_numpy(
         dtype=np.float64)[:len(collection_out)]
                - collection_out[OutPar.minuslogpost].to_numpy(dtype=np.float64))
     difflogmax = np.max(difflog)
     if abs(difflogmax) < 1:
         difflogmax = 0  # keep simple when e.g. very similar
     log.debug("difflogmax: %g", difflogmax)
     if mpi.more_than_one_process():
         difflogmax = max(mpi.allgather(difflogmax))
     if mpi.is_main_process():
         log.debug("Set difflogmax: %g", difflogmax)
     _weights = np.exp(difflog - difflogmax)
     importance_weights.extend(_weights)
     collection_out.reweight(_weights)
Ejemplo n.º 27
0
 def write_checkpoint(self):
     if is_main_process() and self.output:
         checkpoint_filename = self.checkpoint_filename()
         self.dump_covmat(self.proposer.get_covariance())
         checkpoint_info = {kinds.sampler: {self.get_name(): dict([
             ("converged", bool(self.converged)),
             ("Rminus1_last", self.Rminus1_last),
             ("burn_in", (self.burn_in.value  # initial: repeat burn-in if not finished
                          if not self.n() and self.burn_in_left else
                          0)),  # to avoid overweighting last point of prev. run
             ("mpi_size", get_mpi_size())])}}
         yaml_dump_file(checkpoint_filename, checkpoint_info, error_if_exists=False)
         if not self.progress.empty:
             with open(self.progress_filename(), "a",
                       encoding="utf-8") as progress_file:
                 progress_file.write(
                     self.progress.tail(1).to_string(header=False, index=False) + "\n")
         self.log.debug("Dumped checkpoint and progress info, and current covmat.")
Ejemplo n.º 28
0
def random_mean(ranges, n_modes=1, mpi_warn=True):
    """
    Returns a uniformly sampled point (as an array) within a list of bounds ``ranges``.

    The output of this function can be used directly as the value of the option ``mean``
    of the :class:`likelihoods.gaussian`.

    If ``n_modes>1``, returns an array of such points.
    """
    if not is_main_process() and mpi_warn:
        print("WARNING! "
              "Using with MPI: different process will produce different random results.")
    mean = np.array([uniform.rvs(loc=r[0], scale=r[1] - r[0], size=n_modes)
                     for r in ranges])
    mean = mean.T
    if n_modes == 1:
        mean = mean[0]
    return mean
Ejemplo n.º 29
0
 def initialize(self):
     """Imports the PolyChord sampler and prepares its arguments."""
     # Allow global import if no direct path specification
     allow_global = not self.path
     if not self.path and self.packages_path:
         self.path = self.get_path(self.packages_path)
     self.pc = self.is_installed(path=self.path, allow_global=allow_global)
     if not self.pc:
         raise NotInstalledError(
             self.log,
             "Could not find PolyChord. Check error message above. "
             "To install it, run 'cobaya-install polychord --%s "
             "[packages_path]'", _packages_path_arg)
     # Prepare arguments and settings
     from pypolychord.settings import PolyChordSettings
     self.n_sampled = len(self.model.parameterization.sampled_params())
     self.n_derived = len(self.model.parameterization.derived_params())
     self.n_priors = len(self.model.prior)
     self.n_likes = len(self.model.likelihood)
     self.nDims = self.model.prior.d()
     self.nDerived = (self.n_derived + self.n_priors + self.n_likes)
     if self.logzero is None:
         self.logzero = np.nan_to_num(-np.inf)
     if self.max_ndead == np.inf:
         self.max_ndead = -1
     self._quants_d_units = ["nlive", "max_ndead"]
     for p in self._quants_d_units:
         if getattr(self, p) is not None:
             setattr(
                 self, p,
                 NumberWithUnits(getattr(self, p),
                                 "d",
                                 scale=self.nDims,
                                 dtype=int).value)
     self._quants_nlive_units = ["nprior"]
     for p in self._quants_nlive_units:
         if getattr(self, p) is not None:
             setattr(
                 self, p,
                 NumberWithUnits(getattr(self, p),
                                 "nlive",
                                 scale=self.nlive,
                                 dtype=int).value)
     # Fill the automatic ones
     if getattr(self, "feedback", None) is None:
         values = {
             logging.CRITICAL: 0,
             logging.ERROR: 0,
             logging.WARNING: 0,
             logging.INFO: 1,
             logging.DEBUG: 2
         }
         self.feedback = values[self.log.getEffectiveLevel()]
     # Prepare output folders and prefixes
     if self.output:
         self.file_root = self.output.prefix
         self.read_resume = self.output.is_resuming()
     else:
         output_prefix = share_mpi(
             hex(int(random() * 16**6))[2:] if is_main_process() else None)
         self.file_root = output_prefix
         # dummy output -- no resume!
         self.read_resume = False
     self.base_dir = self.get_base_dir(self.output)
     self.raw_clusters_dir = os.path.join(self.base_dir, self._clusters_dir)
     self.output.create_folder(self.base_dir)
     if self.do_clustering:
         self.clusters_folder = self.get_clusters_dir(self.output)
         self.output.create_folder(self.clusters_folder)
     self.mpi_info("Storing raw PolyChord output in '%s'.", self.base_dir)
     # Exploiting the speed hierarchy
     if self.blocking:
         blocks, oversampling_factors = self.model.check_blocking(
             self.blocking)
     else:
         if self.measure_speeds:
             self.model.measure_and_set_speeds(n=self.measure_speeds)
         blocks, oversampling_factors = self.model.get_param_blocking_for_sampler(
             oversample_power=self.oversample_power)
     self.mpi_info("Parameter blocks and their oversampling factors:")
     max_width = len(str(max(oversampling_factors)))
     for f, b in zip(oversampling_factors, blocks):
         self.mpi_info("* %" + "%d" % max_width + "d : %r", f, b)
     # Save blocking in updated info, in case we want to resume
     self._updated_info["blocking"] = list(zip(oversampling_factors,
                                               blocks))
     blocks_flat = list(chain(*blocks))
     self.ordering = [
         blocks_flat.index(p)
         for p in self.model.parameterization.sampled_params()
     ]
     self.grade_dims = [len(block) for block in blocks]
     # Steps per block
     # NB: num_repeats is ignored by PolyChord when int "grade_frac" given,
     # so needs to be applied by hand.
     # In num_repeats, `d` is interpreted as dimension of each block
     self.grade_frac = [
         int(o * read_dnumber(self.num_repeats, dim_block))
         for o, dim_block in zip(oversampling_factors, self.grade_dims)
     ]
     # Assign settings
     pc_args = [
         "nlive", "num_repeats", "nprior", "do_clustering",
         "precision_criterion", "max_ndead", "boost_posterior", "feedback",
         "logzero", "posteriors", "equals", "compression_factor",
         "cluster_posteriors", "write_resume", "read_resume", "write_stats",
         "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims",
         "feedback", "read_resume", "base_dir", "file_root", "grade_frac",
         "grade_dims"
     ]
     # As stated above, num_repeats is ignored, so let's not pass it
     pc_args.pop(pc_args.index("num_repeats"))
     self.pc_settings = PolyChordSettings(
         self.nDims,
         self.nDerived,
         seed=(self.seed if self.seed is not None else -1),
         **{
             p: getattr(self, p)
             for p in pc_args if getattr(self, p) is not None
         })
     # prior conversion from the hypercube
     bounds = self.model.prior.bounds(
         confidence_for_unbounded=self.confidence_for_unbounded)
     # Check if priors are bounded (nan's to inf)
     inf = np.where(np.isinf(bounds))
     if len(inf[0]):
         params_names = self.model.parameterization.sampled_params()
         params = [params_names[i] for i in sorted(list(set(inf[0])))]
         raise LoggedError(
             self.log,
             "PolyChord needs bounded priors, but the parameter(s) '"
             "', '".join(params) + "' is(are) unbounded.")
     locs = bounds[:, 0]
     scales = bounds[:, 1] - bounds[:, 0]
     # This function re-scales the parameters AND puts them in the right order
     self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales
                                ).tolist()
     # We will need the volume of the prior domain, since PolyChord divides by it
     self.logvolume = np.log(np.prod(scales))
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     self.last_point_callback = 0
     # Prepare runtime live and dead points collections
     self.live = Collection(self.model,
                            None,
                            name="live",
                            initial_size=self.pc_settings.nlive)
     self.dead = Collection(self.model, self.output, name="dead")
     # Done!
     if is_main_process():
         self.log.debug("Calling PolyChord with arguments:")
         for p, v in inspect.getmembers(self.pc_settings,
                                        lambda a: not (callable(a))):
             if not p.startswith("_"):
                 self.log.debug("  %s: %s", p, v)
     self.mpi_info("Initialized!")
Ejemplo n.º 30
0
 def process_raw_output(self):
     """
     Loads the sample of live points from ``PolyChord``'s raw output and writes it
     (if ``txt`` output requested).
     """
     if is_main_process():
         self.log.info(
             "Loading PolyChord's results: samples and evidences.")
         self.dump_paramnames(self.raw_prefix)
         self.collection = self.save_sample(self.raw_prefix + ".txt", "1")
         # Load clusters, and save if output
         if self.pc_settings.do_clustering:
             self.clusters = {}
             clusters_raw_regexp = re.compile(
                 re.escape(self.pc_settings.file_root + "_") + r"\d+\.txt")
             cluster_raw_files = sorted(
                 find_with_regexp(clusters_raw_regexp,
                                  os.path.join(self.pc_settings.base_dir,
                                               self._clusters_dir),
                                  walk_tree=True))
             for f in cluster_raw_files:
                 i = int(f[f.rfind("_") + 1:-len(".txt")])
                 if self.output:
                     old_folder = self.output.folder
                     self.output.folder = self.clusters_folder
                 sample = self.save_sample(f, str(i))
                 if self.output:
                     self.output.folder = old_folder
                 self.clusters[i] = {"sample": sample}
         # Prepare the evidence(s) and write to file
         pre = "log(Z"
         active = "(Still active)"
         with open(self.raw_prefix + ".stats", "r",
                   encoding="utf-8-sig") as statsfile:
             lines = [l for l in statsfile.readlines() if l.startswith(pre)]
         for l in lines:
             logZ, logZstd = [
                 float(n.replace(active, ""))
                 for n in l.split("=")[-1].split("+/-")
             ]
             component = l.split("=")[0].lstrip(pre + "_").rstrip(") ")
             if not component:
                 self.logZ, self.logZstd = logZ, logZstd
             elif self.pc_settings.do_clustering:
                 i = int(component)
                 self.clusters[i]["logZ"], self.clusters[i][
                     "logZstd"] = logZ, logZstd
         self.log.debug(
             "RAW log(Z) = %g +/- %g ; RAW Z in [%.8g, %.8g] (68%% C.L. log-gaussian)",
             self.logZ, self.logZstd,
             *[np.exp(self.logZ + n * self.logZstd) for n in [-1, 1]])
         self._correct_unphysical_fraction()
         if self.output:
             out_evidences = dict(logZ=self.logZ, logZstd=self.logZstd)
             if getattr(self, "clusters", None):
                 out_evidences["clusters"] = {}
                 for i in sorted(list(self.clusters)):
                     out_evidences["clusters"][i] = dict(
                         logZ=self.clusters[i]["logZ"],
                         logZstd=self.clusters[i]["logZstd"])
             fname = os.path.join(self.output.folder,
                                  self.output.prefix + _evidence_extension)
             yaml_dump_file(fname,
                            out_evidences,
                            comment="log-evidence",
                            error_if_exists=False)
     # TODO: try to broadcast the collections
     # if get_mpi():
     #     bcast_from_0 = lambda attrname: setattr(self,
     #         attrname, get_mpi_comm().bcast(getattr(self, attrname, None), root=0))
     #     map(bcast_from_0, ["collection", "logZ", "logZstd", "clusters"])
     if is_main_process():
         self.log.info(
             "Finished! Raw PolyChord output stored in '%s', "
             "with prefix '%s'", self.pc_settings.base_dir,
             self.pc_settings.file_root)
         self.log.info(
             "log(Z) = %g +/- %g ; Z in [%.8g, %.8g] (68%% C.L. log-gaussian)",
             self.logZ, self.logZstd,
             *[np.exp(self.logZ + n * self.logZstd) for n in [-1, 1]])