Example #1
0
    def reference(self,
                  max_tries=np.inf,
                  warn_if_tries="10d",
                  ignore_fixed=False,
                  warn_if_no_ref=True):
        """
        Returns:
          One sample from the ref pdf. For those parameters that do not have a ref pdf
          defined, their value is sampled from the prior.

        If `ignored_fixed=True` (default: `False`), fixed reference values will be ignored
        in favor of the full prior, ensuring some randomness for all parameters (useful
        e.g. to prevent caching when measuring speeds).

        NB: The way this function works may be a little dangerous:
        if two parameters have an (external)
        joint prior defined and only one of them has a reference pdf, one should
        sample the other from the joint prior *conditioned* to the first one being
        drawn from the reference. Otherwise, one may end up with a point with null
        prior pdf. In any case, it should work for getting initial reference points,
        e.g. for an MCMC chain.
        """
        if np.nan in self.ref_pdf and warn_if_no_ref:
            self.log.info(
                "Reference values or pdf's for some parameters were not provided. "
                "Sampling from the prior instead for those parameters.")
        ignore_cond = (lambda x: (x is np.nan or (isinstance(x, numbers.Real)
                                                  if ignore_fixed else False)))
        where_ignore_ref = [ignore_cond(r) for r in self.ref_pdf]
        tries = 0
        warn_if_tries = read_dnumber(warn_if_tries, self.d())
        while tries < max_tries:
            tries += 1
            ref_sample = np.array([
                getattr(ref_pdf, "rvs", lambda: ref_pdf.real)()
                for i, ref_pdf in enumerate(self.ref_pdf)
            ])
            if np.any(where_ignore_ref):
                prior_sample = self.sample(ignore_external=True)[0]
                ref_sample[where_ignore_ref] = prior_sample[where_ignore_ref]
            if self.logp(ref_sample) > -np.inf:
                return ref_sample
            if tries == warn_if_tries:
                self.log.warning(
                    "If stuck here, maybe it is not possible to sample from the "
                    "reference pdf a point with non-null prior. Check that the reference "
                    "pdf and the prior are consistent.")
        if self.reference_is_pointlike():
            raise LoggedError(
                self.log, "The reference point provided has null prior. "
                "Set 'ref' to a different point or a pdf.")
        raise LoggedError(
            self.log,
            "Could not sample from the reference pdf a point with non-"
            "null prior density after %d tries. "
            "Maybe your prior is improper of your reference pdf is "
            "null-defined in the domain of the prior.", max_tries)
Example #2
0
    def reference(self, max_tries=np.inf, max_tries_warning="10d"):
        """
        Returns:
          One sample from the ref pdf. For those parameters that do not have a ref pdf
          defined, their value is sampled from the prior.

        NB: The way this function works may be a little dangerous:
        if two parameters have an (external)
        joint prior defined and only one of them has a reference pdf, one should
        sample the other from the joint prior *conditioned* to the first one being
        drawn from the reference. Otherwise, one may end up with a point with null
        prior pdf. In any case, it should work for getting initial reference points,
        e.g. for an MCMC chain.
        """
        if np.nan in self.ref_pdf:
            log.info(
                "Reference values or pdf's for some parameters were not provided. "
                "Sampling from the prior instead for those parameters.")
        tries = 0
        max_tries_warning = read_dnumber(max_tries_warning, self.d())
        while tries < max_tries:
            tries += 1
            ref_sample = np.array([
                getattr(ref_pdf, "rvs", lambda: ref_pdf.real)()
                for i, ref_pdf in enumerate(self.ref_pdf)
            ])
            where_no_ref = np.isnan(ref_sample)
            if np.any(where_no_ref):
                prior_sample = self.sample(ignore_external=True)[0]
                ref_sample[where_no_ref] = prior_sample[where_no_ref]
            if self.logp(ref_sample) > -np.inf:
                return ref_sample
            if tries == max_tries_warning:
                log.warning(
                    "If stuck here, maybe it is not possible to sample from the "
                    "reference pdf a point with non-null prior. Check that they "
                    "are consistent.")
        log.error(
            "Couldn't sample from the reference pdf a point with non-"
            "null prior density after '%d' tries. "
            "Maybe your prior is improper of your reference pdf is "
            "null-defined in the domain of the prior.", max_tries)
        raise HandledException
Example #3
0
 def initialize(self):
     """Imports the PolyChord sampler and prepares its arguments."""
     # Allow global import if no direct path specification
     allow_global = not self.path
     if not self.path and self.packages_path:
         self.path = self.get_path(self.packages_path)
     self.pc = self.is_installed(path=self.path, allow_global=allow_global)
     if not self.pc:
         raise NotInstalledError(
             self.log,
             "Could not find PolyChord. Check error message above. "
             "To install it, run 'cobaya-install polychord --%s "
             "[packages_path]'", _packages_path_arg)
     # Prepare arguments and settings
     from pypolychord.settings import PolyChordSettings
     self.n_sampled = len(self.model.parameterization.sampled_params())
     self.n_derived = len(self.model.parameterization.derived_params())
     self.n_priors = len(self.model.prior)
     self.n_likes = len(self.model.likelihood)
     self.nDims = self.model.prior.d()
     self.nDerived = (self.n_derived + self.n_priors + self.n_likes)
     if self.logzero is None:
         self.logzero = np.nan_to_num(-np.inf)
     if self.max_ndead == np.inf:
         self.max_ndead = -1
     self._quants_d_units = ["nlive", "max_ndead"]
     for p in self._quants_d_units:
         if getattr(self, p) is not None:
             setattr(
                 self, p,
                 NumberWithUnits(getattr(self, p),
                                 "d",
                                 scale=self.nDims,
                                 dtype=int).value)
     self._quants_nlive_units = ["nprior"]
     for p in self._quants_nlive_units:
         if getattr(self, p) is not None:
             setattr(
                 self, p,
                 NumberWithUnits(getattr(self, p),
                                 "nlive",
                                 scale=self.nlive,
                                 dtype=int).value)
     # Fill the automatic ones
     if getattr(self, "feedback", None) is None:
         values = {
             logging.CRITICAL: 0,
             logging.ERROR: 0,
             logging.WARNING: 0,
             logging.INFO: 1,
             logging.DEBUG: 2
         }
         self.feedback = values[self.log.getEffectiveLevel()]
     # Prepare output folders and prefixes
     if self.output:
         self.file_root = self.output.prefix
         self.read_resume = self.output.is_resuming()
     else:
         output_prefix = share_mpi(
             hex(int(random() * 16**6))[2:] if is_main_process() else None)
         self.file_root = output_prefix
         # dummy output -- no resume!
         self.read_resume = False
     self.base_dir = self.get_base_dir(self.output)
     self.raw_clusters_dir = os.path.join(self.base_dir, self._clusters_dir)
     self.output.create_folder(self.base_dir)
     if self.do_clustering:
         self.clusters_folder = self.get_clusters_dir(self.output)
         self.output.create_folder(self.clusters_folder)
     self.mpi_info("Storing raw PolyChord output in '%s'.", self.base_dir)
     # Exploiting the speed hierarchy
     if self.blocking:
         blocks, oversampling_factors = self.model.check_blocking(
             self.blocking)
     else:
         if self.measure_speeds:
             self.model.measure_and_set_speeds(n=self.measure_speeds)
         blocks, oversampling_factors = self.model.get_param_blocking_for_sampler(
             oversample_power=self.oversample_power)
     self.mpi_info("Parameter blocks and their oversampling factors:")
     max_width = len(str(max(oversampling_factors)))
     for f, b in zip(oversampling_factors, blocks):
         self.mpi_info("* %" + "%d" % max_width + "d : %r", f, b)
     # Save blocking in updated info, in case we want to resume
     self._updated_info["blocking"] = list(zip(oversampling_factors,
                                               blocks))
     blocks_flat = list(chain(*blocks))
     self.ordering = [
         blocks_flat.index(p)
         for p in self.model.parameterization.sampled_params()
     ]
     self.grade_dims = [len(block) for block in blocks]
     # Steps per block
     # NB: num_repeats is ignored by PolyChord when int "grade_frac" given,
     # so needs to be applied by hand.
     # In num_repeats, `d` is interpreted as dimension of each block
     self.grade_frac = [
         int(o * read_dnumber(self.num_repeats, dim_block))
         for o, dim_block in zip(oversampling_factors, self.grade_dims)
     ]
     # Assign settings
     pc_args = [
         "nlive", "num_repeats", "nprior", "do_clustering",
         "precision_criterion", "max_ndead", "boost_posterior", "feedback",
         "logzero", "posteriors", "equals", "compression_factor",
         "cluster_posteriors", "write_resume", "read_resume", "write_stats",
         "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims",
         "feedback", "read_resume", "base_dir", "file_root", "grade_frac",
         "grade_dims"
     ]
     # As stated above, num_repeats is ignored, so let's not pass it
     pc_args.pop(pc_args.index("num_repeats"))
     self.pc_settings = PolyChordSettings(
         self.nDims,
         self.nDerived,
         seed=(self.seed if self.seed is not None else -1),
         **{
             p: getattr(self, p)
             for p in pc_args if getattr(self, p) is not None
         })
     # prior conversion from the hypercube
     bounds = self.model.prior.bounds(
         confidence_for_unbounded=self.confidence_for_unbounded)
     # Check if priors are bounded (nan's to inf)
     inf = np.where(np.isinf(bounds))
     if len(inf[0]):
         params_names = self.model.parameterization.sampled_params()
         params = [params_names[i] for i in sorted(list(set(inf[0])))]
         raise LoggedError(
             self.log,
             "PolyChord needs bounded priors, but the parameter(s) '"
             "', '".join(params) + "' is(are) unbounded.")
     locs = bounds[:, 0]
     scales = bounds[:, 1] - bounds[:, 0]
     # This function re-scales the parameters AND puts them in the right order
     self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales
                                ).tolist()
     # We will need the volume of the prior domain, since PolyChord divides by it
     self.logvolume = np.log(np.prod(scales))
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     self.last_point_callback = 0
     # Prepare runtime live and dead points collections
     self.live = Collection(self.model,
                            None,
                            name="live",
                            initial_size=self.pc_settings.nlive)
     self.dead = Collection(self.model, self.output, name="dead")
     # Done!
     if is_main_process():
         self.log.debug("Calling PolyChord with arguments:")
         for p, v in inspect.getmembers(self.pc_settings,
                                        lambda a: not (callable(a))):
             if not p.startswith("_"):
                 self.log.debug("  %s: %s", p, v)
     self.mpi_info("Initialized!")
Example #4
0
    def initialize(self):
        self.mpi_info("Initializing")
        self.max_evals = read_dnumber(self.max_evals, self.model.prior.d())
        # Configure target
        method = self.model.loglike if self.ignore_prior else self.model.logpost
        kwargs = {"make_finite": True}
        if self.ignore_prior:
            kwargs["return_derived"] = False
        self.logp = lambda x: method(x, **kwargs)
        # Try to load info from previous samples.
        # If none, sample from reference (make sure that it has finite like/post)
        initial_point = None
        if self.output:
            files = self.output.find_collections()
            collection_in = None
            if files:
                if more_than_one_process():
                    if 1 + get_mpi_rank() <= len(files):
                        collection_in = Collection(self.model,
                                                   self.output,
                                                   name=str(1 +
                                                            get_mpi_rank()),
                                                   resuming=True)
                else:
                    collection_in = self.output.load_collections(
                        self.model, concatenate=True)
            if collection_in:
                initial_point = (collection_in.bestfit()
                                 if self.ignore_prior else collection_in.MAP())
                initial_point = initial_point[list(
                    self.model.parameterization.sampled_params())].values
                self.log.info("Starting from %s of previous chain:",
                              "best fit" if self.ignore_prior else "MAP")
        if initial_point is None:
            this_logp = -np.inf
            while not np.isfinite(this_logp):
                initial_point = self.model.prior.reference()
                this_logp = self.logp(initial_point)
            self.log.info("Starting from random initial point:")
        self.log.info(
            dict(
                zip(self.model.parameterization.sampled_params(),
                    initial_point)))

        self._bounds = self.model.prior.bounds(
            confidence_for_unbounded=self.confidence_for_unbounded)

        # TODO: if ignore_prior, one should use *like* covariance (this is *post*)
        covmat = self._load_covmat(self.output)[0]

        # scale by conditional parameter widths (since not using correlation structure)
        scales = np.minimum(1 / np.sqrt(np.diag(np.linalg.inv(covmat))),
                            (self._bounds[:, 1] - self._bounds[:, 0]) / 3)

        # Cov and affine transformation
        # Transform to space where initial point is at centre, and cov is normalised
        # Cannot do rotation, as supported minimization routines assume bounds aligned
        # with the parameter axes.
        self._affine_transform_matrix = np.diag(1 / scales)
        self._inv_affine_transform_matrix = np.diag(scales)
        self._scales = scales
        self._affine_transform_baseline = initial_point
        initial_point = self.affine_transform(initial_point)
        np.testing.assert_allclose(initial_point,
                                   np.zeros(initial_point.shape))
        bounds = np.array(
            [self.affine_transform(self._bounds[:, i]) for i in range(2)]).T
        # Configure method
        if self.method.lower() == "bobyqa":
            self.minimizer = pybobyqa.solve
            self.kwargs = {
                "objfun": (lambda x: -self.logp_transf(x)),
                "x0":
                initial_point,
                "bounds":
                np.array(list(zip(*bounds))),
                "seek_global_minimum":
                (True if get_mpi_size() in [0, 1] else False),
                "maxfun":
                int(self.max_evals)
            }
            self.kwargs = recursive_update(deepcopy(self.kwargs),
                                           self.override_bobyqa or {})
            self.log.debug(
                "Arguments for pybobyqa.solve:\n%r",
                {k: v
                 for k, v in self.kwargs.items() if k != "objfun"})
        elif self.method.lower() == "scipy":
            self.minimizer = scpminimize
            self.kwargs = {
                "fun": (lambda x: -self.logp_transf(x)),
                "x0": initial_point,
                "bounds": bounds,
                "options": {
                    "maxiter": self.max_evals,
                    "disp": (self.log.getEffectiveLevel() == logging.DEBUG)
                }
            }
            self.kwargs = recursive_update(deepcopy(self.kwargs),
                                           self.override_scipy or {})
            self.log.debug(
                "Arguments for scipy.optimize.minimize:\n%r",
                {k: v
                 for k, v in self.kwargs.items() if k != "fun"})
        else:
            methods = ["bobyqa", "scipy"]
            raise LoggedError(self.log,
                              "Method '%s' not recognized. Try one of %r.",
                              self.method, methods)
Example #5
0
 def initialize(self):
     """Initializes the sampler:
     creates the proposal distribution and draws the initial sample."""
     self.log.debug("Initializing")
     for p in [
             "burn_in", "max_tries", "output_every", "check_every",
             "callback_every"
     ]:
         setattr(
             self, p,
             read_dnumber(getattr(self, p), self.model.prior.d(),
                          dtype=int))
     if self.callback_every is None:
         self.callback_every = self.check_every
     # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
     self.burn_in_left = self.burn_in + 1
     # Max # checkpoints to wait, in case one process dies without sending MPI_ABORT
     self.been_waiting = 0
     self.max_waiting = max(50, self.max_tries / self.model.prior.d())
     if self.resuming and (max(self.mpi_size or 0, 1) != max(
             get_mpi_size(), 1)):
         self.log.error(
             "Cannot resume a sample with a different number of chains: "
             "was %d and now is %d.", max(self.mpi_size, 1),
             max(get_mpi_size(), 1))
         raise HandledException
     if not self.resuming and self.output:
         # Delete previous files (if not "forced", the run would have already failed)
         if ((os.path.abspath(self.covmat_filename()) != os.path.abspath(
                 str(self.covmat)))):
             try:
                 os.remove(self.covmat_filename())
             except OSError:
                 pass
         # There may be more that chains than expected,
         # if #ranks was bigger in a previous run
         i = 0
         while True:
             i += 1
             collection_filename, _ = self.output.prepare_collection(str(i))
             try:
                 os.remove(collection_filename)
             except OSError:
                 break
     # One collection per MPI process: `name` is the MPI rank + 1
     name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
     self.collection = Collection(self.model,
                                  self.output,
                                  name=name,
                                  resuming=self.resuming)
     self.current_point = OnePoint(self.model, OutputDummy({}), name=name)
     # Use standard MH steps by default
     self.get_new_sample = self.get_new_sample_metropolis
     # Prepare oversampling / dragging if applicable
     self.effective_max_samples = self.max_samples
     if self.oversample and self.drag:
         self.log.error("Choose either oversampling or dragging, not both.")
         raise HandledException
     if self.oversample:
         factors, blocks = self.model.likelihood._speeds_of_params(
             int_speeds=True)
         self.oversampling_factors = factors
         self.log.info("Oversampling with factors:\n" + "\n".join([
             "   %d : %r" % (f, b)
             for f, b in zip(self.oversampling_factors, blocks)
         ]))
         self.i_last_slow_block = None
         # No way right now to separate slow and fast
         slow_params = list(self.model.parameterization.sampled_params())
     elif self.drag:
         speeds, blocks = self.model.likelihood._speeds_of_params(
             fast_slow=True, int_speeds=True)
         # For now, no blocking inside either fast or slow: just 2 blocks
         self.i_last_slow_block = 0
         if np.all(speeds == speeds[0]):
             self.log.error(
                 "All speeds are equal or too similar: cannot drag! "
                 "Make sure to define accurate likelihoods' speeds.")
             raise HandledException
         # Make the 1st factor 1:
         speeds = [1, speeds[1] / speeds[0]]
         # Target: dragging step taking as long as slow step
         self.drag_interp_steps = self.drag * speeds[1]
         # Per dragging step, the (fast) posterior is evaluated *twice*,
         self.drag_interp_steps /= 2
         self.drag_interp_steps = int(np.round(self.drag_interp_steps))
         fast_params = list(chain(*blocks[1 + self.i_last_slow_block:]))
         # Not too much or too little dragging
         drag_limits = [(int(l) * len(fast_params) if l is not None else l)
                        for l in self.drag_limits]
         if drag_limits[
                 0] is not None and self.drag_interp_steps < drag_limits[0]:
             self.log.warning(
                 "Number of dragging steps clipped from below: was not "
                 "enough to efficiently explore the fast directions -- "
                 "avoid this limit by decreasing 'drag_limits[0]'.")
             self.drag_interp_steps = drag_limits[0]
         if drag_limits[
                 1] is not None and self.drag_interp_steps > drag_limits[1]:
             self.log.warning(
                 "Number of dragging steps clipped from above: "
                 "excessive, probably inefficient, exploration of the "
                 "fast directions -- "
                 "avoid this limit by increasing 'drag_limits[1]'.")
             self.drag_interp_steps = drag_limits[1]
         # Re-scale steps between checkpoint and callback to the slow dimensions only
         slow_params = list(chain(*blocks[:1 + self.i_last_slow_block]))
         self.n_slow = len(slow_params)
         for p in ["check_every", "callback_every"]:
             setattr(
                 self, p,
                 int(getattr(self, p) * self.n_slow / self.model.prior.d()))
         self.log.info("Dragging with oversampling per step:\n" +
                       "\n".join([
                           "   %d : %r" % (f, b)
                           for f, b in zip([1, self.drag_interp_steps],
                                           [blocks[0], fast_params])
                       ]))
         self.get_new_sample = self.get_new_sample_dragging
     else:
         _, blocks = self.model.likelihood._speeds_of_params()
         self.oversampling_factors = [1 for b in blocks]
         slow_params = list(self.model.parameterization.sampled_params())
         self.n_slow = len(slow_params)
     # Turn parameter names into indices
     self.blocks = [[
         list(self.model.parameterization.sampled_params()).index(p)
         for p in b
     ] for b in blocks]
     self.proposer = BlockedProposer(
         self.blocks,
         oversampling_factors=self.oversampling_factors,
         i_last_slow_block=self.i_last_slow_block,
         proposal_scale=self.proposal_scale)
     # Build the initial covariance matrix of the proposal, or load from checkpoint
     if self.resuming:
         covmat = np.loadtxt(self.covmat_filename())
         self.log.info("Covariance matrix from checkpoint.")
     else:
         covmat = self.initial_proposal_covmat(slow_params=slow_params)
         self.log.info("Initial covariance matrix.")
     self.log.debug(
         "Sampling with covmat:\n%s",
         DataFrame(
             covmat,
             columns=self.model.parameterization.sampled_params(),
             index=self.model.parameterization.sampled_params()).to_string(
                 line_width=_line_width))
     self.proposer.set_covariance(covmat)
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
Example #6
0
 def initialize(self):
     """Imports the PolyChord sampler and prepares its arguments."""
     if am_single_or_primary_process(
     ):  # rank = 0 (MPI master) or None (no MPI)
         self.log.info("Initializing")
     # If path not given, try using general path to modules
     if not self.path and self.path_install:
         self.path = get_path(self.path_install)
     if self.path:
         if am_single_or_primary_process():
             self.log.info("Importing *local* PolyChord from " + self.path)
             if not os.path.exists(os.path.realpath(self.path)):
                 raise LoggedError(
                     self.log, "The given path does not exist. "
                     "Try installing PolyChord with "
                     "'cobaya-install polychord -m [modules_path]")
         pc_build_path = get_build_path(self.path)
         if not pc_build_path:
             raise LoggedError(
                 self.log, "Either PolyChord is not in the given folder, "
                 "'%s', or you have not compiled it.", self.path)
         # Inserting the previously found path into the list of import folders
         sys.path.insert(0, pc_build_path)
     else:
         self.log.info("Importing *global* PolyChord.")
     try:
         import pypolychord
         from pypolychord.settings import PolyChordSettings
         self.pc = pypolychord
     except ImportError:
         raise LoggedError(
             self.log, "Couldn't find the PolyChord python interface. "
             "Make sure that you have compiled it, and that you either\n"
             " (a) specify a path (you didn't) or\n"
             " (b) install the Python interface globally with\n"
             "     '/path/to/PolyChord/python setup.py install --user'")
     # Prepare arguments and settings
     self.nDims = self.model.prior.d()
     self.nDerived = (len(self.model.parameterization.derived_params()) +
                      len(self.model.prior) +
                      len(self.model.likelihood._likelihoods))
     if self.logzero is None:
         self.logzero = np.nan_to_num(-np.inf)
     if self.max_ndead == np.inf:
         self.max_ndead = -1
     for p in ["nlive", "nprior", "max_ndead"]:
         setattr(self, p,
                 read_dnumber(getattr(self, p), self.nDims, dtype=int))
     # Fill the automatic ones
     if getattr(self, "feedback", None) is None:
         values = {
             logging.CRITICAL: 0,
             logging.ERROR: 0,
             logging.WARNING: 0,
             logging.INFO: 1,
             logging.DEBUG: 2
         }
         self.feedback = values[self.log.getEffectiveLevel()]
     try:
         output_folder = getattr(self.output, "folder")
         output_prefix = getattr(self.output, "prefix") or ""
         self.read_resume = self.resuming
     except AttributeError:
         # dummy output -- no resume!
         self.read_resume = False
         from tempfile import gettempdir
         output_folder = gettempdir()
         if am_single_or_primary_process():
             from random import random
             output_prefix = hex(int(random() * 16**6))[2:]
         else:
             output_prefix = None
         if more_than_one_process():
             output_prefix = get_mpi_comm().bcast(output_prefix, root=0)
     self.base_dir = os.path.join(output_folder, self.base_dir)
     self.file_root = output_prefix
     if am_single_or_primary_process():
         # Creating output folder, if it does not exist (just one process)
         if not os.path.exists(self.base_dir):
             os.makedirs(self.base_dir)
         # Idem, a clusters folder if needed -- notice that PolyChord's default
         # is "True", here "None", hence the funny condition below
         if self.do_clustering is not False:  # None here means "default"
             try:
                 os.makedirs(os.path.join(self.base_dir, clusters))
             except OSError:  # exists!
                 pass
         self.log.info("Storing raw PolyChord output in '%s'.",
                       self.base_dir)
     # Exploiting the speed hierarchy
     if self.blocking:
         speeds, blocks = self.model.likelihood._check_speeds_of_params(
             self.blocking)
     else:
         speeds, blocks = self.model.likelihood._speeds_of_params(
             int_speeds=True)
     blocks_flat = list(chain(*blocks))
     self.ordering = [
         blocks_flat.index(p)
         for p in self.model.parameterization.sampled_params()
     ]
     self.grade_dims = np.array([len(block) for block in blocks])
     # bugfix: pypolychord's C interface for Fortran does not like int numpy types
     self.grade_dims = [int(x) for x in self.grade_dims]
     # Steps per block
     # NB: num_repeats is ignored by PolyChord when int "grade_frac" given,
     # so needs to be applied by hand.
     # Make sure that speeds are integer, and that the slowest is 1,
     # for a straightforward application of num_repeats
     speeds = relative_to_int(speeds, 1)
     # In num_repeats, `d` is interpreted as dimension of each block
     self.grade_frac = [
         int(speed * read_dnumber(self.num_repeats, dim_block))
         for speed, dim_block in zip(speeds, self.grade_dims)
     ]
     # Assign settings
     pc_args = [
         "nlive", "num_repeats", "nprior", "do_clustering",
         "precision_criterion", "max_ndead", "boost_posterior", "feedback",
         "logzero", "posteriors", "equals", "compression_factor",
         "cluster_posteriors", "write_resume", "read_resume", "write_stats",
         "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims",
         "feedback", "read_resume", "base_dir", "file_root", "grade_frac",
         "grade_dims"
     ]
     # As stated above, num_repeats is ignored, so let's not pass it
     pc_args.pop(pc_args.index("num_repeats"))
     self.pc_settings = PolyChordSettings(
         self.nDims,
         self.nDerived,
         seed=(self.seed if self.seed is not None else -1),
         **{
             p: getattr(self, p)
             for p in pc_args if getattr(self, p) is not None
         })
     # prior conversion from the hypercube
     bounds = self.model.prior.bounds(
         confidence_for_unbounded=self.confidence_for_unbounded)
     # Check if priors are bounded (nan's to inf)
     inf = np.where(np.isinf(bounds))
     if len(inf[0]):
         params_names = self.model.parameterization.sampled_params()
         params = [params_names[i] for i in sorted(list(set(inf[0])))]
         raise LoggedError(
             self.log,
             "PolyChord needs bounded priors, but the parameter(s) '"
             "', '".join(params) + "' is(are) unbounded.")
     locs = bounds[:, 0]
     scales = bounds[:, 1] - bounds[:, 0]
     # This function re-scales the parameters AND puts them in the right order
     self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales
                                ).tolist()
     # We will need the volume of the prior domain, since PolyChord divides by it
     self.logvolume = np.log(np.prod(scales))
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     self.last_point_callback = 0
     # Prepare runtime live and dead points collections
     self.live = Collection(self.model,
                            None,
                            name="live",
                            initial_size=self.pc_settings.nlive)
     self.dead = Collection(self.model, self.output, name="dead")
     self.n_sampled = len(self.model.parameterization.sampled_params())
     self.n_derived = len(self.model.parameterization.derived_params())
     self.n_priors = len(self.model.prior)
     self.n_likes = len(self.model.likelihood._likelihoods)
     # Done!
     if am_single_or_primary_process():
         self.log.info("Calling PolyChord with arguments:")
         for p, v in inspect.getmembers(self.pc_settings,
                                        lambda a: not (callable(a))):
             if not p.startswith("_"):
                 self.log.info("  %s: %s", p, v)
Example #7
0
 def initialize(self):
     """Prepares the arguments for `scipy.minimize`."""
     if am_single_or_primary_process():
         self.log.info("Initializing")
     self.max_evals = read_dnumber(self.max_evals, self.model.prior.d())
     # Configure target
     method = self.model.loglike if self.ignore_prior else self.model.logpost
     kwargs = {"make_finite": True}
     if self.ignore_prior:
         kwargs.update({"return_derived": False})
     self.logp = lambda x: method(x, **kwargs)
     # Try to load info from previous samples.
     # If none, sample from reference (make sure that it has finite like/post)
     initial_point = None
     covmat = None
     if self.output:
         collection_in = self.output.load_collections(self.model,
                                                      skip=0,
                                                      thin=1,
                                                      concatenate=True)
         if collection_in:
             initial_point = (collection_in.bestfit()
                              if self.ignore_prior else collection_in.MAP())
             initial_point = initial_point[list(
                 self.model.parameterization.sampled_params())].values
             self.log.info("Starting from %s of previous chain:",
                           "best fit" if self.ignore_prior else "MAP")
             # TODO: if ignore_prior, one should use *like* covariance (this is *post*)
             covmat = collection_in.cov()
     if initial_point is None:
         this_logp = -np.inf
         while not np.isfinite(this_logp):
             initial_point = self.model.prior.reference()
             this_logp = self.logp(initial_point)
         self.log.info("Starting from random initial point:")
     self.log.info(
         dict(
             zip(self.model.parameterization.sampled_params(),
                 initial_point)))
     # Cov and affine transformation
     self._affine_transform_matrix = None
     self._inv_affine_transform_matrix = None
     self._affine_transform_baseline = None
     if covmat is None:
         # Use as much info as we have from ref & prior
         covmat = self.model.prior.reference_covmat()
     # Transform to space where initial point is at centre, and cov is normalised
     sigmas_diag, L = choleskyL(covmat, return_scale_free=True)
     self._affine_transform_matrix = np.linalg.inv(sigmas_diag)
     self._inv_affine_transform_matrix = sigmas_diag
     self._affine_transform_baseline = initial_point
     self.affine_transform = lambda x: (self._affine_transform_matrix.dot(
         x - self._affine_transform_baseline))
     self.inv_affine_transform = lambda x: (
         self._inv_affine_transform_matrix.dot(
             x) + self._affine_transform_baseline)
     bounds = self.model.prior.bounds(
         confidence_for_unbounded=self.confidence_for_unbounded)
     # Re-scale
     self.logp_transf = lambda x: self.logp(self.inv_affine_transform(x))
     initial_point = self.affine_transform(initial_point)
     bounds = np.array(
         [self.affine_transform(bounds[:, i]) for i in range(2)]).T
     # Configure method
     if self.method.lower() == "bobyqa":
         self.minimizer = pybobyqa.solve
         self.kwargs = {
             "objfun": (lambda x: -self.logp_transf(x)),
             "x0":
             initial_point,
             "bounds":
             np.array(list(zip(*bounds))),
             "seek_global_minimum":
             (True if get_mpi_size() in [0, 1] else False),
             "maxfun":
             int(self.max_evals)
         }
         self.kwargs = recursive_update(deepcopy(self.kwargs),
                                        self.override_bobyqa or {})
         self.log.debug(
             "Arguments for pybobyqa.solve:\n%r",
             {k: v
              for k, v in self.kwargs.items() if k != "objfun"})
     elif self.method.lower() == "scipy":
         self.minimizer = scpminimize
         self.kwargs = {
             "fun": (lambda x: -self.logp_transf(x)),
             "x0": initial_point,
             "bounds": bounds,
             "options": {
                 "maxiter": self.max_evals,
                 "disp": (self.log.getEffectiveLevel() == logging.DEBUG)
             }
         }
         self.kwargs = recursive_update(deepcopy(self.kwargs),
                                        self.override_scipy or {})
         self.log.debug(
             "Arguments for scipy.optimize.minimize:\n%r",
             {k: v
              for k, v in self.kwargs.items() if k != "fun"})
     else:
         methods = ["bobyqa", "scipy"]
         raise LoggedError(self.log,
                           "Method '%s' not recognized. Try one of %r.",
                           self.method, methods)
Example #8
0
    def initialize(self):
        if self.method not in evals_attr:
            raise LoggedError(self.log, "Method '%s' not recognized. Try one of %r.",
                              self.method, list(evals_attr))

        self.mpi_info("Initializing")
        self.max_iter = int(read_dnumber(self.max_evals, self.model.prior.d()))
        # Configure target
        method = self.model.loglike if self.ignore_prior else self.model.logpost
        kwargs = {"make_finite": True}
        if self.ignore_prior:
            kwargs["return_derived"] = False
        self.logp = lambda x: method(x, **kwargs)

        # Try to load info from previous samples.
        # If none, sample from reference (make sure that it has finite like/post)
        self.initial_points = []
        assert self.best_of > 0
        num_starts = int(np.ceil(self.best_of / mpi.size()))
        if self.output:
            files = self.output.find_collections()
        else:
            files = None
        for start in range(num_starts):
            initial_point = None
            if files:
                collection_in: Optional[SampleCollection]
                if mpi.more_than_one_process() or num_starts > 1:
                    index = 1 + mpi.rank() * num_starts + start
                    if index <= len(files):
                        collection_in = SampleCollection(
                            self.model, self.output, name=str(index), resuming=True)
                    else:
                        collection_in = None
                else:
                    collection_in = self.output.load_collections(self.model,
                                                                 concatenate=True)
                if collection_in:
                    initial_point = (collection_in.bestfit() if self.ignore_prior
                                     else collection_in.MAP())
                    initial_point = initial_point[
                        list(self.model.parameterization.sampled_params())].values
                    self.log.info("Starting %s/%s from %s of previous chain:", start + 1,
                                  num_starts, "best fit" if self.ignore_prior else "MAP")
                    # Compute covmat if input but no .covmat file (e.g. with PolyChord)
                    # Prefer old over `covmat` definition in yaml (same as MCMC)
                    self.covmat = collection_in.cov(derived=False)
                    self.covmat_params = list(
                        self.model.parameterization.sampled_params())
            if initial_point is None:
                for _ in range(self.max_iter // 10 + 5):
                    initial_point = self.model.prior.reference(random_state=self._rng)
                    if np.isfinite(self.logp(initial_point)):
                        break
                else:
                    raise LoggedError(self.log, "Could not find random starting point "
                                                "giving finite posterior")

                self.log.info("Starting %s/%s random initial point:",
                              start + 1, num_starts)
            self.log.info(
                dict(zip(self.model.parameterization.sampled_params(), initial_point)))
            self.initial_points.append(initial_point)

        self._bounds = self.model.prior.bounds(
            confidence_for_unbounded=self.confidence_for_unbounded)
        # TODO: if ignore_prior, one should use *like* covariance (this is *post*)
        covmat = self._load_covmat(prefer_load_old=self.output)[0]
        # scale by conditional parameter widths (since not using correlation structure)
        scales = np.minimum(1 / np.sqrt(np.diag(np.linalg.inv(covmat))),
                            (self._bounds[:, 1] - self._bounds[:, 0]) / 3)
        # Cov and affine transformation
        # Transform to space where initial point is at centre, and cov is normalised
        # Cannot do rotation, as supported minimization routines assume bounds aligned
        # with the parameter axes.
        self._affine_transform_matrix = np.diag(1 / scales)
        self._inv_affine_transform_matrix = np.diag(scales)
        self._scales = scales
        self.result = None
Example #9
0
 def initialize(self):
     """Imports the PolyChord sampler and prepares its arguments."""
     if not get_mpi_rank():  # rank = 0 (MPI master) or None (no MPI)
         self.log.info("Initializing")
     # If path not given, try using general path to modules
     if not self.path and self.path_install:
         self.path = get_path(self.path_install)
     if self.path:
         if not get_mpi_rank():
             self.log.info("Importing *local* PolyChord from " + self.path)
             if not os.path.exists(os.path.realpath(self.path)):
                 self.log.error("The given path does not exist.")
                 raise HandledException
         pc_build_path = get_build_path(self.path)
         if not pc_build_path:
             self.log.error("Either PolyChord is not in the given folder, "
                            "'%s', or you have not compiled it.", self.path)
             raise HandledException
         # Inserting the previously found path into the list of import folders
         sys.path.insert(0, pc_build_path)
     else:
         self.log.info("Importing *global* PolyChord.")
     try:
         import pypolychord
         from pypolychord.settings import PolyChordSettings
         self.pc = pypolychord
     except ImportError:
         self.log.error(
             "Couldn't find the PolyChord python interface. "
             "Make sure that you have compiled it, and that you either\n"
             " (a) specify a path (you didn't) or\n"
             " (b) install the Python interface globally with\n"
             "     '/path/to/PolyChord/python setup.py install --user'")
         raise HandledException
     # Prepare arguments and settings
     self.nDims = self.model.prior.d()
     self.nDerived = (len(self.model.parameterization.derived_params()) +
                      len(self.model.prior) + len(self.model.likelihood._likelihoods))
     if self.logzero is None:
         self.logzero = np.nan_to_num(-np.inf)
     if self.max_ndead == np.inf:
         self.max_ndead = -1
     for p in ["nlive", "num_repeats", "nprior", "max_ndead"]:
         setattr(self, p, read_dnumber(getattr(self, p), self.nDims, dtype=int))
     # Fill the automatic ones
     if getattr(self, "feedback", None) is None:
         values = {logging.CRITICAL: 0, logging.ERROR: 0, logging.WARNING: 0,
                   logging.INFO: 1, logging.DEBUG: 2}
         self.feedback = values[self.log.getEffectiveLevel()]
     try:
         output_folder = getattr(self.output, "folder")
         output_prefix = getattr(self.output, "prefix") or ""
         self.read_resume = self.resuming
     except AttributeError:
         # dummy output -- no resume!
         self.read_resume = False
         from tempfile import gettempdir
         output_folder = gettempdir()
         if not get_mpi_rank():
             from random import random
             output_prefix = hex(int(random() * 16 ** 6))[2:]
         else:
             output_prefix = None
         if get_mpi():
             output_prefix = get_mpi_comm().bcast(output_prefix, root=0)
     self.base_dir = os.path.join(output_folder, self.base_dir)
     self.file_root = output_prefix
     if not get_mpi_rank():
         # Creating output folder, if it does not exist (just one process)
         if not os.path.exists(self.base_dir):
             os.makedirs(self.base_dir)
         # Idem, a clusters folder if needed -- notice that PolyChord's default
         # is "True", here "None", hence the funny condition below
         if self.do_clustering is not False:  # None here means "default"
             try:
                 os.makedirs(os.path.join(self.base_dir, clusters))
             except OSError:  # exists!
                 pass
         self.log.info("Storing raw PolyChord output in '%s'.",
                       self.base_dir)
     # Exploiting the speed hierarchy
     speeds, blocks = self.model.likelihood._speeds_of_params(int_speeds=True)
     blocks_flat = list(chain(*blocks))
     self.ordering = [
         blocks_flat.index(p) for p in self.model.parameterization.sampled_params()]
     self.grade_dims = [len(block) for block in blocks]
     #        self.grade_frac = np.array(
     #            [i*j for i,j in zip(self.grade_dims, speeds)])
     #        self.grade_frac = (
     #            self.grade_frac/sum(self.grade_frac))
     # Disabled for now. We need a way to override the "time" part of the meaning of grade_frac
     self.grade_frac = [1 / len(self.grade_dims) for _ in self.grade_dims]
     # Assign settings
     pc_args = ["nlive", "num_repeats", "nprior", "do_clustering",
                "precision_criterion", "max_ndead", "boost_posterior", "feedback",
                "logzero", "update_files", "posteriors", "equals",
                "cluster_posteriors", "write_resume", "read_resume", "write_stats",
                "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims",
                "feedback", "read_resume", "base_dir", "file_root", "grade_frac",
                "grade_dims"]
     self.pc_settings = PolyChordSettings(
         self.nDims, self.nDerived, seed=(self.seed if self.seed is not None else -1),
         **{p: getattr(self, p) for p in pc_args if getattr(self, p) is not None})
     # prior conversion from the hypercube
     bounds = self.model.prior.bounds(
         confidence_for_unbounded=self.confidence_for_unbounded)
     # Check if priors are bounded (nan's to inf)
     inf = np.where(np.isinf(bounds))
     if len(inf[0]):
         params_names = self.model.parameterization.sampled_params()
         params = [params_names[i] for i in sorted(list(set(inf[0])))]
         self.log.error("PolyChord needs bounded priors, but the parameter(s) '"
                        "', '".join(params) + "' is(are) unbounded.")
         raise HandledException
     locs = bounds[:, 0]
     scales = bounds[:, 1] - bounds[:, 0]
     # This function re-scales the parameters AND puts them in the right order
     self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales).tolist()
     # We will need the volume of the prior domain, since PolyChord divides by it
     self.logvolume = np.log(np.prod(scales))
     # Done!
     if not get_mpi_rank():
         self.log.info("Calling PolyChord with arguments:")
         for p, v in inspect.getmembers(self.pc_settings, lambda a: not (callable(a))):
             if not p.startswith("_"):
                 self.log.info("  %s: %s", p, v)