def close(self, *args):
     """
     Determines success (or not), chooses best (if MPI)
     and produces output (if requested).
     """
     # If something failed
     if not hasattr(self, "result"):
         return
     if get_mpi_size():
         results = get_mpi_comm().gather(self.result, root=0)
         if not get_mpi_rank():
             self.result = results[np.argmin([r.fun for r in results])]
     if not get_mpi_rank():
         if not self.result.success:
             self.log.error("Maximization failed! Here is the `scipy` raw result:\n%r",
                            self.result)
             raise HandledException
         self.log.info("log%s maximized at %g",
                       "likelihood" if self.ignore_prior else "posterior",
                       -self.result.fun)
         post = self.model.logposterior(self.result.x)
         recomputed_max = sum(post.loglikes) if self.ignore_prior else post.logpost
         if not np.allclose(-self.result.fun, recomputed_max):
             self.log.error("Cannot reproduce result. Something bad happened. "
                            "Recomputed max: %g at %r", recomputed_max, self.result.x)
             raise HandledException
         self.maximum = OnePoint(
             self.model, self.output, name="maximum",
             extension=("likelihood" if self.ignore_prior else "posterior"))
         self.maximum.add(self.result.x, derived=post.derived, logpost=post.logpost,
                          logpriors=post.logpriors, loglikes=post.loglikes)
         self.log.info("Parameter values at maximum:\n%s"%self.maximum.data.to_string())
         self.maximum._out_update()
Beispiel #2
0
 def write_checkpoint(self):
     if not get_mpi_rank() and self.output:
         checkpoint_filename = self.checkpoint_filename()
         covmat_filename = self.covmat_filename()
         np.savetxt(covmat_filename,
                    self.proposer.get_covariance(),
                    header=" ".join(
                        list(self.model.parameterization.sampled_params())))
         checkpoint_info = {
             _sampler: {
                 self.name:
                 odict([
                     ["converged", bool(self.converged)],
                     ["Rminus1_last", self.Rminus1_last],
                     ["proposal_scale",
                      self.proposer.get_scale()],
                     ["blocks", self.blocks],
                     ["oversampling_factors", self.oversampling_factors],
                     ["i_last_slow_block", self.i_last_slow_block],
                     [
                         "burn_in",
                         (
                             self.
                             burn_in  # initial: repeat burn-in if not finished
                             if not self.n() and self.burn_in_left else "d")
                     ],  # to avoid overweighting last point of prev. run
                     ["mpi_size", get_mpi_size()]
                 ])
             }
         }
         yaml_dump_file(checkpoint_filename,
                        checkpoint_info,
                        error_if_exists=False)
         self.log.debug("Dumped checkpoint info and current covmat.")
Beispiel #3
0
 def send_error_signal(self):
     """
     Sends an error signal to the other MPI processes.
     """
     for i_rank in range(get_mpi_size()):
         if i_rank != get_mpi_rank():
             get_mpi_comm().isend(True, dest=i_rank, tag=_error_tag)
Beispiel #4
0
def random_cov(ranges, O_std_min=1e-2, O_std_max=1, n_modes=1, mpi_warn=True):
    """
    Returns a random covariance matrix, with standard deviations sampled log-uniformly
    from the length of the parameter ranges times ``O_std_min`` and ``O_std_max``, and
    uniformly sampled correlation coefficients between ``rho_min`` and ``rho_max``.

    The output of this function can be used directly as the value of the option ``cov`` of
    the :class:`likelihoods.gaussian`.

    If ``n_modes>1``, returns a list of such matrices.
    """
    if get_mpi_size() and mpi_warn:
        print(
            "WARNING! "
            "Using with MPI: different process will produce different random results."
        )
    dim = len(ranges)
    scales = np.array([r[1] - r[0] for r in ranges])
    cov = []
    for i in range(n_modes):
        stds = scales * 10**(uniform.rvs(size=dim,
                                         loc=np.log10(O_std_min),
                                         scale=np.log10(
                                             O_std_max / O_std_min)))
        this_cov = np.diag(stds).dot(
            (random_correlation.rvs(dim * stds /
                                    sum(stds)) if dim > 1 else np.eye(1)).dot(
                                        np.diag(stds)))
        # Symmetrize (numerical noise is usually introduced in the last step)
        cov += [(this_cov + this_cov.T) / 2]
    if n_modes == 1:
        cov = cov[0]
    return cov
Beispiel #5
0
 def write_checkpoint(self):
     if is_main_process() and self.output:
         checkpoint_filename = self.checkpoint_filename()
         self.dump_covmat(self.proposer.get_covariance())
         checkpoint_info = {
             "sampler": {
                 self.get_name():
                 dict([
                     ("converged", self.converged),
                     ("Rminus1_last", self.Rminus1_last),
                     (
                         "burn_in",
                         (
                             self.burn_in.
                             value  # initial: repeat burn-in if not finished
                             if not self.n() and self.burn_in_left else 0)
                     ),  # to avoid overweighting last point of prev. run
                     ("mpi_size", get_mpi_size())
                 ])
             }
         }
         yaml_dump_file(checkpoint_filename,
                        checkpoint_info,
                        error_if_exists=False)
         if not self.progress.empty:
             with open(self.progress_filename(), "a",
                       encoding="utf-8") as progress_file:
                 fmts = {"N": lambda x: "{:9d}".format(x)}
                 # TODO: next one is ignored when added to the dict
                 #        "acceptance_rate": lambda x: "{:15.8g}".format(x)}
                 progress_file.write(
                     self.progress.tail(1).to_string(
                         header=False, index=False, formatters=fmts) + "\n")
         self.log.debug(
             "Dumped checkpoint and progress info, and current covmat.")
Beispiel #6
0
 def close(self, *args):
     """
     Determines success (or not), chooses best (if MPI)
     and produces output (if requested).
     """
     evals_attr_ = evals_attr[self.method.lower()]
     # If something failed
     if not hasattr(self, "result"):
         return
     if get_mpi_size():
         results = get_mpi_comm().gather(self.result, root=0)
         _inv_affine_transform_matrices = get_mpi_comm().gather(
             self._inv_affine_transform_matrix, root=0)
         _affine_transform_baselines = get_mpi_comm().gather(
             self._affine_transform_baseline, root=0)
         if am_single_or_primary_process():
             i_min = np.argmin([getattr(r, evals_attr_) for r in results])
             self.result = results[i_min]
             self._inv_affine_transform_matrix = _inv_affine_transform_matrices[
                 i_min]
             self._affine_transform_baseline = _affine_transform_baselines[
                 i_min]
     if am_single_or_primary_process():
         if not self.success:
             raise LoggedError(
                 self.log,
                 "Minimization failed! Here is the raw result object:\n%s",
                 str(self.result))
         logp_min = -np.array(getattr(self.result, evals_attr_))
         x_min = self.inv_affine_transform(self.result.x)
         self.log.info("-log(%s) minimized to %g",
                       "likelihood" if self.ignore_prior else "posterior",
                       -logp_min)
         recomputed_post_min = self.model.logposterior(x_min, cached=False)
         recomputed_logp_min = (sum(recomputed_post_min.loglikes)
                                if self.ignore_prior else
                                recomputed_post_min.logpost)
         if not np.allclose(logp_min, recomputed_logp_min):
             raise LoggedError(
                 self.log,
                 "Cannot reproduce result. Maybe yout likelihood is stochastic? "
                 "Recomputed min: %g (was %g) at %r", recomputed_logp_min,
                 logp_min, x_min)
         self.minimum = OnePoint(
             self.model,
             self.output,
             name="",
             extension=("bestfit.txt"
                        if self.ignore_prior else "minimum.txt"))
         self.minimum.add(x_min,
                          derived=recomputed_post_min.derived,
                          logpost=recomputed_post_min.logpost,
                          logpriors=recomputed_post_min.logpriors,
                          loglikes=recomputed_post_min.loglikes)
         self.log.info("Parameter values at minimum:\n%s",
                       self.minimum.data.to_string())
         self.minimum._out_update()
         self.dump_getdist()
Beispiel #7
0
 def check_all_ready(self):
     """
     Checks if the chain(s) is(/are) ready to check convergence and, if requested,
     learn a new covariance matrix for the proposal distribution.
     """
     msg_ready = ("Ready to check convergence" +
                  (" and learn a new proposal covmat"
                   if self.learn_proposal else ""))
     n = len(self.collection)
     # If *just* (weight==1) got ready to check+learn
     if not (n % self.learn_every.value) and n > 0:
         self.log.info("Learn + convergence test @ %d samples accepted.", n)
         if more_than_one_process():
             self.been_waiting += 1
             if self.been_waiting > self.max_waiting:
                 self.send_error_signal()
                 raise LoggedError(
                     self.log,
                     "Waiting for too long for all chains to be ready. "
                     "Maybe one of them is stuck or died unexpectedly?")
         self.model.dump_timing()
         # If not MPI size > 1, we are ready
         if not more_than_one_process():
             self.log.debug(msg_ready)
             return True
         # Error check in case any process already sent an error signal
         self.check_error_signal()
         # If MPI, tell the rest that we are ready -- we use a "gather"
         # ("reduce" was problematic), but we are in practice just pinging
         if not hasattr(self, "req"):  # just once!
             self.all_ready = np.empty(get_mpi_size())
             self.req = get_mpi_comm().Iallgather(np.array([1.]),
                                                  self.all_ready)
             self.log.info(msg_ready + " (waiting for the rest...)")
     # If all processes are ready to learn (= communication finished)
     if self.req.Test() if hasattr(self, "req") else False:
         # Sanity check: actually all processes have finished
         assert np.all(self.all_ready == 1), (
             "This should not happen! Notify the developers. (Got %r)",
             self.all_ready)
         if more_than_one_process() and is_main_process():
             self.log.info("All chains are r" + msg_ready[1:])
         delattr(self, "req")
         self.been_waiting = 0
         # Another error check, in case the error occurred after sending "ready" signal
         self.check_error_signal()
         # Just in case, a barrier here
         sync_processes()
         return True
     return False
Beispiel #8
0
    def check_error_signal(self):
        """
        Checks if any of the other process has sent an error signal, and fails.

        NB: This behaviour only shows up when running this sampler inside a Python script,
            not when running with `cobaya run` (in that case, the process raising an error
            will call `MPI_ABORT` and kill the rest.
        """
        for i in range(get_mpi_size()):
            if i != get_mpi_rank():
                from mpi4py import MPI
                status = MPI.Status()
                get_mpi_comm().iprobe(i, status=status)
                if status.tag == _error_tag:
                    raise LoggedError(self.log, "Another process failed! Exiting.")
Beispiel #9
0
 def check_all_ready(self):
     """
     Checks if the chain(s) is(/are) ready to check convergence and, if requested,
     learn a new covariance matrix for the proposal distribution.
     """
     msg_ready = (
         ("Ready to" if get_mpi() or self.learn_proposal else "") +
         " check convergence" +
         (" and" if get_mpi() and self.learn_proposal else "") +
         (" learn a new proposal covmat" if self.learn_proposal else ""))
     # If *just* (weight==1) got ready to check+learn
     if (self.n() > 0 and self.current_point[_weight] == 1
             and not (self.n() % self.check_every)):
         self.log.info("Checkpoint: %d samples accepted.", self.n())
         if get_mpi():
             self.been_waiting += 1
             if self.been_waiting > self.max_waiting:
                 self.log.error(
                     "Waiting for too long for all chains to be ready. "
                     "Maybe one of them is stuck or died unexpectedly?")
                 raise HandledException
         self.model.dump_timing()
         # If not MPI, we are ready
         if not get_mpi():
             if msg_ready:
                 self.log.info(msg_ready)
             return True
         # If MPI, tell the rest that we are ready -- we use a "gather"
         # ("reduce" was problematic), but we are in practice just pinging
         if not hasattr(self, "req"):  # just once!
             self.all_ready = np.empty(get_mpi_size())
             self.req = get_mpi_comm().Iallgather(np.array([1.]),
                                                  self.all_ready)
             self.log.info(msg_ready + " (waiting for the rest...)")
     # If all processes are ready to learn (= communication finished)
     if self.req.Test() if hasattr(self, "req") else False:
         # Sanity check: actually all processes have finished
         assert np.all(self.all_ready == 1), (
             "This should not happen! Notify the developers. (Got %r)",
             self.all_ready)
         if get_mpi_rank() == 0:
             self.log.info("All chains are r" + msg_ready[1:])
         delattr(self, "req")
         self.been_waiting = 0
         # Just in case, a barrier here
         get_mpi_comm().barrier()
         return True
     return False
Beispiel #10
0
def random_mean(ranges, n_modes=1, mpi_warn=True):
    """
    Returns a uniformly sampled point (as an array) within a list of bounds ``ranges``.

    The output of this function can be used directly as the value of the option ``mean``
    of the :class:`likelihoods.gaussian`.

    If ``n_modes>1``, returns an array of such points.
    """
    if get_mpi_size() and mpi_warn:
        print ("WARNING! "
               "Using with MPI: different process will produce different random results.")
    mean = np.array([uniform.rvs(loc=r[0], scale=r[1]-r[0], size=n_modes)
                     for r in ranges])
    mean = mean.T
    if n_modes == 1:
        mean = mean[0]
    return mean
Beispiel #11
0
 def write_checkpoint(self):
     if is_main_process() and self.output:
         checkpoint_filename = self.checkpoint_filename()
         self.dump_covmat(self.proposer.get_covariance())
         checkpoint_info = {kinds.sampler: {self.get_name(): dict([
             ("converged", bool(self.converged)),
             ("Rminus1_last", self.Rminus1_last),
             ("burn_in", (self.burn_in.value  # initial: repeat burn-in if not finished
                          if not self.n() and self.burn_in_left else
                          0)),  # to avoid overweighting last point of prev. run
             ("mpi_size", get_mpi_size())])}}
         yaml_dump_file(checkpoint_filename, checkpoint_info, error_if_exists=False)
         if not self.progress.empty:
             with open(self.progress_filename(), "a",
                       encoding="utf-8") as progress_file:
                 progress_file.write(
                     self.progress.tail(1).to_string(header=False, index=False) + "\n")
         self.log.debug("Dumped checkpoint and progress info, and current covmat.")
Beispiel #12
0
 def write_checkpoint(self):
     if am_single_or_primary_process() and self.output:
         checkpoint_filename = self.checkpoint_filename()
         covmat_filename = self.covmat_filename()
         np.savetxt(covmat_filename, self.proposer.get_covariance(), header=" ".join(
             list(self.model.parameterization.sampled_params())))
         checkpoint_info = {_sampler: {self.name: odict([
             ["converged", bool(self.converged)],
             ["Rminus1_last", self.Rminus1_last],
             ["proposal_scale", self.proposer.get_scale()],
             ["blocks", self.blocks],
             ["oversampling_factors", self.oversampling_factors],
             ["i_last_slow_block", self.i_last_slow_block],
             ["burn_in", (self.burn_in  # initial: repeat burn-in if not finished
                          if not self.n() and self.burn_in_left else
                          "d")],  # to avoid overweighting last point of prev. run
             ["mpi_size", get_mpi_size()]])}}
         yaml_dump_file(checkpoint_filename, checkpoint_info, error_if_exists=False)
         if not self.progress.empty:
             with open(self.progress_filename(), "a") as progress_file:
                 progress_file.write(
                     self.progress.tail(1).to_string(header=False, index=False) + "\n")
         self.log.debug("Dumped checkpoint and progress info, and current covmat.")
Beispiel #13
0
 def initialize(self):
     """Initializes the sampler:
     creates the proposal distribution and draws the initial sample."""
     if not self.model.prior.d():
         raise LoggedError(self.log,
                           "No parameters being varied for sampler")
     self.log.debug("Initializing")
     # MARKED FOR DEPRECATION IN v3.0
     if getattr(self, "oversample", None) is not None:
         self.log.warning(
             "*DEPRECATION*: `oversample` will be deprecated in the "
             "next version. Oversampling is now requested by setting "
             "`oversample_power` > 0.")
     # END OF DEPRECATION BLOCK
     # MARKED FOR DEPRECATION IN v3.0
     if getattr(self, "check_every", None) is not None:
         self.log.warning(
             "*DEPRECATION*: `check_every` will be deprecated in the "
             "next version. Please use `learn_every` instead.")
         # BEHAVIOUR TO BE REPLACED BY ERROR:
         self.learn_every = getattr(self, "check_every")
     # END OF DEPRECATION BLOCK
     if self.callback_every is None:
         self.callback_every = self.learn_every
     self._quants_d_units = []
     for q in ["max_tries", "learn_every", "callback_every", "burn_in"]:
         number = NumberWithUnits(getattr(self, q), "d", dtype=int)
         self._quants_d_units.append(number)
         setattr(self, q, number)
     self.output_every = NumberWithUnits(self.output_every, "s", dtype=int)
     if is_main_process():
         if self.output.is_resuming() and (max(self.mpi_size or 0, 1) !=
                                           max(get_mpi_size(), 1)):
             raise LoggedError(
                 self.log,
                 "Cannot resume a run with a different number of chains: "
                 "was %d and now is %d.", max(self.mpi_size, 1),
                 max(get_mpi_size(), 1))
         if more_than_one_process():
             if get_mpi().Get_version()[0] < 3:
                 raise LoggedError(
                     self.log, "MPI use requires MPI version 3.0 or "
                     "higher to support IALLGATHER.")
     sync_processes()
     # One collection per MPI process: `name` is the MPI rank + 1
     name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
     self.collection = Collection(self.model,
                                  self.output,
                                  name=name,
                                  resuming=self.output.is_resuming())
     self.current_point = OneSamplePoint(self.model)
     # Use standard MH steps by default
     self.get_new_sample = self.get_new_sample_metropolis
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     # Useful for getting last points added inside callback function
     self.last_point_callback = 0
     # Monitoring/restore progress
     if is_main_process():
         cols = [
             "N", "timestamp", "acceptance_rate", "Rminus1", "Rminus1_cl"
         ]
         self.progress = DataFrame(columns=cols)
         self.i_learn = 1
         if self.output and not self.output.is_resuming():
             with open(self.progress_filename(), "w",
                       encoding="utf-8") as progress_file:
                 progress_file.write("# " +
                                     " ".join(self.progress.columns) + "\n")
     # Get first point, to be discarded -- not possible to determine its weight
     # Still, we need to compute derived parameters, since, as the proposal "blocked",
     # we may be saving the initial state of some block.
     # NB: if resuming but nothing was written (burn-in not finished): re-start
     if self.output.is_resuming() and len(self.collection):
         initial_point = (self.collection[
             self.collection.sampled_params].iloc[len(self.collection) -
                                                  1]).values.copy()
         logpost = -(self.collection[_minuslogpost].iloc[
             len(self.collection) - 1].copy())
         logpriors = -(self.collection[self.collection.minuslogprior_names].
                       iloc[len(self.collection) - 1].copy())
         loglikes = -0.5 * (self.collection[self.collection.chi2_names].
                            iloc[len(self.collection) - 1].copy())
         derived = (self.collection[self.collection.derived_params].iloc[
             len(self.collection) - 1].values.copy())
     else:
         # NB: max_tries adjusted to dim instead of #cycles (blocking not computed yet)
         self.max_tries.set_scale(self.model.prior.d())
         self.log.info(
             "Getting initial point... (this may take a few seconds)")
         initial_point, logpost, logpriors, loglikes, derived = \
             self.model.get_valid_point(max_tries=self.max_tries.value)
         # If resuming but no existing chain, assume failed run and ignore blocking
         # if speeds measurement requested
         if self.output.is_resuming() and not len(self.collection) \
            and self.measure_speeds:
             self.blocking = None
         if self.measure_speeds and self.blocking:
             self.log.warning(
                 "Parameter blocking manually fixed: speeds will not be measured."
             )
         elif self.measure_speeds:
             n = None if self.measure_speeds is True else int(
                 self.measure_speeds)
             self.model.measure_and_set_speeds(n=n, discard=0)
     self.set_proposer_blocking()
     self.set_proposer_covmat(load=True)
     self.current_point.add(initial_point,
                            derived=derived,
                            logpost=logpost,
                            logpriors=logpriors,
                            loglikes=loglikes)
     self.log.info("Initial point: %s", self.current_point)
     # Max #(learn+convergence checks) to wait,
     # in case one process dies without sending MPI_ABORT
     self.been_waiting = 0
     self.max_waiting = max(50, self.max_tries.unit_value)
     # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
     self.burn_in_left = self.burn_in.value * self.current_point.output_thin + 1
     # Initial dummy checkpoint
     # (needed when 1st "learn point" not reached in prev. run)
     self.write_checkpoint()
Beispiel #14
0
def safe_exit():
    """Closes all MPI process, if more than one present."""
    if get_mpi_size() > 1:
        get_mpi_comm().Abort(1)
Beispiel #15
0
    def initialize(self):
        self.mpi_info("Initializing")
        self.max_evals = read_dnumber(self.max_evals, self.model.prior.d())
        # Configure target
        method = self.model.loglike if self.ignore_prior else self.model.logpost
        kwargs = {"make_finite": True}
        if self.ignore_prior:
            kwargs["return_derived"] = False
        self.logp = lambda x: method(x, **kwargs)
        # Try to load info from previous samples.
        # If none, sample from reference (make sure that it has finite like/post)
        initial_point = None
        if self.output:
            files = self.output.find_collections()
            collection_in = None
            if files:
                if more_than_one_process():
                    if 1 + get_mpi_rank() <= len(files):
                        collection_in = Collection(self.model,
                                                   self.output,
                                                   name=str(1 +
                                                            get_mpi_rank()),
                                                   resuming=True)
                else:
                    collection_in = self.output.load_collections(
                        self.model, concatenate=True)
            if collection_in:
                initial_point = (collection_in.bestfit()
                                 if self.ignore_prior else collection_in.MAP())
                initial_point = initial_point[list(
                    self.model.parameterization.sampled_params())].values
                self.log.info("Starting from %s of previous chain:",
                              "best fit" if self.ignore_prior else "MAP")
        if initial_point is None:
            this_logp = -np.inf
            while not np.isfinite(this_logp):
                initial_point = self.model.prior.reference()
                this_logp = self.logp(initial_point)
            self.log.info("Starting from random initial point:")
        self.log.info(
            dict(
                zip(self.model.parameterization.sampled_params(),
                    initial_point)))

        self._bounds = self.model.prior.bounds(
            confidence_for_unbounded=self.confidence_for_unbounded)

        # TODO: if ignore_prior, one should use *like* covariance (this is *post*)
        covmat = self._load_covmat(self.output)[0]

        # scale by conditional parameter widths (since not using correlation structure)
        scales = np.minimum(1 / np.sqrt(np.diag(np.linalg.inv(covmat))),
                            (self._bounds[:, 1] - self._bounds[:, 0]) / 3)

        # Cov and affine transformation
        # Transform to space where initial point is at centre, and cov is normalised
        # Cannot do rotation, as supported minimization routines assume bounds aligned
        # with the parameter axes.
        self._affine_transform_matrix = np.diag(1 / scales)
        self._inv_affine_transform_matrix = np.diag(scales)
        self._scales = scales
        self._affine_transform_baseline = initial_point
        initial_point = self.affine_transform(initial_point)
        np.testing.assert_allclose(initial_point,
                                   np.zeros(initial_point.shape))
        bounds = np.array(
            [self.affine_transform(self._bounds[:, i]) for i in range(2)]).T
        # Configure method
        if self.method.lower() == "bobyqa":
            self.minimizer = pybobyqa.solve
            self.kwargs = {
                "objfun": (lambda x: -self.logp_transf(x)),
                "x0":
                initial_point,
                "bounds":
                np.array(list(zip(*bounds))),
                "seek_global_minimum":
                (True if get_mpi_size() in [0, 1] else False),
                "maxfun":
                int(self.max_evals)
            }
            self.kwargs = recursive_update(deepcopy(self.kwargs),
                                           self.override_bobyqa or {})
            self.log.debug(
                "Arguments for pybobyqa.solve:\n%r",
                {k: v
                 for k, v in self.kwargs.items() if k != "objfun"})
        elif self.method.lower() == "scipy":
            self.minimizer = scpminimize
            self.kwargs = {
                "fun": (lambda x: -self.logp_transf(x)),
                "x0": initial_point,
                "bounds": bounds,
                "options": {
                    "maxiter": self.max_evals,
                    "disp": (self.log.getEffectiveLevel() == logging.DEBUG)
                }
            }
            self.kwargs = recursive_update(deepcopy(self.kwargs),
                                           self.override_scipy or {})
            self.log.debug(
                "Arguments for scipy.optimize.minimize:\n%r",
                {k: v
                 for k, v in self.kwargs.items() if k != "fun"})
        else:
            methods = ["bobyqa", "scipy"]
            raise LoggedError(self.log,
                              "Method '%s' not recognized. Try one of %r.",
                              self.method, methods)
Beispiel #16
0
 def initialize(self):
     """Initializes the sampler:
     creates the proposal distribution and draws the initial sample."""
     self.log.debug("Initializing")
     for p in [
             "burn_in", "max_tries", "output_every", "check_every",
             "callback_every"
     ]:
         setattr(
             self, p,
             read_dnumber(getattr(self, p), self.model.prior.d(),
                          dtype=int))
     if self.callback_every is None:
         self.callback_every = self.check_every
     # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
     self.burn_in_left = self.burn_in + 1
     # Max # checkpoints to wait, in case one process dies without sending MPI_ABORT
     self.been_waiting = 0
     self.max_waiting = max(50, self.max_tries / self.model.prior.d())
     if self.resuming and (max(self.mpi_size or 0, 1) != max(
             get_mpi_size(), 1)):
         self.log.error(
             "Cannot resume a sample with a different number of chains: "
             "was %d and now is %d.", max(self.mpi_size, 1),
             max(get_mpi_size(), 1))
         raise HandledException
     if not self.resuming and self.output:
         # Delete previous files (if not "forced", the run would have already failed)
         if ((os.path.abspath(self.covmat_filename()) != os.path.abspath(
                 str(self.covmat)))):
             try:
                 os.remove(self.covmat_filename())
             except OSError:
                 pass
         # There may be more that chains than expected,
         # if #ranks was bigger in a previous run
         i = 0
         while True:
             i += 1
             collection_filename, _ = self.output.prepare_collection(str(i))
             try:
                 os.remove(collection_filename)
             except OSError:
                 break
     # One collection per MPI process: `name` is the MPI rank + 1
     name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
     self.collection = Collection(self.model,
                                  self.output,
                                  name=name,
                                  resuming=self.resuming)
     self.current_point = OnePoint(self.model, OutputDummy({}), name=name)
     # Use standard MH steps by default
     self.get_new_sample = self.get_new_sample_metropolis
     # Prepare oversampling / dragging if applicable
     self.effective_max_samples = self.max_samples
     if self.oversample and self.drag:
         self.log.error("Choose either oversampling or dragging, not both.")
         raise HandledException
     if self.oversample:
         factors, blocks = self.model.likelihood._speeds_of_params(
             int_speeds=True)
         self.oversampling_factors = factors
         self.log.info("Oversampling with factors:\n" + "\n".join([
             "   %d : %r" % (f, b)
             for f, b in zip(self.oversampling_factors, blocks)
         ]))
         self.i_last_slow_block = None
         # No way right now to separate slow and fast
         slow_params = list(self.model.parameterization.sampled_params())
     elif self.drag:
         speeds, blocks = self.model.likelihood._speeds_of_params(
             fast_slow=True, int_speeds=True)
         # For now, no blocking inside either fast or slow: just 2 blocks
         self.i_last_slow_block = 0
         if np.all(speeds == speeds[0]):
             self.log.error(
                 "All speeds are equal or too similar: cannot drag! "
                 "Make sure to define accurate likelihoods' speeds.")
             raise HandledException
         # Make the 1st factor 1:
         speeds = [1, speeds[1] / speeds[0]]
         # Target: dragging step taking as long as slow step
         self.drag_interp_steps = self.drag * speeds[1]
         # Per dragging step, the (fast) posterior is evaluated *twice*,
         self.drag_interp_steps /= 2
         self.drag_interp_steps = int(np.round(self.drag_interp_steps))
         fast_params = list(chain(*blocks[1 + self.i_last_slow_block:]))
         # Not too much or too little dragging
         drag_limits = [(int(l) * len(fast_params) if l is not None else l)
                        for l in self.drag_limits]
         if drag_limits[
                 0] is not None and self.drag_interp_steps < drag_limits[0]:
             self.log.warning(
                 "Number of dragging steps clipped from below: was not "
                 "enough to efficiently explore the fast directions -- "
                 "avoid this limit by decreasing 'drag_limits[0]'.")
             self.drag_interp_steps = drag_limits[0]
         if drag_limits[
                 1] is not None and self.drag_interp_steps > drag_limits[1]:
             self.log.warning(
                 "Number of dragging steps clipped from above: "
                 "excessive, probably inefficient, exploration of the "
                 "fast directions -- "
                 "avoid this limit by increasing 'drag_limits[1]'.")
             self.drag_interp_steps = drag_limits[1]
         # Re-scale steps between checkpoint and callback to the slow dimensions only
         slow_params = list(chain(*blocks[:1 + self.i_last_slow_block]))
         self.n_slow = len(slow_params)
         for p in ["check_every", "callback_every"]:
             setattr(
                 self, p,
                 int(getattr(self, p) * self.n_slow / self.model.prior.d()))
         self.log.info("Dragging with oversampling per step:\n" +
                       "\n".join([
                           "   %d : %r" % (f, b)
                           for f, b in zip([1, self.drag_interp_steps],
                                           [blocks[0], fast_params])
                       ]))
         self.get_new_sample = self.get_new_sample_dragging
     else:
         _, blocks = self.model.likelihood._speeds_of_params()
         self.oversampling_factors = [1 for b in blocks]
         slow_params = list(self.model.parameterization.sampled_params())
         self.n_slow = len(slow_params)
     # Turn parameter names into indices
     self.blocks = [[
         list(self.model.parameterization.sampled_params()).index(p)
         for p in b
     ] for b in blocks]
     self.proposer = BlockedProposer(
         self.blocks,
         oversampling_factors=self.oversampling_factors,
         i_last_slow_block=self.i_last_slow_block,
         proposal_scale=self.proposal_scale)
     # Build the initial covariance matrix of the proposal, or load from checkpoint
     if self.resuming:
         covmat = np.loadtxt(self.covmat_filename())
         self.log.info("Covariance matrix from checkpoint.")
     else:
         covmat = self.initial_proposal_covmat(slow_params=slow_params)
         self.log.info("Initial covariance matrix.")
     self.log.debug(
         "Sampling with covmat:\n%s",
         DataFrame(
             covmat,
             columns=self.model.parameterization.sampled_params(),
             index=self.model.parameterization.sampled_params()).to_string(
                 line_width=_line_width))
     self.proposer.set_covariance(covmat)
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
Beispiel #17
0
 def initialize(self):
     """Prepares the arguments for `scipy.minimize`."""
     if am_single_or_primary_process():
         self.log.info("Initializing")
     self.max_evals = read_dnumber(self.max_evals, self.model.prior.d())
     # Configure target
     method = self.model.loglike if self.ignore_prior else self.model.logpost
     kwargs = {"make_finite": True}
     if self.ignore_prior:
         kwargs.update({"return_derived": False})
     self.logp = lambda x: method(x, **kwargs)
     # Try to load info from previous samples.
     # If none, sample from reference (make sure that it has finite like/post)
     initial_point = None
     covmat = None
     if self.output:
         collection_in = self.output.load_collections(self.model,
                                                      skip=0,
                                                      thin=1,
                                                      concatenate=True)
         if collection_in:
             initial_point = (collection_in.bestfit()
                              if self.ignore_prior else collection_in.MAP())
             initial_point = initial_point[list(
                 self.model.parameterization.sampled_params())].values
             self.log.info("Starting from %s of previous chain:",
                           "best fit" if self.ignore_prior else "MAP")
             # TODO: if ignore_prior, one should use *like* covariance (this is *post*)
             covmat = collection_in.cov()
     if initial_point is None:
         this_logp = -np.inf
         while not np.isfinite(this_logp):
             initial_point = self.model.prior.reference()
             this_logp = self.logp(initial_point)
         self.log.info("Starting from random initial point:")
     self.log.info(
         dict(
             zip(self.model.parameterization.sampled_params(),
                 initial_point)))
     # Cov and affine transformation
     self._affine_transform_matrix = None
     self._inv_affine_transform_matrix = None
     self._affine_transform_baseline = None
     if covmat is None:
         # Use as much info as we have from ref & prior
         covmat = self.model.prior.reference_covmat()
     # Transform to space where initial point is at centre, and cov is normalised
     sigmas_diag, L = choleskyL(covmat, return_scale_free=True)
     self._affine_transform_matrix = np.linalg.inv(sigmas_diag)
     self._inv_affine_transform_matrix = sigmas_diag
     self._affine_transform_baseline = initial_point
     self.affine_transform = lambda x: (self._affine_transform_matrix.dot(
         x - self._affine_transform_baseline))
     self.inv_affine_transform = lambda x: (
         self._inv_affine_transform_matrix.dot(
             x) + self._affine_transform_baseline)
     bounds = self.model.prior.bounds(
         confidence_for_unbounded=self.confidence_for_unbounded)
     # Re-scale
     self.logp_transf = lambda x: self.logp(self.inv_affine_transform(x))
     initial_point = self.affine_transform(initial_point)
     bounds = np.array(
         [self.affine_transform(bounds[:, i]) for i in range(2)]).T
     # Configure method
     if self.method.lower() == "bobyqa":
         self.minimizer = pybobyqa.solve
         self.kwargs = {
             "objfun": (lambda x: -self.logp_transf(x)),
             "x0":
             initial_point,
             "bounds":
             np.array(list(zip(*bounds))),
             "seek_global_minimum":
             (True if get_mpi_size() in [0, 1] else False),
             "maxfun":
             int(self.max_evals)
         }
         self.kwargs = recursive_update(deepcopy(self.kwargs),
                                        self.override_bobyqa or {})
         self.log.debug(
             "Arguments for pybobyqa.solve:\n%r",
             {k: v
              for k, v in self.kwargs.items() if k != "objfun"})
     elif self.method.lower() == "scipy":
         self.minimizer = scpminimize
         self.kwargs = {
             "fun": (lambda x: -self.logp_transf(x)),
             "x0": initial_point,
             "bounds": bounds,
             "options": {
                 "maxiter": self.max_evals,
                 "disp": (self.log.getEffectiveLevel() == logging.DEBUG)
             }
         }
         self.kwargs = recursive_update(deepcopy(self.kwargs),
                                        self.override_scipy or {})
         self.log.debug(
             "Arguments for scipy.optimize.minimize:\n%r",
             {k: v
              for k, v in self.kwargs.items() if k != "fun"})
     else:
         methods = ["bobyqa", "scipy"]
         raise LoggedError(self.log,
                           "Method '%s' not recognized. Try one of %r.",
                           self.method, methods)