Esempio n. 1
0
 def check_all_ready(self):
     """
     Checks if the chain(s) is(/are) ready to check convergence and, if requested,
     learn a new covariance matrix for the proposal distribution.
     """
     msg_ready = (
         ("Ready to" if get_mpi() or self.learn_proposal else "") +
         " check convergence" +
         (" and" if get_mpi() and self.learn_proposal else "") +
         (" learn a new proposal covmat" if self.learn_proposal else ""))
     # If *just* (weight==1) got ready to check+learn
     if (self.n() > 0 and self.current_point[_weight] == 1
             and not (self.n() % self.check_every)):
         self.log.info("Checkpoint: %d samples accepted.", self.n())
         if get_mpi():
             self.been_waiting += 1
             if self.been_waiting > self.max_waiting:
                 self.log.error(
                     "Waiting for too long for all chains to be ready. "
                     "Maybe one of them is stuck or died unexpectedly?")
                 raise HandledException
         self.model.dump_timing()
         # If not MPI, we are ready
         if not get_mpi():
             if msg_ready:
                 self.log.info(msg_ready)
             return True
         # If MPI, tell the rest that we are ready -- we use a "gather"
         # ("reduce" was problematic), but we are in practice just pinging
         if not hasattr(self, "req"):  # just once!
             self.all_ready = np.empty(get_mpi_size())
             self.req = get_mpi_comm().Iallgather(np.array([1.]),
                                                  self.all_ready)
             self.log.info(msg_ready + " (waiting for the rest...)")
     # If all processes are ready to learn (= communication finished)
     if self.req.Test() if hasattr(self, "req") else False:
         # Sanity check: actually all processes have finished
         assert np.all(self.all_ready == 1), (
             "This should not happen! Notify the developers. (Got %r)",
             self.all_ready)
         if get_mpi_rank() == 0:
             self.log.info("All chains are r" + msg_ready[1:])
         delattr(self, "req")
         self.been_waiting = 0
         # Just in case, a barrier here
         get_mpi_comm().barrier()
         return True
     return False
Esempio n. 2
0
 def format(self, record):
     self._fmt = (("[%d]" % get_mpi_rank() if get_mpi() else "") +
                  "[%(name)s] " + {
                      logging.ERROR: "*ERROR* ",
                      logging.WARNING: "*WARNING* "
                  }.get(record.levelno, "") + "%(message)s")
     return logging.Formatter.format(self, record)
Esempio n. 3
0
 def lock_error(self):
     if not self.has_lock():
         assert self.lock_error_file
         try:
             # make lock_err so process holding lock can check
             # another process had an error
             with open(self.lock_error_file, 'wb'):
                 pass
         except OSError:
             pass
     if mpi.get_mpi():
         import mpi4py
     else:
         mpi4py = None
     if mpi.is_main_process() and use_portalocker() is None:
         self.log.warning('install "portalocker" for better file lock control.')
     raise LoggedError(self.log,
                       "File %s is locked.\nYou may be running multiple jobs with "
                       "the same output when you intended to run with MPI. "
                       "Check that mpi4py is correctly installed and "
                       "configured (using the same mpi as mpirun/mpiexec); "
                       "e.g. try the test at\n"
                       "https://cobaya.readthedocs.io/en/latest/installation."
                       "html#mpi-parallelization-optional-but-encouraged\n"
                       + ("Your current mpi4py config is:"
                          "\n %s" % mpi4py.get_config()
                          if mpi4py is not None else
                          "mpi4py is NOT currently installed."), self.lock_file)
Esempio n. 4
0
    def run(self):
        """
        Prepares the posterior function and calls ``PolyChord``'s ``run`` function.
        """

        # Prepare the posterior
        # Don't forget to multiply by the volume of the physical hypercube,
        # since PolyChord divides by it
        def logpost(params_values):
            logposterior, logpriors, loglikes, derived = (
                self.model.logposterior(params_values))
            if len(derived) != len(self.model.parameterization.derived_params()):
                derived = np.full(
                    len(self.model.parameterization.derived_params()), np.nan)
            if len(loglikes) != len(self.model.likelihood._likelihoods):
                loglikes = np.full(
                    len(self.model.likelihood._likelihoods), np.nan)
            derived = list(derived) + list(logpriors) + list(loglikes)
            return (
                max(logposterior + self.logvolume, 0.99 * self.pc_settings.logzero), derived)

        self.log.info("Sampling!")
        if get_mpi():
            get_mpi_comm().barrier()
        self.pc.run_polychord(logpost, self.nDims, self.nDerived,
                              self.pc_settings, self.pc_prior)
Esempio n. 5
0
 def format(self, record):
     self._fmt = ("[" +
                  ("%d : " % get_mpi_rank() if get_mpi() else "") +
                  "%(name)s" + (" %(asctime)s " if debug else "") +
                  "] " + {
                      logging.ERROR: "*ERROR* ",
                      logging.WARNING: "*WARNING* "
                  }.get(record.levelno, "") + "%(message)s")
     return logging.Formatter.format(self, record)
Esempio n. 6
0
 def check_convergence_and_learn_proposal(self):
     """
     Checks the convergence of the sampling process (MPI only), and, if requested,
     learns a new covariance matrix for the proposal distribution from the covariance
     of the last samples.
     """
     if get_mpi():
         # Compute and gather means, covs and CL intervals of last half of chains
         mean = self.collection.mean(first=int(self.n() / 2))
         cov = self.collection.cov(first=int(self.n() / 2))
         mcsamples = self.collection._sampled_to_getdist_mcsamples(
             first=int(self.n() / 2))
         try:
             bound = np.array([[
                 mcsamples.confidence(i,
                                      limfrac=self.Rminus1_cl_level / 2.,
                                      upper=which)
                 for i in range(self.model.prior.d())
             ] for which in [False, True]]).T
             success_bounds = True
         except:
             bound = None
             success_bounds = False
         Ns, means, covs, bounds = map(
             lambda x: np.array(get_mpi_comm().gather(x)),
             [self.n(), mean, cov, bound])
     else:
         # Compute and gather means, covs and CL intervals of last m-1 chain fractions
         m = 1 + self.Rminus1_single_split
         cut = int(self.collection.n() / m)
         if cut <= 1:
             self.log.error(
                 "Not enough points in chain to check convergence. "
                 "Increase `check_every` or reduce `Rminus1_single_split`.")
             raise HandledException
         Ns = (m - 1) * [cut]
         means = np.array([
             self.collection.mean(first=i * cut, last=(i + 1) * cut - 1)
             for i in range(1, m)
         ])
         covs = np.array([
             self.collection.cov(first=i * cut, last=(i + 1) * cut - 1)
             for i in range(1, m)
         ])
         # No logging of warnings temporarily, so getdist won't complain unnecessarily
         logging.disable(logging.WARNING)
         mcsampleses = [
             self.collection._sampled_to_getdist_mcsamples(
                 first=i * cut, last=(i + 1) * cut - 1)
             for i in range(1, m)
         ]
         logging.disable(logging.NOTSET)
         try:
             bounds = [
                 np.array([[
                     mcs.confidence(i,
                                    limfrac=self.Rminus1_cl_level / 2.,
                                    upper=which)
                     for i in range(self.model.prior.d())
                 ] for which in [False, True]]).T for mcs in mcsampleses
             ]
             success_bounds = True
         except:
             bounds = None
             success_bounds = False
     # Compute convergence diagnostics
     if not get_mpi_rank():
         # "Within" or "W" term -- our "units" for assessing convergence
         # and our prospective new covariance matrix
         mean_of_covs = np.average(covs, weights=Ns, axis=0)
         # "Between" or "B" term
         # We don't weight with the number of samples in the chains here:
         # shorter chains will likely be outliers, and we want to notice them
         cov_of_means = np.atleast_2d(np.cov(means.T))  # , fweights=Ns)
         # For numerical stability, we turn mean_of_covs into correlation matrix:
         #   rho = (diag(Sigma))^(-1/2) * Sigma * (diag(Sigma))^(-1/2)
         # and apply the same transformation to the mean of covs (same eigenvals!)
         diagSinvsqrt = np.diag(np.power(np.diag(cov_of_means), -0.5))
         corr_of_means = diagSinvsqrt.dot(cov_of_means).dot(diagSinvsqrt)
         norm_mean_of_covs = diagSinvsqrt.dot(mean_of_covs).dot(
             diagSinvsqrt)
         # Cholesky of (normalized) mean of covs and eigvals of Linv*cov_of_means*L
         try:
             L = np.linalg.cholesky(norm_mean_of_covs)
         except np.linalg.LinAlgError:
             self.log.warning(
                 "Negative covariance eigenvectors. "
                 "This may mean that the covariance of the samples does not "
                 "contain enough information at this point. "
                 "Skipping this checkpoint")
             success = False
         else:
             Linv = np.linalg.inv(L)
             try:
                 eigvals = np.linalg.eigvalsh(
                     Linv.dot(corr_of_means).dot(Linv.T))
                 success = True
             except np.linalg.LinAlgError:
                 self.log.warning("Could not compute eigenvalues. "
                                  "Skipping this checkpoint.")
                 success = False
             if success:
                 Rminus1 = max(np.abs(eigvals))
                 # For real square matrices, a possible def of the cond number is:
                 condition_number = Rminus1 / min(np.abs(eigvals))
                 self.log.debug("Condition number = %g", condition_number)
                 self.log.debug("Eigenvalues = %r", eigvals)
                 self.log.info(
                     "Convergence of means: R-1 = %f after %d accepted steps"
                     % (Rminus1, (sum(Ns) if get_mpi() else self.n())) +
                     (" = sum(%r)" % list(Ns) if get_mpi() else ""))
                 # Have we converged in means?
                 # (criterion must be fulfilled twice in a row)
                 if max(Rminus1, self.Rminus1_last) < self.Rminus1_stop:
                     # Check the convergence of the bounds of the confidence intervals
                     # Same as R-1, but with the rms deviation from the mean bound
                     # in units of the mean standard deviation of the chains
                     if success_bounds:
                         Rminus1_cl = (np.std(bounds, axis=0).T /
                                       np.sqrt(np.diag(mean_of_covs)))
                         self.log.debug("normalized std's of bounds = %r",
                                        Rminus1_cl)
                         self.log.info(
                             "Convergence of bounds: R-1 = %f after %d " %
                             (np.max(Rminus1_cl),
                              (sum(Ns) if get_mpi() else self.n())) +
                             "accepted steps" +
                             (" = sum(%r)" % list(Ns) if get_mpi() else ""))
                         if np.max(Rminus1_cl) < self.Rminus1_cl_stop:
                             self.converged = True
                             self.log.info("The run has converged!")
                         self._Ns = Ns
                     else:
                         self.log.info(
                             "Computation of the bounds was not possible. "
                             "Waiting until the next checkpoint")
     if get_mpi():
         # Broadcast and save the convergence status and the last R-1 of means
         success = get_mpi_comm().bcast(
             (success if not get_mpi_rank() else None), root=0)
         if success:
             self.Rminus1_last = get_mpi_comm().bcast(
                 (Rminus1 if not get_mpi_rank() else None), root=0)
             self.converged = get_mpi_comm().bcast(
                 (self.converged if not get_mpi_rank() else None), root=0)
     else:
         if success:
             self.Rminus1_last = Rminus1
     # Do we want to learn a better proposal pdf?
     if self.learn_proposal and not self.converged and success:
         good_Rminus1 = (self.learn_proposal_Rminus1_max > self.Rminus1_last
                         > self.learn_proposal_Rminus1_min)
         if not good_Rminus1:
             if not get_mpi_rank():
                 self.log.info("Bad convergence statistics: "
                               "waiting until the next checkpoint.")
             return
         if get_mpi():
             if get_mpi_rank():
                 mean_of_covs = np.empty(
                     (self.model.prior.d(), self.model.prior.d()))
             get_mpi_comm().Bcast(mean_of_covs, root=0)
         elif not get_mpi():
             mean_of_covs = covs[0]
         try:
             self.proposer.set_covariance(mean_of_covs)
         except:
             self.log.debug(
                 "Updating covariance matrix failed unexpectedly. "
                 "waiting until next checkpoint.")
         if not get_mpi_rank():
             self.log.info("Updated covariance matrix of proposal pdf.")
             self.log.debug("%r", mean_of_covs)
     # Save checkpoint info
     self.write_checkpoint()
Esempio n. 7
0
 def run(self):
     """
     Runs the sampler.
     """
     # Get first point, to be discarded -- not possible to determine its weight
     # Still, we need to compute derived parameters, since, as the proposal "blocked",
     # we may be saving the initial state of some block.
     # NB: if resuming but nothing was written (burn-in not finished): re-start
     self.log.info("Initial point:")
     if self.resuming and self.collection.n():
         initial_point = (self.collection[
             self.collection.sampled_params].ix[self.collection.n() -
                                                1]).values.copy()
         logpost = -(self.collection[_minuslogpost].ix[self.collection.n() -
                                                       1].copy())
         logpriors = -(self.collection[self.collection.prior_names].ix[
             self.collection.n() - 1].copy())
         loglikes = -0.5 * (self.collection[self.collection.chi2_names].ix[
             self.collection.n() - 1].copy())
         derived = (self.collection[self.collection.derived_params].ix[
             self.collection.n() - 1].values.copy())
     else:
         initial_point = self.model.prior.reference(
             max_tries=self.max_tries)
         logpost, logpriors, loglikes, derived = self.model.logposterior(
             initial_point)
     self.current_point.add(initial_point,
                            derived=derived,
                            logpost=logpost,
                            logpriors=logpriors,
                            loglikes=loglikes)
     self.log.info(
         "\n%s",
         self.current_point.data.to_string(index=False,
                                           line_width=_line_width))
     # Initial dummy checkpoint (needed when 1st checkpoint not reached in prev. run)
     self.write_checkpoint()
     # Main loop!
     self.log.info("Sampling!" + (
         " (NB: nothing will be printed until %d burn-in samples " %
         self.burn_in + "have been obtained)" if self.burn_in else ""))
     while self.n() < self.effective_max_samples and not self.converged:
         self.get_new_sample()
         # Callback function
         if (hasattr(self, "callback_function_callable")
                 and not (max(self.n(), 1) % self.callback_every)
                 and self.current_point[_weight] == 1):
             self.callback_function_callable(self)
         # Checking convergence and (optionally) learning the covmat of the proposal
         if self.check_all_ready():
             self.check_convergence_and_learn_proposal()
         if self.n() == self.effective_max_samples:
             self.log.info(
                 "Reached maximum number of accepted steps allowed. "
                 "Stopping.")
     # Make sure the last batch of samples ( < output_every ) are written
     self.collection._out_update()
     if get_mpi():
         Ns = (lambda x: np.array(get_mpi_comm().gather(x)))(self.n())
     else:
         Ns = [self.n()]
     if not get_mpi_rank():
         self.log.info("Sampling complete after %d accepted steps.",
                       sum(Ns))
Esempio n. 8
0
 def initialize(self):
     """Initializes the sampler:
     creates the proposal distribution and draws the initial sample."""
     if not self.model.prior.d():
         raise LoggedError(self.log,
                           "No parameters being varied for sampler")
     self.log.debug("Initializing")
     # MARKED FOR DEPRECATION IN v3.0
     if getattr(self, "oversample", None) is not None:
         self.log.warning(
             "*DEPRECATION*: `oversample` will be deprecated in the "
             "next version. Oversampling is now requested by setting "
             "`oversample_power` > 0.")
     # END OF DEPRECATION BLOCK
     # MARKED FOR DEPRECATION IN v3.0
     if getattr(self, "check_every", None) is not None:
         self.log.warning(
             "*DEPRECATION*: `check_every` will be deprecated in the "
             "next version. Please use `learn_every` instead.")
         # BEHAVIOUR TO BE REPLACED BY ERROR:
         self.learn_every = getattr(self, "check_every")
     # END OF DEPRECATION BLOCK
     if self.callback_every is None:
         self.callback_every = self.learn_every
     self._quants_d_units = []
     for q in ["max_tries", "learn_every", "callback_every", "burn_in"]:
         number = NumberWithUnits(getattr(self, q), "d", dtype=int)
         self._quants_d_units.append(number)
         setattr(self, q, number)
     self.output_every = NumberWithUnits(self.output_every, "s", dtype=int)
     if is_main_process():
         if self.output.is_resuming() and (max(self.mpi_size or 0, 1) !=
                                           max(get_mpi_size(), 1)):
             raise LoggedError(
                 self.log,
                 "Cannot resume a run with a different number of chains: "
                 "was %d and now is %d.", max(self.mpi_size, 1),
                 max(get_mpi_size(), 1))
         if more_than_one_process():
             if get_mpi().Get_version()[0] < 3:
                 raise LoggedError(
                     self.log, "MPI use requires MPI version 3.0 or "
                     "higher to support IALLGATHER.")
     sync_processes()
     # One collection per MPI process: `name` is the MPI rank + 1
     name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
     self.collection = Collection(self.model,
                                  self.output,
                                  name=name,
                                  resuming=self.output.is_resuming())
     self.current_point = OneSamplePoint(self.model)
     # Use standard MH steps by default
     self.get_new_sample = self.get_new_sample_metropolis
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     # Useful for getting last points added inside callback function
     self.last_point_callback = 0
     # Monitoring/restore progress
     if is_main_process():
         cols = [
             "N", "timestamp", "acceptance_rate", "Rminus1", "Rminus1_cl"
         ]
         self.progress = DataFrame(columns=cols)
         self.i_learn = 1
         if self.output and not self.output.is_resuming():
             with open(self.progress_filename(), "w",
                       encoding="utf-8") as progress_file:
                 progress_file.write("# " +
                                     " ".join(self.progress.columns) + "\n")
     # Get first point, to be discarded -- not possible to determine its weight
     # Still, we need to compute derived parameters, since, as the proposal "blocked",
     # we may be saving the initial state of some block.
     # NB: if resuming but nothing was written (burn-in not finished): re-start
     if self.output.is_resuming() and len(self.collection):
         initial_point = (self.collection[
             self.collection.sampled_params].iloc[len(self.collection) -
                                                  1]).values.copy()
         logpost = -(self.collection[_minuslogpost].iloc[
             len(self.collection) - 1].copy())
         logpriors = -(self.collection[self.collection.minuslogprior_names].
                       iloc[len(self.collection) - 1].copy())
         loglikes = -0.5 * (self.collection[self.collection.chi2_names].
                            iloc[len(self.collection) - 1].copy())
         derived = (self.collection[self.collection.derived_params].iloc[
             len(self.collection) - 1].values.copy())
     else:
         # NB: max_tries adjusted to dim instead of #cycles (blocking not computed yet)
         self.max_tries.set_scale(self.model.prior.d())
         self.log.info(
             "Getting initial point... (this may take a few seconds)")
         initial_point, logpost, logpriors, loglikes, derived = \
             self.model.get_valid_point(max_tries=self.max_tries.value)
         # If resuming but no existing chain, assume failed run and ignore blocking
         # if speeds measurement requested
         if self.output.is_resuming() and not len(self.collection) \
            and self.measure_speeds:
             self.blocking = None
         if self.measure_speeds and self.blocking:
             self.log.warning(
                 "Parameter blocking manually fixed: speeds will not be measured."
             )
         elif self.measure_speeds:
             n = None if self.measure_speeds is True else int(
                 self.measure_speeds)
             self.model.measure_and_set_speeds(n=n, discard=0)
     self.set_proposer_blocking()
     self.set_proposer_covmat(load=True)
     self.current_point.add(initial_point,
                            derived=derived,
                            logpost=logpost,
                            logpriors=logpriors,
                            loglikes=loglikes)
     self.log.info("Initial point: %s", self.current_point)
     # Max #(learn+convergence checks) to wait,
     # in case one process dies without sending MPI_ABORT
     self.been_waiting = 0
     self.max_waiting = max(50, self.max_tries.unit_value)
     # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
     self.burn_in_left = self.burn_in.value * self.current_point.output_thin + 1
     # Initial dummy checkpoint
     # (needed when 1st "learn point" not reached in prev. run)
     self.write_checkpoint()
Esempio n. 9
0
 def initialize(self):
     """Imports the PolyChord sampler and prepares its arguments."""
     if not get_mpi_rank():  # rank = 0 (MPI master) or None (no MPI)
         self.log.info("Initializing")
     # If path not given, try using general path to modules
     if not self.path and self.path_install:
         self.path = get_path(self.path_install)
     if self.path:
         if not get_mpi_rank():
             self.log.info("Importing *local* PolyChord from " + self.path)
             if not os.path.exists(os.path.realpath(self.path)):
                 self.log.error("The given path does not exist.")
                 raise HandledException
         pc_build_path = get_build_path(self.path)
         if not pc_build_path:
             self.log.error("Either PolyChord is not in the given folder, "
                            "'%s', or you have not compiled it.", self.path)
             raise HandledException
         # Inserting the previously found path into the list of import folders
         sys.path.insert(0, pc_build_path)
     else:
         self.log.info("Importing *global* PolyChord.")
     try:
         import pypolychord
         from pypolychord.settings import PolyChordSettings
         self.pc = pypolychord
     except ImportError:
         self.log.error(
             "Couldn't find the PolyChord python interface. "
             "Make sure that you have compiled it, and that you either\n"
             " (a) specify a path (you didn't) or\n"
             " (b) install the Python interface globally with\n"
             "     '/path/to/PolyChord/python setup.py install --user'")
         raise HandledException
     # Prepare arguments and settings
     self.nDims = self.model.prior.d()
     self.nDerived = (len(self.model.parameterization.derived_params()) +
                      len(self.model.prior) + len(self.model.likelihood._likelihoods))
     if self.logzero is None:
         self.logzero = np.nan_to_num(-np.inf)
     if self.max_ndead == np.inf:
         self.max_ndead = -1
     for p in ["nlive", "num_repeats", "nprior", "max_ndead"]:
         setattr(self, p, read_dnumber(getattr(self, p), self.nDims, dtype=int))
     # Fill the automatic ones
     if getattr(self, "feedback", None) is None:
         values = {logging.CRITICAL: 0, logging.ERROR: 0, logging.WARNING: 0,
                   logging.INFO: 1, logging.DEBUG: 2}
         self.feedback = values[self.log.getEffectiveLevel()]
     try:
         output_folder = getattr(self.output, "folder")
         output_prefix = getattr(self.output, "prefix") or ""
         self.read_resume = self.resuming
     except AttributeError:
         # dummy output -- no resume!
         self.read_resume = False
         from tempfile import gettempdir
         output_folder = gettempdir()
         if not get_mpi_rank():
             from random import random
             output_prefix = hex(int(random() * 16 ** 6))[2:]
         else:
             output_prefix = None
         if get_mpi():
             output_prefix = get_mpi_comm().bcast(output_prefix, root=0)
     self.base_dir = os.path.join(output_folder, self.base_dir)
     self.file_root = output_prefix
     if not get_mpi_rank():
         # Creating output folder, if it does not exist (just one process)
         if not os.path.exists(self.base_dir):
             os.makedirs(self.base_dir)
         # Idem, a clusters folder if needed -- notice that PolyChord's default
         # is "True", here "None", hence the funny condition below
         if self.do_clustering is not False:  # None here means "default"
             try:
                 os.makedirs(os.path.join(self.base_dir, clusters))
             except OSError:  # exists!
                 pass
         self.log.info("Storing raw PolyChord output in '%s'.",
                       self.base_dir)
     # Exploiting the speed hierarchy
     speeds, blocks = self.model.likelihood._speeds_of_params(int_speeds=True)
     blocks_flat = list(chain(*blocks))
     self.ordering = [
         blocks_flat.index(p) for p in self.model.parameterization.sampled_params()]
     self.grade_dims = [len(block) for block in blocks]
     #        self.grade_frac = np.array(
     #            [i*j for i,j in zip(self.grade_dims, speeds)])
     #        self.grade_frac = (
     #            self.grade_frac/sum(self.grade_frac))
     # Disabled for now. We need a way to override the "time" part of the meaning of grade_frac
     self.grade_frac = [1 / len(self.grade_dims) for _ in self.grade_dims]
     # Assign settings
     pc_args = ["nlive", "num_repeats", "nprior", "do_clustering",
                "precision_criterion", "max_ndead", "boost_posterior", "feedback",
                "logzero", "update_files", "posteriors", "equals",
                "cluster_posteriors", "write_resume", "read_resume", "write_stats",
                "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims",
                "feedback", "read_resume", "base_dir", "file_root", "grade_frac",
                "grade_dims"]
     self.pc_settings = PolyChordSettings(
         self.nDims, self.nDerived, seed=(self.seed if self.seed is not None else -1),
         **{p: getattr(self, p) for p in pc_args if getattr(self, p) is not None})
     # prior conversion from the hypercube
     bounds = self.model.prior.bounds(
         confidence_for_unbounded=self.confidence_for_unbounded)
     # Check if priors are bounded (nan's to inf)
     inf = np.where(np.isinf(bounds))
     if len(inf[0]):
         params_names = self.model.parameterization.sampled_params()
         params = [params_names[i] for i in sorted(list(set(inf[0])))]
         self.log.error("PolyChord needs bounded priors, but the parameter(s) '"
                        "', '".join(params) + "' is(are) unbounded.")
         raise HandledException
     locs = bounds[:, 0]
     scales = bounds[:, 1] - bounds[:, 0]
     # This function re-scales the parameters AND puts them in the right order
     self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales).tolist()
     # We will need the volume of the prior domain, since PolyChord divides by it
     self.logvolume = np.log(np.prod(scales))
     # Done!
     if not get_mpi_rank():
         self.log.info("Calling PolyChord with arguments:")
         for p, v in inspect.getmembers(self.pc_settings, lambda a: not (callable(a))):
             if not p.startswith("_"):
                 self.log.info("  %s: %s", p, v)
Esempio n. 10
0
 def check_convergence_and_learn_proposal(self):
     """
     Checks the convergence of the sampling process (MPI only), and, if requested,
     learns a new covariance matrix for the proposal distribution from the covariance
     of the last samples.
     """
     # Compute and gather means, covs and CL intervals of last half of chains
     mean = self.collection.mean(first=int(self.n() / 2))
     cov = self.collection.cov(first=int(self.n() / 2))
     # No logging of warnings temporarily, so getdist won't complain innecessarily
     logging.disable(logging.WARNING)
     mcsamples = self.collection.sampled_to_getdist_mcsamples(
         first=int(self.n() / 2))
     logging.disable(logging.NOTSET)
     bound = np.array([[
         mcsamples.confidence(i,
                              limfrac=self.Rminus1_cl_level / 2.,
                              upper=which) for i in range(self.prior.d())
     ] for which in [False, True]]).T
     Ns, means, covs, bounds = map(
         lambda x: np.array((get_mpi_comm().gather(x)
                             if get_mpi() else [x])),
         [self.n(), mean, cov, bound])
     # Compute convergence diagnostics
     if get_mpi():
         if get_mpi_rank() == 0:
             # "Within" or "W" term -- our "units" for assessing convergence
             # and our prospective new covariance matrix
             mean_of_covs = np.average(covs, weights=Ns, axis=0)
             # "Between" or "B" term
             # We don't weight with the number of samples in the chains here:
             # shorter chains will likely be outliers, and we want to notice them
             cov_of_means = np.cov(means.T)  # , fweights=Ns)
             # For numerical stability, we turn mean_of_covs into correlation matrix:
             #   rho = (diag(Sigma))^(-1/2) * Sigma * (diag(Sigma))^(-1/2)
             # and apply the same transformation to the mean of covs (same eigenvals!)
             diagSinvsqrt = np.diag(np.power(np.diag(cov_of_means), -0.5))
             corr_of_means = diagSinvsqrt.dot(cov_of_means).dot(
                 diagSinvsqrt)
             norm_mean_of_covs = diagSinvsqrt.dot(mean_of_covs).dot(
                 diagSinvsqrt)
             # Cholesky of (normalized) mean of covs and eigvals of Linv*cov_of_means*L
             try:
                 L = np.linalg.cholesky(norm_mean_of_covs)
             except np.linalg.LinAlgError:
                 self.log.warning(
                     "Negative covariance eigenvectors. "
                     "This may mean that the covariance of the samples does not "
                     "contain enough information at this point. "
                     "Skipping this checkpoint")
                 success = False
             else:
                 Linv = np.linalg.inv(L)
                 eigvals = np.linalg.eigvalsh(
                     Linv.dot(corr_of_means).dot(Linv.T))
                 Rminus1 = max(np.abs(eigvals))
                 # For real square matrices, a possible def of the cond number is:
                 condition_number = Rminus1 / min(np.abs(eigvals))
                 self.log.debug("Condition number = %g", condition_number)
                 self.log.debug("Eigenvalues = %r", eigvals)
                 self.log.info(
                     "Convergence of means: R-1 = %f after %d samples",
                     Rminus1, self.n())
                 success = True
                 # Have we converged in means?
                 # (criterion must be fulfilled twice in a row)
                 if (max(Rminus1, getattr(self, "Rminus1_last", np.inf)) <
                         self.Rminus1_stop):
                     # Check the convergence of the bounds of the confidence intervals
                     # Same as R-1, but with the rms deviation from the mean bound
                     # in units of the mean standard deviation of the chains
                     Rminus1_cl = (np.std(bounds, axis=0).T /
                                   np.sqrt(np.diag(mean_of_covs)))
                     self.log.debug("normalized std's of bounds = %r",
                                    Rminus1_cl)
                     self.log.info(
                         "Convergence of bounds: R-1 = %f after %d samples",
                         np.max(Rminus1_cl), self.n())
                     if np.max(Rminus1_cl) < self.Rminus1_cl_stop:
                         self.converged = True
                         self.log.info("The run has converged!")
         # Broadcast and save the convergence status and the last R-1 of means
         success = get_mpi_comm().bcast(
             (success if not get_mpi_rank() else None), root=0)
         if success:
             self.Rminus1_last = get_mpi_comm().bcast(
                 (Rminus1 if not get_mpi_rank() else None), root=0)
             self.converged = get_mpi_comm().bcast(
                 (self.converged if not get_mpi_rank() else None), root=0)
     else:  # No MPI
         pass
     # Do we want to learn a better proposal pdf?
     if self.learn_proposal and not self.converged:
         # update iff (not MPI, or MPI and "good" Rminus1)
         if get_mpi():
             good_Rminus1 = (self.learn_proposal_Rminus1_max >
                             self.Rminus1_last >
                             self.learn_proposal_Rminus1_min)
             if not good_Rminus1:
                 if not get_mpi_rank():
                     self.log.info("Bad convergence statistics: "
                                   "waiting until the next checkpoint.")
                 return
         if get_mpi():
             if get_mpi_rank():
                 mean_of_covs = np.empty((self.prior.d(), self.prior.d()))
             get_mpi_comm().Bcast(mean_of_covs, root=0)
         elif not get_mpi():
             mean_of_covs = covs[0]
         self.proposer.set_covariance(mean_of_covs)
         if not get_mpi_rank():
             self.log.info("Updated covariance matrix of proposal pdf.")
             self.log.debug("%r", mean_of_covs)