Ejemplo n.º 1
0
def get_sampler(info_sampler: SamplersDict,
                model: Model,
                output: Optional[Output] = None,
                packages_path: Optional[str] = None) -> 'Sampler':
    assert isinstance(info_sampler, Mapping), (
        "The first argument must be a dictionary with the info needed for the sampler. "
        "If you were trying to pass the name of an input file instead, "
        "load it first with 'cobaya.input.load_input', "
        "or, if you were passing a yaml string, load it with 'cobaya.yaml.yaml_load'."
    )
    logger_sampler = get_logger(__name__)
    info_sampler = deepcopy_where_possible(info_sampler)
    if output is None:
        output = OutputDummy()
    # Check and update info
    check_sane_info_sampler(info_sampler)
    updated_info_sampler = update_info({"sampler": info_sampler
                                        })["sampler"]  # type: ignore
    if is_debug(logger_sampler):
        logger_sampler.debug(
            "Input info updated with defaults (dumped to YAML):\n%s",
            yaml_dump(updated_info_sampler))
    # Get sampler class & check resume/force compatibility
    sampler_name, sampler_class = get_sampler_name_and_class(
        updated_info_sampler, logger=logger_sampler)
    check_sampler_info((output.reload_updated_info(use_cache=True)
                        or {}).get("sampler"),
                       updated_info_sampler,
                       is_resuming=output.is_resuming())
    # Check if resumable run
    sampler_class.check_force_resume(output,
                                     info=updated_info_sampler[sampler_name])
    # Instantiate the sampler
    sampler_instance = sampler_class(updated_info_sampler[sampler_name],
                                     model,
                                     output,
                                     packages_path=packages_path)
    # If output, dump updated
    if output:
        to_dump = model.info()
        to_dump["sampler"] = {sampler_name: sampler_instance.info()}
        to_dump["output"] = os.path.join(output.folder, output.prefix)
        output.check_and_dump_info(None, to_dump, check_compatible=False)
    return sampler_instance
Ejemplo n.º 2
0
def get_sampler(info_sampler, model, output=None, packages_path=None):
    assert isinstance(info_sampler, Mapping), (
        "The first argument must be a dictionary with the info needed for the sampler. "
        "If you were trying to pass the name of an input file instead, "
        "load it first with 'cobaya.input.load_input', "
        "or, if you were passing a yaml string, load it with 'cobaya.yaml.yaml_load'."
    )
    logger_sampler = logging.getLogger(__name__.split(".")[-1])
    info_sampler = deepcopy_where_possible(info_sampler)
    if output is None:
        output = OutputDummy()
    # Check and update info
    check_sane_info_sampler(info_sampler)
    updated_info_sampler = update_info({kinds.sampler:
                                        info_sampler})[kinds.sampler]
    if logging.root.getEffectiveLevel() <= logging.DEBUG:
        logger_sampler.debug(
            "Input info updated with defaults (dumped to YAML):\n%s",
            yaml_dump(updated_info_sampler))
    # Get sampler class & check resume/force compatibility
    sampler_name, sampler_class = get_sampler_name_and_class(
        updated_info_sampler)
    check_sampler_info((output.reload_updated_info(use_cache=True)
                        or {}).get(kinds.sampler),
                       updated_info_sampler,
                       is_resuming=output.is_resuming())
    # Check if resumable run
    sampler_class.check_force_resume(output,
                                     info=updated_info_sampler[sampler_name])
    # Instantiate the sampler
    sampler_instance = sampler_class(updated_info_sampler[sampler_name],
                                     model,
                                     output,
                                     packages_path=packages_path)
    # If output, dump updated
    if output:
        to_dump = model.info()
        to_dump[kinds.sampler] = {sampler_name: sampler_instance.info()}
        to_dump[_output_prefix] = os.path.join(output.folder, output.prefix)
        output.check_and_dump_info(None, to_dump, check_compatible=False)
    return sampler_instance
Ejemplo n.º 3
0
 def initialize(self):
     """Initializes the sampler:
     creates the proposal distribution and draws the initial sample."""
     self.log.debug("Initializing")
     for p in [
             "burn_in", "max_tries", "output_every", "check_every",
             "callback_every"
     ]:
         setattr(
             self, p,
             read_dnumber(getattr(self, p), self.model.prior.d(),
                          dtype=int))
     if self.callback_every is None:
         self.callback_every = self.check_every
     # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
     self.burn_in_left = self.burn_in + 1
     # Max # checkpoints to wait, in case one process dies without sending MPI_ABORT
     self.been_waiting = 0
     self.max_waiting = max(50, self.max_tries / self.model.prior.d())
     if self.resuming and (max(self.mpi_size or 0, 1) != max(
             get_mpi_size(), 1)):
         self.log.error(
             "Cannot resume a sample with a different number of chains: "
             "was %d and now is %d.", max(self.mpi_size, 1),
             max(get_mpi_size(), 1))
         raise HandledException
     if not self.resuming and self.output:
         # Delete previous files (if not "forced", the run would have already failed)
         if ((os.path.abspath(self.covmat_filename()) != os.path.abspath(
                 str(self.covmat)))):
             try:
                 os.remove(self.covmat_filename())
             except OSError:
                 pass
         # There may be more that chains than expected,
         # if #ranks was bigger in a previous run
         i = 0
         while True:
             i += 1
             collection_filename, _ = self.output.prepare_collection(str(i))
             try:
                 os.remove(collection_filename)
             except OSError:
                 break
     # One collection per MPI process: `name` is the MPI rank + 1
     name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
     self.collection = Collection(self.model,
                                  self.output,
                                  name=name,
                                  resuming=self.resuming)
     self.current_point = OnePoint(self.model, OutputDummy({}), name=name)
     # Use standard MH steps by default
     self.get_new_sample = self.get_new_sample_metropolis
     # Prepare oversampling / dragging if applicable
     self.effective_max_samples = self.max_samples
     if self.oversample and self.drag:
         self.log.error("Choose either oversampling or dragging, not both.")
         raise HandledException
     if self.oversample:
         factors, blocks = self.model.likelihood._speeds_of_params(
             int_speeds=True)
         self.oversampling_factors = factors
         self.log.info("Oversampling with factors:\n" + "\n".join([
             "   %d : %r" % (f, b)
             for f, b in zip(self.oversampling_factors, blocks)
         ]))
         self.i_last_slow_block = None
         # No way right now to separate slow and fast
         slow_params = list(self.model.parameterization.sampled_params())
     elif self.drag:
         speeds, blocks = self.model.likelihood._speeds_of_params(
             fast_slow=True, int_speeds=True)
         # For now, no blocking inside either fast or slow: just 2 blocks
         self.i_last_slow_block = 0
         if np.all(speeds == speeds[0]):
             self.log.error(
                 "All speeds are equal or too similar: cannot drag! "
                 "Make sure to define accurate likelihoods' speeds.")
             raise HandledException
         # Make the 1st factor 1:
         speeds = [1, speeds[1] / speeds[0]]
         # Target: dragging step taking as long as slow step
         self.drag_interp_steps = self.drag * speeds[1]
         # Per dragging step, the (fast) posterior is evaluated *twice*,
         self.drag_interp_steps /= 2
         self.drag_interp_steps = int(np.round(self.drag_interp_steps))
         fast_params = list(chain(*blocks[1 + self.i_last_slow_block:]))
         # Not too much or too little dragging
         drag_limits = [(int(l) * len(fast_params) if l is not None else l)
                        for l in self.drag_limits]
         if drag_limits[
                 0] is not None and self.drag_interp_steps < drag_limits[0]:
             self.log.warning(
                 "Number of dragging steps clipped from below: was not "
                 "enough to efficiently explore the fast directions -- "
                 "avoid this limit by decreasing 'drag_limits[0]'.")
             self.drag_interp_steps = drag_limits[0]
         if drag_limits[
                 1] is not None and self.drag_interp_steps > drag_limits[1]:
             self.log.warning(
                 "Number of dragging steps clipped from above: "
                 "excessive, probably inefficient, exploration of the "
                 "fast directions -- "
                 "avoid this limit by increasing 'drag_limits[1]'.")
             self.drag_interp_steps = drag_limits[1]
         # Re-scale steps between checkpoint and callback to the slow dimensions only
         slow_params = list(chain(*blocks[:1 + self.i_last_slow_block]))
         self.n_slow = len(slow_params)
         for p in ["check_every", "callback_every"]:
             setattr(
                 self, p,
                 int(getattr(self, p) * self.n_slow / self.model.prior.d()))
         self.log.info("Dragging with oversampling per step:\n" +
                       "\n".join([
                           "   %d : %r" % (f, b)
                           for f, b in zip([1, self.drag_interp_steps],
                                           [blocks[0], fast_params])
                       ]))
         self.get_new_sample = self.get_new_sample_dragging
     else:
         _, blocks = self.model.likelihood._speeds_of_params()
         self.oversampling_factors = [1 for b in blocks]
         slow_params = list(self.model.parameterization.sampled_params())
         self.n_slow = len(slow_params)
     # Turn parameter names into indices
     self.blocks = [[
         list(self.model.parameterization.sampled_params()).index(p)
         for p in b
     ] for b in blocks]
     self.proposer = BlockedProposer(
         self.blocks,
         oversampling_factors=self.oversampling_factors,
         i_last_slow_block=self.i_last_slow_block,
         proposal_scale=self.proposal_scale)
     # Build the initial covariance matrix of the proposal, or load from checkpoint
     if self.resuming:
         covmat = np.loadtxt(self.covmat_filename())
         self.log.info("Covariance matrix from checkpoint.")
     else:
         covmat = self.initial_proposal_covmat(slow_params=slow_params)
         self.log.info("Initial covariance matrix.")
     self.log.debug(
         "Sampling with covmat:\n%s",
         DataFrame(
             covmat,
             columns=self.model.parameterization.sampled_params(),
             index=self.model.parameterization.sampled_params()).to_string(
                 line_width=_line_width))
     self.proposer.set_covariance(covmat)
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))