def get_sampler(info_sampler: SamplersDict, model: Model, output: Optional[Output] = None, packages_path: Optional[str] = None) -> 'Sampler': assert isinstance(info_sampler, Mapping), ( "The first argument must be a dictionary with the info needed for the sampler. " "If you were trying to pass the name of an input file instead, " "load it first with 'cobaya.input.load_input', " "or, if you were passing a yaml string, load it with 'cobaya.yaml.yaml_load'." ) logger_sampler = get_logger(__name__) info_sampler = deepcopy_where_possible(info_sampler) if output is None: output = OutputDummy() # Check and update info check_sane_info_sampler(info_sampler) updated_info_sampler = update_info({"sampler": info_sampler })["sampler"] # type: ignore if is_debug(logger_sampler): logger_sampler.debug( "Input info updated with defaults (dumped to YAML):\n%s", yaml_dump(updated_info_sampler)) # Get sampler class & check resume/force compatibility sampler_name, sampler_class = get_sampler_name_and_class( updated_info_sampler, logger=logger_sampler) check_sampler_info((output.reload_updated_info(use_cache=True) or {}).get("sampler"), updated_info_sampler, is_resuming=output.is_resuming()) # Check if resumable run sampler_class.check_force_resume(output, info=updated_info_sampler[sampler_name]) # Instantiate the sampler sampler_instance = sampler_class(updated_info_sampler[sampler_name], model, output, packages_path=packages_path) # If output, dump updated if output: to_dump = model.info() to_dump["sampler"] = {sampler_name: sampler_instance.info()} to_dump["output"] = os.path.join(output.folder, output.prefix) output.check_and_dump_info(None, to_dump, check_compatible=False) return sampler_instance
def get_sampler(info_sampler, model, output=None, packages_path=None): assert isinstance(info_sampler, Mapping), ( "The first argument must be a dictionary with the info needed for the sampler. " "If you were trying to pass the name of an input file instead, " "load it first with 'cobaya.input.load_input', " "or, if you were passing a yaml string, load it with 'cobaya.yaml.yaml_load'." ) logger_sampler = logging.getLogger(__name__.split(".")[-1]) info_sampler = deepcopy_where_possible(info_sampler) if output is None: output = OutputDummy() # Check and update info check_sane_info_sampler(info_sampler) updated_info_sampler = update_info({kinds.sampler: info_sampler})[kinds.sampler] if logging.root.getEffectiveLevel() <= logging.DEBUG: logger_sampler.debug( "Input info updated with defaults (dumped to YAML):\n%s", yaml_dump(updated_info_sampler)) # Get sampler class & check resume/force compatibility sampler_name, sampler_class = get_sampler_name_and_class( updated_info_sampler) check_sampler_info((output.reload_updated_info(use_cache=True) or {}).get(kinds.sampler), updated_info_sampler, is_resuming=output.is_resuming()) # Check if resumable run sampler_class.check_force_resume(output, info=updated_info_sampler[sampler_name]) # Instantiate the sampler sampler_instance = sampler_class(updated_info_sampler[sampler_name], model, output, packages_path=packages_path) # If output, dump updated if output: to_dump = model.info() to_dump[kinds.sampler] = {sampler_name: sampler_instance.info()} to_dump[_output_prefix] = os.path.join(output.folder, output.prefix) output.check_and_dump_info(None, to_dump, check_compatible=False) return sampler_instance
def initialize(self): """Initializes the sampler: creates the proposal distribution and draws the initial sample.""" self.log.debug("Initializing") for p in [ "burn_in", "max_tries", "output_every", "check_every", "callback_every" ]: setattr( self, p, read_dnumber(getattr(self, p), self.model.prior.d(), dtype=int)) if self.callback_every is None: self.callback_every = self.check_every # Burning-in countdown -- the +1 accounts for the initial point (always accepted) self.burn_in_left = self.burn_in + 1 # Max # checkpoints to wait, in case one process dies without sending MPI_ABORT self.been_waiting = 0 self.max_waiting = max(50, self.max_tries / self.model.prior.d()) if self.resuming and (max(self.mpi_size or 0, 1) != max( get_mpi_size(), 1)): self.log.error( "Cannot resume a sample with a different number of chains: " "was %d and now is %d.", max(self.mpi_size, 1), max(get_mpi_size(), 1)) raise HandledException if not self.resuming and self.output: # Delete previous files (if not "forced", the run would have already failed) if ((os.path.abspath(self.covmat_filename()) != os.path.abspath( str(self.covmat)))): try: os.remove(self.covmat_filename()) except OSError: pass # There may be more that chains than expected, # if #ranks was bigger in a previous run i = 0 while True: i += 1 collection_filename, _ = self.output.prepare_collection(str(i)) try: os.remove(collection_filename) except OSError: break # One collection per MPI process: `name` is the MPI rank + 1 name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank())) self.collection = Collection(self.model, self.output, name=name, resuming=self.resuming) self.current_point = OnePoint(self.model, OutputDummy({}), name=name) # Use standard MH steps by default self.get_new_sample = self.get_new_sample_metropolis # Prepare oversampling / dragging if applicable self.effective_max_samples = self.max_samples if self.oversample and self.drag: self.log.error("Choose either oversampling or dragging, not both.") raise HandledException if self.oversample: factors, blocks = self.model.likelihood._speeds_of_params( int_speeds=True) self.oversampling_factors = factors self.log.info("Oversampling with factors:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip(self.oversampling_factors, blocks) ])) self.i_last_slow_block = None # No way right now to separate slow and fast slow_params = list(self.model.parameterization.sampled_params()) elif self.drag: speeds, blocks = self.model.likelihood._speeds_of_params( fast_slow=True, int_speeds=True) # For now, no blocking inside either fast or slow: just 2 blocks self.i_last_slow_block = 0 if np.all(speeds == speeds[0]): self.log.error( "All speeds are equal or too similar: cannot drag! " "Make sure to define accurate likelihoods' speeds.") raise HandledException # Make the 1st factor 1: speeds = [1, speeds[1] / speeds[0]] # Target: dragging step taking as long as slow step self.drag_interp_steps = self.drag * speeds[1] # Per dragging step, the (fast) posterior is evaluated *twice*, self.drag_interp_steps /= 2 self.drag_interp_steps = int(np.round(self.drag_interp_steps)) fast_params = list(chain(*blocks[1 + self.i_last_slow_block:])) # Not too much or too little dragging drag_limits = [(int(l) * len(fast_params) if l is not None else l) for l in self.drag_limits] if drag_limits[ 0] is not None and self.drag_interp_steps < drag_limits[0]: self.log.warning( "Number of dragging steps clipped from below: was not " "enough to efficiently explore the fast directions -- " "avoid this limit by decreasing 'drag_limits[0]'.") self.drag_interp_steps = drag_limits[0] if drag_limits[ 1] is not None and self.drag_interp_steps > drag_limits[1]: self.log.warning( "Number of dragging steps clipped from above: " "excessive, probably inefficient, exploration of the " "fast directions -- " "avoid this limit by increasing 'drag_limits[1]'.") self.drag_interp_steps = drag_limits[1] # Re-scale steps between checkpoint and callback to the slow dimensions only slow_params = list(chain(*blocks[:1 + self.i_last_slow_block])) self.n_slow = len(slow_params) for p in ["check_every", "callback_every"]: setattr( self, p, int(getattr(self, p) * self.n_slow / self.model.prior.d())) self.log.info("Dragging with oversampling per step:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip([1, self.drag_interp_steps], [blocks[0], fast_params]) ])) self.get_new_sample = self.get_new_sample_dragging else: _, blocks = self.model.likelihood._speeds_of_params() self.oversampling_factors = [1 for b in blocks] slow_params = list(self.model.parameterization.sampled_params()) self.n_slow = len(slow_params) # Turn parameter names into indices self.blocks = [[ list(self.model.parameterization.sampled_params()).index(p) for p in b ] for b in blocks] self.proposer = BlockedProposer( self.blocks, oversampling_factors=self.oversampling_factors, i_last_slow_block=self.i_last_slow_block, proposal_scale=self.proposal_scale) # Build the initial covariance matrix of the proposal, or load from checkpoint if self.resuming: covmat = np.loadtxt(self.covmat_filename()) self.log.info("Covariance matrix from checkpoint.") else: covmat = self.initial_proposal_covmat(slow_params=slow_params) self.log.info("Initial covariance matrix.") self.log.debug( "Sampling with covmat:\n%s", DataFrame( covmat, columns=self.model.parameterization.sampled_params(), index=self.model.parameterization.sampled_params()).to_string( line_width=_line_width)) self.proposer.set_covariance(covmat) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function))