def __init__(self, info_params, info_likelihood, info_prior=None, info_theory=None, modules=None, timing=None, allow_renames=True): self.set_logger(lowercase=True) self._updated_info = { _params: deepcopy_where_possible(info_params), _likelihood: deepcopy_where_possible(info_likelihood) } if not self._updated_info[_likelihood]: raise LoggedError(self.log, "No likelihood requested!") for k, v in ((_prior, info_prior), (_theory, info_theory), (_path_install, modules), (_timing, timing)): if v not in (None, {}): self._updated_info[k] = deepcopy_where_possible(v) self.parameterization = Parameterization(self._updated_info[_params], allow_renames=allow_renames) self.prior = Prior(self.parameterization, self._updated_info.get(_prior, None)) self.likelihood = Likelihood(self._updated_info[_likelihood], self.parameterization, self._updated_info.get(_theory), modules=modules, timing=timing)
def test_cosmo_run_not_found(): with NoLogging(logging.ERROR): inf = deepcopy_where_possible(info) inf["likelihood"]["H0.perfect"] = None with pytest.raises(ComponentNotFoundError): run(inf) inf = deepcopy_where_possible(info) inf["likelihood"]["none"] = None with pytest.raises(ComponentNotFoundError): run(inf) inf = deepcopy_where_possible(info) inf["likelihood"]["pandas.plotting.PlotAccessor"] = None with pytest.raises(LoggedError) as e: run(inf) assert "Failed to get defaults for component" in str(e)
def reload_updated_info(self, cache=False, use_cache=False) -> Optional[InputDict]: if mpi.is_main_process(): if use_cache and hasattr(self, "_old_updated_info"): return self._old_updated_info try: if os.path.isfile(self.dump_file_updated): loaded = load_info_dump(self.dump_file_updated) else: loaded = yaml_load_file(self.file_updated) # type: ignore if cache: self._old_updated_info = deepcopy_where_possible(loaded) return loaded except IOError: if cache: self._old_updated_info = None return None else: # Only cached possible when non main process if not use_cache: raise LoggedError( self.log, "Cannot call `reload_updated_info` from " "non-main process unless cached version " "(`use_cache=True`) requested.") return getattr(self, "_old_updated_info", None)
def get_model(info): assert hasattr(info, "keys"), ( "The first argument must be a dictionary with the info needed for the model. " "If you were trying to pass the name of an input file instead, " "load it first with 'cobaya.input.load_input', " "or, if you were passing a yaml string, load it with 'cobaya.yaml.yaml_load'." ) # Configure the logger ASAP # Just a dummy import before configuring the logger, until I fix root/individual level import getdist logger_setup(info.pop(_debug, _debug_default), info.pop(_debug_file, None)) # Create the updated input information, including defaults for each module. info = deepcopy_where_possible(info) ignored_info = {} for k in list(info): if k not in [ _params, _likelihood, _prior, _theory, _path_install, _timing ]: ignored_info.update({k: info.pop(k)}) import logging if ignored_info: logging.getLogger(__name__.split(".")[-1]).warning( "Ignored blocks/options: %r", list(ignored_info)) updated_info = update_info(info) if logging.root.getEffectiveLevel() <= logging.DEBUG: logging.getLogger(__name__.split(".")[-1]).debug( "Input info updated with defaults (dumped to YAML):\n%s", yaml_dump(updated_info)) # Initialize the posterior and the sampler return Model(updated_info[_params], updated_info[_likelihood], updated_info.get(_prior), updated_info.get(_theory), modules=info.get(_path_install), timing=updated_info.get(_timing))
def info(self): """ Returns a copy of the information used to initialise the sampler, including defaults and some new values that are only available after initialisation. """ return deepcopy_where_possible(self._updated_info)
def expand_info_param(info_param: ParamInput, default_derived=True) -> ParamDict: """ Expands the info of a parameter, from the user friendly, shorter format to a more unambiguous one. """ info_param = deepcopy_where_possible(info_param) if not isinstance(info_param, Mapping): if info_param is None: info_param = {} elif isinstance(info_param, Sequence) and not isinstance(info_param, str): values = list(info_param) allowed_lengths = [2, 4, 5] if len(values) not in allowed_lengths: logger = logging.getLogger(__name__.split(".")[-1]) raise LoggedError( logger, "Parameter info length not valid: %d. " "The allowed lengths are %r. See documentation.", len(values), allowed_lengths) info_param = {"prior": [values[0], values[1]]} if len(values) >= 4: info_param["ref"] = [values[2], values[3]] if len(values) == 5: info_param["proposal"] = values[4] else: info_param = {"value": info_param} if all(f not in info_param for f in ["prior", "value", "derived"]): info_param["derived"] = default_derived # Dynamical input parameters: save as derived by default value = info_param.get("value") if isinstance(value, str) or callable(value): info_param["derived"] = info_param.get("derived", True) return info_param
def initialize(self): # Dict of named tuples to collect requirements and computation methods self.collectors = {} # Additional input parameters to pass to CAMB, and attributes to set_ manually self.extra_args = deepcopy_where_possible(self.extra_args) or {} self._must_provide = None
def initialize(self): """Importing CLASS from the correct path, if given, and if not, globally.""" # If path not given, try using general path to modules if not self.path and self.path_install: self.path = self.get_path(self.path_install) if self.path: self.log.info("Importing *local* classy from " + self.path) classy_build_path = os.path.join(self.path, "python", "build") post = next(d for d in os.listdir(classy_build_path) if d.startswith("lib.")) classy_build_path = os.path.join(classy_build_path, post) if not os.path.exists(classy_build_path): # If path was given as an install path, try to install global one anyway if self.path_install: self.log.info( "Importing *global* CLASS (because not installed).") else: raise LoggedError( self.log, "Either CLASS is not in the given folder, " "'%s', or you have not compiled it.", self.path) else: # Inserting the previously found path into the list of import folders sys.path.insert(0, classy_build_path) else: self.log.info("Importing *global* CLASS.") try: from classy import Class, CosmoSevereError, CosmoComputationError except ImportError: raise LoggedError( self.log, "Couldn't find the CLASS python interface. " "Make sure that you have compiled it, and that you either\n" " (a) specify a path (you didn't) or\n" " (b) install the Python interface globally with\n" " '/path/to/class/python/python setup.py install --user'") self.classy = Class() # Propagate errors up global CosmoComputationError, CosmoSevereError # Generate states, to avoid recomputing self.n_states = 3 self.states = [{ "params": None, "derived": None, "derived_extra": None, "last": 0 } for i in range(self.n_states)] # Dict of named tuples to collect requirements and computation methods self.collectors = {} # Additional input parameters to pass to CLASS self.extra_args = deepcopy_where_possible(self.extra_args) or {} # Add general CLASS stuff self.extra_args["output"] = self.extra_args.get("output", "") if "sBBN file" in self.extra_args: self.extra_args["sBBN file"] = ( self.extra_args["sBBN file"].format(classy=self.path)) # Set aliases self.planck_to_classy = self.renames # Derived parameters that may not have been requested, but will be necessary later self.derived_extra = []
def __init__(self, info_sampler: SamplerDict, model: Model, output=Optional[Output], packages_path: Optional[str] = None, name: Optional[str] = None): """ Actual initialization of the class. Loads the default and input information and call the custom ``initialize`` method. [Do not modify this one.] """ self._model = model self._output = output self._updated_info = deepcopy_where_possible(info_sampler) super().__init__(info_sampler, packages_path=packages_path, name=name, initialize=False, standalone=False) if not model.parameterization.sampled_params(): self.mpi_warning("No sampled parameters requested! " "This will fail for non-mock samplers.") # Load checkpoint info, if resuming if self.output.is_resuming() and not isinstance(self, Minimizer): checkpoint_info = None if mpi.is_main_process(): try: checkpoint_info = yaml_load_file( self.checkpoint_filename()) if self.get_name() not in checkpoint_info["sampler"]: raise LoggedError( self.log, "Checkpoint file found at '%s' " "but it corresponds to a different sampler.", self.checkpoint_filename()) except (IOError, TypeError): pass checkpoint_info = mpi.share_mpi(checkpoint_info) if checkpoint_info: self.set_checkpoint_info(checkpoint_info) self.mpi_info("Resuming from previous sample!") elif not isinstance(self, Minimizer) and mpi.is_main_process(): try: output.delete_file_or_folder(self.checkpoint_filename()) output.delete_file_or_folder(self.progress_filename()) except (OSError, TypeError): pass self._set_rng() self.initialize() model.set_cache_size(self._get_requested_cache_size()) # Add to the updated info some values which are # only available after initialisation self._updated_info["version"] = self.get_version()
def reload_updated_info(self, cache=False, use_cache=False): if use_cache and getattr(self, "_old_updated_info", None): return self._old_updated_info try: loaded = yaml_load_file(self.file_updated) if cache: self._old_updated_info = loaded return deepcopy_where_possible(loaded) except IOError: if cache: self._old_updated_info = None return None
def load_input_dict(info_or_yaml_or_file: Union[InputDict, str, os.PathLike] ) -> InputDict: if isinstance(info_or_yaml_or_file, os.PathLike): return load_input_file(info_or_yaml_or_file) elif isinstance(info_or_yaml_or_file, str): if "\n" in info_or_yaml_or_file: return yaml_load(info_or_yaml_or_file) # type: ignore else: return load_input_file(info_or_yaml_or_file) elif isinstance(info_or_yaml_or_file, (dict, Mapping)): return deepcopy_where_possible(info_or_yaml_or_file) else: raise ValueError("The first argument must be a dictionary, file name or " "yaml string with the required input options.")
def reduce_info_param(info_param): """ Compresses the info of a parameter, suppressing default values. This is the opposite of :func:`~input.expand_info_param`. """ info_param = deepcopy_where_possible(info_param) if not isinstance(info_param, dict): return # All parameters without a prior are derived parameters unless otherwise specified if info_param.get(partag.derived) is True: info_param.pop(partag.derived) # Fixed parameters with single "value" key if list(info_param) == [partag.value]: return info_param[partag.value] return info_param
def reduce_info_param(info_param: ParamDict) -> ParamInput: """ Compresses the info of a parameter, suppressing default values. This is the opposite of :func:`~input.expand_info_param`. """ info_param = deepcopy_where_possible(info_param) if not isinstance(info_param, dict): return None # All parameters without a prior are derived parameters unless otherwise specified if info_param.get("derived") is True: info_param.pop("derived") # Fixed parameters with single "value" key if list(info_param) == ["value"] and not callable(info_param["value"]): return info_param["value"] return info_param
def dump_info(self, input_info, updated_info, check_compatible=True): """ Saves the info in the chain folder twice: - the input info. - idem, populated with the modules' defaults. If resuming a sample, checks first that old and new infos are consistent. """ # trim known params of each likelihood: for internal use only updated_info_trimmed = deepcopy_where_possible(updated_info) for lik_info in updated_info_trimmed.get(_likelihood, {}).values(): if hasattr(lik_info, "pop"): lik_info.pop(_params, None) if check_compatible: try: # We will test the old info against the dumped+loaded new info. # This is because we can't actually check if python objects do change old_info = self.reload_updated_info() new_info = yaml_load(yaml_dump(updated_info_trimmed)) ignore_blocks = [] if list(new_info.get(_sampler, [None]))[0] == "minimize": ignore_blocks = [_sampler] if not is_equal_info(old_info, new_info, strict=False, ignore_blocks=ignore_blocks): # HACK!!! NEEDS TO BE FIXED if list(updated_info.get(_sampler, [None]))[0] == "minimize": raise LoggedError( self.log, "Old and new sample information not compatible! " "At this moment it is not possible to 'force' deletion of " "and old 'minimize' run. Please delete it by hand. " "We are working on fixing this very soon!") raise LoggedError( self.log, "Old and new sample information not compatible! " "Resuming not possible!") except IOError: # There was no previous chain pass # We write the new one anyway (maybe updated debug, resuming...) for f, info in [(self.file_input, input_info), (self.file_updated, updated_info_trimmed)]: if not info: pass with open(f, "w") as f_out: try: f_out.write(yaml_dump(info)) except OutputError as e: raise LoggedError(self.log, str(e))
def make_auto_params(auto_params, params_info): def replace(item, tag): if isinstance(item, dict): for key, val in list(item.items()): item[key] = replace(val, tag) elif isinstance(item, str) and '%s' in item: item = item % tag return item for k, v in auto_params.items(): if '%s' not in k: raise LoggedError( log, 'auto_param parameter names must have %s placeholder') replacements = v.pop('auto_range') if isinstance(replacements, str): replacements = eval(replacements) for value in replacements: params_info[k % value] = replace(deepcopy_where_possible(v), value)
def expand_info_param(info_param, default_derived=True): """ Expands the info of a parameter, from the user friendly, shorter format to a more unambiguous one. """ info_param = deepcopy_where_possible(info_param) if not isinstance(info_param, dict): if info_param is None: info_param = {} else: info_param = {partag.value: info_param} if all(f not in info_param for f in [partag.prior, partag.value, partag.derived]): info_param[partag.derived] = default_derived # Dynamical input parameters: save as derived by default value = info_param.get(partag.value, None) if isinstance(value, str) or callable(value): info_param[partag.derived] = info_param.get(partag.derived, True) return info_param
def get_sampler(info_sampler: SamplersDict, model: Model, output: Optional[Output] = None, packages_path: Optional[str] = None) -> 'Sampler': assert isinstance(info_sampler, Mapping), ( "The first argument must be a dictionary with the info needed for the sampler. " "If you were trying to pass the name of an input file instead, " "load it first with 'cobaya.input.load_input', " "or, if you were passing a yaml string, load it with 'cobaya.yaml.yaml_load'." ) logger_sampler = get_logger(__name__) info_sampler = deepcopy_where_possible(info_sampler) if output is None: output = OutputDummy() # Check and update info check_sane_info_sampler(info_sampler) updated_info_sampler = update_info({"sampler": info_sampler })["sampler"] # type: ignore if is_debug(logger_sampler): logger_sampler.debug( "Input info updated with defaults (dumped to YAML):\n%s", yaml_dump(updated_info_sampler)) # Get sampler class & check resume/force compatibility sampler_name, sampler_class = get_sampler_name_and_class( updated_info_sampler, logger=logger_sampler) check_sampler_info((output.reload_updated_info(use_cache=True) or {}).get("sampler"), updated_info_sampler, is_resuming=output.is_resuming()) # Check if resumable run sampler_class.check_force_resume(output, info=updated_info_sampler[sampler_name]) # Instantiate the sampler sampler_instance = sampler_class(updated_info_sampler[sampler_name], model, output, packages_path=packages_path) # If output, dump updated if output: to_dump = model.info() to_dump["sampler"] = {sampler_name: sampler_instance.info()} to_dump["output"] = os.path.join(output.folder, output.prefix) output.check_and_dump_info(None, to_dump, check_compatible=False) return sampler_instance
def get_sampler(info_sampler, model, output=None, packages_path=None): assert isinstance(info_sampler, Mapping), ( "The first argument must be a dictionary with the info needed for the sampler. " "If you were trying to pass the name of an input file instead, " "load it first with 'cobaya.input.load_input', " "or, if you were passing a yaml string, load it with 'cobaya.yaml.yaml_load'." ) logger_sampler = logging.getLogger(__name__.split(".")[-1]) info_sampler = deepcopy_where_possible(info_sampler) if output is None: output = OutputDummy() # Check and update info check_sane_info_sampler(info_sampler) updated_info_sampler = update_info({kinds.sampler: info_sampler})[kinds.sampler] if logging.root.getEffectiveLevel() <= logging.DEBUG: logger_sampler.debug( "Input info updated with defaults (dumped to YAML):\n%s", yaml_dump(updated_info_sampler)) # Get sampler class & check resume/force compatibility sampler_name, sampler_class = get_sampler_name_and_class( updated_info_sampler) check_sampler_info((output.reload_updated_info(use_cache=True) or {}).get(kinds.sampler), updated_info_sampler, is_resuming=output.is_resuming()) # Check if resumable run sampler_class.check_force_resume(output, info=updated_info_sampler[sampler_name]) # Instantiate the sampler sampler_instance = sampler_class(updated_info_sampler[sampler_name], model, output, packages_path=packages_path) # If output, dump updated if output: to_dump = model.info() to_dump[kinds.sampler] = {sampler_name: sampler_instance.info()} to_dump[_output_prefix] = os.path.join(output.folder, output.prefix) output.check_and_dump_info(None, to_dump, check_compatible=False) return sampler_instance
def initial_proposal_covmat(self, auto_params=None): """ Build the initial covariance matrix, using the data provided, in descending order of priority: 1. "covmat" field in the sampler block (including `auto` search). 2. "proposal" field for each parameter. 3. variance of the reference pdf. 4. variance of the prior pdf. The covariances between parameters when both are present in a covariance matrix provided through option 1 are preserved. All other covariances are assumed 0. If `covmat: auto`, use the keyword `auto_params` to restrict the parameters for which a covariance matrix is searched (default: None, meaning all sampled params). """ params_infos = self.model.parameterization.sampled_params_info() covmat = np.diag([np.nan] * len(params_infos)) # Try to generate it automatically self.covmat = getattr(self, 'covmat', None) if isinstance(self.covmat, str) and self.covmat.lower() == "auto": params_infos_covmat = deepcopy_where_possible(params_infos) for p in list(params_infos_covmat): if p not in (auto_params or []): params_infos_covmat.pop(p, None) auto_covmat = self.model.get_auto_covmat(params_infos_covmat, random_state=self._rng) if auto_covmat: self.covmat = os.path.join(auto_covmat["folder"], auto_covmat["name"]) self.log.info("Covariance matrix selected automatically: %s", self.covmat) else: self.covmat = None self.log.info( "Could not automatically find a good covmat. " "Will generate from parameter info (proposal and prior).") # If given, load and test the covariance matrix loaded_params: Sequence[str] if isinstance(self.covmat, str): covmat_pre = "{%s}" % packages_path_input if self.covmat.startswith(covmat_pre): self.covmat = self.covmat.format( **{ packages_path_input: self.packages_path }).replace("/", os.sep) try: with open(self.covmat, "r", encoding="utf-8-sig") as file_covmat: header = file_covmat.readline() loaded_covmat = np.loadtxt(self.covmat) self.log.debug( f"Loaded a covariance matrix from '{self.covmat}'") except TypeError: raise LoggedError( self.log, "The property 'covmat' must be a file name," "but it's '%s'.", str(self.covmat)) except IOError: raise LoggedError(self.log, "Can't open covmat file '%s'.", self.covmat) if header[0] != "#": raise LoggedError( self.log, "The first line of the covmat file '%s' " "must be one list of parameter names separated by spaces " "and staring with '#', and the rest must be a square " "matrix, with one row per line.", self.covmat) loaded_params = header.strip("#").strip().split() elif hasattr(self.covmat, "__getitem__"): if not self.covmat_params: raise LoggedError( self.log, "If a covariance matrix is passed as a numpy array, " "you also need to pass the parameters it corresponds to " "via 'covmat_params: [name1, name2, ...]'.") loaded_params = self.covmat_params loaded_covmat = np.array(self.covmat) elif self.covmat: raise LoggedError(self.log, "Invalid covmat") if self.covmat is not None: str_msg = "the `covmat_params` list" if isinstance(self.covmat, str): str_msg = "the header of the covmat file %r" % self.covmat if len(loaded_params) != len(set(loaded_params)): duplicated = list( set(p for p in loaded_params if list(loaded_params).count(p) > 1)) raise LoggedError( self.log, "Parameter(s) %r appear more than once in %s", duplicated, str_msg) if len(loaded_params) != loaded_covmat.shape[0]: raise LoggedError( self.log, "The number of parameters in %s and the " "dimensions of the matrix do not agree: %d vs %r", str_msg, len(loaded_params), loaded_covmat.shape) loaded_covmat = np.atleast_2d(loaded_covmat) is_square_symmetric = ( len(loaded_covmat.shape) == 2 and loaded_covmat.shape[0] == loaded_covmat.shape[1] and np.allclose(loaded_covmat.T, loaded_covmat)) # Not checking for positive-definiteness yet: may contain highly degenerate # derived parameters that would spoil it now, but will later be dropped. if not is_square_symmetric: from_msg = (f"loaded from '{self.covmat}'" if isinstance( self.covmat, str) else "passed") raise LoggedError( self.log, f"The covariance matrix {from_msg} is not a symmetric square matrix." ) # Fill with parameters in the loaded covmat renames = { p: [p] + str_to_list(v.get("renames") or []) for p, v in params_infos.items() } indices_used, indices_sampler = zip(*[[ loaded_params.index(p), [ list(params_infos).index(q) for q, a in renames.items() if p in a ] ] for p in loaded_params]) if not any(indices_sampler): raise LoggedError( self.log, "A proposal covariance matrix has been loaded, but none of its " "parameters are actually sampled here. Maybe a mismatch between" " parameter names in the covariance matrix and the input file?" ) indices_used, indices_sampler = zip( *[[i, j] for i, j in zip(indices_used, indices_sampler) if j]) if any(len(j) - 1 for j in indices_sampler): first = next(j for j in indices_sampler if len(j) > 1) raise LoggedError( self.log, "The parameters %s have duplicated aliases. Can't assign them an " "element of the covariance matrix unambiguously.", ", ".join([list(params_infos)[i] for i in first])) indices_sampler = tuple(chain(*indices_sampler)) covmat[np.ix_(indices_sampler, indices_sampler)] = (loaded_covmat[np.ix_( indices_used, indices_used)]) self.log.info("Covariance matrix loaded for params %r", [list(params_infos)[i] for i in indices_sampler]) missing_params = set(params_infos).difference( list(params_infos)[i] for i in indices_sampler) if missing_params: self.log.info("Missing proposal covariance for params %r", [ p for p in self.model.parameterization.sampled_params() if p in missing_params ]) else: self.log.info( "All parameters' covariance loaded from given covmat.") # Fill gaps with "proposal" property, if present, otherwise ref (or prior) where_nan = np.isnan(covmat.diagonal()) if np.any(where_nan): covmat[where_nan, where_nan] = np.array([ (info.get("proposal", np.nan) or np.nan)**2 for info in params_infos.values() ])[where_nan] where_nan2 = np.isnan(covmat.diagonal()) if np.any(where_nan2): # the variances are likely too large for a good proposal, e.g. conditional # widths may be much smaller than the marginalized ones. # Divide by 4, better to be too small than too large. covmat[where_nan2, where_nan2] = ( self.model.prior.reference_variances()[where_nan2] / self.fallback_covmat_scale) assert not np.any(np.isnan(covmat)) return covmat, where_nan
def get_defaults(cls, return_yaml=False, yaml_expand_defaults=True, input_options=empty_dict): """ Return defaults for this component_or_class, with syntax: .. code:: option: value [...] params: [...] # if required prior: [...] # if required If keyword `return_yaml` is set to True, it returns literally that, whereas if False (default), it returns the corresponding Python dict. Note that in external components installed as zip_safe=True packages files cannot be accessed directly. In this case using !default .yaml includes currently does not work. Also note that if you return a dictionary it may be modified (return a deep copy if you want to keep it). if yaml_expand_defaults then !default: file includes will be expanded input_options may be a dictionary of input options, e.g. in case default params are dynamically dependent on an input variable """ if 'class_options' in cls.__dict__: raise LoggedError( log, "class_options (in %s) should now be replaced by " "public attributes defined directly in the class" % cls.get_qualified_class_name()) yaml_text = cls.get_associated_file_content('.yaml') options = cls.get_class_options(input_options=input_options) if options and yaml_text: raise LoggedError( log, "%s: any class can either have .yaml or class variables " "but not both (type declarations without values are fine " "also with yaml file). You have class attributes: %s", cls.get_qualified_class_name(), list(options)) if return_yaml and not yaml_expand_defaults: return yaml_text or "" this_defaults = yaml_load_file(cls.get_yaml_file(), yaml_text) \ if yaml_text else deepcopy_where_possible(options) # start with this one to keep the order such that most recent class options # near the top. Update below to actually override parameters with these. defaults = this_defaults.copy() if not return_yaml: for base in cls.__bases__: if issubclass(base, HasDefaults) and base is not HasDefaults: defaults.update( base.get_defaults(input_options=input_options)) defaults.update(this_defaults) if return_yaml: return yaml_dump(defaults) else: return defaults
def update_info(info): """ Creates an updated info starting from the defaults for each module and updating it with the input info. """ # Don't modify the original input! input_info = deepcopy_where_possible(info) # Creates an equivalent info using only the defaults updated_info = odict() default_params_info = odict() default_prior_info = odict() modules = get_used_modules(input_info) for block in modules: updated_info[block] = odict() for module in modules[block]: # Preprocess "no options" and "external function" in input try: input_info[block][module] = input_info[block][module] or {} except TypeError: raise LoggedError( log, "Your input info is not well formatted at the '%s' block. " "It must be a dictionary {'%s':{options}, ...}. ", block, block) if not hasattr(input_info[block][module], "get"): input_info[block][module] = { _external: input_info[block][module] } # Get default class options updated_info[block][module] = deepcopy( getattr( import_module(_package + "." + block, package=_package), "class_options", {})) default_module_info = get_default_info(module, block) # TODO: check - get_default_info was ignoring this extra arg: input_info[block][module]) updated_info[block][module].update( default_module_info[block][module] or {}) # Update default options with input info # Consistency is checked only up to first level! (i.e. subkeys may not match) ignore = set( [_external, _p_renames, _input_params, _output_params]) options_not_recognized = (set( input_info[block][module]).difference(ignore).difference( set(updated_info[block][module]))) if options_not_recognized: alternatives = odict() available = (set([_external, _p_renames ]).union(updated_info[block][module])) while options_not_recognized: option = options_not_recognized.pop() alternatives[option] = fuzzy_match(option, available, n=3) did_you_mean = ", ".join([ ("'%s' (did you mean %s?)" % (o, "|".join(["'%s'" % _ for _ in a])) if a else "'%s'" % o) for o, a in alternatives.items() ]) if default_module_info[block][module]: # Internal module raise LoggedError( log, "'%s' does not recognize some options: %s. " "To see the allowed options, check out the documentation of" " this module.", module, did_you_mean) else: # External module raise LoggedError( log, "External %s '%s' does not recognize some options: %s. " "Check the documentation for 'external %s'.", block, module, did_you_mean, block) updated_info[block][module].update(input_info[block][module]) # Store default parameters and priors of class, and save to combine later if block == _likelihood: params_info = default_module_info.get(_params, {}) updated_info[block][module].update( {_params: list(params_info or [])}) default_params_info[module] = params_info default_prior_info[module] = default_module_info.get( _prior, {}) # Add priors info, after the necessary checks if _prior in input_info or any(default_prior_info.values()): updated_info[_prior] = input_info.get(_prior, odict()) for prior_info in default_prior_info.values(): for name, prior in prior_info.items(): if updated_info[_prior].get(name, prior) != prior: raise LoggedError( log, "Two different priors cannot have the same name: '%s'.", name) updated_info[_prior][name] = prior # Add parameters info, after the necessary updates and checks defaults_merged = merge_default_params_info(default_params_info) updated_info[_params] = merge_params_info(defaults_merged, input_info.get(_params, {})) # Add aliases for theory params (after merging!) if _theory in updated_info: renames = list(updated_info[_theory].values())[0].get(_p_renames) str_to_list = lambda x: ([x] if isinstance(x, string_types) else x) renames_flat = [ set([k] + str_to_list(v)) for k, v in (renames or {}).items() ] for p in updated_info.get(_params, {}): # Probably could be made faster by inverting the renames dicts *just once* renames_pairs = [a for a in renames_flat if p in a] if renames_pairs: this_renames = reduce(lambda x, y: x.union(y), [a for a in renames_flat if p in a]) updated_info[_params][p][_p_renames] = list( set(this_renames).union( set( str_to_list(updated_info[_params][p].get( _p_renames, [])))).difference(set([p]))) # Rest of the options for k, v in input_info.items(): if k not in updated_info: updated_info[k] = v return updated_info
def post(info, sample=None): logger_setup(info.get(_debug), info.get(_debug_file)) log = logging.getLogger(__name__.split(".")[-1]) # MARKED FOR DEPRECATION IN v3.0 # BEHAVIOUR TO BE REPLACED BY ERROR: check_deprecated_modules_path(info) # END OF DEPRECATION BLOCK try: info_post = info[_post] except KeyError: raise LoggedError(log, "No 'post' block given. Nothing to do!") if get_mpi_rank(): log.warning( "Post-processing is not yet MPI-aware. Doing nothing for rank > 1 processes.") return if info.get(_resume): log.warning("Resuming not implemented for post-processing. Re-starting.") # 1. Load existing sample output_in = get_output(output_prefix=info.get(_output_prefix)) if output_in: try: info_in = output_in.reload_updated_info() except FileNotFoundError: raise LoggedError(log, "Error loading input model: " "could not find input info at %s", output_in.file_updated) else: info_in = deepcopy_where_possible(info) dummy_model_in = DummyModel(info_in[_params], info_in[kinds.likelihood], info_in.get(_prior, None)) if output_in: if not output_in.find_collections(): raise LoggedError(log, "No samples found for the input model with prefix %s", os.path.join(output_in.folder, output_in.prefix)) collection_in = output_in.load_collections( dummy_model_in, skip=info_post.get("skip", 0), thin=info_post.get("thin", 1), concatenate=True) elif sample: if isinstance(sample, Collection): sample = [sample] collection_in = deepcopy(sample[0]) for s in sample[1:]: try: collection_in.append(s) except: raise LoggedError(log, "Failed to load some of the input samples.") else: raise LoggedError(log, "Not output from where to load from or input collections given.") log.info("Will process %d samples.", len(collection_in)) if len(collection_in) <= 1: raise LoggedError( log, "Not enough samples for post-processing. Try using a larger sample, " "or skipping or thinning less.") # 2. Compare old and new info: determine what to do add = info_post.get(_post_add, {}) or {} remove = info_post.get(_post_remove, {}) # Add a dummy 'one' likelihood, to absorb unused parameters if not add.get(kinds.likelihood): add[kinds.likelihood] = {} add[kinds.likelihood]["one"] = None # Expand the "add" info add = update_info(add) # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters out = {_params: deepcopy_where_possible(info_in[_params])} for p in remove.get(_params, {}): pinfo = info_in[_params].get(p) if pinfo is None or not is_derived_param(pinfo): raise LoggedError( log, "You tried to remove parameter '%s', which is not a derived parameter. " "Only derived parameters can be removed during post-processing.", p) out[_params].pop(p) # Force recomputation of aggregated chi2 for p in list(out[_params]): if p.startswith(_get_chi2_name("")): out[_params].pop(p) mlprior_names_add = [] for p, pinfo in add.get(_params, {}).items(): pinfo_in = info_in[_params].get(p) if is_sampled_param(pinfo): if not is_sampled_param(pinfo_in): # No added sampled parameters (de-marginalisation not implemented) if pinfo_in is None: raise LoggedError( log, "You added a new sampled parameter %r (maybe accidentally " "by adding a new likelihood that depends on it). " "Adding new sampled parameters is not possible. Try fixing " "it to some value.", p) else: raise LoggedError( log, "You tried to change the prior of parameter '%s', " "but it was not a sampled parameter. " "To change that prior, you need to define as an external one.", p) if mlprior_names_add[:1] != _prior_1d_name: mlprior_names_add = ([_minuslogprior + _separator + _prior_1d_name] + mlprior_names_add) elif is_derived_param(pinfo): if p in out[_params]: raise LoggedError( log, "You tried to add derived parameter '%s', which is already " "present. To force its recomputation, 'remove' it too.", p) elif is_fixed_param(pinfo): # Only one possibility left "fixed" parameter that was not present before: # input of new likelihood, or just an argument for dynamical derived (dropped) if ((p in info_in[_params] and pinfo[partag.value] != (pinfo_in or {}).get(partag.value, None))): raise LoggedError( log, "You tried to add a fixed parameter '%s: %r' that was already present" " but had a different value or was not fixed. This is not allowed. " "The old info of the parameter was '%s: %r'", p, dict(pinfo), p, dict(pinfo_in)) else: raise LoggedError(log, "This should not happen. Contact the developers.") out[_params][p] = pinfo # For the likelihood only, turn the rest of *derived* parameters into constants, # so that the likelihoods do not try to compute them) # But be careful to exclude *input* params that have a "derived: True" value # (which in "updated info" turns into "derived: 'lambda [x]: [x]'") out_params_like = deepcopy_where_possible(out[_params]) for p, pinfo in out_params_like.items(): if ((is_derived_param(pinfo) and not (partag.value in pinfo) and p not in add.get(_params, {}))): out_params_like[p] = {partag.value: np.nan, partag.drop: True} # 2.2 Manage adding/removing priors and likelihoods warn_remove = False for level in [_prior, kinds.likelihood]: out[level] = getattr(dummy_model_in, level) if level == _prior: out[level].remove(_prior_1d_name) for pdf in info_post.get(_post_remove, {}).get(level, []) or []: try: out[level].remove(pdf) warn_remove = True except ValueError: raise LoggedError( log, "Trying to remove %s '%s', but it is not present. " "Existing ones: %r", level, pdf, out[level]) if warn_remove: log.warning("You are removing a prior or likelihood pdf. " "Notice that if the resulting posterior is much wider " "than the original one, or displaced enough, " "it is probably safer to explore it directly.") if _prior in add: mlprior_names_add += [_minuslogprior + _separator + name for name in add[_prior]] out[_prior] += list(add[_prior]) prior_recompute_1d = ( mlprior_names_add[:1] == [_minuslogprior + _separator + _prior_1d_name]) # Don't initialise the theory code if not adding/recomputing theory, # theory-derived params or likelihoods recompute_theory = info_in.get(kinds.theory) and not ( list(add[kinds.likelihood]) == ["one"] and not any(is_derived_param(pinfo) for pinfo in add.get(_params, {}).values())) if recompute_theory: # Inherit from the original chain (needs input|output_params, renames, etc add_theory = add.get(kinds.theory) if add_theory: info_theory_out = {} if len(add_theory) > 1: log.warning('Importance sampling with more than one theory is ' 'not really tested') add_theory = add_theory.copy() for theory, theory_info in info_in[kinds.theory].items(): theory_copy = deepcopy_where_possible(theory_info) if theory in add_theory: info_theory_out[theory] = \ recursive_update(theory_copy, add_theory.pop(theory)) else: info_theory_out[theory] = theory_copy info_theory_out.update(add_theory) else: info_theory_out = deepcopy_where_possible(info_in[kinds.theory]) else: info_theory_out = None chi2_names_add = [ _get_chi2_name(name) for name in add[kinds.likelihood] if name != "one"] out[kinds.likelihood] += [l for l in add[kinds.likelihood] if l != "one"] if recompute_theory: log.warning("You are recomputing the theory, but in the current version this does" " not force recomputation of any likelihood or derived parameter, " "unless explicitly removed+added.") for level in [_prior, kinds.likelihood]: for i, x_i in enumerate(out[level]): if x_i in list(out[level])[i + 1:]: raise LoggedError( log, "You have added %s '%s', which was already present. If you " "want to force its recomputation, you must also 'remove' it.", level, x_i) # 3. Create output collection if _post_suffix not in info_post: raise LoggedError(log, "You need to provide a '%s' for your chains.", _post_suffix) # Use default prefix if it exists. If it does not, produce no output by default. # {post: {output: None}} suppresses output, and if it's a string, updates it. out_prefix = info_post.get(_output_prefix, info.get(_output_prefix)) if out_prefix not in [None, False]: out_prefix += _separator_files + _post + _separator_files + info_post[ _post_suffix] output_out = get_output(output_prefix=out_prefix, force=info.get(_force)) if output_out and not output_out.force and output_out.find_collections(): raise LoggedError(log, "Found existing post-processing output with prefix %r. " "Delete it manually or re-run with `force: True` " "(or `-f`, `--force` from the shell).", out_prefix) elif output_out and output_out.force: output_out.delete_infos() for regexp in output_out.find_collections(): output_out.delete_with_regexp(re.compile(regexp)) info_out = deepcopy_where_possible(info) info_out[_post] = info_post # Updated with input info and extended (updated) add info info_out.update(info_in) info_out[_post][_post_add] = add dummy_model_out = DummyModel(out[_params], out[kinds.likelihood], info_prior=out[_prior]) if recompute_theory: # TODO: May need updating for more than one, or maybe can be removed theory = list(info_theory_out)[0] if _input_params not in info_theory_out[theory]: raise LoggedError( log, "You appear to be post-processing a chain generated with an older " "version of Cobaya. For post-processing to work, please edit the " "'[root].updated.yaml' file of the original chain to add, inside the " "theory code block, the list of its input parameters. E.g.\n----\n" "theory:\n %s:\n input_params: [param1, param2, ...]\n" "----\nIf you get strange errors later, it is likely that you did not " "specify the correct set of theory parameters.\n" "The full set of input parameters are %s.", theory, list(dummy_model_out.parameterization.input_params())) # TODO: check allow_renames=False? # TODO: May well be simplifications here, this is v close to pre-refactor logic # Have not gone through or understood all the parameterization stuff model_add = Model(out_params_like, add[kinds.likelihood], info_prior=add.get(_prior), info_theory=info_theory_out, packages_path=info.get(_packages_path), allow_renames=False, post=True, prior_parameterization=dummy_model_out.parameterization) # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post][_post_add] add[kinds.likelihood].pop("one") collection_out = Collection(dummy_model_out, output_out, name="1") output_out.check_and_dump_info(None, info_out, check_compatible=False) # Prepare recomputation of aggregated chi2 # (they need to be recomputed by hand, because its autocomputation won't pick up # old likelihoods for a given type) all_types = { like: str_to_list(add[kinds.likelihood].get( like, info_in[kinds.likelihood].get(like)).get("type", []) or []) for like in out[kinds.likelihood]} types = set(chain(*list(all_types.values()))) inv_types = {t: [like for like, like_types in all_types.items() if t in like_types] for t in types} # 4. Main loop! log.info("Running post-processing...") last_percent = 0 for i, point in collection_in.data.iterrows(): log.debug("Point: %r", point) sampled = [point[param] for param in dummy_model_in.parameterization.sampled_params()] derived = {param: point.get(param, None) for param in dummy_model_out.parameterization.derived_params()} inputs = {param: point.get( param, dummy_model_in.parameterization.constant_params().get( param, dummy_model_out.parameterization.constant_params().get( param, None))) for param in dummy_model_out.parameterization.input_params()} # Solve inputs that depend on a function and were not saved # (we don't use the Parameterization_to_input method in case there are references # to functions that cannot be loaded at the moment) for p, value in inputs.items(): if value is None: func = dummy_model_out.parameterization._input_funcs[p] args = dummy_model_out.parameterization._input_args[p] inputs[p] = func(*[point.get(arg) for arg in args]) # Add/remove priors priors_add = model_add.prior.logps(sampled) if not prior_recompute_1d: priors_add = priors_add[1:] logpriors_add = dict(zip(mlprior_names_add, priors_add)) logpriors_new = [logpriors_add.get(name, - point.get(name, 0)) for name in collection_out.minuslogprior_names] if log.getEffectiveLevel() <= logging.DEBUG: log.debug( "New set of priors: %r", dict(zip(dummy_model_out.prior, logpriors_new))) if -np.inf in logpriors_new: continue # Add/remove likelihoods output_like = [] if add[kinds.likelihood]: # Notice "one" (last in likelihood_add) is ignored: not in chi2_names loglikes_add, output_like = model_add.logps(inputs, return_derived=True) loglikes_add = dict(zip(chi2_names_add, loglikes_add)) output_like = dict(zip(model_add.output_params, output_like)) else: loglikes_add = dict() loglikes_new = [loglikes_add.get(name, -0.5 * point.get(name, 0)) for name in collection_out.chi2_names] if log.getEffectiveLevel() <= logging.DEBUG: log.debug( "New set of likelihoods: %r", dict(zip(dummy_model_out.likelihood, loglikes_new))) if output_like: log.debug("New set of likelihood-derived parameters: %r", output_like) if -np.inf in loglikes_new: continue # Add/remove derived parameters and change priors of sampled parameters for p in add[_params]: if p in dummy_model_out.parameterization._directly_output: derived[p] = output_like[p] elif p in dummy_model_out.parameterization._derived_funcs: func = dummy_model_out.parameterization._derived_funcs[p] args = dummy_model_out.parameterization._derived_args[p] derived[p] = func( *[point.get(arg, output_like.get(arg, None)) for arg in args]) # We need to recompute the aggregated chi2 by hand for type_, likes in inv_types.items(): derived[_get_chi2_name(type_)] = sum( [-2 * lvalue for lname, lvalue in zip(collection_out.chi2_names, loglikes_new) if _undo_chi2_name(lname) in likes]) if log.getEffectiveLevel() <= logging.DEBUG: log.debug("New derived parameters: %r", dict([(p, derived[p]) for p in dummy_model_out.parameterization.derived_params() if p in add[_params]])) # Save to the collection (keep old weight for now) collection_out.add( sampled, derived=derived.values(), weight=point.get(_weight), logpriors=logpriors_new, loglikes=loglikes_new) # Display progress percent = np.round(i / len(collection_in) * 100) if percent != last_percent and not percent % 5: last_percent = percent progress_bar(log, percent, " (%d/%d)" % (i, len(collection_in))) if not collection_out.data.last_valid_index(): raise LoggedError( log, "No elements in the final sample. Possible causes: " "added a prior or likelihood valued zero over the full sampled domain, " "or the computation of the theory failed everywhere, etc.") # Reweight -- account for large dynamic range! # Prefer to rescale +inf to finite, and ignore final points with -inf. # Remove -inf's (0-weight), and correct indices difflogmax = max(collection_in[_minuslogpost] - collection_out[_minuslogpost]) collection_out.data[_weight] *= np.exp( collection_in[_minuslogpost] - collection_out[_minuslogpost] - difflogmax) collection_out.data = ( collection_out.data[collection_out.data.weight > 0].reset_index(drop=True)) collection_out._n = collection_out.data.last_valid_index() + 1 # Write! collection_out.out_update() log.info("Finished! Final number of samples: %d", len(collection_out)) return info_out, {"sample": collection_out}
def __init__(self, info_sampler, model, output=None, packages_path=None, name=None): """ Actual initialization of the class. Loads the default and input information and call the custom ``initialize`` method. [Do not modify this one.] """ self.model = model self.output = output self._updated_info = deepcopy_where_possible(info_sampler) super().__init__(info_sampler, packages_path=packages_path, name=name, initialize=False, standalone=False) # Seed, if requested if getattr(self, "seed", None) is not None: if not isinstance(self.seed, int) or not (0 <= self.seed <= 2**32 - 1): raise LoggedError( self.log, "Seeds must be a *positive integer* < 2**32 - 1, " "but got %r with type %r", self.seed, type(self.seed)) # MPI-awareness: sum the rank to the seed if more_than_one_process(): self.seed += get_mpi_rank() self.mpi_warning("This run has been SEEDED with seed %d", self.seed) # Load checkpoint info, if resuming if self.output.is_resuming() and not isinstance(self, Minimizer): try: checkpoint_info = yaml_load_file(self.checkpoint_filename()) try: for k, v in checkpoint_info[kinds.sampler][ self.get_name()].items(): setattr(self, k, v) self.mpi_info("Resuming from previous sample!") except KeyError: if is_main_process(): raise LoggedError( self.log, "Checkpoint file found at '%s' " "but it corresponds to a different sampler.", self.checkpoint_filename()) except (IOError, TypeError): pass else: try: os.remove(self.checkpoint_filename()) os.remove(self.progress_filename()) except (OSError, TypeError): pass self._set_rng() self.initialize() self._release_rng() self.model.set_cache_size(self._get_requested_cache_size()) # Add to the updated info some values which are # only available after initialisation self._updated_info[_version] = self.get_version()
def is_equal_info(info_old, info_new, strict=True, print_not_log=False, ignore_blocks=()): """ Compares two information dictionaries, and old one versus a new one, and updates the new one for selected values of the old one. Set ``strict=False`` (default: ``True``) to ignore options that would not affect the statistics of a posterior sample, including order of params/priors/likelihoods. """ if print_not_log: myprint = print myprint_debug = lambda x: x else: myprint = log.info myprint_debug = log.debug myname = inspect.stack()[0][3] ignore = set() if strict else \ {_debug, _debug_file, _resume, _force, _packages_path, _test_run, _version} ignore = ignore.union(set(ignore_blocks or [])) if set(info for info in info_old if info_old[info] is not None).difference(ignore) \ != set(info for info in info_new if info_new[info] is not None).difference( ignore): myprint(myname + ": different blocks or options: %r (old) vs %r (new)" % (set(info_old).difference(ignore), set(info_new).difference(ignore))) return False for block_name in info_old: if block_name in ignore or block_name not in info_new: continue block1 = deepcopy_where_possible(info_old[block_name]) block2 = deepcopy_where_possible(info_new[block_name]) # First, deal with root-level options (force, output, ...) if not isinstance(block1, dict): if block1 != block2: myprint(myname + ": different option '%s'" % block_name) return False continue # Now let's do components and params # 1. check order (it DOES matter, but just up to 1st level) f = list if strict else set if f(block1) != f(block2): myprint(myname + ": different [%s] or different order of them: %r vs %r" % (block_name, list(block1), list(block2))) return False # 2. Gather general options to be ignored if not strict: ignore_k = set() if block_name in [kinds.theory, kinds.likelihood]: ignore_k = ignore_k.union({_input_params, _output_params}) elif block_name == _params: for param in block1: # Unify notation block1[param] = expand_info_param(block1[param]) block2[param] = expand_info_param(block2[param]) ignore_k = ignore_k.union({ partag.latex, partag.renames, partag.ref, partag.proposal, "min", "max" }) # Fixed params, it doesn't matter if they are saved as derived if partag.value in block1[param]: block1[param].pop(partag.derived, None) if partag.value in block2[param]: block2[param].pop(partag.derived, None) # Renames: order does not matter block1[param][partag.renames] = set(block1[param].get( partag.renames, [])) block2[param][partag.renames] = set(block2[param].get( partag.renames, [])) # 3. Now check component/parameters one-by-one for k in block1: if not strict: # Add component-specific options to be ignored if block_name in kinds: ignore_k_this = ignore_k.copy() if _external not in block1[k]: try: component_path = block1[k].pop(_component_path, None) \ if isinstance(block1[k], dict) else None class_name = (block1[k] or {}).get(_class_name) or k cls = get_class(class_name, block_name, component_path=component_path) ignore_k_this = ignore_k_this.union( set(getattr(cls, "_at_resume_prefer_new", {}))) except ImportError: pass # Pop ignored and kept options for j in list(ignore_k_this): block1[k].pop(j, None) block2[k].pop(j, None) if block1[k] != block2[k]: # For clarity, pop common stuff before printing to_pop = [ j for j in block1[k] if (block1[k].get(j) == block2[k].get(j)) ] [(block1[k].pop(j, None), block2[k].pop(j, None)) for j in to_pop] myprint(myname + ": different content of [%s:%s]" % (block_name, k) + " -- (re-run with `debug: True` for more info)") myprint_debug("%r (old) vs %r (new)" % (block1[k], block2[k])) return False return True
def post(info_or_yaml_or_file: Union[InputDict, str, os.PathLike], sample: Union[SampleCollection, List[SampleCollection], None] = None ) -> PostTuple: info = load_input_dict(info_or_yaml_or_file) logger_setup(info.get("debug"), info.get("debug_file")) log = get_logger(__name__) # MARKED FOR DEPRECATION IN v3.0 if info.get("modules"): raise LoggedError(log, "The input field 'modules' has been deprecated." "Please use instead %r", packages_path_input) # END OF DEPRECATION BLOCK info_post: PostDict = info.get("post") or {} if not info_post: raise LoggedError(log, "No 'post' block given. Nothing to do!") if mpi.is_main_process() and info.get("resume"): log.warning("Resuming not implemented for post-processing. Re-starting.") if not info.get("output") and info_post.get("output") \ and not info.get("params"): raise LoggedError(log, "The input dictionary must have be a full option " "dictionary, or have an existing 'output' root to load " "previous settings from ('output' to read from is in the " "main block not under 'post'). ") # 1. Load existing sample output_in = get_output(prefix=info.get("output")) if output_in: info_in = output_in.load_updated_info() or update_info(info) else: info_in = update_info(info) params_in: ExpandedParamsDict = info_in["params"] # type: ignore dummy_model_in = DummyModel(params_in, info_in.get("likelihood", {}), info_in.get("prior")) in_collections = [] thin = info_post.get("thin", 1) skip = info_post.get("skip", 0) if info.get('thin') is not None or info.get('skip') is not None: # type: ignore raise LoggedError(log, "'thin' and 'skip' should be " "parameters of the 'post' block") if sample: # If MPI, assume for each MPI process post is passed in the list of # collections that should be processed by that process # (e.g. single chain output from sampler) if isinstance(sample, SampleCollection): in_collections = [sample] else: in_collections = sample for i, collection in enumerate(in_collections): if skip: if 0 < skip < 1: skip = int(round(skip * len(collection))) collection = collection.filtered_copy(slice(skip, None)) if thin != 1: collection = collection.thin_samples(thin) in_collections[i] = collection elif output_in: files = output_in.find_collections() numbered = files if not numbered: # look for un-numbered output files files = output_in.find_collections(name=False) if files: if mpi.size() > len(files): raise LoggedError(log, "Number of MPI processes (%s) is larger than " "the number of sample files (%s)", mpi.size(), len(files)) for num in range(mpi.rank(), len(files), mpi.size()): in_collections += [SampleCollection( dummy_model_in, output_in, onload_thin=thin, onload_skip=skip, load=True, file_name=files[num], name=str(num + 1) if numbered else "")] else: raise LoggedError(log, "No samples found for the input model with prefix %s", os.path.join(output_in.folder, output_in.prefix)) else: raise LoggedError(log, "No output from where to load from, " "nor input collections given.") if any(len(c) <= 1 for c in in_collections): raise LoggedError( log, "Not enough samples for post-processing. Try using a larger sample, " "or skipping or thinning less.") mpi.sync_processes() log.info("Will process %d sample points.", sum(len(c) for c in in_collections)) # 2. Compare old and new info: determine what to do add = info_post.get("add") or {} if "remove" in add: raise LoggedError(log, "remove block should be under 'post', not 'add'") remove = info_post.get("remove") or {} # Add a dummy 'one' likelihood, to absorb unused parameters if not add.get("likelihood"): add["likelihood"] = {} add["likelihood"]["one"] = None # Expand the "add" info, but don't add new default sampled parameters orig_params = set(add.get("params") or []) add = update_info(add, add_aggr_chi2=False) add_params: ExpandedParamsDict = add["params"] # type: ignore for p in set(add_params) - orig_params: if p in params_in: add_params.pop(p) # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters out_combined_params = deepcopy_where_possible(params_in) remove_params = list(str_to_list(remove.get("params")) or []) for p in remove_params: pinfo = params_in.get(p) if pinfo is None or not is_derived_param(pinfo): raise LoggedError( log, "You tried to remove parameter '%s', which is not a derived parameter. " "Only derived parameters can be removed during post-processing.", p) out_combined_params.pop(p) # Force recomputation of aggregated chi2 for p in list(out_combined_params): if p.startswith(get_chi2_name("")): out_combined_params.pop(p) prior_recompute_1d = False for p, pinfo in add_params.items(): pinfo_in = params_in.get(p) if is_sampled_param(pinfo): if not is_sampled_param(pinfo_in): # No added sampled parameters (de-marginalisation not implemented) if pinfo_in is None: raise LoggedError( log, "You added a new sampled parameter %r (maybe accidentally " "by adding a new likelihood that depends on it). " "Adding new sampled parameters is not possible. Try fixing " "it to some value.", p) else: raise LoggedError( log, "You tried to change the prior of parameter '%s', " "but it was not a sampled parameter. " "To change that prior, you need to define as an external one.", p) # recompute prior if potentially changed sampled parameter priors prior_recompute_1d = True elif is_derived_param(pinfo): if p in out_combined_params: raise LoggedError( log, "You tried to add derived parameter '%s', which is already " "present. To force its recomputation, 'remove' it too.", p) elif is_fixed_or_function_param(pinfo): # Only one possibility left "fixed" parameter that was not present before: # input of new likelihood, or just an argument for dynamical derived (dropped) if pinfo_in and p in params_in and pinfo["value"] != pinfo_in.get("value"): raise LoggedError( log, "You tried to add a fixed parameter '%s: %r' that was already present" " but had a different value or was not fixed. This is not allowed. " "The old info of the parameter was '%s: %r'", p, dict(pinfo), p, dict(pinfo_in)) elif not pinfo_in: # OK as long as we have known value for it raise LoggedError(log, "Parameter %s no known value. ", p) out_combined_params[p] = pinfo out_combined: InputDict = {"params": out_combined_params} # type: ignore # Turn the rest of *derived* parameters into constants, # so that the likelihoods do not try to recompute them # But be careful to exclude *input* params that have a "derived: True" value # (which in "updated info" turns into "derived: 'lambda [x]: [x]'") # Don't assign to derived parameters to theories, only likelihoods, so they can be # recomputed if needed. If the theory does not need to be computed, it doesn't matter # if it is already assigned parameters in the usual way; likelihoods can get # the required derived parameters from the stored sample derived parameter inputs. out_params_with_computed = deepcopy_where_possible(out_combined_params) dropped_theory = set() for p, pinfo in out_params_with_computed.items(): if (is_derived_param(pinfo) and "value" not in pinfo and p not in add_params): out_params_with_computed[p] = {"value": np.nan} dropped_theory.add(p) # 2.2 Manage adding/removing priors and likelihoods warn_remove = False kind: ModelBlock for kind in ("prior", "likelihood", "theory"): out_combined[kind] = deepcopy_where_possible(info_in.get(kind)) or {} for remove_item in str_to_list(remove.get(kind)) or []: try: out_combined[kind].pop(remove_item, None) if remove_item not in (add.get(kind) or []) and kind != "theory": warn_remove = True except ValueError: raise LoggedError( log, "Trying to remove %s '%s', but it is not present. " "Existing ones: %r", kind, remove_item, list(out_combined[kind])) if kind != "theory" and kind in add: dups = set(add.get(kind) or []).intersection(out_combined[kind]) - {"one"} if dups: raise LoggedError( log, "You have added %s '%s', which was already present. If you " "want to force its recomputation, you must also 'remove' it.", kind, dups) out_combined[kind].update(add[kind]) if warn_remove and mpi.is_main_process(): log.warning("You are removing a prior or likelihood pdf. " "Notice that if the resulting posterior is much wider " "than the original one, or displaced enough, " "it is probably safer to explore it directly.") mlprior_names_add = minuslogprior_names(add.get("prior") or []) chi2_names_add = [get_chi2_name(name) for name in add["likelihood"] if name != "one"] out_combined["likelihood"].pop("one", None) add_theory = add.get("theory") if add_theory: if len(add["likelihood"]) == 1 and not any( is_derived_param(pinfo) for pinfo in add_params.values()): log.warning("You are adding a theory, but this does not force recomputation " "of any likelihood or derived parameters unless explicitly " "removed+added.") # Inherit from the original chain (input|output_params, renames, etc) added_theory = add_theory.copy() for theory, theory_info in out_combined["theory"].items(): if theory in list(added_theory): out_combined["theory"][theory] = \ recursive_update(theory_info, added_theory.pop(theory)) out_combined["theory"].update(added_theory) # Prepare recomputation of aggregated chi2 # (they need to be recomputed by hand, because auto-computation won't pick up # old likelihoods for a given type) all_types = {like: str_to_list(opts.get("type") or []) for like, opts in out_combined["likelihood"].items()} types = set(chain(*all_types.values())) inv_types = {t: [like for like, like_types in all_types.items() if t in like_types] for t in sorted(types)} add_aggregated_chi2_params(out_combined_params, types) # 3. Create output collection # Use default prefix if it exists. If it does not, produce no output by default. # {post: {output: None}} suppresses output, and if it's a string, updates it. out_prefix = info_post.get("output", info.get("output")) if out_prefix: suffix = info_post.get("suffix") if not suffix: raise LoggedError(log, "You need to provide a '%s' for your output chains.", "suffix") out_prefix += separator_files + "post" + separator_files + suffix output_out = get_output(prefix=out_prefix, force=info.get("force")) output_out.set_lock() if output_out and not output_out.force and output_out.find_collections(): raise LoggedError(log, "Found existing post-processing output with prefix %r. " "Delete it manually or re-run with `force: True` " "(or `-f`, `--force` from the shell).", out_prefix) elif output_out and output_out.force and mpi.is_main_process(): output_out.delete_infos() for _file in output_out.find_collections(): output_out.delete_file_or_folder(_file) info_out = deepcopy_where_possible(info) info_post = info_post.copy() info_out["post"] = info_post # Updated with input info and extended (updated) add info info_out.update(info_in) # type: ignore info_post["add"] = add dummy_model_out = DummyModel(out_combined_params, out_combined["likelihood"], info_prior=out_combined["prior"]) out_func_parameterization = Parameterization(out_params_with_computed) # TODO: check allow_renames=False? model_add = Model(out_params_with_computed, add["likelihood"], info_prior=add.get("prior"), info_theory=out_combined["theory"], packages_path=(info_post.get(packages_path_input) or info.get(packages_path_input)), allow_renames=False, post=True, stop_at_error=info.get('stop_at_error', False), skip_unused_theories=True, dropped_theory_params=dropped_theory) # Remove auxiliary "one" before dumping -- 'add' *is* info_out["post"]["add"] add["likelihood"].pop("one") out_collections = [SampleCollection(dummy_model_out, output_out, name=c.name, cache_size=OutputOptions.default_post_cache_size) for c in in_collections] # TODO: should maybe add skip/thin to out_combined, so can tell post-processed? output_out.check_and_dump_info(info_out, out_combined, check_compatible=False) collection_in = in_collections[0] collection_out = out_collections[0] last_percent = None known_constants = dummy_model_out.parameterization.constant_params() known_constants.update(dummy_model_in.parameterization.constant_params()) missing_params = dummy_model_in.parameterization.sampled_params().keys() - set( collection_in.columns) if missing_params: raise LoggedError(log, "Input samples do not contain expected sampled parameter " "values: %s", missing_params) missing_priors = set(name for name in collection_out.minuslogprior_names if name not in mlprior_names_add and name not in collection_in.columns) if _minuslogprior_1d_name in missing_priors: prior_recompute_1d = True if prior_recompute_1d: missing_priors.discard(_minuslogprior_1d_name) mlprior_names_add.insert(0, _minuslogprior_1d_name) prior_regenerate: Optional[Prior] if missing_priors and "prior" in info_in: # in case there are input priors that are not stored in input samples # e.g. when postprocessing GetDist/CosmoMC-format chains in_names = minuslogprior_names(info_in["prior"]) info_prior = {piname: inf for (piname, inf), in_name in zip(info_in["prior"].items(), in_names) if in_name in missing_priors} regenerated_prior_names = minuslogprior_names(info_prior) missing_priors.difference_update(regenerated_prior_names) prior_regenerate = Prior(dummy_model_in.parameterization, info_prior) else: prior_regenerate = None regenerated_prior_names = None if missing_priors: raise LoggedError(log, "Missing priors: %s", missing_priors) mpi.sync_processes() output_in.check_lock() # 4. Main loop! Loop over input samples and adjust as required. if mpi.is_main_process(): log.info("Running post-processing...") difflogmax: Optional[float] = None to_do = sum(len(c) for c in in_collections) weights = [] done = 0 last_dump_time = time.time() for collection_in, collection_out in zip(in_collections, out_collections): importance_weights = [] def set_difflogmax(): nonlocal difflogmax difflog = (collection_in[OutPar.minuslogpost].to_numpy( dtype=np.float64)[:len(collection_out)] - collection_out[OutPar.minuslogpost].to_numpy(dtype=np.float64)) difflogmax = np.max(difflog) if abs(difflogmax) < 1: difflogmax = 0 # keep simple when e.g. very similar log.debug("difflogmax: %g", difflogmax) if mpi.more_than_one_process(): difflogmax = max(mpi.allgather(difflogmax)) if mpi.is_main_process(): log.debug("Set difflogmax: %g", difflogmax) _weights = np.exp(difflog - difflogmax) importance_weights.extend(_weights) collection_out.reweight(_weights) for i, point in collection_in.data.iterrows(): all_params = point.to_dict() for p in remove_params: all_params.pop(p, None) log.debug("Point: %r", point) sampled = np.array([all_params[param] for param in dummy_model_in.parameterization.sampled_params()]) all_params = out_func_parameterization.to_input(all_params).copy() # Add/remove priors if prior_recompute_1d: priors_add = [model_add.prior.logps_internal(sampled)] if priors_add[0] == -np.inf: continue else: priors_add = [] if model_add.prior.external: priors_add.extend(model_add.prior.logps_external(all_params)) logpriors_add = dict(zip(mlprior_names_add, priors_add)) logpriors_new = [logpriors_add.get(name, - point.get(name, 0)) for name in collection_out.minuslogprior_names] if prior_regenerate: regenerated = dict(zip(regenerated_prior_names, prior_regenerate.logps_external(all_params))) for _i, name in enumerate(collection_out.minuslogprior_names): if name in regenerated_prior_names: logpriors_new[_i] = regenerated[name] if is_debug(log): log.debug("New set of priors: %r", dict(zip(dummy_model_out.prior, logpriors_new))) if -np.inf in logpriors_new: continue # Add/remove likelihoods and/or (re-)calculate derived parameters loglikes_add, output_derived = model_add._loglikes_input_params( all_params, return_output_params=True) loglikes_add = dict(zip(chi2_names_add, loglikes_add)) output_derived = dict(zip(model_add.output_params, output_derived)) loglikes_new = [loglikes_add.get(name, -0.5 * point.get(name, 0)) for name in collection_out.chi2_names] if is_debug(log): log.debug("New set of likelihoods: %r", dict(zip(dummy_model_out.likelihood, loglikes_new))) if output_derived: log.debug("New set of derived parameters: %r", output_derived) if -np.inf in loglikes_new: continue all_params.update(output_derived) all_params.update(out_func_parameterization.to_derived(all_params)) derived = {param: all_params.get(param) for param in dummy_model_out.parameterization.derived_params()} # We need to recompute the aggregated chi2 by hand for type_, likes in inv_types.items(): derived[get_chi2_name(type_)] = sum( -2 * lvalue for lname, lvalue in zip(collection_out.chi2_names, loglikes_new) if undo_chi2_name(lname) in likes) if is_debug(log): log.debug("New derived parameters: %r", {p: derived[p] for p in dummy_model_out.parameterization.derived_params() if p in add["params"]}) # Save to the collection (keep old weight for now) weight = point.get(OutPar.weight) mpi.check_errors() if difflogmax is None and i > OutputOptions.reweight_after and \ time.time() - last_dump_time > OutputOptions.output_inteveral_s / 2: set_difflogmax() collection_out.out_update() if difflogmax is not None: logpost_new = sum(logpriors_new) + sum(loglikes_new) importance_weight = np.exp(logpost_new + point.get(OutPar.minuslogpost) - difflogmax) weight = weight * importance_weight importance_weights.append(importance_weight) if time.time() - last_dump_time > OutputOptions.output_inteveral_s: collection_out.out_update() last_dump_time = time.time() if weight > 0: collection_out.add(sampled, derived=derived.values(), weight=weight, logpriors=logpriors_new, loglikes=loglikes_new) # Display progress percent = int(np.round((i + done) / to_do * 100)) if percent != last_percent and not percent % 5: last_percent = percent progress_bar(log, percent, " (%d/%d)" % (i + done, to_do)) if difflogmax is None: set_difflogmax() if not collection_out.data.last_valid_index(): raise LoggedError( log, "No elements in the final sample. Possible causes: " "added a prior or likelihood valued zero over the full sampled " "domain, or the computation of the theory failed everywhere, etc.") collection_out.out_update() weights.append(np.array(importance_weights)) done += len(collection_in) assert difflogmax is not None points = 0 tot_weight = 0 min_weight = np.inf max_weight = -np.inf max_output_weight = -np.inf sum_w2 = 0 points_removed = 0 for collection_in, collection_out, importance_weights in zip(in_collections, out_collections, weights): output_weights = collection_out[OutPar.weight] points += len(collection_out) tot_weight += np.sum(output_weights) points_removed += len(importance_weights) - len(output_weights) min_weight = min(min_weight, np.min(importance_weights)) max_weight = max(max_weight, np.max(importance_weights)) max_output_weight = max(max_output_weight, np.max(output_weights)) sum_w2 += np.dot(output_weights, output_weights) (tot_weights, min_weights, max_weights, max_output_weights, sum_w2s, points_s, points_removed_s) = mpi.zip_gather( [tot_weight, min_weight, max_weight, max_output_weight, sum_w2, points, points_removed]) if mpi.is_main_process(): output_out.clear_lock() log.info("Finished! Final number of distinct sample points: %s", sum(points_s)) log.info("Importance weight range: %.4g -- %.4g", min(min_weights), max(max_weights)) if sum(points_removed_s): log.info("Points deleted due to zero weight: %s", sum(points_removed_s)) log.info("Effective number of single samples if independent (sum w)/max(w): %s", int(sum(tot_weights) / max(max_output_weights))) log.info( "Effective number of weighted samples if independent (sum w)^2/sum(w^2): " "%s", int(sum(tot_weights) ** 2 / sum(sum_w2s))) products: PostResultDict = {"sample": value_or_list(out_collections), "stats": {'min_importance_weight': (min(min_weights) / max(max_weights)), 'points_removed': sum(points_removed_s), 'tot_weight': sum(tot_weights), 'max_weight': max(max_output_weights), 'sum_w2': sum(sum_w2s), 'points': sum(points_s)}, "logpost_weight_offset": difflogmax, "weights": value_or_list(weights)} return PostTuple(info=out_combined, products=products)
def info(self): """ Returns a copy of the information used to create the model, including defaults. """ return deepcopy_where_possible(self._updated_info)
def check_and_dump_info(self, input_info, updated_info, check_compatible=True, cache_old=False, use_cache_old=False, ignore_blocks=()): """ Saves the info in the chain folder twice: - the input info. - idem, populated with the components' defaults. If resuming a sample, checks first that old and new infos and versions are consistent. """ # trim known params of each likelihood: for internal use only self.check_lock() updated_info_trimmed = deepcopy_where_possible(updated_info) updated_info_trimmed["version"] = get_version() for like_info in updated_info_trimmed.get("likelihood", {}).values(): (like_info or {}).pop("params", None) if check_compatible: # We will test the old info against the dumped+loaded new info. # This is because we can't actually check if python objects do change try: old_info = self.reload_updated_info(cache=cache_old, use_cache=use_cache_old) except InputImportError: # for example, when there's a dynamically generated class that cannot # be found by the yaml loader (could use yaml loader that ignores them) old_info = None if old_info: # use consistent yaml read-in types # TODO: could probably just compare full infos here, with externals? # for the moment cautiously keeping old behaviour old_info = yaml_load(yaml_dump(old_info)) # type: ignore new_info = yaml_load(yaml_dump(updated_info_trimmed)) if not is_equal_info(old_info, new_info, strict=False, ignore_blocks=list(ignore_blocks) + [ "output"]): raise LoggedError( self.log, "Old and new run information not compatible! " "Resuming not possible!") # Deal with version comparison separately: # - If not specified now, take the one used in resume info # - If specified both now and before, check new older than old one # (For Cobaya's own version, prefer new one always) old_version = old_info.get("version") new_version = new_info.get("version") if isinstance(old_version, str) and isinstance(new_version, str): if version.parse(old_version) > version.parse(new_version): raise LoggedError( self.log, "You are trying to resume a run performed with a " "newer version of Cobaya: %r (you are using %r). " "Please, update your Cobaya installation.", old_version, new_version) for k in set(kinds).intersection(updated_info): if k in ignore_blocks or updated_info[k] is None: continue for c in updated_info[k]: new_version = updated_info[k][c].get("version") old_version = old_info[k][c].get("version") # type: ignore if new_version is None: updated_info[k][c]["version"] = old_version updated_info_trimmed[k][c]["version"] = old_version elif old_version is not None: cls = get_resolved_class( c, k, None_if_not_found=True, class_name=updated_info[k][c].get("class")) if cls and cls.compare_versions( old_version, new_version, equal=False): raise LoggedError( self.log, "You have requested version %r for " "%s:%s, but you are trying to resume a " "run that used a newer version: %r.", new_version, k, c, old_version) # If resuming, we don't want to to *partial* dumps if ignore_blocks and self.is_resuming(): return # Work on a copy of the input info, since we are updating the prefix # (the updated one is already a copy) if input_info is not None: input_info = deepcopy_where_possible(input_info) # Write the new one for f, info in [(self.file_input, input_info), (self.file_updated, updated_info_trimmed)]: if info: for k in ignore_blocks: info.pop(k, None) info.pop("debug", None) info.pop("force", None) info.pop("resume", None) # make sure the dumped output_prefix does only contain the file prefix, # not the folder, since it's already been placed inside it info["output"] = self.updated_prefix() with open(f, "w", encoding="utf-8") as f_out: try: f_out.write(yaml_dump(sort_cosmetic(info))) except OutputError as e: raise LoggedError(self.log, str(e)) if updated_info_trimmed and has_non_yaml_reproducible(updated_info_trimmed): try: import dill except ImportError: self.mpi_info('Install "dill" to save reproducible options file.') else: import pickle try: with open(self.dump_file_updated, 'wb') as f: dill.dump(sort_cosmetic(updated_info_trimmed), f, pickle.HIGHEST_PROTOCOL) except pickle.PicklingError as e: os.remove(self.dump_file_updated) self.mpi_info('Options file cannot be pickled %s', e)
def __init__(self, info_params, allow_renames=True, ignore_unused_sampled=False): self.set_logger(lowercase=True) self.allow_renames = allow_renames # First, we load the parameters, # not caring about whether they are understood by any likelihood. # `input` contains the parameters (expected to be) understood by the likelihood, # with its fixed value, its fixing function, or None if their value is given # directly by the sampler. self._infos = {} self._input = {} self._input_funcs = {} self._input_args = {} self._output = {} self._constant = {} self._sampled = {} self._sampled_renames = {} self._derived = {} self._derived_funcs = {} self._derived_args = {} # Notice here that expand_info_param *always* adds a partag.derived:True tag # to infos without _prior or partag.value, and a partag.value field # to fixed params for p, info in info_params.items(): self._infos[p] = deepcopy_where_possible(info) if is_fixed_param(info): if isinstance(info[partag.value], Number): self._constant[p] = info[partag.value] if not info.get(partag.drop, False): self._input[p] = self._constant[p] else: self._input[p] = None self._input_funcs[p] = get_external_function(info[partag.value]) self._input_args[p] = getfullargspec(self._input_funcs[p]).args if is_sampled_param(info): self._sampled[p] = None if not info.get(partag.drop, False): self._input[p] = None self._sampled_renames[p] = ( (lambda x: [x] if isinstance(x, str) else x) (info.get(partag.renames, []))) if is_derived_param(info): self._derived[p] = deepcopy_where_possible(info) # Dynamical parameters whose value we want to save if info[partag.derived] is True and is_fixed_param(info): info[partag.derived] = "lambda %s: %s" % (p, p) if info[partag.derived] is True: self._output[p] = None else: self._derived_funcs[p] = get_external_function(info[partag.derived]) self._derived_args[p] = getfullargspec(self._derived_funcs[p]).args # Check that the sampled and derived params are all valid python variable names for p in chain(self._sampled, self._derived): if not is_valid_variable_name(p): is_in = p in self._sampled eg_in = " p_prime:\n prior: ...\n %s: 'lambda p_prime: p_prime'\n" % p eg_out = " p_prime: 'lambda %s: %s'\n" % (p, p) raise LoggedError( self.log, "Parameter name '%s' is not a valid Python variable name " "(it needs to start with a letter or '_').\n" "If this is an %s parameter of a likelihood or theory, " "whose name you cannot change,%s define an associated " "%s one with a valid name 'p_prime' as: \n\n%s", p, "input" if is_in else "output", "" if is_in else " remove it and", "sampled" if is_in else "derived", eg_in if is_in else eg_out) # Assume that the *un*known function arguments are likelihood/theory # output parameters for arg in (set(chain(*self._input_args.values())) .union(chain(*self._derived_args.values())) - set(self._constant) - set(self._input) - set(self._sampled) - set(self._derived)): self._output[arg] = None # Useful sets: directly-sampled input parameters and directly "output-ed" derived self._directly_sampled = [p for p in self._input if p in self._sampled] self._directly_output = [p for p in self._derived if p in self._output] # Useful mapping: input params that vary if each sample is varied self._sampled_input_dependence = {s: [i for i in self._input if s in self._input_args.get(i, {})] for s in self._sampled} # From here on, some error control. dropped_but_never_used = ( set(p for p, v in self._sampled_input_dependence.items() if not v) .difference(set(self._directly_sampled))) if dropped_but_never_used and not ignore_unused_sampled: raise LoggedError( self.log, "Parameters %r are sampled but not passed to a likelihood or theory " "code, and never used as arguments for any parameter functions. " "Check that you are not using the '%s' tag unintentionally.", list(dropped_but_never_used), partag.drop) # input params depend on input and sampled only, never on output/derived all_input_arguments = set(chain(*self._input_args.values())) bad_input_dependencies = all_input_arguments.difference( set(self.input_params()).union(set(self.sampled_params())).union( set(self.constant_params()))) if bad_input_dependencies: raise LoggedError( self.log, "Input parameters defined as functions can only depend on other " "input parameters that are not defined as functions. " "In particular, an input parameter cannot depend on %r." "Use an explicit Theory calculator for more complex dependencies.", list(bad_input_dependencies)) self._wrapped_input_funcs, self._wrapped_derived_funcs = \ self._get_wrapped_functions_evaluation_order() # warn if repeated labels labels_inv_repeated = invert_dict(self.labels()) for k in list(labels_inv_repeated): if len(labels_inv_repeated[k]) == 1: labels_inv_repeated.pop(k) if labels_inv_repeated: self.log.warn("There are repeated parameter labels: %r", labels_inv_repeated)
def sampled_params_info(self): return {p: deepcopy_where_possible(info) for p, info in self._infos.items() if p in self._sampled}
def update_info(info): """ Creates an updated info starting from the defaults for each component and updating it with the input info. """ component_base_classes = get_base_classes() # Don't modify the original input, and convert all Mapping to consistent dict input_info = deepcopy_where_possible(info) # Creates an equivalent info using only the defaults updated_info = {} default_params_info = {} default_prior_info = {} components = get_used_components(input_info) from cobaya.component import CobayaComponent for block in components: updated = {} updated_info[block] = updated input_block = input_info[block] for component in components[block]: # Preprocess "no options" and "external function" in input try: input_block[component] = input_block[component] or {} except TypeError: raise LoggedError( log, "Your input info is not well formatted at the '%s' block. " "It must be a dictionary {'%s_i':{options}, ...}. ", block, block) if isinstance(component, CobayaComponent) or \ isinstance(input_block[component], CobayaComponent): raise LoggedError( log, "Input for %s:%s should specify a class not " "an instance", block, component) # TODO: allow instance passing? # could allow this, but would have to sort out deepcopy # if input_block[component]: # raise LoggedError(log, "Instances should be passed a dictionary " # "entry of the form 'instance: None'") # change_key(input_block, component, component.get_name(), # {_external: component}) # updated[component.get_name()] = input_block[component.get_name()].copy() # continue if inspect.isclass(input_block[component]) or \ not isinstance(input_block[component], dict): input_block[component] = {_external: input_block[component]} ext = input_block[component].get(_external) if ext: if inspect.isclass(ext): default_class_info = get_default_info( ext, block, input_options=input_block[component]) else: default_class_info = deepcopy_where_possible( component_base_classes[block].get_defaults()) else: component_path = input_block[component].get( _component_path, None) default_class_info = get_default_info( component, block, class_name=input_block[component].get(_class_name), component_path=component_path, input_options=input_block[component]) updated[component] = default_class_info or {} # Update default options with input info # Consistency is checked only up to first level! (i.e. subkeys may not match) # Reserved attributes not necessarily already in default info: reserved = { _external, _class_name, _provides, _requires, partag.renames, _input_params, _output_params, _component_path, _aliases } options_not_recognized = (set( input_block[component]).difference(reserved).difference( set(updated[component]))) if options_not_recognized: alternatives = {} available = ({ _external, _class_name, _requires, partag.renames }.union(updated_info[block][component])) while options_not_recognized: option = options_not_recognized.pop() alternatives[option] = fuzzy_match(option, available, n=3) did_you_mean = ", ".join([ ("'%s' (did you mean %s?)" % (o, "|".join(["'%s'" % _ for _ in a])) if a else "'%s'" % o) for o, a in alternatives.items() ]) raise LoggedError( log, "%s '%s' does not recognize some options: %s. " "Check the documentation for '%s'.", block, component, did_you_mean, block) updated[component].update(input_block[component]) # save params and priors of class to combine later default_params_info[component] = default_class_info.get( _params, {}) default_prior_info[component] = default_class_info.get(_prior, {}) # Add priors info, after the necessary checks if _prior in input_info or any(default_prior_info.values()): updated_info[_prior] = input_info.get(_prior, {}) for prior_info in default_prior_info.values(): for name, prior in prior_info.items(): if updated_info[_prior].get(name, prior) != prior: raise LoggedError( log, "Two different priors cannot have the same name: '%s'.", name) updated_info[_prior][name] = prior # Add parameters info, after the necessary updates and checks defaults_merged = merge_default_params_info(default_params_info) updated_info[_params] = merge_params_info( [defaults_merged, input_info.get(_params, {})], default_derived=False) # Add aggregated chi2 params if kinds.likelihood in info: all_types = set( chain(*[ str_to_list(like_info.get("type", []) or []) for like_info in updated_info[kinds.likelihood].values() ])) for t in all_types: updated_info[_params][_get_chi2_name(t)] = { partag.latex: _get_chi2_label(t), partag.derived: True } # Add automatically-defined parameters if _auto_params in updated_info: make_auto_params(updated_info.pop(_auto_params), updated_info[_params]) # Add aliases for theory params (after merging!) for kind in [ k for k in [kinds.theory, kinds.likelihood] if k in updated_info ]: for item in updated_info[kind].values(): renames = item.get(partag.renames) if renames: if not isinstance(renames, Mapping): raise LoggedError( log, "'renames' should be a dictionary of name mappings " "(or you meant to use 'aliases')") renames_flat = [ set([k] + str_to_list(v)) for k, v in renames.items() ] for p in updated_info[_params]: # Probably could be made faster by inverting the renames dicts *once* renames_pairs = [a for a in renames_flat if p in a] if renames_pairs: this_renames = reduce( lambda x, y: x.union(y), [a for a in renames_flat if p in a]) updated_info[_params][p][partag.renames] = \ list(set(this_renames).union(set(str_to_list( updated_info[_params][p].get(partag.renames, [])))) .difference({p})) # Rest of the options for k, v in input_info.items(): if k not in updated_info: updated_info[k] = v return updated_info