예제 #1
0
 def __init__(self,
              info_params,
              info_likelihood,
              info_prior=None,
              info_theory=None,
              modules=None,
              timing=None,
              allow_renames=True):
     self.set_logger(lowercase=True)
     self._updated_info = {
         _params: deepcopy_where_possible(info_params),
         _likelihood: deepcopy_where_possible(info_likelihood)
     }
     if not self._updated_info[_likelihood]:
         raise LoggedError(self.log, "No likelihood requested!")
     for k, v in ((_prior, info_prior), (_theory, info_theory),
                  (_path_install, modules), (_timing, timing)):
         if v not in (None, {}):
             self._updated_info[k] = deepcopy_where_possible(v)
     self.parameterization = Parameterization(self._updated_info[_params],
                                              allow_renames=allow_renames)
     self.prior = Prior(self.parameterization,
                        self._updated_info.get(_prior, None))
     self.likelihood = Likelihood(self._updated_info[_likelihood],
                                  self.parameterization,
                                  self._updated_info.get(_theory),
                                  modules=modules,
                                  timing=timing)
예제 #2
0
def test_cosmo_run_not_found():
    with NoLogging(logging.ERROR):
        inf = deepcopy_where_possible(info)
        inf["likelihood"]["H0.perfect"] = None
        with pytest.raises(ComponentNotFoundError):
            run(inf)
        inf = deepcopy_where_possible(info)
        inf["likelihood"]["none"] = None
        with pytest.raises(ComponentNotFoundError):
            run(inf)
        inf = deepcopy_where_possible(info)
        inf["likelihood"]["pandas.plotting.PlotAccessor"] = None
        with pytest.raises(LoggedError) as e:
            run(inf)
        assert "Failed to get defaults for component" in str(e)
예제 #3
0
 def reload_updated_info(self,
                         cache=False,
                         use_cache=False) -> Optional[InputDict]:
     if mpi.is_main_process():
         if use_cache and hasattr(self, "_old_updated_info"):
             return self._old_updated_info
         try:
             if os.path.isfile(self.dump_file_updated):
                 loaded = load_info_dump(self.dump_file_updated)
             else:
                 loaded = yaml_load_file(self.file_updated)  # type: ignore
             if cache:
                 self._old_updated_info = deepcopy_where_possible(loaded)
             return loaded
         except IOError:
             if cache:
                 self._old_updated_info = None
             return None
     else:
         # Only cached possible when non main process
         if not use_cache:
             raise LoggedError(
                 self.log, "Cannot call `reload_updated_info` from "
                 "non-main process unless cached version "
                 "(`use_cache=True`) requested.")
         return getattr(self, "_old_updated_info", None)
예제 #4
0
def get_model(info):
    assert hasattr(info, "keys"), (
        "The first argument must be a dictionary with the info needed for the model. "
        "If you were trying to pass the name of an input file instead, "
        "load it first with 'cobaya.input.load_input', "
        "or, if you were passing a yaml string, load it with 'cobaya.yaml.yaml_load'."
    )
    # Configure the logger ASAP
    # Just a dummy import before configuring the logger, until I fix root/individual level
    import getdist
    logger_setup(info.pop(_debug, _debug_default), info.pop(_debug_file, None))
    # Create the updated input information, including defaults for each module.
    info = deepcopy_where_possible(info)
    ignored_info = {}
    for k in list(info):
        if k not in [
                _params, _likelihood, _prior, _theory, _path_install, _timing
        ]:
            ignored_info.update({k: info.pop(k)})
    import logging
    if ignored_info:
        logging.getLogger(__name__.split(".")[-1]).warning(
            "Ignored blocks/options: %r", list(ignored_info))
    updated_info = update_info(info)
    if logging.root.getEffectiveLevel() <= logging.DEBUG:
        logging.getLogger(__name__.split(".")[-1]).debug(
            "Input info updated with defaults (dumped to YAML):\n%s",
            yaml_dump(updated_info))
    # Initialize the posterior and the sampler
    return Model(updated_info[_params],
                 updated_info[_likelihood],
                 updated_info.get(_prior),
                 updated_info.get(_theory),
                 modules=info.get(_path_install),
                 timing=updated_info.get(_timing))
예제 #5
0
 def info(self):
     """
     Returns a copy of the information used to initialise the sampler,
     including defaults and some new values that are only available after
     initialisation.
     """
     return deepcopy_where_possible(self._updated_info)
예제 #6
0
def expand_info_param(info_param: ParamInput,
                      default_derived=True) -> ParamDict:
    """
    Expands the info of a parameter, from the user friendly, shorter format
    to a more unambiguous one.
    """
    info_param = deepcopy_where_possible(info_param)
    if not isinstance(info_param, Mapping):
        if info_param is None:
            info_param = {}
        elif isinstance(info_param,
                        Sequence) and not isinstance(info_param, str):
            values = list(info_param)
            allowed_lengths = [2, 4, 5]
            if len(values) not in allowed_lengths:
                logger = logging.getLogger(__name__.split(".")[-1])
                raise LoggedError(
                    logger, "Parameter info length not valid: %d. "
                    "The allowed lengths are %r. See documentation.",
                    len(values), allowed_lengths)
            info_param = {"prior": [values[0], values[1]]}
            if len(values) >= 4:
                info_param["ref"] = [values[2], values[3]]
                if len(values) == 5:
                    info_param["proposal"] = values[4]
        else:
            info_param = {"value": info_param}
    if all(f not in info_param for f in ["prior", "value", "derived"]):
        info_param["derived"] = default_derived
    # Dynamical input parameters: save as derived by default
    value = info_param.get("value")
    if isinstance(value, str) or callable(value):
        info_param["derived"] = info_param.get("derived", True)
    return info_param
예제 #7
0
    def initialize(self):

        # Dict of named tuples to collect requirements and computation methods
        self.collectors = {}
        # Additional input parameters to pass to CAMB, and attributes to set_ manually
        self.extra_args = deepcopy_where_possible(self.extra_args) or {}
        self._must_provide = None
예제 #8
0
 def initialize(self):
     """Importing CLASS from the correct path, if given, and if not, globally."""
     # If path not given, try using general path to modules
     if not self.path and self.path_install:
         self.path = self.get_path(self.path_install)
     if self.path:
         self.log.info("Importing *local* classy from " + self.path)
         classy_build_path = os.path.join(self.path, "python", "build")
         post = next(d for d in os.listdir(classy_build_path)
                     if d.startswith("lib."))
         classy_build_path = os.path.join(classy_build_path, post)
         if not os.path.exists(classy_build_path):
             # If path was given as an install path, try to install global one anyway
             if self.path_install:
                 self.log.info(
                     "Importing *global* CLASS (because not installed).")
             else:
                 raise LoggedError(
                     self.log, "Either CLASS is not in the given folder, "
                     "'%s', or you have not compiled it.", self.path)
         else:
             # Inserting the previously found path into the list of import folders
             sys.path.insert(0, classy_build_path)
     else:
         self.log.info("Importing *global* CLASS.")
     try:
         from classy import Class, CosmoSevereError, CosmoComputationError
     except ImportError:
         raise LoggedError(
             self.log, "Couldn't find the CLASS python interface. "
             "Make sure that you have compiled it, and that you either\n"
             " (a) specify a path (you didn't) or\n"
             " (b) install the Python interface globally with\n"
             "     '/path/to/class/python/python setup.py install --user'")
     self.classy = Class()
     # Propagate errors up
     global CosmoComputationError, CosmoSevereError
     # Generate states, to avoid recomputing
     self.n_states = 3
     self.states = [{
         "params": None,
         "derived": None,
         "derived_extra": None,
         "last": 0
     } for i in range(self.n_states)]
     # Dict of named tuples to collect requirements and computation methods
     self.collectors = {}
     # Additional input parameters to pass to CLASS
     self.extra_args = deepcopy_where_possible(self.extra_args) or {}
     # Add general CLASS stuff
     self.extra_args["output"] = self.extra_args.get("output", "")
     if "sBBN file" in self.extra_args:
         self.extra_args["sBBN file"] = (
             self.extra_args["sBBN file"].format(classy=self.path))
     # Set aliases
     self.planck_to_classy = self.renames
     # Derived parameters that may not have been requested, but will be necessary later
     self.derived_extra = []
예제 #9
0
    def __init__(self,
                 info_sampler: SamplerDict,
                 model: Model,
                 output=Optional[Output],
                 packages_path: Optional[str] = None,
                 name: Optional[str] = None):
        """
        Actual initialization of the class. Loads the default and input information and
        call the custom ``initialize`` method.

        [Do not modify this one.]
        """
        self._model = model
        self._output = output
        self._updated_info = deepcopy_where_possible(info_sampler)
        super().__init__(info_sampler,
                         packages_path=packages_path,
                         name=name,
                         initialize=False,
                         standalone=False)
        if not model.parameterization.sampled_params():
            self.mpi_warning("No sampled parameters requested! "
                             "This will fail for non-mock samplers.")
        # Load checkpoint info, if resuming
        if self.output.is_resuming() and not isinstance(self, Minimizer):
            checkpoint_info = None
            if mpi.is_main_process():
                try:
                    checkpoint_info = yaml_load_file(
                        self.checkpoint_filename())

                    if self.get_name() not in checkpoint_info["sampler"]:
                        raise LoggedError(
                            self.log, "Checkpoint file found at '%s' "
                            "but it corresponds to a different sampler.",
                            self.checkpoint_filename())
                except (IOError, TypeError):
                    pass
            checkpoint_info = mpi.share_mpi(checkpoint_info)
            if checkpoint_info:
                self.set_checkpoint_info(checkpoint_info)
                self.mpi_info("Resuming from previous sample!")
        elif not isinstance(self, Minimizer) and mpi.is_main_process():
            try:
                output.delete_file_or_folder(self.checkpoint_filename())
                output.delete_file_or_folder(self.progress_filename())
            except (OSError, TypeError):
                pass
        self._set_rng()
        self.initialize()
        model.set_cache_size(self._get_requested_cache_size())
        # Add to the updated info some values which are
        # only available after initialisation
        self._updated_info["version"] = self.get_version()
예제 #10
0
파일: output.py 프로젝트: yufdu/cobaya
 def reload_updated_info(self, cache=False, use_cache=False):
     if use_cache and getattr(self, "_old_updated_info", None):
         return self._old_updated_info
     try:
         loaded = yaml_load_file(self.file_updated)
         if cache:
             self._old_updated_info = loaded
         return deepcopy_where_possible(loaded)
     except IOError:
         if cache:
             self._old_updated_info = None
         return None
예제 #11
0
def load_input_dict(info_or_yaml_or_file: Union[InputDict, str, os.PathLike]
                    ) -> InputDict:
    if isinstance(info_or_yaml_or_file, os.PathLike):
        return load_input_file(info_or_yaml_or_file)
    elif isinstance(info_or_yaml_or_file, str):
        if "\n" in info_or_yaml_or_file:
            return yaml_load(info_or_yaml_or_file)  # type: ignore
        else:
            return load_input_file(info_or_yaml_or_file)
    elif isinstance(info_or_yaml_or_file, (dict, Mapping)):
        return deepcopy_where_possible(info_or_yaml_or_file)
    else:
        raise ValueError("The first argument must be a dictionary, file name or "
                         "yaml string with the required input options.")
예제 #12
0
def reduce_info_param(info_param):
    """
    Compresses the info of a parameter, suppressing default values.
    This is the opposite of :func:`~input.expand_info_param`.
    """
    info_param = deepcopy_where_possible(info_param)
    if not isinstance(info_param, dict):
        return
    # All parameters without a prior are derived parameters unless otherwise specified
    if info_param.get(partag.derived) is True:
        info_param.pop(partag.derived)
    # Fixed parameters with single "value" key
    if list(info_param) == [partag.value]:
        return info_param[partag.value]
    return info_param
예제 #13
0
def reduce_info_param(info_param: ParamDict) -> ParamInput:
    """
    Compresses the info of a parameter, suppressing default values.
    This is the opposite of :func:`~input.expand_info_param`.
    """
    info_param = deepcopy_where_possible(info_param)
    if not isinstance(info_param, dict):
        return None
    # All parameters without a prior are derived parameters unless otherwise specified
    if info_param.get("derived") is True:
        info_param.pop("derived")
    # Fixed parameters with single "value" key
    if list(info_param) == ["value"] and not callable(info_param["value"]):
        return info_param["value"]
    return info_param
예제 #14
0
파일: output.py 프로젝트: rancesol/cobaya
    def dump_info(self, input_info, updated_info, check_compatible=True):
        """
        Saves the info in the chain folder twice:
           - the input info.
           - idem, populated with the modules' defaults.

        If resuming a sample, checks first that old and new infos are consistent.
        """
        # trim known params of each likelihood: for internal use only
        updated_info_trimmed = deepcopy_where_possible(updated_info)
        for lik_info in updated_info_trimmed.get(_likelihood, {}).values():
            if hasattr(lik_info, "pop"):
                lik_info.pop(_params, None)
        if check_compatible:
            try:
                # We will test the old info against the dumped+loaded new info.
                # This is because we can't actually check if python objects do change
                old_info = self.reload_updated_info()
                new_info = yaml_load(yaml_dump(updated_info_trimmed))
                ignore_blocks = []
                if list(new_info.get(_sampler, [None]))[0] == "minimize":
                    ignore_blocks = [_sampler]
                if not is_equal_info(old_info, new_info, strict=False,
                                     ignore_blocks=ignore_blocks):
                    # HACK!!! NEEDS TO BE FIXED
                    if list(updated_info.get(_sampler, [None]))[0] == "minimize":
                        raise LoggedError(
                            self.log, "Old and new sample information not compatible! "
                            "At this moment it is not possible to 'force' deletion of "
                            "and old 'minimize' run. Please delete it by hand. "
                            "We are working on fixing this very soon!")
                    raise LoggedError(
                        self.log, "Old and new sample information not compatible! "
                        "Resuming not possible!")
            except IOError:
                # There was no previous chain
                pass
        # We write the new one anyway (maybe updated debug, resuming...)
        for f, info in [(self.file_input, input_info),
                        (self.file_updated, updated_info_trimmed)]:
            if not info:
                pass
            with open(f, "w") as f_out:
                try:
                    f_out.write(yaml_dump(info))
                except OutputError as e:
                    raise LoggedError(self.log, str(e))
예제 #15
0
def make_auto_params(auto_params, params_info):
    def replace(item, tag):
        if isinstance(item, dict):
            for key, val in list(item.items()):
                item[key] = replace(val, tag)
        elif isinstance(item, str) and '%s' in item:
            item = item % tag
        return item

    for k, v in auto_params.items():
        if '%s' not in k:
            raise LoggedError(
                log, 'auto_param parameter names must have %s placeholder')
        replacements = v.pop('auto_range')
        if isinstance(replacements, str):
            replacements = eval(replacements)
        for value in replacements:
            params_info[k % value] = replace(deepcopy_where_possible(v), value)
예제 #16
0
def expand_info_param(info_param, default_derived=True):
    """
    Expands the info of a parameter, from the user friendly, shorter format
    to a more unambiguous one.
    """
    info_param = deepcopy_where_possible(info_param)
    if not isinstance(info_param, dict):
        if info_param is None:
            info_param = {}
        else:
            info_param = {partag.value: info_param}
    if all(f not in info_param for f in [partag.prior, partag.value, partag.derived]):
        info_param[partag.derived] = default_derived
    # Dynamical input parameters: save as derived by default
    value = info_param.get(partag.value, None)
    if isinstance(value, str) or callable(value):
        info_param[partag.derived] = info_param.get(partag.derived, True)
    return info_param
예제 #17
0
def get_sampler(info_sampler: SamplersDict,
                model: Model,
                output: Optional[Output] = None,
                packages_path: Optional[str] = None) -> 'Sampler':
    assert isinstance(info_sampler, Mapping), (
        "The first argument must be a dictionary with the info needed for the sampler. "
        "If you were trying to pass the name of an input file instead, "
        "load it first with 'cobaya.input.load_input', "
        "or, if you were passing a yaml string, load it with 'cobaya.yaml.yaml_load'."
    )
    logger_sampler = get_logger(__name__)
    info_sampler = deepcopy_where_possible(info_sampler)
    if output is None:
        output = OutputDummy()
    # Check and update info
    check_sane_info_sampler(info_sampler)
    updated_info_sampler = update_info({"sampler": info_sampler
                                        })["sampler"]  # type: ignore
    if is_debug(logger_sampler):
        logger_sampler.debug(
            "Input info updated with defaults (dumped to YAML):\n%s",
            yaml_dump(updated_info_sampler))
    # Get sampler class & check resume/force compatibility
    sampler_name, sampler_class = get_sampler_name_and_class(
        updated_info_sampler, logger=logger_sampler)
    check_sampler_info((output.reload_updated_info(use_cache=True)
                        or {}).get("sampler"),
                       updated_info_sampler,
                       is_resuming=output.is_resuming())
    # Check if resumable run
    sampler_class.check_force_resume(output,
                                     info=updated_info_sampler[sampler_name])
    # Instantiate the sampler
    sampler_instance = sampler_class(updated_info_sampler[sampler_name],
                                     model,
                                     output,
                                     packages_path=packages_path)
    # If output, dump updated
    if output:
        to_dump = model.info()
        to_dump["sampler"] = {sampler_name: sampler_instance.info()}
        to_dump["output"] = os.path.join(output.folder, output.prefix)
        output.check_and_dump_info(None, to_dump, check_compatible=False)
    return sampler_instance
예제 #18
0
파일: sampler.py 프로젝트: yufdu/cobaya
def get_sampler(info_sampler, model, output=None, packages_path=None):
    assert isinstance(info_sampler, Mapping), (
        "The first argument must be a dictionary with the info needed for the sampler. "
        "If you were trying to pass the name of an input file instead, "
        "load it first with 'cobaya.input.load_input', "
        "or, if you were passing a yaml string, load it with 'cobaya.yaml.yaml_load'."
    )
    logger_sampler = logging.getLogger(__name__.split(".")[-1])
    info_sampler = deepcopy_where_possible(info_sampler)
    if output is None:
        output = OutputDummy()
    # Check and update info
    check_sane_info_sampler(info_sampler)
    updated_info_sampler = update_info({kinds.sampler:
                                        info_sampler})[kinds.sampler]
    if logging.root.getEffectiveLevel() <= logging.DEBUG:
        logger_sampler.debug(
            "Input info updated with defaults (dumped to YAML):\n%s",
            yaml_dump(updated_info_sampler))
    # Get sampler class & check resume/force compatibility
    sampler_name, sampler_class = get_sampler_name_and_class(
        updated_info_sampler)
    check_sampler_info((output.reload_updated_info(use_cache=True)
                        or {}).get(kinds.sampler),
                       updated_info_sampler,
                       is_resuming=output.is_resuming())
    # Check if resumable run
    sampler_class.check_force_resume(output,
                                     info=updated_info_sampler[sampler_name])
    # Instantiate the sampler
    sampler_instance = sampler_class(updated_info_sampler[sampler_name],
                                     model,
                                     output,
                                     packages_path=packages_path)
    # If output, dump updated
    if output:
        to_dump = model.info()
        to_dump[kinds.sampler] = {sampler_name: sampler_instance.info()}
        to_dump[_output_prefix] = os.path.join(output.folder, output.prefix)
        output.check_and_dump_info(None, to_dump, check_compatible=False)
    return sampler_instance
예제 #19
0
    def initial_proposal_covmat(self, auto_params=None):
        """
        Build the initial covariance matrix, using the data provided, in descending order
        of priority:
        1. "covmat" field in the sampler block (including `auto` search).
        2. "proposal" field for each parameter.
        3. variance of the reference pdf.
        4. variance of the prior pdf.

        The covariances between parameters when both are present in a covariance matrix
        provided through option 1 are preserved. All other covariances are assumed 0.

        If `covmat: auto`, use the keyword `auto_params` to restrict the parameters for
        which a covariance matrix is searched (default: None, meaning all sampled params).
        """
        params_infos = self.model.parameterization.sampled_params_info()
        covmat = np.diag([np.nan] * len(params_infos))
        # Try to generate it automatically
        self.covmat = getattr(self, 'covmat', None)
        if isinstance(self.covmat, str) and self.covmat.lower() == "auto":
            params_infos_covmat = deepcopy_where_possible(params_infos)
            for p in list(params_infos_covmat):
                if p not in (auto_params or []):
                    params_infos_covmat.pop(p, None)
            auto_covmat = self.model.get_auto_covmat(params_infos_covmat,
                                                     random_state=self._rng)
            if auto_covmat:
                self.covmat = os.path.join(auto_covmat["folder"],
                                           auto_covmat["name"])
                self.log.info("Covariance matrix selected automatically: %s",
                              self.covmat)
            else:
                self.covmat = None
                self.log.info(
                    "Could not automatically find a good covmat. "
                    "Will generate from parameter info (proposal and prior).")
        # If given, load and test the covariance matrix
        loaded_params: Sequence[str]
        if isinstance(self.covmat, str):
            covmat_pre = "{%s}" % packages_path_input
            if self.covmat.startswith(covmat_pre):
                self.covmat = self.covmat.format(
                    **{
                        packages_path_input: self.packages_path
                    }).replace("/", os.sep)
            try:
                with open(self.covmat, "r",
                          encoding="utf-8-sig") as file_covmat:
                    header = file_covmat.readline()
                loaded_covmat = np.loadtxt(self.covmat)
                self.log.debug(
                    f"Loaded a covariance matrix from '{self.covmat}'")
            except TypeError:
                raise LoggedError(
                    self.log, "The property 'covmat' must be a file name,"
                    "but it's '%s'.", str(self.covmat))
            except IOError:
                raise LoggedError(self.log, "Can't open covmat file '%s'.",
                                  self.covmat)
            if header[0] != "#":
                raise LoggedError(
                    self.log, "The first line of the covmat file '%s' "
                    "must be one list of parameter names separated by spaces "
                    "and staring with '#', and the rest must be a square "
                    "matrix, with one row per line.", self.covmat)
            loaded_params = header.strip("#").strip().split()
        elif hasattr(self.covmat, "__getitem__"):
            if not self.covmat_params:
                raise LoggedError(
                    self.log,
                    "If a covariance matrix is passed as a numpy array, "
                    "you also need to pass the parameters it corresponds to "
                    "via 'covmat_params: [name1, name2, ...]'.")
            loaded_params = self.covmat_params
            loaded_covmat = np.array(self.covmat)
        elif self.covmat:
            raise LoggedError(self.log, "Invalid covmat")
        if self.covmat is not None:
            str_msg = "the `covmat_params` list"
            if isinstance(self.covmat, str):
                str_msg = "the header of the covmat file %r" % self.covmat
            if len(loaded_params) != len(set(loaded_params)):
                duplicated = list(
                    set(p for p in loaded_params
                        if list(loaded_params).count(p) > 1))
                raise LoggedError(
                    self.log, "Parameter(s) %r appear more than once in %s",
                    duplicated, str_msg)
            if len(loaded_params) != loaded_covmat.shape[0]:
                raise LoggedError(
                    self.log, "The number of parameters in %s and the "
                    "dimensions of the matrix do not agree: %d vs %r", str_msg,
                    len(loaded_params), loaded_covmat.shape)
            loaded_covmat = np.atleast_2d(loaded_covmat)
            is_square_symmetric = (
                len(loaded_covmat.shape) == 2
                and loaded_covmat.shape[0] == loaded_covmat.shape[1]
                and np.allclose(loaded_covmat.T, loaded_covmat))
            # Not checking for positive-definiteness yet: may contain highly degenerate
            # derived parameters that would spoil it now, but will later be dropped.
            if not is_square_symmetric:
                from_msg = (f"loaded from '{self.covmat}'" if isinstance(
                    self.covmat, str) else "passed")
                raise LoggedError(
                    self.log,
                    f"The covariance matrix {from_msg} is not a symmetric square matrix."
                )
            # Fill with parameters in the loaded covmat
            renames = {
                p: [p] + str_to_list(v.get("renames") or [])
                for p, v in params_infos.items()
            }
            indices_used, indices_sampler = zip(*[[
                loaded_params.index(p),
                [
                    list(params_infos).index(q) for q, a in renames.items()
                    if p in a
                ]
            ] for p in loaded_params])
            if not any(indices_sampler):
                raise LoggedError(
                    self.log,
                    "A proposal covariance matrix has been loaded, but none of its "
                    "parameters are actually sampled here. Maybe a mismatch between"
                    " parameter names in the covariance matrix and the input file?"
                )
            indices_used, indices_sampler = zip(
                *[[i, j] for i, j in zip(indices_used, indices_sampler) if j])
            if any(len(j) - 1 for j in indices_sampler):
                first = next(j for j in indices_sampler if len(j) > 1)
                raise LoggedError(
                    self.log,
                    "The parameters %s have duplicated aliases. Can't assign them an "
                    "element of the covariance matrix unambiguously.",
                    ", ".join([list(params_infos)[i] for i in first]))
            indices_sampler = tuple(chain(*indices_sampler))
            covmat[np.ix_(indices_sampler,
                          indices_sampler)] = (loaded_covmat[np.ix_(
                              indices_used, indices_used)])
            self.log.info("Covariance matrix loaded for params %r",
                          [list(params_infos)[i] for i in indices_sampler])
            missing_params = set(params_infos).difference(
                list(params_infos)[i] for i in indices_sampler)
            if missing_params:
                self.log.info("Missing proposal covariance for params %r", [
                    p for p in self.model.parameterization.sampled_params()
                    if p in missing_params
                ])
            else:
                self.log.info(
                    "All parameters' covariance loaded from given covmat.")
        # Fill gaps with "proposal" property, if present, otherwise ref (or prior)
        where_nan = np.isnan(covmat.diagonal())
        if np.any(where_nan):
            covmat[where_nan, where_nan] = np.array([
                (info.get("proposal", np.nan) or np.nan)**2
                for info in params_infos.values()
            ])[where_nan]
        where_nan2 = np.isnan(covmat.diagonal())
        if np.any(where_nan2):
            # the variances are likely too large for a good proposal, e.g. conditional
            # widths may be much smaller than the marginalized ones.
            # Divide by 4, better to be too small than too large.
            covmat[where_nan2, where_nan2] = (
                self.model.prior.reference_variances()[where_nan2] /
                self.fallback_covmat_scale)
        assert not np.any(np.isnan(covmat))
        return covmat, where_nan
예제 #20
0
    def get_defaults(cls,
                     return_yaml=False,
                     yaml_expand_defaults=True,
                     input_options=empty_dict):
        """
        Return defaults for this component_or_class, with syntax:

        .. code::

           option: value
           [...]

           params:
             [...]  # if required

           prior:
             [...]  # if required

        If keyword `return_yaml` is set to True, it returns literally that,
        whereas if False (default), it returns the corresponding Python dict.

        Note that in external components installed as zip_safe=True packages files cannot
        be accessed directly.
        In this case using !default .yaml includes currently does not work.

        Also note that if you return a dictionary it may be modified (return a deep copy
        if you want to keep it).

        if yaml_expand_defaults then !default: file includes will be expanded

        input_options may be a dictionary of input options, e.g. in case default params
        are dynamically dependent on an input variable
        """
        if 'class_options' in cls.__dict__:
            raise LoggedError(
                log, "class_options (in %s) should now be replaced by "
                "public attributes defined directly in the class" %
                cls.get_qualified_class_name())
        yaml_text = cls.get_associated_file_content('.yaml')
        options = cls.get_class_options(input_options=input_options)
        if options and yaml_text:
            raise LoggedError(
                log, "%s: any class can either have .yaml or class variables "
                "but not both (type declarations without values are fine "
                "also with yaml file). You have class attributes: %s",
                cls.get_qualified_class_name(), list(options))
        if return_yaml and not yaml_expand_defaults:
            return yaml_text or ""
        this_defaults = yaml_load_file(cls.get_yaml_file(), yaml_text) \
            if yaml_text else deepcopy_where_possible(options)
        # start with this one to keep the order such that most recent class options
        # near the top. Update below to actually override parameters with these.
        defaults = this_defaults.copy()
        if not return_yaml:
            for base in cls.__bases__:
                if issubclass(base, HasDefaults) and base is not HasDefaults:
                    defaults.update(
                        base.get_defaults(input_options=input_options))
        defaults.update(this_defaults)
        if return_yaml:
            return yaml_dump(defaults)
        else:
            return defaults
예제 #21
0
파일: input.py 프로젝트: rancesol/cobaya
def update_info(info):
    """
    Creates an updated info starting from the defaults for each module and updating it
    with the input info.
    """
    # Don't modify the original input!
    input_info = deepcopy_where_possible(info)
    # Creates an equivalent info using only the defaults
    updated_info = odict()
    default_params_info = odict()
    default_prior_info = odict()
    modules = get_used_modules(input_info)
    for block in modules:
        updated_info[block] = odict()
        for module in modules[block]:
            # Preprocess "no options" and "external function" in input
            try:
                input_info[block][module] = input_info[block][module] or {}
            except TypeError:
                raise LoggedError(
                    log,
                    "Your input info is not well formatted at the '%s' block. "
                    "It must be a dictionary {'%s':{options}, ...}. ", block,
                    block)
            if not hasattr(input_info[block][module], "get"):
                input_info[block][module] = {
                    _external: input_info[block][module]
                }
            # Get default class options
            updated_info[block][module] = deepcopy(
                getattr(
                    import_module(_package + "." + block, package=_package),
                    "class_options", {}))
            default_module_info = get_default_info(module, block)
            # TODO: check - get_default_info was ignoring this extra arg: input_info[block][module])
            updated_info[block][module].update(
                default_module_info[block][module] or {})
            # Update default options with input info
            # Consistency is checked only up to first level! (i.e. subkeys may not match)
            ignore = set(
                [_external, _p_renames, _input_params, _output_params])
            options_not_recognized = (set(
                input_info[block][module]).difference(ignore).difference(
                    set(updated_info[block][module])))
            if options_not_recognized:
                alternatives = odict()
                available = (set([_external, _p_renames
                                  ]).union(updated_info[block][module]))
                while options_not_recognized:
                    option = options_not_recognized.pop()
                    alternatives[option] = fuzzy_match(option, available, n=3)
                did_you_mean = ", ".join([
                    ("'%s' (did you mean %s?)" %
                     (o, "|".join(["'%s'" % _
                                   for _ in a])) if a else "'%s'" % o)
                    for o, a in alternatives.items()
                ])
                if default_module_info[block][module]:
                    # Internal module
                    raise LoggedError(
                        log, "'%s' does not recognize some options: %s. "
                        "To see the allowed options, check out the documentation of"
                        " this module.", module, did_you_mean)
                else:
                    # External module
                    raise LoggedError(
                        log,
                        "External %s '%s' does not recognize some options: %s. "
                        "Check the documentation for 'external %s'.", block,
                        module, did_you_mean, block)
            updated_info[block][module].update(input_info[block][module])
            # Store default parameters and priors of class, and save to combine later
            if block == _likelihood:
                params_info = default_module_info.get(_params, {})
                updated_info[block][module].update(
                    {_params: list(params_info or [])})
                default_params_info[module] = params_info
                default_prior_info[module] = default_module_info.get(
                    _prior, {})
    # Add priors info, after the necessary checks
    if _prior in input_info or any(default_prior_info.values()):
        updated_info[_prior] = input_info.get(_prior, odict())
    for prior_info in default_prior_info.values():
        for name, prior in prior_info.items():
            if updated_info[_prior].get(name, prior) != prior:
                raise LoggedError(
                    log,
                    "Two different priors cannot have the same name: '%s'.",
                    name)
            updated_info[_prior][name] = prior
    # Add parameters info, after the necessary updates and checks
    defaults_merged = merge_default_params_info(default_params_info)
    updated_info[_params] = merge_params_info(defaults_merged,
                                              input_info.get(_params, {}))
    # Add aliases for theory params (after merging!)
    if _theory in updated_info:
        renames = list(updated_info[_theory].values())[0].get(_p_renames)
        str_to_list = lambda x: ([x] if isinstance(x, string_types) else x)
        renames_flat = [
            set([k] + str_to_list(v)) for k, v in (renames or {}).items()
        ]
        for p in updated_info.get(_params, {}):
            # Probably could be made faster by inverting the renames dicts *just once*
            renames_pairs = [a for a in renames_flat if p in a]
            if renames_pairs:
                this_renames = reduce(lambda x, y: x.union(y),
                                      [a for a in renames_flat if p in a])
                updated_info[_params][p][_p_renames] = list(
                    set(this_renames).union(
                        set(
                            str_to_list(updated_info[_params][p].get(
                                _p_renames, [])))).difference(set([p])))
    # Rest of the options
    for k, v in input_info.items():
        if k not in updated_info:
            updated_info[k] = v
    return updated_info
예제 #22
0
def post(info, sample=None):
    logger_setup(info.get(_debug), info.get(_debug_file))
    log = logging.getLogger(__name__.split(".")[-1])
    # MARKED FOR DEPRECATION IN v3.0
    # BEHAVIOUR TO BE REPLACED BY ERROR:
    check_deprecated_modules_path(info)
    # END OF DEPRECATION BLOCK
    try:
        info_post = info[_post]
    except KeyError:
        raise LoggedError(log, "No 'post' block given. Nothing to do!")
    if get_mpi_rank():
        log.warning(
            "Post-processing is not yet MPI-aware. Doing nothing for rank > 1 processes.")
        return
    if info.get(_resume):
        log.warning("Resuming not implemented for post-processing. Re-starting.")
    # 1. Load existing sample
    output_in = get_output(output_prefix=info.get(_output_prefix))
    if output_in:
        try:
            info_in = output_in.reload_updated_info()
        except FileNotFoundError:
            raise LoggedError(log, "Error loading input model: "
                                   "could not find input info at %s",
                              output_in.file_updated)
    else:
        info_in = deepcopy_where_possible(info)
    dummy_model_in = DummyModel(info_in[_params], info_in[kinds.likelihood],
                                info_in.get(_prior, None))
    if output_in:
        if not output_in.find_collections():
            raise LoggedError(log, "No samples found for the input model with prefix %s",
                              os.path.join(output_in.folder, output_in.prefix))
        collection_in = output_in.load_collections(
            dummy_model_in, skip=info_post.get("skip", 0), thin=info_post.get("thin", 1),
            concatenate=True)
    elif sample:
        if isinstance(sample, Collection):
            sample = [sample]
        collection_in = deepcopy(sample[0])
        for s in sample[1:]:
            try:
                collection_in.append(s)
            except:
                raise LoggedError(log, "Failed to load some of the input samples.")
    else:
        raise LoggedError(log,
                          "Not output from where to load from or input collections given.")
    log.info("Will process %d samples.", len(collection_in))
    if len(collection_in) <= 1:
        raise LoggedError(
            log, "Not enough samples for post-processing. Try using a larger sample, "
                 "or skipping or thinning less.")
    # 2. Compare old and new info: determine what to do
    add = info_post.get(_post_add, {}) or {}
    remove = info_post.get(_post_remove, {})
    # Add a dummy 'one' likelihood, to absorb unused parameters
    if not add.get(kinds.likelihood):
        add[kinds.likelihood] = {}
    add[kinds.likelihood]["one"] = None
    # Expand the "add" info
    add = update_info(add)
    # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters
    out = {_params: deepcopy_where_possible(info_in[_params])}
    for p in remove.get(_params, {}):
        pinfo = info_in[_params].get(p)
        if pinfo is None or not is_derived_param(pinfo):
            raise LoggedError(
                log,
                "You tried to remove parameter '%s', which is not a derived parameter. "
                "Only derived parameters can be removed during post-processing.", p)
        out[_params].pop(p)
    # Force recomputation of aggregated chi2
    for p in list(out[_params]):
        if p.startswith(_get_chi2_name("")):
            out[_params].pop(p)
    mlprior_names_add = []
    for p, pinfo in add.get(_params, {}).items():
        pinfo_in = info_in[_params].get(p)
        if is_sampled_param(pinfo):
            if not is_sampled_param(pinfo_in):
                # No added sampled parameters (de-marginalisation not implemented)
                if pinfo_in is None:
                    raise LoggedError(
                        log, "You added a new sampled parameter %r (maybe accidentally "
                             "by adding a new likelihood that depends on it). "
                             "Adding new sampled parameters is not possible. Try fixing "
                             "it to some value.", p)
                else:
                    raise LoggedError(
                        log,
                        "You tried to change the prior of parameter '%s', "
                        "but it was not a sampled parameter. "
                        "To change that prior, you need to define as an external one.", p)
            if mlprior_names_add[:1] != _prior_1d_name:
                mlprior_names_add = ([_minuslogprior + _separator + _prior_1d_name]
                                     + mlprior_names_add)
        elif is_derived_param(pinfo):
            if p in out[_params]:
                raise LoggedError(
                    log, "You tried to add derived parameter '%s', which is already "
                         "present. To force its recomputation, 'remove' it too.", p)
        elif is_fixed_param(pinfo):
            # Only one possibility left "fixed" parameter that was not present before:
            # input of new likelihood, or just an argument for dynamical derived (dropped)
            if ((p in info_in[_params] and
                 pinfo[partag.value] != (pinfo_in or {}).get(partag.value, None))):
                raise LoggedError(
                    log,
                    "You tried to add a fixed parameter '%s: %r' that was already present"
                    " but had a different value or was not fixed. This is not allowed. "
                    "The old info of the parameter was '%s: %r'",
                    p, dict(pinfo), p, dict(pinfo_in))
        else:
            raise LoggedError(log, "This should not happen. Contact the developers.")
        out[_params][p] = pinfo
    # For the likelihood only, turn the rest of *derived* parameters into constants,
    # so that the likelihoods do not try to compute them)
    # But be careful to exclude *input* params that have a "derived: True" value
    # (which in "updated info" turns into "derived: 'lambda [x]: [x]'")
    out_params_like = deepcopy_where_possible(out[_params])
    for p, pinfo in out_params_like.items():
        if ((is_derived_param(pinfo) and not (partag.value in pinfo)
             and p not in add.get(_params, {}))):
            out_params_like[p] = {partag.value: np.nan, partag.drop: True}
    # 2.2 Manage adding/removing priors and likelihoods
    warn_remove = False
    for level in [_prior, kinds.likelihood]:
        out[level] = getattr(dummy_model_in, level)
        if level == _prior:
            out[level].remove(_prior_1d_name)
        for pdf in info_post.get(_post_remove, {}).get(level, []) or []:
            try:
                out[level].remove(pdf)
                warn_remove = True
            except ValueError:
                raise LoggedError(
                    log, "Trying to remove %s '%s', but it is not present. "
                         "Existing ones: %r", level, pdf, out[level])
    if warn_remove:
        log.warning("You are removing a prior or likelihood pdf. "
                    "Notice that if the resulting posterior is much wider "
                    "than the original one, or displaced enough, "
                    "it is probably safer to explore it directly.")
    if _prior in add:
        mlprior_names_add += [_minuslogprior + _separator + name for name in add[_prior]]
        out[_prior] += list(add[_prior])
    prior_recompute_1d = (
            mlprior_names_add[:1] == [_minuslogprior + _separator + _prior_1d_name])
    # Don't initialise the theory code if not adding/recomputing theory,
    # theory-derived params or likelihoods
    recompute_theory = info_in.get(kinds.theory) and not (
            list(add[kinds.likelihood]) == ["one"] and
            not any(is_derived_param(pinfo) for pinfo in add.get(_params, {}).values()))
    if recompute_theory:
        # Inherit from the original chain (needs input|output_params, renames, etc
        add_theory = add.get(kinds.theory)
        if add_theory:
            info_theory_out = {}
            if len(add_theory) > 1:
                log.warning('Importance sampling with more than one theory is '
                            'not really tested')
            add_theory = add_theory.copy()
            for theory, theory_info in info_in[kinds.theory].items():
                theory_copy = deepcopy_where_possible(theory_info)
                if theory in add_theory:
                    info_theory_out[theory] = \
                        recursive_update(theory_copy, add_theory.pop(theory))
                else:
                    info_theory_out[theory] = theory_copy
            info_theory_out.update(add_theory)
        else:
            info_theory_out = deepcopy_where_possible(info_in[kinds.theory])
    else:
        info_theory_out = None
    chi2_names_add = [
        _get_chi2_name(name) for name in add[kinds.likelihood] if name != "one"]
    out[kinds.likelihood] += [l for l in add[kinds.likelihood] if l != "one"]
    if recompute_theory:
        log.warning("You are recomputing the theory, but in the current version this does"
                    " not force recomputation of any likelihood or derived parameter, "
                    "unless explicitly removed+added.")
    for level in [_prior, kinds.likelihood]:
        for i, x_i in enumerate(out[level]):
            if x_i in list(out[level])[i + 1:]:
                raise LoggedError(
                    log, "You have added %s '%s', which was already present. If you "
                         "want to force its recomputation, you must also 'remove' it.",
                    level, x_i)
    # 3. Create output collection
    if _post_suffix not in info_post:
        raise LoggedError(log, "You need to provide a '%s' for your chains.",
                          _post_suffix)
    # Use default prefix if it exists. If it does not, produce no output by default.
    # {post: {output: None}} suppresses output, and if it's a string, updates it.
    out_prefix = info_post.get(_output_prefix, info.get(_output_prefix))
    if out_prefix not in [None, False]:
        out_prefix += _separator_files + _post + _separator_files + info_post[
            _post_suffix]
    output_out = get_output(output_prefix=out_prefix, force=info.get(_force))
    if output_out and not output_out.force and output_out.find_collections():
        raise LoggedError(log, "Found existing post-processing output with prefix %r. "
                               "Delete it manually or re-run with `force: True` "
                               "(or `-f`, `--force` from the shell).", out_prefix)
    elif output_out and output_out.force:
        output_out.delete_infos()
        for regexp in output_out.find_collections():
            output_out.delete_with_regexp(re.compile(regexp))
    info_out = deepcopy_where_possible(info)
    info_out[_post] = info_post
    # Updated with input info and extended (updated) add info
    info_out.update(info_in)
    info_out[_post][_post_add] = add
    dummy_model_out = DummyModel(out[_params], out[kinds.likelihood],
                                 info_prior=out[_prior])
    if recompute_theory:
        # TODO: May need updating for more than one, or maybe can be removed
        theory = list(info_theory_out)[0]
        if _input_params not in info_theory_out[theory]:
            raise LoggedError(
                log,
                "You appear to be post-processing a chain generated with an older "
                "version of Cobaya. For post-processing to work, please edit the "
                "'[root].updated.yaml' file of the original chain to add, inside the "
                "theory code block, the list of its input parameters. E.g.\n----\n"
                "theory:\n  %s:\n    input_params: [param1, param2, ...]\n"
                "----\nIf you get strange errors later, it is likely that you did not "
                "specify the correct set of theory parameters.\n"
                "The full set of input parameters are %s.",
                theory, list(dummy_model_out.parameterization.input_params()))
    # TODO: check allow_renames=False?
    # TODO: May well be simplifications here, this is v close to pre-refactor logic
    # Have not gone through or understood all the parameterization  stuff
    model_add = Model(out_params_like, add[kinds.likelihood], info_prior=add.get(_prior),
                      info_theory=info_theory_out, packages_path=info.get(_packages_path),
                      allow_renames=False, post=True,
                      prior_parameterization=dummy_model_out.parameterization)
    # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post][_post_add]
    add[kinds.likelihood].pop("one")
    collection_out = Collection(dummy_model_out, output_out, name="1")
    output_out.check_and_dump_info(None, info_out, check_compatible=False)
    # Prepare recomputation of aggregated chi2
    # (they need to be recomputed by hand, because its autocomputation won't pick up
    #  old likelihoods for a given type)
    all_types = {
        like: str_to_list(add[kinds.likelihood].get(
            like, info_in[kinds.likelihood].get(like)).get("type", []) or [])
        for like in out[kinds.likelihood]}
    types = set(chain(*list(all_types.values())))
    inv_types = {t: [like for like, like_types in all_types.items() if t in like_types]
                 for t in types}
    # 4. Main loop!
    log.info("Running post-processing...")
    last_percent = 0
    for i, point in collection_in.data.iterrows():
        log.debug("Point: %r", point)
        sampled = [point[param] for param in
                   dummy_model_in.parameterization.sampled_params()]
        derived = {param: point.get(param, None)
                   for param in dummy_model_out.parameterization.derived_params()}
        inputs = {param: point.get(
            param, dummy_model_in.parameterization.constant_params().get(
                param, dummy_model_out.parameterization.constant_params().get(
                    param, None)))
            for param in dummy_model_out.parameterization.input_params()}
        # Solve inputs that depend on a function and were not saved
        # (we don't use the Parameterization_to_input method in case there are references
        #  to functions that cannot be loaded at the moment)
        for p, value in inputs.items():
            if value is None:
                func = dummy_model_out.parameterization._input_funcs[p]
                args = dummy_model_out.parameterization._input_args[p]
                inputs[p] = func(*[point.get(arg) for arg in args])
        # Add/remove priors
        priors_add = model_add.prior.logps(sampled)
        if not prior_recompute_1d:
            priors_add = priors_add[1:]
        logpriors_add = dict(zip(mlprior_names_add, priors_add))
        logpriors_new = [logpriors_add.get(name, - point.get(name, 0))
                         for name in collection_out.minuslogprior_names]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug(
                "New set of priors: %r", dict(zip(dummy_model_out.prior, logpriors_new)))
        if -np.inf in logpriors_new:
            continue
        # Add/remove likelihoods
        output_like = []
        if add[kinds.likelihood]:
            # Notice "one" (last in likelihood_add) is ignored: not in chi2_names
            loglikes_add, output_like = model_add.logps(inputs, return_derived=True)
            loglikes_add = dict(zip(chi2_names_add, loglikes_add))
            output_like = dict(zip(model_add.output_params, output_like))
        else:
            loglikes_add = dict()
        loglikes_new = [loglikes_add.get(name, -0.5 * point.get(name, 0))
                        for name in collection_out.chi2_names]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug(
                "New set of likelihoods: %r",
                dict(zip(dummy_model_out.likelihood, loglikes_new)))
            if output_like:
                log.debug("New set of likelihood-derived parameters: %r", output_like)
        if -np.inf in loglikes_new:
            continue
        # Add/remove derived parameters and change priors of sampled parameters
        for p in add[_params]:
            if p in dummy_model_out.parameterization._directly_output:
                derived[p] = output_like[p]
            elif p in dummy_model_out.parameterization._derived_funcs:
                func = dummy_model_out.parameterization._derived_funcs[p]
                args = dummy_model_out.parameterization._derived_args[p]
                derived[p] = func(
                    *[point.get(arg, output_like.get(arg, None)) for arg in args])
        # We need to recompute the aggregated chi2 by hand
        for type_, likes in inv_types.items():
            derived[_get_chi2_name(type_)] = sum(
                [-2 * lvalue for lname, lvalue
                 in zip(collection_out.chi2_names, loglikes_new)
                 if _undo_chi2_name(lname) in likes])
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New derived parameters: %r",
                      dict([(p, derived[p])
                            for p in dummy_model_out.parameterization.derived_params()
                            if p in add[_params]]))
        # Save to the collection (keep old weight for now)
        collection_out.add(
            sampled, derived=derived.values(), weight=point.get(_weight),
            logpriors=logpriors_new, loglikes=loglikes_new)
        # Display progress
        percent = np.round(i / len(collection_in) * 100)
        if percent != last_percent and not percent % 5:
            last_percent = percent
            progress_bar(log, percent, " (%d/%d)" % (i, len(collection_in)))
    if not collection_out.data.last_valid_index():
        raise LoggedError(
            log, "No elements in the final sample. Possible causes: "
                 "added a prior or likelihood valued zero over the full sampled domain, "
                 "or the computation of the theory failed everywhere, etc.")
    # Reweight -- account for large dynamic range!
    #   Prefer to rescale +inf to finite, and ignore final points with -inf.
    #   Remove -inf's (0-weight), and correct indices
    difflogmax = max(collection_in[_minuslogpost] - collection_out[_minuslogpost])
    collection_out.data[_weight] *= np.exp(
        collection_in[_minuslogpost] - collection_out[_minuslogpost] - difflogmax)
    collection_out.data = (
        collection_out.data[collection_out.data.weight > 0].reset_index(drop=True))
    collection_out._n = collection_out.data.last_valid_index() + 1
    # Write!
    collection_out.out_update()
    log.info("Finished! Final number of samples: %d", len(collection_out))
    return info_out, {"sample": collection_out}
예제 #23
0
파일: sampler.py 프로젝트: yufdu/cobaya
    def __init__(self,
                 info_sampler,
                 model,
                 output=None,
                 packages_path=None,
                 name=None):
        """
        Actual initialization of the class. Loads the default and input information and
        call the custom ``initialize`` method.

        [Do not modify this one.]
        """
        self.model = model
        self.output = output
        self._updated_info = deepcopy_where_possible(info_sampler)
        super().__init__(info_sampler,
                         packages_path=packages_path,
                         name=name,
                         initialize=False,
                         standalone=False)
        # Seed, if requested
        if getattr(self, "seed", None) is not None:
            if not isinstance(self.seed,
                              int) or not (0 <= self.seed <= 2**32 - 1):
                raise LoggedError(
                    self.log,
                    "Seeds must be a *positive integer* < 2**32 - 1, "
                    "but got %r with type %r", self.seed, type(self.seed))
            # MPI-awareness: sum the rank to the seed
            if more_than_one_process():
                self.seed += get_mpi_rank()
            self.mpi_warning("This run has been SEEDED with seed %d",
                             self.seed)
        # Load checkpoint info, if resuming
        if self.output.is_resuming() and not isinstance(self, Minimizer):
            try:
                checkpoint_info = yaml_load_file(self.checkpoint_filename())
                try:
                    for k, v in checkpoint_info[kinds.sampler][
                            self.get_name()].items():
                        setattr(self, k, v)
                    self.mpi_info("Resuming from previous sample!")
                except KeyError:
                    if is_main_process():
                        raise LoggedError(
                            self.log, "Checkpoint file found at '%s' "
                            "but it corresponds to a different sampler.",
                            self.checkpoint_filename())
            except (IOError, TypeError):
                pass
        else:
            try:
                os.remove(self.checkpoint_filename())
                os.remove(self.progress_filename())
            except (OSError, TypeError):
                pass
        self._set_rng()
        self.initialize()
        self._release_rng()
        self.model.set_cache_size(self._get_requested_cache_size())
        # Add to the updated info some values which are
        # only available after initialisation
        self._updated_info[_version] = self.get_version()
예제 #24
0
def is_equal_info(info_old,
                  info_new,
                  strict=True,
                  print_not_log=False,
                  ignore_blocks=()):
    """
    Compares two information dictionaries, and old one versus a new one, and updates the
    new one for selected values of the old one.

    Set ``strict=False`` (default: ``True``) to ignore options that would not affect
    the statistics of a posterior sample, including order of params/priors/likelihoods.
    """
    if print_not_log:
        myprint = print
        myprint_debug = lambda x: x
    else:
        myprint = log.info
        myprint_debug = log.debug
    myname = inspect.stack()[0][3]
    ignore = set() if strict else \
        {_debug, _debug_file, _resume, _force, _packages_path, _test_run, _version}
    ignore = ignore.union(set(ignore_blocks or []))
    if set(info for info in info_old if info_old[info] is not None).difference(ignore) \
            != set(info for info in info_new if info_new[info] is not None).difference(
        ignore):
        myprint(myname +
                ": different blocks or options: %r (old) vs %r (new)" %
                (set(info_old).difference(ignore),
                 set(info_new).difference(ignore)))
        return False
    for block_name in info_old:
        if block_name in ignore or block_name not in info_new:
            continue
        block1 = deepcopy_where_possible(info_old[block_name])
        block2 = deepcopy_where_possible(info_new[block_name])
        # First, deal with root-level options (force, output, ...)
        if not isinstance(block1, dict):
            if block1 != block2:
                myprint(myname + ": different option '%s'" % block_name)
                return False
            continue
        # Now let's do components and params
        # 1. check order (it DOES matter, but just up to 1st level)
        f = list if strict else set
        if f(block1) != f(block2):
            myprint(myname +
                    ": different [%s] or different order of them: %r vs %r" %
                    (block_name, list(block1), list(block2)))
            return False
        # 2. Gather general options to be ignored
        if not strict:
            ignore_k = set()
            if block_name in [kinds.theory, kinds.likelihood]:
                ignore_k = ignore_k.union({_input_params, _output_params})
            elif block_name == _params:
                for param in block1:
                    # Unify notation
                    block1[param] = expand_info_param(block1[param])
                    block2[param] = expand_info_param(block2[param])
                    ignore_k = ignore_k.union({
                        partag.latex, partag.renames, partag.ref,
                        partag.proposal, "min", "max"
                    })
                    # Fixed params, it doesn't matter if they are saved as derived
                    if partag.value in block1[param]:
                        block1[param].pop(partag.derived, None)
                    if partag.value in block2[param]:
                        block2[param].pop(partag.derived, None)
                    # Renames: order does not matter
                    block1[param][partag.renames] = set(block1[param].get(
                        partag.renames, []))
                    block2[param][partag.renames] = set(block2[param].get(
                        partag.renames, []))
        # 3. Now check component/parameters one-by-one
        for k in block1:
            if not strict:
                # Add component-specific options to be ignored
                if block_name in kinds:
                    ignore_k_this = ignore_k.copy()
                    if _external not in block1[k]:
                        try:
                            component_path = block1[k].pop(_component_path, None) \
                                if isinstance(block1[k], dict) else None
                            class_name = (block1[k]
                                          or {}).get(_class_name) or k
                            cls = get_class(class_name,
                                            block_name,
                                            component_path=component_path)
                            ignore_k_this = ignore_k_this.union(
                                set(getattr(cls, "_at_resume_prefer_new", {})))
                        except ImportError:
                            pass
                    # Pop ignored and kept options
                    for j in list(ignore_k_this):
                        block1[k].pop(j, None)
                        block2[k].pop(j, None)
            if block1[k] != block2[k]:
                # For clarity, pop common stuff before printing
                to_pop = [
                    j for j in block1[k]
                    if (block1[k].get(j) == block2[k].get(j))
                ]
                [(block1[k].pop(j, None), block2[k].pop(j, None))
                 for j in to_pop]
                myprint(myname + ": different content of [%s:%s]" %
                        (block_name, k) +
                        " -- (re-run with `debug: True` for more info)")
                myprint_debug("%r (old) vs %r (new)" % (block1[k], block2[k]))
                return False
    return True
예제 #25
0
def post(info_or_yaml_or_file: Union[InputDict, str, os.PathLike],
         sample: Union[SampleCollection, List[SampleCollection], None] = None
         ) -> PostTuple:
    info = load_input_dict(info_or_yaml_or_file)
    logger_setup(info.get("debug"), info.get("debug_file"))
    log = get_logger(__name__)
    # MARKED FOR DEPRECATION IN v3.0
    if info.get("modules"):
        raise LoggedError(log, "The input field 'modules' has been deprecated."
                               "Please use instead %r", packages_path_input)
    # END OF DEPRECATION BLOCK
    info_post: PostDict = info.get("post") or {}
    if not info_post:
        raise LoggedError(log, "No 'post' block given. Nothing to do!")
    if mpi.is_main_process() and info.get("resume"):
        log.warning("Resuming not implemented for post-processing. Re-starting.")
    if not info.get("output") and info_post.get("output") \
            and not info.get("params"):
        raise LoggedError(log, "The input dictionary must have be a full option "
                               "dictionary, or have an existing 'output' root to load "
                               "previous settings from ('output' to read from is in the "
                               "main block not under 'post'). ")
    # 1. Load existing sample
    output_in = get_output(prefix=info.get("output"))
    if output_in:
        info_in = output_in.load_updated_info() or update_info(info)
    else:
        info_in = update_info(info)
    params_in: ExpandedParamsDict = info_in["params"]  # type: ignore
    dummy_model_in = DummyModel(params_in, info_in.get("likelihood", {}),
                                info_in.get("prior"))

    in_collections = []
    thin = info_post.get("thin", 1)
    skip = info_post.get("skip", 0)
    if info.get('thin') is not None or info.get('skip') is not None:  # type: ignore
        raise LoggedError(log, "'thin' and 'skip' should be "
                               "parameters of the 'post' block")

    if sample:
        # If MPI, assume for each MPI process post is passed in the list of
        # collections that should be processed by that process
        # (e.g. single chain output from sampler)
        if isinstance(sample, SampleCollection):
            in_collections = [sample]
        else:
            in_collections = sample
        for i, collection in enumerate(in_collections):
            if skip:
                if 0 < skip < 1:
                    skip = int(round(skip * len(collection)))
                collection = collection.filtered_copy(slice(skip, None))
            if thin != 1:
                collection = collection.thin_samples(thin)
            in_collections[i] = collection
    elif output_in:
        files = output_in.find_collections()
        numbered = files
        if not numbered:
            # look for un-numbered output files
            files = output_in.find_collections(name=False)
        if files:
            if mpi.size() > len(files):
                raise LoggedError(log, "Number of MPI processes (%s) is larger than "
                                       "the number of sample files (%s)",
                                  mpi.size(), len(files))
            for num in range(mpi.rank(), len(files), mpi.size()):
                in_collections += [SampleCollection(
                    dummy_model_in, output_in,
                    onload_thin=thin, onload_skip=skip, load=True, file_name=files[num],
                    name=str(num + 1) if numbered else "")]
        else:
            raise LoggedError(log, "No samples found for the input model with prefix %s",
                              os.path.join(output_in.folder, output_in.prefix))

    else:
        raise LoggedError(log, "No output from where to load from, "
                               "nor input collections given.")
    if any(len(c) <= 1 for c in in_collections):
        raise LoggedError(
            log, "Not enough samples for post-processing. Try using a larger sample, "
                 "or skipping or thinning less.")
    mpi.sync_processes()
    log.info("Will process %d sample points.", sum(len(c) for c in in_collections))

    # 2. Compare old and new info: determine what to do
    add = info_post.get("add") or {}
    if "remove" in add:
        raise LoggedError(log, "remove block should be under 'post', not 'add'")
    remove = info_post.get("remove") or {}
    # Add a dummy 'one' likelihood, to absorb unused parameters
    if not add.get("likelihood"):
        add["likelihood"] = {}
    add["likelihood"]["one"] = None
    # Expand the "add" info, but don't add new default sampled parameters
    orig_params = set(add.get("params") or [])
    add = update_info(add, add_aggr_chi2=False)
    add_params: ExpandedParamsDict = add["params"]  # type: ignore
    for p in set(add_params) - orig_params:
        if p in params_in:
            add_params.pop(p)

    # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters
    out_combined_params = deepcopy_where_possible(params_in)
    remove_params = list(str_to_list(remove.get("params")) or [])
    for p in remove_params:
        pinfo = params_in.get(p)
        if pinfo is None or not is_derived_param(pinfo):
            raise LoggedError(
                log,
                "You tried to remove parameter '%s', which is not a derived parameter. "
                "Only derived parameters can be removed during post-processing.", p)
        out_combined_params.pop(p)
    # Force recomputation of aggregated chi2
    for p in list(out_combined_params):
        if p.startswith(get_chi2_name("")):
            out_combined_params.pop(p)
    prior_recompute_1d = False
    for p, pinfo in add_params.items():
        pinfo_in = params_in.get(p)
        if is_sampled_param(pinfo):
            if not is_sampled_param(pinfo_in):
                # No added sampled parameters (de-marginalisation not implemented)
                if pinfo_in is None:
                    raise LoggedError(
                        log, "You added a new sampled parameter %r (maybe accidentally "
                             "by adding a new likelihood that depends on it). "
                             "Adding new sampled parameters is not possible. Try fixing "
                             "it to some value.", p)
                else:
                    raise LoggedError(
                        log,
                        "You tried to change the prior of parameter '%s', "
                        "but it was not a sampled parameter. "
                        "To change that prior, you need to define as an external one.", p)
            # recompute prior if potentially changed sampled parameter priors
            prior_recompute_1d = True
        elif is_derived_param(pinfo):
            if p in out_combined_params:
                raise LoggedError(
                    log, "You tried to add derived parameter '%s', which is already "
                         "present. To force its recomputation, 'remove' it too.", p)
        elif is_fixed_or_function_param(pinfo):
            # Only one possibility left "fixed" parameter that was not present before:
            # input of new likelihood, or just an argument for dynamical derived (dropped)
            if pinfo_in and p in params_in and pinfo["value"] != pinfo_in.get("value"):
                raise LoggedError(
                    log,
                    "You tried to add a fixed parameter '%s: %r' that was already present"
                    " but had a different value or was not fixed. This is not allowed. "
                    "The old info of the parameter was '%s: %r'",
                    p, dict(pinfo), p, dict(pinfo_in))
        elif not pinfo_in:  # OK as long as we have known value for it
            raise LoggedError(log, "Parameter %s no known value. ", p)
        out_combined_params[p] = pinfo

    out_combined: InputDict = {"params": out_combined_params}  # type: ignore
    # Turn the rest of *derived* parameters into constants,
    # so that the likelihoods do not try to recompute them
    # But be careful to exclude *input* params that have a "derived: True" value
    # (which in "updated info" turns into "derived: 'lambda [x]: [x]'")
    # Don't assign to derived parameters to theories, only likelihoods, so they can be
    # recomputed if needed. If the theory does not need to be computed, it doesn't matter
    # if it is already assigned parameters in the usual way; likelihoods can get
    # the required derived parameters from the stored sample derived parameter inputs.
    out_params_with_computed = deepcopy_where_possible(out_combined_params)

    dropped_theory = set()
    for p, pinfo in out_params_with_computed.items():
        if (is_derived_param(pinfo) and "value" not in pinfo
                and p not in add_params):
            out_params_with_computed[p] = {"value": np.nan}
            dropped_theory.add(p)
    # 2.2 Manage adding/removing priors and likelihoods
    warn_remove = False
    kind: ModelBlock
    for kind in ("prior", "likelihood", "theory"):
        out_combined[kind] = deepcopy_where_possible(info_in.get(kind)) or {}
        for remove_item in str_to_list(remove.get(kind)) or []:
            try:
                out_combined[kind].pop(remove_item, None)
                if remove_item not in (add.get(kind) or []) and kind != "theory":
                    warn_remove = True
            except ValueError:
                raise LoggedError(
                    log, "Trying to remove %s '%s', but it is not present. "
                         "Existing ones: %r", kind, remove_item, list(out_combined[kind]))
        if kind != "theory" and kind in add:
            dups = set(add.get(kind) or []).intersection(out_combined[kind]) - {"one"}
            if dups:
                raise LoggedError(
                    log, "You have added %s '%s', which was already present. If you "
                         "want to force its recomputation, you must also 'remove' it.",
                    kind, dups)
            out_combined[kind].update(add[kind])

    if warn_remove and mpi.is_main_process():
        log.warning("You are removing a prior or likelihood pdf. "
                    "Notice that if the resulting posterior is much wider "
                    "than the original one, or displaced enough, "
                    "it is probably safer to explore it directly.")

    mlprior_names_add = minuslogprior_names(add.get("prior") or [])
    chi2_names_add = [get_chi2_name(name) for name in add["likelihood"] if
                      name != "one"]
    out_combined["likelihood"].pop("one", None)

    add_theory = add.get("theory")
    if add_theory:
        if len(add["likelihood"]) == 1 and not any(
                is_derived_param(pinfo) for pinfo in add_params.values()):
            log.warning("You are adding a theory, but this does not force recomputation "
                        "of any likelihood or derived parameters unless explicitly "
                        "removed+added.")
        # Inherit from the original chain (input|output_params, renames, etc)
        added_theory = add_theory.copy()
        for theory, theory_info in out_combined["theory"].items():
            if theory in list(added_theory):
                out_combined["theory"][theory] = \
                    recursive_update(theory_info, added_theory.pop(theory))
        out_combined["theory"].update(added_theory)

    # Prepare recomputation of aggregated chi2
    # (they need to be recomputed by hand, because auto-computation won't pick up
    #  old likelihoods for a given type)
    all_types = {like: str_to_list(opts.get("type") or [])
                 for like, opts in out_combined["likelihood"].items()}
    types = set(chain(*all_types.values()))
    inv_types = {t: [like for like, like_types in all_types.items() if t in like_types]
                 for t in sorted(types)}
    add_aggregated_chi2_params(out_combined_params, types)

    # 3. Create output collection
    # Use default prefix if it exists. If it does not, produce no output by default.
    # {post: {output: None}} suppresses output, and if it's a string, updates it.
    out_prefix = info_post.get("output", info.get("output"))
    if out_prefix:
        suffix = info_post.get("suffix")
        if not suffix:
            raise LoggedError(log, "You need to provide a '%s' for your output chains.",
                              "suffix")
        out_prefix += separator_files + "post" + separator_files + suffix
    output_out = get_output(prefix=out_prefix, force=info.get("force"))
    output_out.set_lock()

    if output_out and not output_out.force and output_out.find_collections():
        raise LoggedError(log, "Found existing post-processing output with prefix %r. "
                               "Delete it manually or re-run with `force: True` "
                               "(or `-f`, `--force` from the shell).", out_prefix)
    elif output_out and output_out.force and mpi.is_main_process():
        output_out.delete_infos()
        for _file in output_out.find_collections():
            output_out.delete_file_or_folder(_file)
    info_out = deepcopy_where_possible(info)
    info_post = info_post.copy()
    info_out["post"] = info_post
    # Updated with input info and extended (updated) add info
    info_out.update(info_in)  # type: ignore
    info_post["add"] = add

    dummy_model_out = DummyModel(out_combined_params, out_combined["likelihood"],
                                 info_prior=out_combined["prior"])
    out_func_parameterization = Parameterization(out_params_with_computed)

    # TODO: check allow_renames=False?
    model_add = Model(out_params_with_computed, add["likelihood"],
                      info_prior=add.get("prior"), info_theory=out_combined["theory"],
                      packages_path=(info_post.get(packages_path_input) or
                                     info.get(packages_path_input)),
                      allow_renames=False, post=True,
                      stop_at_error=info.get('stop_at_error', False),
                      skip_unused_theories=True, dropped_theory_params=dropped_theory)
    # Remove auxiliary "one" before dumping -- 'add' *is* info_out["post"]["add"]
    add["likelihood"].pop("one")
    out_collections = [SampleCollection(dummy_model_out, output_out, name=c.name,
                                        cache_size=OutputOptions.default_post_cache_size)
                       for c in in_collections]
    # TODO: should maybe add skip/thin to out_combined, so can tell post-processed?
    output_out.check_and_dump_info(info_out, out_combined, check_compatible=False)
    collection_in = in_collections[0]
    collection_out = out_collections[0]

    last_percent = None
    known_constants = dummy_model_out.parameterization.constant_params()
    known_constants.update(dummy_model_in.parameterization.constant_params())
    missing_params = dummy_model_in.parameterization.sampled_params().keys() - set(
        collection_in.columns)
    if missing_params:
        raise LoggedError(log, "Input samples do not contain expected sampled parameter "
                               "values: %s", missing_params)

    missing_priors = set(name for name in collection_out.minuslogprior_names if
                         name not in mlprior_names_add
                         and name not in collection_in.columns)
    if _minuslogprior_1d_name in missing_priors:
        prior_recompute_1d = True
    if prior_recompute_1d:
        missing_priors.discard(_minuslogprior_1d_name)
        mlprior_names_add.insert(0, _minuslogprior_1d_name)
    prior_regenerate: Optional[Prior]
    if missing_priors and "prior" in info_in:
        # in case there are input priors that are not stored in input samples
        # e.g. when postprocessing GetDist/CosmoMC-format chains
        in_names = minuslogprior_names(info_in["prior"])
        info_prior = {piname: inf for (piname, inf), in_name in
                      zip(info_in["prior"].items(), in_names) if
                      in_name in missing_priors}
        regenerated_prior_names = minuslogprior_names(info_prior)
        missing_priors.difference_update(regenerated_prior_names)
        prior_regenerate = Prior(dummy_model_in.parameterization, info_prior)
    else:
        prior_regenerate = None
        regenerated_prior_names = None
    if missing_priors:
        raise LoggedError(log, "Missing priors: %s", missing_priors)

    mpi.sync_processes()
    output_in.check_lock()

    # 4. Main loop! Loop over input samples and adjust as required.
    if mpi.is_main_process():
        log.info("Running post-processing...")
    difflogmax: Optional[float] = None
    to_do = sum(len(c) for c in in_collections)
    weights = []
    done = 0
    last_dump_time = time.time()
    for collection_in, collection_out in zip(in_collections, out_collections):
        importance_weights = []

        def set_difflogmax():
            nonlocal difflogmax
            difflog = (collection_in[OutPar.minuslogpost].to_numpy(
                dtype=np.float64)[:len(collection_out)]
                       - collection_out[OutPar.minuslogpost].to_numpy(dtype=np.float64))
            difflogmax = np.max(difflog)
            if abs(difflogmax) < 1:
                difflogmax = 0  # keep simple when e.g. very similar
            log.debug("difflogmax: %g", difflogmax)
            if mpi.more_than_one_process():
                difflogmax = max(mpi.allgather(difflogmax))
            if mpi.is_main_process():
                log.debug("Set difflogmax: %g", difflogmax)
            _weights = np.exp(difflog - difflogmax)
            importance_weights.extend(_weights)
            collection_out.reweight(_weights)

        for i, point in collection_in.data.iterrows():
            all_params = point.to_dict()
            for p in remove_params:
                all_params.pop(p, None)
            log.debug("Point: %r", point)
            sampled = np.array([all_params[param] for param in
                                dummy_model_in.parameterization.sampled_params()])
            all_params = out_func_parameterization.to_input(all_params).copy()

            # Add/remove priors
            if prior_recompute_1d:
                priors_add = [model_add.prior.logps_internal(sampled)]
                if priors_add[0] == -np.inf:
                    continue
            else:
                priors_add = []
            if model_add.prior.external:
                priors_add.extend(model_add.prior.logps_external(all_params))

            logpriors_add = dict(zip(mlprior_names_add, priors_add))
            logpriors_new = [logpriors_add.get(name, - point.get(name, 0))
                             for name in collection_out.minuslogprior_names]
            if prior_regenerate:
                regenerated = dict(zip(regenerated_prior_names,
                                       prior_regenerate.logps_external(all_params)))
                for _i, name in enumerate(collection_out.minuslogprior_names):
                    if name in regenerated_prior_names:
                        logpriors_new[_i] = regenerated[name]

            if is_debug(log):
                log.debug("New set of priors: %r",
                          dict(zip(dummy_model_out.prior, logpriors_new)))
            if -np.inf in logpriors_new:
                continue
            # Add/remove likelihoods and/or (re-)calculate derived parameters
            loglikes_add, output_derived = model_add._loglikes_input_params(
                all_params, return_output_params=True)
            loglikes_add = dict(zip(chi2_names_add, loglikes_add))
            output_derived = dict(zip(model_add.output_params, output_derived))
            loglikes_new = [loglikes_add.get(name, -0.5 * point.get(name, 0))
                            for name in collection_out.chi2_names]
            if is_debug(log):
                log.debug("New set of likelihoods: %r",
                          dict(zip(dummy_model_out.likelihood, loglikes_new)))
                if output_derived:
                    log.debug("New set of derived parameters: %r", output_derived)
            if -np.inf in loglikes_new:
                continue
            all_params.update(output_derived)

            all_params.update(out_func_parameterization.to_derived(all_params))
            derived = {param: all_params.get(param) for param in
                       dummy_model_out.parameterization.derived_params()}
            # We need to recompute the aggregated chi2 by hand
            for type_, likes in inv_types.items():
                derived[get_chi2_name(type_)] = sum(
                    -2 * lvalue for lname, lvalue
                    in zip(collection_out.chi2_names, loglikes_new)
                    if undo_chi2_name(lname) in likes)
            if is_debug(log):
                log.debug("New derived parameters: %r",
                          {p: derived[p]
                           for p in dummy_model_out.parameterization.derived_params()
                           if p in add["params"]})
            # Save to the collection (keep old weight for now)
            weight = point.get(OutPar.weight)
            mpi.check_errors()
            if difflogmax is None and i > OutputOptions.reweight_after and \
                    time.time() - last_dump_time > OutputOptions.output_inteveral_s / 2:
                set_difflogmax()
                collection_out.out_update()

            if difflogmax is not None:
                logpost_new = sum(logpriors_new) + sum(loglikes_new)
                importance_weight = np.exp(logpost_new + point.get(OutPar.minuslogpost)
                                           - difflogmax)
                weight = weight * importance_weight
                importance_weights.append(importance_weight)
                if time.time() - last_dump_time > OutputOptions.output_inteveral_s:
                    collection_out.out_update()
                    last_dump_time = time.time()

            if weight > 0:
                collection_out.add(sampled, derived=derived.values(), weight=weight,
                                   logpriors=logpriors_new, loglikes=loglikes_new)

            # Display progress
            percent = int(np.round((i + done) / to_do * 100))
            if percent != last_percent and not percent % 5:
                last_percent = percent
                progress_bar(log, percent, " (%d/%d)" % (i + done, to_do))

        if difflogmax is None:
            set_difflogmax()
        if not collection_out.data.last_valid_index():
            raise LoggedError(
                log, "No elements in the final sample. Possible causes: "
                     "added a prior or likelihood valued zero over the full sampled "
                     "domain, or the computation of the theory failed everywhere, etc.")
        collection_out.out_update()
        weights.append(np.array(importance_weights))
        done += len(collection_in)

    assert difflogmax is not None
    points = 0
    tot_weight = 0
    min_weight = np.inf
    max_weight = -np.inf
    max_output_weight = -np.inf
    sum_w2 = 0
    points_removed = 0
    for collection_in, collection_out, importance_weights in zip(in_collections,
                                                                 out_collections,
                                                                 weights):
        output_weights = collection_out[OutPar.weight]
        points += len(collection_out)
        tot_weight += np.sum(output_weights)
        points_removed += len(importance_weights) - len(output_weights)
        min_weight = min(min_weight, np.min(importance_weights))
        max_weight = max(max_weight, np.max(importance_weights))
        max_output_weight = max(max_output_weight, np.max(output_weights))
        sum_w2 += np.dot(output_weights, output_weights)

    (tot_weights, min_weights, max_weights, max_output_weights, sum_w2s, points_s,
     points_removed_s) = mpi.zip_gather(
        [tot_weight, min_weight, max_weight, max_output_weight, sum_w2,
         points, points_removed])

    if mpi.is_main_process():
        output_out.clear_lock()
        log.info("Finished! Final number of distinct sample points: %s", sum(points_s))
        log.info("Importance weight range: %.4g -- %.4g",
                 min(min_weights), max(max_weights))
        if sum(points_removed_s):
            log.info("Points deleted due to zero weight: %s", sum(points_removed_s))
        log.info("Effective number of single samples if independent (sum w)/max(w): %s",
                 int(sum(tot_weights) / max(max_output_weights)))
        log.info(
            "Effective number of weighted samples if independent (sum w)^2/sum(w^2): "
            "%s", int(sum(tot_weights) ** 2 / sum(sum_w2s)))
    products: PostResultDict = {"sample": value_or_list(out_collections),
                                "stats": {'min_importance_weight': (min(min_weights) /
                                                                    max(max_weights)),
                                          'points_removed': sum(points_removed_s),
                                          'tot_weight': sum(tot_weights),
                                          'max_weight': max(max_output_weights),
                                          'sum_w2': sum(sum_w2s),
                                          'points': sum(points_s)},
                                "logpost_weight_offset": difflogmax,
                                "weights": value_or_list(weights)}
    return PostTuple(info=out_combined, products=products)
예제 #26
0
 def info(self):
     """
     Returns a copy of the information used to create the model, including defaults.
     """
     return deepcopy_where_possible(self._updated_info)
예제 #27
0
    def check_and_dump_info(self, input_info, updated_info, check_compatible=True,
                            cache_old=False, use_cache_old=False, ignore_blocks=()):
        """
        Saves the info in the chain folder twice:
           - the input info.
           - idem, populated with the components' defaults.

        If resuming a sample, checks first that old and new infos and versions are
        consistent.
        """
        # trim known params of each likelihood: for internal use only
        self.check_lock()
        updated_info_trimmed = deepcopy_where_possible(updated_info)
        updated_info_trimmed["version"] = get_version()
        for like_info in updated_info_trimmed.get("likelihood", {}).values():
            (like_info or {}).pop("params", None)
        if check_compatible:
            # We will test the old info against the dumped+loaded new info.
            # This is because we can't actually check if python objects do change
            try:
                old_info = self.reload_updated_info(cache=cache_old,
                                                    use_cache=use_cache_old)
            except InputImportError:
                # for example, when there's a dynamically generated class that cannot
                # be found by the yaml loader (could use yaml loader that ignores them)
                old_info = None
            if old_info:
                # use consistent yaml read-in types
                # TODO: could probably just compare full infos here, with externals?
                #  for the moment cautiously keeping old behaviour
                old_info = yaml_load(yaml_dump(old_info))  # type: ignore
                new_info = yaml_load(yaml_dump(updated_info_trimmed))
                if not is_equal_info(old_info, new_info, strict=False,
                                     ignore_blocks=list(ignore_blocks) + [
                                         "output"]):
                    raise LoggedError(
                        self.log, "Old and new run information not compatible! "
                                  "Resuming not possible!")
                # Deal with version comparison separately:
                # - If not specified now, take the one used in resume info
                # - If specified both now and before, check new older than old one
                # (For Cobaya's own version, prefer new one always)
                old_version = old_info.get("version")
                new_version = new_info.get("version")
                if isinstance(old_version, str) and isinstance(new_version, str):
                    if version.parse(old_version) > version.parse(new_version):
                        raise LoggedError(
                            self.log, "You are trying to resume a run performed with a "
                                      "newer version of Cobaya: %r (you are using %r). "
                                      "Please, update your Cobaya installation.",
                            old_version, new_version)
                for k in set(kinds).intersection(updated_info):
                    if k in ignore_blocks or updated_info[k] is None:
                        continue
                    for c in updated_info[k]:
                        new_version = updated_info[k][c].get("version")
                        old_version = old_info[k][c].get("version")  # type: ignore
                        if new_version is None:
                            updated_info[k][c]["version"] = old_version
                            updated_info_trimmed[k][c]["version"] = old_version
                        elif old_version is not None:
                            cls = get_resolved_class(
                                c, k, None_if_not_found=True,
                                class_name=updated_info[k][c].get("class"))
                            if cls and cls.compare_versions(
                                    old_version, new_version, equal=False):
                                raise LoggedError(
                                    self.log, "You have requested version %r for "
                                              "%s:%s, but you are trying to resume a "
                                              "run that used a newer version: %r.",
                                    new_version, k, c, old_version)
        # If resuming, we don't want to to *partial* dumps
        if ignore_blocks and self.is_resuming():
            return
        # Work on a copy of the input info, since we are updating the prefix
        # (the updated one is already a copy)
        if input_info is not None:
            input_info = deepcopy_where_possible(input_info)
        # Write the new one
        for f, info in [(self.file_input, input_info),
                        (self.file_updated, updated_info_trimmed)]:
            if info:
                for k in ignore_blocks:
                    info.pop(k, None)
                info.pop("debug", None)
                info.pop("force", None)
                info.pop("resume", None)
                # make sure the dumped output_prefix does only contain the file prefix,
                # not the folder, since it's already been placed inside it
                info["output"] = self.updated_prefix()
                with open(f, "w", encoding="utf-8") as f_out:
                    try:
                        f_out.write(yaml_dump(sort_cosmetic(info)))
                    except OutputError as e:
                        raise LoggedError(self.log, str(e))
        if updated_info_trimmed and has_non_yaml_reproducible(updated_info_trimmed):
            try:
                import dill
            except ImportError:
                self.mpi_info('Install "dill" to save reproducible options file.')
            else:
                import pickle
                try:
                    with open(self.dump_file_updated, 'wb') as f:
                        dill.dump(sort_cosmetic(updated_info_trimmed), f,
                                  pickle.HIGHEST_PROTOCOL)
                except pickle.PicklingError as e:
                    os.remove(self.dump_file_updated)
                    self.mpi_info('Options file cannot be pickled %s', e)
예제 #28
0
    def __init__(self, info_params, allow_renames=True, ignore_unused_sampled=False):
        self.set_logger(lowercase=True)
        self.allow_renames = allow_renames
        # First, we load the parameters,
        # not caring about whether they are understood by any likelihood.
        # `input` contains the parameters (expected to be) understood by the likelihood,
        #   with its fixed value, its fixing function, or None if their value is given
        #   directly by the sampler.
        self._infos = {}
        self._input = {}
        self._input_funcs = {}
        self._input_args = {}
        self._output = {}
        self._constant = {}
        self._sampled = {}
        self._sampled_renames = {}
        self._derived = {}
        self._derived_funcs = {}
        self._derived_args = {}
        # Notice here that expand_info_param *always* adds a partag.derived:True tag
        # to infos without _prior or partag.value, and a partag.value field
        # to fixed params
        for p, info in info_params.items():
            self._infos[p] = deepcopy_where_possible(info)
            if is_fixed_param(info):
                if isinstance(info[partag.value], Number):
                    self._constant[p] = info[partag.value]
                    if not info.get(partag.drop, False):
                        self._input[p] = self._constant[p]
                else:
                    self._input[p] = None
                    self._input_funcs[p] = get_external_function(info[partag.value])
                    self._input_args[p] = getfullargspec(self._input_funcs[p]).args
            if is_sampled_param(info):
                self._sampled[p] = None
                if not info.get(partag.drop, False):
                    self._input[p] = None
                self._sampled_renames[p] = (
                    (lambda x: [x] if isinstance(x, str) else x)
                    (info.get(partag.renames, [])))
            if is_derived_param(info):
                self._derived[p] = deepcopy_where_possible(info)
                # Dynamical parameters whose value we want to save
                if info[partag.derived] is True and is_fixed_param(info):
                    info[partag.derived] = "lambda %s: %s" % (p, p)
                if info[partag.derived] is True:
                    self._output[p] = None
                else:
                    self._derived_funcs[p] = get_external_function(info[partag.derived])
                    self._derived_args[p] = getfullargspec(self._derived_funcs[p]).args
        # Check that the sampled and derived params are all valid python variable names
        for p in chain(self._sampled, self._derived):
            if not is_valid_variable_name(p):
                is_in = p in self._sampled
                eg_in = "  p_prime:\n    prior: ...\n  %s: 'lambda p_prime: p_prime'\n" % p
                eg_out = "  p_prime: 'lambda %s: %s'\n" % (p, p)
                raise LoggedError(
                    self.log, "Parameter name '%s' is not a valid Python variable name "
                              "(it needs to start with a letter or '_').\n"
                              "If this is an %s parameter of a likelihood or theory, "
                              "whose name you cannot change,%s define an associated "
                              "%s one with a valid name 'p_prime' as: \n\n%s",
                    p, "input" if is_in else "output",
                    "" if is_in else " remove it and",
                    "sampled" if is_in else "derived",
                    eg_in if is_in else eg_out)
        # Assume that the *un*known function arguments are likelihood/theory
        # output parameters
        for arg in (set(chain(*self._input_args.values()))
                            .union(chain(*self._derived_args.values()))
                    - set(self._constant) - set(self._input)
                    - set(self._sampled) - set(self._derived)):
            self._output[arg] = None

        # Useful sets: directly-sampled input parameters and directly "output-ed" derived
        self._directly_sampled = [p for p in self._input if p in self._sampled]
        self._directly_output = [p for p in self._derived if p in self._output]
        # Useful mapping: input params that vary if each sample is varied
        self._sampled_input_dependence = {s: [i for i in self._input
                                              if s in self._input_args.get(i, {})]
                                          for s in self._sampled}
        # From here on, some error control.
        dropped_but_never_used = (
            set(p for p, v in self._sampled_input_dependence.items() if not v)
                .difference(set(self._directly_sampled)))
        if dropped_but_never_used and not ignore_unused_sampled:
            raise LoggedError(
                self.log,
                "Parameters %r are sampled but not passed to a likelihood or theory "
                "code, and never used as arguments for any parameter functions. "
                "Check that you are not using the '%s' tag unintentionally.",
                list(dropped_but_never_used), partag.drop)
        # input params depend on input and sampled only, never on output/derived
        all_input_arguments = set(chain(*self._input_args.values()))
        bad_input_dependencies = all_input_arguments.difference(
            set(self.input_params()).union(set(self.sampled_params())).union(
                set(self.constant_params())))
        if bad_input_dependencies:
            raise LoggedError(
                self.log,
                "Input parameters defined as functions can only depend on other "
                "input parameters that are not defined as functions. "
                "In particular, an input parameter cannot depend on %r."
                "Use an explicit Theory calculator for more complex dependencies.",
                list(bad_input_dependencies))
        self._wrapped_input_funcs, self._wrapped_derived_funcs = \
            self._get_wrapped_functions_evaluation_order()
        # warn if repeated labels
        labels_inv_repeated = invert_dict(self.labels())
        for k in list(labels_inv_repeated):
            if len(labels_inv_repeated[k]) == 1:
                labels_inv_repeated.pop(k)
        if labels_inv_repeated:
            self.log.warn("There are repeated parameter labels: %r", labels_inv_repeated)
예제 #29
0
 def sampled_params_info(self):
     return {p: deepcopy_where_possible(info) for p, info
             in self._infos.items() if p in self._sampled}
예제 #30
0
def update_info(info):
    """
    Creates an updated info starting from the defaults for each component and updating it
    with the input info.
    """
    component_base_classes = get_base_classes()
    # Don't modify the original input, and convert all Mapping to consistent dict
    input_info = deepcopy_where_possible(info)
    # Creates an equivalent info using only the defaults
    updated_info = {}
    default_params_info = {}
    default_prior_info = {}
    components = get_used_components(input_info)
    from cobaya.component import CobayaComponent
    for block in components:
        updated = {}
        updated_info[block] = updated
        input_block = input_info[block]
        for component in components[block]:
            # Preprocess "no options" and "external function" in input
            try:
                input_block[component] = input_block[component] or {}
            except TypeError:
                raise LoggedError(
                    log,
                    "Your input info is not well formatted at the '%s' block. "
                    "It must be a dictionary {'%s_i':{options}, ...}. ", block,
                    block)
            if isinstance(component, CobayaComponent) or \
                    isinstance(input_block[component], CobayaComponent):
                raise LoggedError(
                    log, "Input for %s:%s should specify a class not "
                    "an instance", block, component)
                # TODO: allow instance passing?
                #       could allow this, but would have to sort out deepcopy
                # if input_block[component]:
                #   raise LoggedError(log, "Instances should be passed a dictionary "
                #                           "entry of the form 'instance: None'")
                # change_key(input_block, component, component.get_name(),
                #           {_external: component})
                # updated[component.get_name()] = input_block[component.get_name()].copy()
                # continue
            if inspect.isclass(input_block[component]) or \
                    not isinstance(input_block[component], dict):
                input_block[component] = {_external: input_block[component]}
            ext = input_block[component].get(_external)
            if ext:
                if inspect.isclass(ext):
                    default_class_info = get_default_info(
                        ext, block, input_options=input_block[component])
                else:
                    default_class_info = deepcopy_where_possible(
                        component_base_classes[block].get_defaults())
            else:
                component_path = input_block[component].get(
                    _component_path, None)
                default_class_info = get_default_info(
                    component,
                    block,
                    class_name=input_block[component].get(_class_name),
                    component_path=component_path,
                    input_options=input_block[component])
            updated[component] = default_class_info or {}
            # Update default options with input info
            # Consistency is checked only up to first level! (i.e. subkeys may not match)
            # Reserved attributes not necessarily already in default info:
            reserved = {
                _external, _class_name, _provides, _requires, partag.renames,
                _input_params, _output_params, _component_path, _aliases
            }
            options_not_recognized = (set(
                input_block[component]).difference(reserved).difference(
                    set(updated[component])))
            if options_not_recognized:
                alternatives = {}
                available = ({
                    _external, _class_name, _requires, partag.renames
                }.union(updated_info[block][component]))
                while options_not_recognized:
                    option = options_not_recognized.pop()
                    alternatives[option] = fuzzy_match(option, available, n=3)
                did_you_mean = ", ".join([
                    ("'%s' (did you mean %s?)" %
                     (o, "|".join(["'%s'" % _
                                   for _ in a])) if a else "'%s'" % o)
                    for o, a in alternatives.items()
                ])
                raise LoggedError(
                    log, "%s '%s' does not recognize some options: %s. "
                    "Check the documentation for '%s'.", block, component,
                    did_you_mean, block)
            updated[component].update(input_block[component])
            # save params and priors of class to combine later
            default_params_info[component] = default_class_info.get(
                _params, {})
            default_prior_info[component] = default_class_info.get(_prior, {})
    # Add priors info, after the necessary checks
    if _prior in input_info or any(default_prior_info.values()):
        updated_info[_prior] = input_info.get(_prior, {})
    for prior_info in default_prior_info.values():
        for name, prior in prior_info.items():
            if updated_info[_prior].get(name, prior) != prior:
                raise LoggedError(
                    log,
                    "Two different priors cannot have the same name: '%s'.",
                    name)
            updated_info[_prior][name] = prior
    # Add parameters info, after the necessary updates and checks
    defaults_merged = merge_default_params_info(default_params_info)
    updated_info[_params] = merge_params_info(
        [defaults_merged, input_info.get(_params, {})], default_derived=False)
    # Add aggregated chi2 params
    if kinds.likelihood in info:
        all_types = set(
            chain(*[
                str_to_list(like_info.get("type", []) or [])
                for like_info in updated_info[kinds.likelihood].values()
            ]))
        for t in all_types:
            updated_info[_params][_get_chi2_name(t)] = {
                partag.latex: _get_chi2_label(t),
                partag.derived: True
            }
    # Add automatically-defined parameters
    if _auto_params in updated_info:
        make_auto_params(updated_info.pop(_auto_params), updated_info[_params])
    # Add aliases for theory params (after merging!)
    for kind in [
            k for k in [kinds.theory, kinds.likelihood] if k in updated_info
    ]:
        for item in updated_info[kind].values():
            renames = item.get(partag.renames)
            if renames:
                if not isinstance(renames, Mapping):
                    raise LoggedError(
                        log,
                        "'renames' should be a dictionary of name mappings "
                        "(or you meant to use 'aliases')")
                renames_flat = [
                    set([k] + str_to_list(v)) for k, v in renames.items()
                ]
                for p in updated_info[_params]:
                    # Probably could be made faster by inverting the renames dicts *once*
                    renames_pairs = [a for a in renames_flat if p in a]
                    if renames_pairs:
                        this_renames = reduce(
                            lambda x, y: x.union(y),
                            [a for a in renames_flat if p in a])
                        updated_info[_params][p][partag.renames] = \
                            list(set(this_renames).union(set(str_to_list(
                                updated_info[_params][p].get(partag.renames, []))))
                                 .difference({p}))
    # Rest of the options
    for k, v in input_info.items():
        if k not in updated_info:
            updated_info[k] = v
    return updated_info