def __init__(self, info_params, info_likelihood, info_prior=None, info_theory=None, modules=None, timing=None, allow_renames=True): self.log = logging.getLogger(self.__class__.__name__) self._full_info = { _params: deepcopy(info_params), _likelihood: deepcopy(info_likelihood) } if not self._full_info[_likelihood]: self.log.error("No likelihood requested!") raise HandledException for like in self._full_info[_likelihood].values(): like.pop(_params) for k, v in ((_prior, info_prior), (_theory, info_theory), (_path_install, modules), (_timing, timing)): if v not in (None, {}): self._full_info[k] = deepcopy(v) self.parameterization = Parameterization(info_params, allow_renames=allow_renames) self.prior = Prior(self.parameterization, info_prior) self.likelihood = Likelihood(info_likelihood, self.parameterization, info_theory, modules=modules, timing=timing)
def __init__(self, info_params, info_likelihood, info_prior=None, info_theory=None, modules=None, timing=None, allow_renames=True): self.set_logger(lowercase=True) self._updated_info = { _params: deepcopy_where_possible(info_params), _likelihood: deepcopy_where_possible(info_likelihood) } if not self._updated_info[_likelihood]: raise LoggedError(self.log, "No likelihood requested!") for k, v in ((_prior, info_prior), (_theory, info_theory), (_path_install, modules), (_timing, timing)): if v not in (None, {}): self._updated_info[k] = deepcopy_where_possible(v) self.parameterization = Parameterization(self._updated_info[_params], allow_renames=allow_renames) self.prior = Prior(self.parameterization, self._updated_info.get(_prior, None)) self.likelihood = Likelihood(self._updated_info[_likelihood], self.parameterization, self._updated_info.get(_theory), modules=modules, timing=timing)
class Model(object): """ Class containing all the information necessary to compute the unnormalized posterior. Allows for low-level interaction with the theory code, prior and likelihood. **NB:** do not initialize this class directly; use :func:`~model.get_model` instead, with some info as input. """ def __init__(self, info_params, info_likelihood, info_prior=None, info_theory=None, modules=None, timing=None, allow_renames=True): self.log = logging.getLogger(self.__class__.__name__) self._full_info = { _params: deepcopy(info_params), _likelihood: deepcopy(info_likelihood) } if not self._full_info[_likelihood]: self.log.error("No likelihood requested!") raise HandledException for like in self._full_info[_likelihood].values(): like.pop(_params) for k, v in ((_prior, info_prior), (_theory, info_theory), (_path_install, modules), (_timing, timing)): if v not in (None, {}): self._full_info[k] = deepcopy(v) self.parameterization = Parameterization(info_params, allow_renames=allow_renames) self.prior = Prior(self.parameterization, info_prior) self.likelihood = Likelihood(info_likelihood, self.parameterization, info_theory, modules=modules, timing=timing) def info(self): """ Returns a copy of the information used to create the model, including defaults. """ return deepcopy(self._full_info) def _to_sampled_array(self, params_values): """ Internal method to interact with the prior. Needs correct (not renamed) parameter names. """ if hasattr(params_values, "keys"): params_values_array = np.array(list(params_values.values())) else: params_values_array = np.atleast_1d(params_values) if params_values_array.shape[0] != self.prior.d(): self.log.error( "Wrong dimensionality: it's %d and it should be %d.", len(params_values_array), self.prior.d()) raise HandledException if len(params_values_array.shape) >= 2: self.log.error( "Cannot take arrays of points as inputs, just single points.") raise HandledException return params_values_array def logpriors(self, params_values, make_finite=False): """ Takes an array or dictionary of sampled parameter values. If the argument is an array, parameters must have the same order as in the input. When in doubt, you can get the correct order as ``list([your_model].parameterization.sampled_params())``. Returns the log-values of the priors, in the same order as it is returned by ``list([your_model].prior)``. The first one, named ``0``, corresponds to the product of the 1-dimensional priors specified in the ``params`` block, and it's normalized (in general, the external prior densities aren't). If ``make_finite=True``, it will try to represent infinities as the largest real numbers allowed by machine precision. """ if hasattr(params_values, "keys"): params_values = self.parameterization._check_sampled( **params_values) params_values_array = self._to_sampled_array(params_values) logpriors = self.prior.logps(params_values_array) if make_finite: return np.nan_to_num(logpriors) return logpriors def logprior(self, params_values, make_finite=False): """ Takes an array or dictionary of sampled parameter values. If the argument is an array, parameters must have the same order as in the input. When in doubt, you can get the correct order as ``list([your_model].parameterization.sampled_params())``. Returns the log-value of the prior (in general, unnormalized, unless the only priors specified are the 1-dimensional ones in the ``params`` block). If ``make_finite=True``, it will try to represent infinities as the largest real numbers allowed by machine precision. """ logprior = np.sum(self.logpriors(params_values)) if make_finite: return np.nan_to_num(logprior) return logprior def loglikes(self, params_values, return_derived=True, make_finite=False, cached=True, _no_check=False): """ Takes an array or dictionary of sampled parameter values. If the argument is an array, parameters must have the same order as in the input. When in doubt, you can get the correct order as ``list([your_model].parameterization.sampled_params())``. Returns a tuple ``(loglikes, derived_params)``, where ``loglikes`` are the log-values of the likelihoods (unnormalized, in general) in the same order as it is returned by ``list([your_model].likelihood)``, and ``derived_params`` are the values of the derived parameters in the order given by ``list([your_model].parameterization.derived_params())``. To return just the list of log-likelihood values, make ``return_derived=False``. If ``make_finite=True``, it will try to represent infinities as the largest real numbers allowed by machine precision. If ``cached=False`` (default: True), it ignores previously computed results that could be reused. """ if hasattr(params_values, "keys") and not _no_check: params_values = self.parameterization._check_sampled( **params_values) _derived = [] if return_derived else None loglikes = self.likelihood.logps( self.parameterization._to_input(params_values), _derived=_derived, cached=cached) if make_finite: loglikes = np.nan_to_num(loglikes) if return_derived: derived_sampler = self.parameterization._to_derived(_derived) if self.log.getEffectiveLevel() <= logging.DEBUG: self.log.debug( "Computed derived parameters: %s", dict( zip(self.parameterization.derived_params(), derived_sampler))) return loglikes, derived_sampler return loglikes def loglike(self, params_values, return_derived=True, make_finite=False, cached=True): """ Takes an array or dictionary of sampled parameter values. If the argument is an array, parameters must have the same order as in the input. When in doubt, you can get the correct order as ``list([your_model].parameterization.sampled_params())``. Returns a tuple ``(loglike, derived_params)``, where ``loglike`` is the log-value of the likelihood (unnormalized, in general), and ``derived_params`` are the values of the derived parameters in the order given by ``list([your_model].parameterization.derived_params())``. To return just the list of log-likelihood values, make ``return_derived=False``. If ``make_finite=True``, it will try to represent infinities as the largest real numbers allowed by machine precision. If ``cached=False`` (default: True), it ignores previously computed results that could be reused. """ ret_value = self.loglikes(params_values, return_derived=return_derived, cached=cached) if return_derived: loglike = np.sum(ret_value[0]) if make_finite: return np.nan_to_num(loglike), ret_value[1] return loglike, ret_value[1] else: loglike = np.sum(ret_value) if make_finite: return np.nan_to_num(loglike) return loglike def logposterior(self, params_values, return_derived=True, make_finite=False, cached=True): """ Takes an array or dictionary of sampled parameter values. If the argument is an array, parameters must have the same order as in the input. When in doubt, you can get the correct order as ``list([your_model].parameterization.sampled_params())``. Returns the a ``logposterior`` ``NamedTuple``, with the following fields: - ``logpost``: log-value of the posterior. - ``logpriors``: log-values of the priors, in the same order as in ``list([your_model].prior)``. The first one, corresponds to the product of the 1-dimensional priors specified in the ``params`` block. Except for the first one, the priors are unnormalized. - ``loglikes``: log-values of the likelihoods (unnormalized, in general), in the same order as in ``list([your_model].likelihood)``. - ``derived``: values of the derived parameters in the order given by ``list([your_model].parameterization.derived_params())``. Only computes the log-likelihood and the derived parameters if the prior is non-null (otherwise the fields ``loglikes`` and ``derived`` are empty lists). To ignore the derived parameters, make ``return_derived=False``. If ``make_finite=True``, it will try to represent infinities as the largest real numbers allowed by machine precision. If ``cached=False`` (default: True), it ignores previously computed results that could be reused. """ if hasattr(params_values, "keys"): params_values = self.parameterization._check_sampled( **params_values) params_values_array = self._to_sampled_array(params_values) if self.log.getEffectiveLevel() <= logging.DEBUG: self.log.debug( "Posterior to be computed for parameters %s", dict( zip(self.parameterization.sampled_params(), params_values_array))) if not np.all(np.isfinite(params_values_array)): self.log.error( "Got non-finite parameter values: %r", dict( zip(self.parameterization.sampled_params(), params_values_array))) raise HandledException # Notice that we don't use the make_finite in the prior call, # to correctly check if we have to compute the likelihood logpriors = self.logpriors(params_values_array, make_finite=False) logpost = sum(logpriors) if -np.inf not in logpriors: l = self.loglikes(params_values, return_derived=return_derived, make_finite=make_finite, cached=cached, _no_check=True) loglikes, derived_sampler = l if return_derived else (l, []) logpost += sum(loglikes) else: loglikes = [] derived_sampler = [] if make_finite: logpriors = np.nan_to_num(logpriors) logpost = np.nan_to_num(logpost) return logposterior(logpost=logpost, logpriors=logpriors, loglikes=loglikes, derived=derived_sampler) def logpost(self, params_values, make_finite=False, cached=True): """ Takes an array or dictionary of sampled parameter values. If the argument is an array, parameters must have the same order as in the input. When in doubt, you can get the correct order as ``list([your_model].parameterization.sampled_params())``. Returns the log-value of the posterior. If ``make_finite=True``, it will try to represent infinities as the largest real numbers allowed by machine precision. If ``cached=False`` (default: True), it ignores previously computed results that could be reused. """ return self.logposterior(params_values, make_finite=make_finite, return_derived=False, cached=cached)[0] def dump_timing(self): """ Prints the average computation time of the theory code and likelihoods. It's more reliable the more times the likelihood has been evaluated. """ self.likelihood.dump_timing() # Python magic for the "with" statement def __enter__(self): return self def __exit__(self, exception_type=None, exception_value=None, traceback=None): self.likelihood.__exit__(exception_type, exception_value, traceback) def close(self): self.__exit__()
def post(info, sample=None): logger_setup(info.get(_debug), info.get(_debug_file)) log = logging.getLogger(__name__.split(".")[-1]) try: info_post = info[_post] except KeyError: log.error("No 'post' block given. Nothing to do!") raise HandledException if get_mpi_rank(): log.warning( "Post-processing is not yet MPI-able. Doing nothing for rank > 1 processes." ) return # 1. Load existing sample output_in = Output(output_prefix=info.get(_output_prefix), resume=True) info_in = load_input(output_in.file_full) if output_in else deepcopy(info) dummy_model_in = DummyModel(info_in[_params], info_in[_likelihood], info_in.get(_prior, None), info_in.get(_theory, None)) if output_in: i = 0 while True: try: collection = Collection(dummy_model_in, output_in, name="%d" % (1 + i), load=True, onload_skip=info_post.get("skip", 0), onload_thin=info_post.get("thin", 1)) if i == 0: collection_in = collection else: collection_in._append(collection) i += 1 except IOError: break elif sample: if isinstance(sample, Collection): sample = [sample] collection_in = deepcopy(sample[0]) for s in sample[1:]: try: collection_in._append(s) except: log.error("Failed to load some of the input samples.") raise HandledException i = len(sample) else: log.error( "Not output from where to load from or input collections given.") raise HandledException log.info("Loaded %d chain%s. Will process %d samples.", i, "s" if i - 1 else "", collection_in.n()) if collection_in.n() <= 1: log.error( "Not enough samples for post-processing. Try using a larger sample, " "or skipping or thinning less.") raise HandledException # 2. Compare old and new info: determine what to do add = info_post.get("add", {}) remove = info_post.get("remove", {}) # Add a dummy 'one' likelihood, to absorb unused parameters if not add.get(_likelihood): add[_likelihood] = odict() add[_likelihood].update({"one": None}) # Expand the "add" info add = get_full_info(add) # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters out = {_params: deepcopy(info_in[_params])} for p in remove.get(_params, {}): pinfo = info_in[_params].get(p) if pinfo is None or not is_derived_param(pinfo): log.error( "You tried to remove parameter '%s', which is not a derived paramter. " "Only derived parameters can be removed during post-processing.", p) raise HandledException out[_params].pop(p) mlprior_names_add = [] for p, pinfo in add.get(_params, {}).items(): pinfo_in = info_in[_params].get(p) if is_sampled_param(pinfo): if not is_sampled_param(pinfo_in): # No added sampled parameters (de-marginalisation not implemented) if pinfo_in is None: log.error( "You added a new sampled parameter %r (maybe accidentaly " "by adding a new likelihood that depends on it). " "Adding new sampled parameters is not possible. Try fixing " "it to some value.", p) raise HandledException else: log.error( "You tried to change the prior of parameter '%s', " "but it was not a sampled parameter. " "To change that prior, you need to define as an external one.", p) raise HandledException if mlprior_names_add[:1] != _prior_1d_name: mlprior_names_add = ( [_minuslogprior + _separator + _prior_1d_name] + mlprior_names_add) elif is_derived_param(pinfo): if p in out[_params]: log.error( "You tried to add derived parameter '%s', which is already " "present. To force its recomputation, 'remove' it too.", p) raise HandledException elif is_fixed_param(pinfo): # Only one possibility left "fixed" parameter that was not present before: # input of new likelihood, or just an argument for dynamical derived (dropped) if ((p in info_in[_params] and pinfo[_p_value] != (pinfo_in or {}).get(_p_value, None))): log.error( "You tried to add a fixed parameter '%s: %r' that was already present" " but had a different value or was not fixed. This is not allowed. " "The old info of the parameter was '%s: %r'", p, dict(pinfo), p, dict(pinfo_in)) raise HandledException else: log.error("This should not happen. Contact the developers.") raise HandledException out[_params][p] = pinfo # For the likelihood only, turn the rest of *derived* parameters into constants, # so that the likelihoods do not try to compute them) # But be careful to exclude *input* params that have a "derived: True" value # (which in "full info" turns into "derived: 'lambda [x]: [x]'") out_params_like = deepcopy(out[_params]) for p, pinfo in out_params_like.items(): if ((is_derived_param(pinfo) and not (_p_value in pinfo) and p not in add.get(_params, {}))): out_params_like[p] = {_p_value: np.nan, _p_drop: True} parameterization_like = Parameterization(out_params_like, ignore_unused_sampled=True) # 2.2 Manage adding/removing priors and likelihoods warn_remove = False for level in [_prior, _likelihood]: out[level] = getattr(dummy_model_in, level) if level == _prior: out[level].remove(_prior_1d_name) for pdf in info_post.get("remove", {}).get(level, []) or []: try: out[level].remove(pdf) warn_remove = True except ValueError: log.error( "Trying to remove %s '%s', but it is not present. " "Existing ones: %r", level, pdf, out[level]) raise HandledException if warn_remove: log.warning("You are removing a prior or likelihood pdf. " "Notice that if the resulting posterior is much wider " "than the original one, or displaced enough, " "it is probably safer to explore it directly.") if _prior in add: mlprior_names_add += [ _minuslogprior + _separator + name for name in add[_prior] ] out[_prior] += list(add[_prior]) prior_recompute_1d = (mlprior_names_add[:1] == [ _minuslogprior + _separator + _prior_1d_name ]) # Don't initialise the theory code if not adding/recomputing theory, # theory-derived params or likelihoods recompute_theory = info_in.get(_theory) and not (list( add[_likelihood]) == ["one"] and not any([ is_derived_param(pinfo) for pinfo in add.get(_params, {}).values() ])) if recompute_theory: # Inherit from the original chain (needs input|output_params, renames, etc theory = list(info_in[_theory].keys())[0] info_theory_out = odict([[ theory, recursive_update(deepcopy(info_in[_theory][theory]), add.get(_theory, {theory: {}})[theory]) ]]) else: info_theory_out = None chi2_names_add = [ _chi2 + _separator + name for name in add[_likelihood] if name is not "one" ] out[_likelihood] += [l for l in add[_likelihood] if l is not "one"] if recompute_theory: log.warn( "You are recomputing the theory, but in the current version this does " "not force recomputation of any likelihood or derived parameter, " "unless explicitly removed+added.") for level in [_prior, _likelihood]: for i, x_i in enumerate(out[level]): if x_i in list(out[level])[i + 1:]: log.error( "You have added %s '%s', which was already present. If you " "want to force its recomputation, you must also 'remove' it.", level, x_i) raise HandledException # 3. Create output collection if "suffix" not in info_post: log.error("You need to provide a 'suffix' for your chains.") raise HandledException # Use default prefix if it exists. If it does not, produce no output by default. # {post: {output: None}} suppresses output, and if it's a string, updates it. out_prefix = info_post.get(_output_prefix, info.get(_output_prefix)) if out_prefix not in [None, False]: out_prefix += "_" + _post + "_" + info_post["suffix"] output_out = Output(output_prefix=out_prefix, force_output=info.get(_force)) info_out = deepcopy(info) info_out[_post] = info_post # Updated with input info and extended (full) add info info_out.update(info_in) info_out[_post]["add"] = add dummy_model_out = DummyModel(out[_params], out[_likelihood], info_prior=out[_prior]) if recompute_theory: theory = list(info_theory_out.keys())[0] if _input_params not in info_theory_out[theory]: log.error( "You appear to be post-processing a chain generated with an older " "version of Cobaya. For post-processing to work, please edit the " "'[root]__full.info' file of the original chain to add, inside the " "theory code block, the list of its input parameters. E.g.\n----\n" "theory:\n %s:\n input_params: [param1, param2, ...]\n" "----\nIf you get strange errors later, it is likely that you did not " "specify the correct set of theory parameters.\n" "The full set of input parameters are %s.", theory, list(dummy_model_out.parameterization.input_params())) raise HandledException prior_add = Prior(dummy_model_out.parameterization, add.get(_prior)) likelihood_add = Likelihood(add[_likelihood], parameterization_like, info_theory=info_theory_out, modules=info.get(_path_install)) # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post]["add"] add[_likelihood].pop("one") if likelihood_add.theory: # Make sure that theory.needs is called at least once, for adjustments likelihood_add.theory.needs() collection_out = Collection(dummy_model_out, output_out, name="1") output_out.dump_info({}, info_out) # 4. Main loop! log.info("Running post-processing...") last_percent = 0 for i, point in enumerate(collection_in.data.itertuples()): log.debug("Point: %r", point) sampled = [ getattr(point, param) for param in dummy_model_in.parameterization.sampled_params() ] derived = odict( [[param, getattr(point, param, None)] for param in dummy_model_out.parameterization.derived_params()]) inputs = odict([[ param, getattr( point, param, dummy_model_in.parameterization.constant_params().get( param, dummy_model_out.parameterization.constant_params().get( param, None))) ] for param in dummy_model_out.parameterization.input_params()]) # Solve inputs that depend on a function and were not saved # (we don't use the Parameterization_to_input method in case there are references # to functions that cannot be loaded at the moment) for p, value in inputs.items(): if value is None: func = dummy_model_out.parameterization._input_funcs[p] args = dummy_model_out.parameterization._input_args[p] inputs[p] = func(*[getattr(point, arg) for arg in args]) # Add/remove priors priors_add = prior_add.logps(sampled) if not prior_recompute_1d: priors_add = priors_add[1:] logpriors_add = odict(zip(mlprior_names_add, priors_add)) logpriors_new = [ logpriors_add.get(name, -getattr(point, name, 0)) for name in collection_out.minuslogprior_names ] if log.getEffectiveLevel() <= logging.DEBUG: log.debug("New set of priors: %r", dict(zip(dummy_model_out.prior, logpriors_new))) if -np.inf in logpriors_new: continue # Add/remove likelihoods output_like = [] if likelihood_add: # Notice "one" (last in likelihood_add) is ignored: not in chi2_names loglikes_add = odict( zip(chi2_names_add, likelihood_add.logps(inputs, _derived=output_like))) output_like = dict(zip(likelihood_add.output_params, output_like)) else: loglikes_add = dict() loglikes_new = [ loglikes_add.get(name, -0.5 * getattr(point, name, 0)) for name in collection_out.chi2_names ] if log.getEffectiveLevel() <= logging.DEBUG: log.debug("New set of likelihoods: %r", dict(zip(dummy_model_out.likelihood, loglikes_new))) if output_like: log.debug("New set of likelihood-derived parameters: %r", output_like) if -np.inf in loglikes_new: continue # Add/remove derived parameters and change priors of sampled parameters for p in add[_params]: if p in dummy_model_out.parameterization._directly_output: derived[p] = output_like[p] elif p in dummy_model_out.parameterization._derived_funcs: func = dummy_model_out.parameterization._derived_funcs[p] args = dummy_model_out.parameterization._derived_args[p] derived[p] = func(*[ getattr(point, arg, output_like.get(arg, None)) for arg in args ]) if log.getEffectiveLevel() <= logging.DEBUG: log.debug( "New derived parameters: %r", dict([[ p, derived[p] ] for p in dummy_model_out.parameterization.derived_params() if p in add[_params]])) # Save to the collection (keep old weight for now) collection_out.add(sampled, derived=derived.values(), weight=getattr(point, _weight), logpriors=logpriors_new, loglikes=loglikes_new) # Display progress percent = np.round(i / collection_in.n() * 100) if percent != last_percent and not percent % 5: last_percent = percent progress_bar(log, percent, " (%d/%d)" % (i, collection_in.n())) if not collection_out.data.last_valid_index(): log.error( "No elements in the final sample. Possible causes: " "added a prior or likelihood valued zero over the full sampled domain, " "or the computation of the theory failed everywhere, etc.") raise HandledException # Reweight -- account for large dynamic range! # Prefer to rescale +inf to finite, and ignore final points with -inf. # Remove -inf's (0-weight), and correct indices difflogmax = max(collection_in[_minuslogpost] - collection_out[_minuslogpost]) collection_out.data[_weight] *= np.exp(collection_in[_minuslogpost] - collection_out[_minuslogpost] - difflogmax) collection_out.data = ( collection_out.data[collection_out.data.weight > 0].reset_index( drop=True)) collection_out._n = collection_out.data.last_valid_index() + 1 # Write! collection_out._out_update() log.info("Finished! Final number of samples: %d", collection_out.n()) return info_out, {"sample": collection_out}