def get_best_covmat_ext(packages_path, params_info, likelihoods_info, random_state, cached=True) -> Optional[dict]: """ Actual covmat finder used by `get_best_covmat`. Call directly for more control on the parameters used. Returns the same dict as `get_best_covmat`, except for the covariance matrix itself. """ global _loaded_covmats_database covmats_database = ( _loaded_covmats_database or get_covmat_database(packages_path, cached=cached)) _loaded_covmats_database = covmats_database # Prepare params and likes aliases params_renames = set(chain(*[ [p] + str_to_list(info.get("renames", [])) for p, info in params_info.items()])) likes_renames = set(chain(*[[like] + str_to_list((info or {}).get("aliases", [])) for like, info in likelihoods_info.items()])) delimiters = r"[_\.]" likes_regexps = [re.compile(delimiters + re.escape(_like) + delimiters) for _like in likes_renames] # Match number of params score_params = ( lambda covmat: len(set(covmat["params"]).intersection(params_renames))) best_p = get_best_score(covmats_database, score_params) if not best_p: log.warning( "No covariance matrix found including at least one of the given parameters") return None # Match likelihood names / keywords # No debug print here: way too many! score_likes = ( lambda covmat: len([0 for r in likes_regexps if r.search(covmat["name"])])) best_p_l = get_best_score(best_p, score_likes) if is_debug(log): log.debug("Subset based on params + likes:\n - " + "\n - ".join([b["name"] for b in best_p_l])) # Finally, in case there is more than one, select shortest #params and name (simpler!) # #params first, to avoid extended models with shorter covmat name def score_simpler_params(covmat): return -len(covmat["params"]) best_p_l_sp = get_best_score(best_p_l, score_simpler_params) if is_debug(log): log.debug("Subset based on params + likes + fewest params:\n - " + "\n - ".join([b["name"] for b in best_p_l_sp])) score_simpler_name = ( lambda covmat: -len(covmat["name"].replace("_", " ").replace("-", " ").split())) best_p_l_sp_sn = get_best_score(best_p_l_sp, score_simpler_name) if is_debug(log): log.debug("Subset based on params + likes + fewest params + shortest name:\n - " + "\n - ".join([b["name"] for b in best_p_l_sp_sn])) # if there is more than one (unlikely), just pick one at random if len(best_p_l_sp_sn) > 1: log.warning("WARNING: >1 possible best covmats: %r", [b["name"] for b in best_p_l_sp_sn]) random_state = np.random.default_rng(random_state) return best_p_l_sp_sn[random_state.choice(range(len(best_p_l_sp_sn)))].copy()
def get_sampler(info_sampler: SamplersDict, model: Model, output: Optional[Output] = None, packages_path: Optional[str] = None) -> 'Sampler': assert isinstance(info_sampler, Mapping), ( "The first argument must be a dictionary with the info needed for the sampler. " "If you were trying to pass the name of an input file instead, " "load it first with 'cobaya.input.load_input', " "or, if you were passing a yaml string, load it with 'cobaya.yaml.yaml_load'." ) logger_sampler = get_logger(__name__) info_sampler = deepcopy_where_possible(info_sampler) if output is None: output = OutputDummy() # Check and update info check_sane_info_sampler(info_sampler) updated_info_sampler = update_info({"sampler": info_sampler })["sampler"] # type: ignore if is_debug(logger_sampler): logger_sampler.debug( "Input info updated with defaults (dumped to YAML):\n%s", yaml_dump(updated_info_sampler)) # Get sampler class & check resume/force compatibility sampler_name, sampler_class = get_sampler_name_and_class( updated_info_sampler, logger=logger_sampler) check_sampler_info((output.reload_updated_info(use_cache=True) or {}).get("sampler"), updated_info_sampler, is_resuming=output.is_resuming()) # Check if resumable run sampler_class.check_force_resume(output, info=updated_info_sampler[sampler_name]) # Instantiate the sampler sampler_instance = sampler_class(updated_info_sampler[sampler_name], model, output, packages_path=packages_path) # If output, dump updated if output: to_dump = model.info() to_dump["sampler"] = {sampler_name: sampler_instance.info()} to_dump["output"] = os.path.join(output.folder, output.prefix) output.check_and_dump_info(None, to_dump, check_compatible=False) return sampler_instance
def post(info_or_yaml_or_file: Union[InputDict, str, os.PathLike], sample: Union[SampleCollection, List[SampleCollection], None] = None ) -> PostTuple: info = load_input_dict(info_or_yaml_or_file) logger_setup(info.get("debug"), info.get("debug_file")) log = get_logger(__name__) # MARKED FOR DEPRECATION IN v3.0 if info.get("modules"): raise LoggedError(log, "The input field 'modules' has been deprecated." "Please use instead %r", packages_path_input) # END OF DEPRECATION BLOCK info_post: PostDict = info.get("post") or {} if not info_post: raise LoggedError(log, "No 'post' block given. Nothing to do!") if mpi.is_main_process() and info.get("resume"): log.warning("Resuming not implemented for post-processing. Re-starting.") if not info.get("output") and info_post.get("output") \ and not info.get("params"): raise LoggedError(log, "The input dictionary must have be a full option " "dictionary, or have an existing 'output' root to load " "previous settings from ('output' to read from is in the " "main block not under 'post'). ") # 1. Load existing sample output_in = get_output(prefix=info.get("output")) if output_in: info_in = output_in.load_updated_info() or update_info(info) else: info_in = update_info(info) params_in: ExpandedParamsDict = info_in["params"] # type: ignore dummy_model_in = DummyModel(params_in, info_in.get("likelihood", {}), info_in.get("prior")) in_collections = [] thin = info_post.get("thin", 1) skip = info_post.get("skip", 0) if info.get('thin') is not None or info.get('skip') is not None: # type: ignore raise LoggedError(log, "'thin' and 'skip' should be " "parameters of the 'post' block") if sample: # If MPI, assume for each MPI process post is passed in the list of # collections that should be processed by that process # (e.g. single chain output from sampler) if isinstance(sample, SampleCollection): in_collections = [sample] else: in_collections = sample for i, collection in enumerate(in_collections): if skip: if 0 < skip < 1: skip = int(round(skip * len(collection))) collection = collection.filtered_copy(slice(skip, None)) if thin != 1: collection = collection.thin_samples(thin) in_collections[i] = collection elif output_in: files = output_in.find_collections() numbered = files if not numbered: # look for un-numbered output files files = output_in.find_collections(name=False) if files: if mpi.size() > len(files): raise LoggedError(log, "Number of MPI processes (%s) is larger than " "the number of sample files (%s)", mpi.size(), len(files)) for num in range(mpi.rank(), len(files), mpi.size()): in_collections += [SampleCollection( dummy_model_in, output_in, onload_thin=thin, onload_skip=skip, load=True, file_name=files[num], name=str(num + 1) if numbered else "")] else: raise LoggedError(log, "No samples found for the input model with prefix %s", os.path.join(output_in.folder, output_in.prefix)) else: raise LoggedError(log, "No output from where to load from, " "nor input collections given.") if any(len(c) <= 1 for c in in_collections): raise LoggedError( log, "Not enough samples for post-processing. Try using a larger sample, " "or skipping or thinning less.") mpi.sync_processes() log.info("Will process %d sample points.", sum(len(c) for c in in_collections)) # 2. Compare old and new info: determine what to do add = info_post.get("add") or {} if "remove" in add: raise LoggedError(log, "remove block should be under 'post', not 'add'") remove = info_post.get("remove") or {} # Add a dummy 'one' likelihood, to absorb unused parameters if not add.get("likelihood"): add["likelihood"] = {} add["likelihood"]["one"] = None # Expand the "add" info, but don't add new default sampled parameters orig_params = set(add.get("params") or []) add = update_info(add, add_aggr_chi2=False) add_params: ExpandedParamsDict = add["params"] # type: ignore for p in set(add_params) - orig_params: if p in params_in: add_params.pop(p) # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters out_combined_params = deepcopy_where_possible(params_in) remove_params = list(str_to_list(remove.get("params")) or []) for p in remove_params: pinfo = params_in.get(p) if pinfo is None or not is_derived_param(pinfo): raise LoggedError( log, "You tried to remove parameter '%s', which is not a derived parameter. " "Only derived parameters can be removed during post-processing.", p) out_combined_params.pop(p) # Force recomputation of aggregated chi2 for p in list(out_combined_params): if p.startswith(get_chi2_name("")): out_combined_params.pop(p) prior_recompute_1d = False for p, pinfo in add_params.items(): pinfo_in = params_in.get(p) if is_sampled_param(pinfo): if not is_sampled_param(pinfo_in): # No added sampled parameters (de-marginalisation not implemented) if pinfo_in is None: raise LoggedError( log, "You added a new sampled parameter %r (maybe accidentally " "by adding a new likelihood that depends on it). " "Adding new sampled parameters is not possible. Try fixing " "it to some value.", p) else: raise LoggedError( log, "You tried to change the prior of parameter '%s', " "but it was not a sampled parameter. " "To change that prior, you need to define as an external one.", p) # recompute prior if potentially changed sampled parameter priors prior_recompute_1d = True elif is_derived_param(pinfo): if p in out_combined_params: raise LoggedError( log, "You tried to add derived parameter '%s', which is already " "present. To force its recomputation, 'remove' it too.", p) elif is_fixed_or_function_param(pinfo): # Only one possibility left "fixed" parameter that was not present before: # input of new likelihood, or just an argument for dynamical derived (dropped) if pinfo_in and p in params_in and pinfo["value"] != pinfo_in.get("value"): raise LoggedError( log, "You tried to add a fixed parameter '%s: %r' that was already present" " but had a different value or was not fixed. This is not allowed. " "The old info of the parameter was '%s: %r'", p, dict(pinfo), p, dict(pinfo_in)) elif not pinfo_in: # OK as long as we have known value for it raise LoggedError(log, "Parameter %s no known value. ", p) out_combined_params[p] = pinfo out_combined: InputDict = {"params": out_combined_params} # type: ignore # Turn the rest of *derived* parameters into constants, # so that the likelihoods do not try to recompute them # But be careful to exclude *input* params that have a "derived: True" value # (which in "updated info" turns into "derived: 'lambda [x]: [x]'") # Don't assign to derived parameters to theories, only likelihoods, so they can be # recomputed if needed. If the theory does not need to be computed, it doesn't matter # if it is already assigned parameters in the usual way; likelihoods can get # the required derived parameters from the stored sample derived parameter inputs. out_params_with_computed = deepcopy_where_possible(out_combined_params) dropped_theory = set() for p, pinfo in out_params_with_computed.items(): if (is_derived_param(pinfo) and "value" not in pinfo and p not in add_params): out_params_with_computed[p] = {"value": np.nan} dropped_theory.add(p) # 2.2 Manage adding/removing priors and likelihoods warn_remove = False kind: ModelBlock for kind in ("prior", "likelihood", "theory"): out_combined[kind] = deepcopy_where_possible(info_in.get(kind)) or {} for remove_item in str_to_list(remove.get(kind)) or []: try: out_combined[kind].pop(remove_item, None) if remove_item not in (add.get(kind) or []) and kind != "theory": warn_remove = True except ValueError: raise LoggedError( log, "Trying to remove %s '%s', but it is not present. " "Existing ones: %r", kind, remove_item, list(out_combined[kind])) if kind != "theory" and kind in add: dups = set(add.get(kind) or []).intersection(out_combined[kind]) - {"one"} if dups: raise LoggedError( log, "You have added %s '%s', which was already present. If you " "want to force its recomputation, you must also 'remove' it.", kind, dups) out_combined[kind].update(add[kind]) if warn_remove and mpi.is_main_process(): log.warning("You are removing a prior or likelihood pdf. " "Notice that if the resulting posterior is much wider " "than the original one, or displaced enough, " "it is probably safer to explore it directly.") mlprior_names_add = minuslogprior_names(add.get("prior") or []) chi2_names_add = [get_chi2_name(name) for name in add["likelihood"] if name != "one"] out_combined["likelihood"].pop("one", None) add_theory = add.get("theory") if add_theory: if len(add["likelihood"]) == 1 and not any( is_derived_param(pinfo) for pinfo in add_params.values()): log.warning("You are adding a theory, but this does not force recomputation " "of any likelihood or derived parameters unless explicitly " "removed+added.") # Inherit from the original chain (input|output_params, renames, etc) added_theory = add_theory.copy() for theory, theory_info in out_combined["theory"].items(): if theory in list(added_theory): out_combined["theory"][theory] = \ recursive_update(theory_info, added_theory.pop(theory)) out_combined["theory"].update(added_theory) # Prepare recomputation of aggregated chi2 # (they need to be recomputed by hand, because auto-computation won't pick up # old likelihoods for a given type) all_types = {like: str_to_list(opts.get("type") or []) for like, opts in out_combined["likelihood"].items()} types = set(chain(*all_types.values())) inv_types = {t: [like for like, like_types in all_types.items() if t in like_types] for t in sorted(types)} add_aggregated_chi2_params(out_combined_params, types) # 3. Create output collection # Use default prefix if it exists. If it does not, produce no output by default. # {post: {output: None}} suppresses output, and if it's a string, updates it. out_prefix = info_post.get("output", info.get("output")) if out_prefix: suffix = info_post.get("suffix") if not suffix: raise LoggedError(log, "You need to provide a '%s' for your output chains.", "suffix") out_prefix += separator_files + "post" + separator_files + suffix output_out = get_output(prefix=out_prefix, force=info.get("force")) output_out.set_lock() if output_out and not output_out.force and output_out.find_collections(): raise LoggedError(log, "Found existing post-processing output with prefix %r. " "Delete it manually or re-run with `force: True` " "(or `-f`, `--force` from the shell).", out_prefix) elif output_out and output_out.force and mpi.is_main_process(): output_out.delete_infos() for _file in output_out.find_collections(): output_out.delete_file_or_folder(_file) info_out = deepcopy_where_possible(info) info_post = info_post.copy() info_out["post"] = info_post # Updated with input info and extended (updated) add info info_out.update(info_in) # type: ignore info_post["add"] = add dummy_model_out = DummyModel(out_combined_params, out_combined["likelihood"], info_prior=out_combined["prior"]) out_func_parameterization = Parameterization(out_params_with_computed) # TODO: check allow_renames=False? model_add = Model(out_params_with_computed, add["likelihood"], info_prior=add.get("prior"), info_theory=out_combined["theory"], packages_path=(info_post.get(packages_path_input) or info.get(packages_path_input)), allow_renames=False, post=True, stop_at_error=info.get('stop_at_error', False), skip_unused_theories=True, dropped_theory_params=dropped_theory) # Remove auxiliary "one" before dumping -- 'add' *is* info_out["post"]["add"] add["likelihood"].pop("one") out_collections = [SampleCollection(dummy_model_out, output_out, name=c.name, cache_size=OutputOptions.default_post_cache_size) for c in in_collections] # TODO: should maybe add skip/thin to out_combined, so can tell post-processed? output_out.check_and_dump_info(info_out, out_combined, check_compatible=False) collection_in = in_collections[0] collection_out = out_collections[0] last_percent = None known_constants = dummy_model_out.parameterization.constant_params() known_constants.update(dummy_model_in.parameterization.constant_params()) missing_params = dummy_model_in.parameterization.sampled_params().keys() - set( collection_in.columns) if missing_params: raise LoggedError(log, "Input samples do not contain expected sampled parameter " "values: %s", missing_params) missing_priors = set(name for name in collection_out.minuslogprior_names if name not in mlprior_names_add and name not in collection_in.columns) if _minuslogprior_1d_name in missing_priors: prior_recompute_1d = True if prior_recompute_1d: missing_priors.discard(_minuslogprior_1d_name) mlprior_names_add.insert(0, _minuslogprior_1d_name) prior_regenerate: Optional[Prior] if missing_priors and "prior" in info_in: # in case there are input priors that are not stored in input samples # e.g. when postprocessing GetDist/CosmoMC-format chains in_names = minuslogprior_names(info_in["prior"]) info_prior = {piname: inf for (piname, inf), in_name in zip(info_in["prior"].items(), in_names) if in_name in missing_priors} regenerated_prior_names = minuslogprior_names(info_prior) missing_priors.difference_update(regenerated_prior_names) prior_regenerate = Prior(dummy_model_in.parameterization, info_prior) else: prior_regenerate = None regenerated_prior_names = None if missing_priors: raise LoggedError(log, "Missing priors: %s", missing_priors) mpi.sync_processes() output_in.check_lock() # 4. Main loop! Loop over input samples and adjust as required. if mpi.is_main_process(): log.info("Running post-processing...") difflogmax: Optional[float] = None to_do = sum(len(c) for c in in_collections) weights = [] done = 0 last_dump_time = time.time() for collection_in, collection_out in zip(in_collections, out_collections): importance_weights = [] def set_difflogmax(): nonlocal difflogmax difflog = (collection_in[OutPar.minuslogpost].to_numpy( dtype=np.float64)[:len(collection_out)] - collection_out[OutPar.minuslogpost].to_numpy(dtype=np.float64)) difflogmax = np.max(difflog) if abs(difflogmax) < 1: difflogmax = 0 # keep simple when e.g. very similar log.debug("difflogmax: %g", difflogmax) if mpi.more_than_one_process(): difflogmax = max(mpi.allgather(difflogmax)) if mpi.is_main_process(): log.debug("Set difflogmax: %g", difflogmax) _weights = np.exp(difflog - difflogmax) importance_weights.extend(_weights) collection_out.reweight(_weights) for i, point in collection_in.data.iterrows(): all_params = point.to_dict() for p in remove_params: all_params.pop(p, None) log.debug("Point: %r", point) sampled = np.array([all_params[param] for param in dummy_model_in.parameterization.sampled_params()]) all_params = out_func_parameterization.to_input(all_params).copy() # Add/remove priors if prior_recompute_1d: priors_add = [model_add.prior.logps_internal(sampled)] if priors_add[0] == -np.inf: continue else: priors_add = [] if model_add.prior.external: priors_add.extend(model_add.prior.logps_external(all_params)) logpriors_add = dict(zip(mlprior_names_add, priors_add)) logpriors_new = [logpriors_add.get(name, - point.get(name, 0)) for name in collection_out.minuslogprior_names] if prior_regenerate: regenerated = dict(zip(regenerated_prior_names, prior_regenerate.logps_external(all_params))) for _i, name in enumerate(collection_out.minuslogprior_names): if name in regenerated_prior_names: logpriors_new[_i] = regenerated[name] if is_debug(log): log.debug("New set of priors: %r", dict(zip(dummy_model_out.prior, logpriors_new))) if -np.inf in logpriors_new: continue # Add/remove likelihoods and/or (re-)calculate derived parameters loglikes_add, output_derived = model_add._loglikes_input_params( all_params, return_output_params=True) loglikes_add = dict(zip(chi2_names_add, loglikes_add)) output_derived = dict(zip(model_add.output_params, output_derived)) loglikes_new = [loglikes_add.get(name, -0.5 * point.get(name, 0)) for name in collection_out.chi2_names] if is_debug(log): log.debug("New set of likelihoods: %r", dict(zip(dummy_model_out.likelihood, loglikes_new))) if output_derived: log.debug("New set of derived parameters: %r", output_derived) if -np.inf in loglikes_new: continue all_params.update(output_derived) all_params.update(out_func_parameterization.to_derived(all_params)) derived = {param: all_params.get(param) for param in dummy_model_out.parameterization.derived_params()} # We need to recompute the aggregated chi2 by hand for type_, likes in inv_types.items(): derived[get_chi2_name(type_)] = sum( -2 * lvalue for lname, lvalue in zip(collection_out.chi2_names, loglikes_new) if undo_chi2_name(lname) in likes) if is_debug(log): log.debug("New derived parameters: %r", {p: derived[p] for p in dummy_model_out.parameterization.derived_params() if p in add["params"]}) # Save to the collection (keep old weight for now) weight = point.get(OutPar.weight) mpi.check_errors() if difflogmax is None and i > OutputOptions.reweight_after and \ time.time() - last_dump_time > OutputOptions.output_inteveral_s / 2: set_difflogmax() collection_out.out_update() if difflogmax is not None: logpost_new = sum(logpriors_new) + sum(loglikes_new) importance_weight = np.exp(logpost_new + point.get(OutPar.minuslogpost) - difflogmax) weight = weight * importance_weight importance_weights.append(importance_weight) if time.time() - last_dump_time > OutputOptions.output_inteveral_s: collection_out.out_update() last_dump_time = time.time() if weight > 0: collection_out.add(sampled, derived=derived.values(), weight=weight, logpriors=logpriors_new, loglikes=loglikes_new) # Display progress percent = int(np.round((i + done) / to_do * 100)) if percent != last_percent and not percent % 5: last_percent = percent progress_bar(log, percent, " (%d/%d)" % (i + done, to_do)) if difflogmax is None: set_difflogmax() if not collection_out.data.last_valid_index(): raise LoggedError( log, "No elements in the final sample. Possible causes: " "added a prior or likelihood valued zero over the full sampled " "domain, or the computation of the theory failed everywhere, etc.") collection_out.out_update() weights.append(np.array(importance_weights)) done += len(collection_in) assert difflogmax is not None points = 0 tot_weight = 0 min_weight = np.inf max_weight = -np.inf max_output_weight = -np.inf sum_w2 = 0 points_removed = 0 for collection_in, collection_out, importance_weights in zip(in_collections, out_collections, weights): output_weights = collection_out[OutPar.weight] points += len(collection_out) tot_weight += np.sum(output_weights) points_removed += len(importance_weights) - len(output_weights) min_weight = min(min_weight, np.min(importance_weights)) max_weight = max(max_weight, np.max(importance_weights)) max_output_weight = max(max_output_weight, np.max(output_weights)) sum_w2 += np.dot(output_weights, output_weights) (tot_weights, min_weights, max_weights, max_output_weights, sum_w2s, points_s, points_removed_s) = mpi.zip_gather( [tot_weight, min_weight, max_weight, max_output_weight, sum_w2, points, points_removed]) if mpi.is_main_process(): output_out.clear_lock() log.info("Finished! Final number of distinct sample points: %s", sum(points_s)) log.info("Importance weight range: %.4g -- %.4g", min(min_weights), max(max_weights)) if sum(points_removed_s): log.info("Points deleted due to zero weight: %s", sum(points_removed_s)) log.info("Effective number of single samples if independent (sum w)/max(w): %s", int(sum(tot_weights) / max(max_output_weights))) log.info( "Effective number of weighted samples if independent (sum w)^2/sum(w^2): " "%s", int(sum(tot_weights) ** 2 / sum(sum_w2s))) products: PostResultDict = {"sample": value_or_list(out_collections), "stats": {'min_importance_weight': (min(min_weights) / max(max_weights)), 'points_removed': sum(points_removed_s), 'tot_weight': sum(tot_weights), 'max_weight': max(max_output_weights), 'sum_w2': sum(sum_w2s), 'points': sum(points_s)}, "logpost_weight_offset": difflogmax, "weights": value_or_list(weights)} return PostTuple(info=out_combined, products=products)
def run( info_or_yaml_or_file: Union[InputDict, str, os.PathLike], packages_path: Optional[str] = None, output: Union[str, LiteralFalse, None] = None, debug: Union[bool, int, None] = None, stop_at_error: Optional[bool] = None, resume: bool = False, force: bool = False, no_mpi: bool = False, test: bool = False, override: Optional[InputDict] = None, ) -> Union[InfoSamplerTuple, PostTuple]: """ Run from an input dictionary, file name or yaml string, with optional arguments to override settings in the input as needed. :param info_or_yaml_or_file: input options dictionary, yaml file, or yaml text :param packages_path: path where external packages were installed :param output: path name prefix for output files, or False for no file output :param debug: true for verbose debug output, or a specific logging level :param stop_at_error: stop if an error is raised :param resume: continue an existing run :param force: overwrite existing output if it exists :param no_mpi: run without MPI :param test: only test initialization rather than actually running :param override: option dictionary to merge into the input one, overriding settings (but with lower precedence than the explicit keyword arguments) :return: (updated_info, sampler) tuple of options dictionary and Sampler instance, or (updated_info, results) if using "post" post-processing """ # This function reproduces the model-->output-->sampler pipeline one would follow # when instantiating by hand, but alters the order to performs checks and dump info # as early as possible, e.g. to check if resuming possible or `force` needed. if no_mpi or test: mpi.set_mpi_disabled() with mpi.ProcessState("run"): info: InputDict = load_info_overrides(info_or_yaml_or_file, debug, stop_at_error, packages_path, override) if test: info["test"] = True # If any of resume|force given as cmd args, ignore those in the input file if resume or force: if resume and force: raise ValueError("'rename' and 'force' are exclusive options") info["resume"] = bool(resume) info["force"] = bool(force) if info.get("post"): if isinstance(output, str) or output is False: info["post"]["output"] = output or None return post(info) if isinstance(output, str) or output is False: info["output"] = output or None logger_setup(info.get("debug"), info.get("debug_file")) logger_run = get_logger(run.__name__) # MARKED FOR DEPRECATION IN v3.0 # BEHAVIOUR TO BE REPLACED BY ERROR: check_deprecated_modules_path(info) # END OF DEPRECATION BLOCK # 1. Prepare output driver, if requested by defining an output_prefix # GetDist needs to know the original sampler, so don't overwrite if minimizer try: which_sampler = list(info["sampler"])[0] except (KeyError, TypeError): raise LoggedError( logger_run, "You need to specify a sampler using the 'sampler' key " "as e.g. `sampler: {mcmc: None}.`") infix = "minimize" if which_sampler == "minimize" else None with get_output(prefix=info.get("output"), resume=info.get("resume"), force=info.get("force"), infix=infix) as out: # 2. Update the input info with the defaults for each component updated_info = update_info(info) if is_debug(logger_run): # Dump only if not doing output # (otherwise, the user can check the .updated file) if not out and mpi.is_main_process(): logger_run.info( "Input info updated with defaults (dumped to YAML):\n%s", yaml_dump(sort_cosmetic(updated_info))) # 3. If output requested, check compatibility if existing one, and dump. # 3.1 First: model only out.check_and_dump_info(info, updated_info, cache_old=True, ignore_blocks=["sampler"]) # 3.2 Then sampler -- 1st get the last sampler mentioned in the updated.yaml # TODO: ideally, using Minimizer would *append* to the sampler block. # Some code already in place, but not possible at the moment. try: last_sampler = list(updated_info["sampler"])[-1] last_sampler_info = { last_sampler: updated_info["sampler"][last_sampler] } except (KeyError, TypeError): raise LoggedError(logger_run, "No sampler requested.") sampler_name, sampler_class = get_sampler_name_and_class( last_sampler_info) check_sampler_info((out.reload_updated_info(use_cache=True) or {}).get("sampler"), updated_info["sampler"], is_resuming=out.is_resuming()) # Dump again, now including sampler info out.check_and_dump_info(info, updated_info, check_compatible=False) # Check if resumable run sampler_class.check_force_resume( out, info=updated_info["sampler"][sampler_name]) # 4. Initialize the posterior and the sampler with Model(updated_info["params"], updated_info["likelihood"], updated_info.get("prior"), updated_info.get("theory"), packages_path=info.get("packages_path"), timing=updated_info.get("timing"), allow_renames=False, stop_at_error=info.get("stop_at_error", False)) as model: # Re-dump the updated info, now containing parameter routes and version updated_info = recursive_update(updated_info, model.info()) out.check_and_dump_info(None, updated_info, check_compatible=False) sampler = sampler_class( updated_info["sampler"][sampler_name], model, out, name=sampler_name, packages_path=info.get("packages_path")) # Re-dump updated info, now also containing updates from the sampler updated_info["sampler"][sampler_name] = \ recursive_update(updated_info["sampler"][sampler_name], sampler.info()) out.check_and_dump_info(None, updated_info, check_compatible=False) mpi.sync_processes() if info.get("test", False): logger_run.info( "Test initialization successful! " "You can probably run now without `--%s`.", "test") return InfoSamplerTuple(updated_info, sampler) # Run the sampler sampler.run() return InfoSamplerTuple(updated_info, sampler)