def save_sample(self, fname, name): sample = np.atleast_2d(np.loadtxt(fname)) if not sample.size: return None collection = SampleCollection(self.model, self.output, name=str(name)) for row in sample: collection.add( row[2:2 + self.n_sampled], derived=row[2 + self.n_sampled:2 + self.n_sampled + self.n_derived], weight=row[0], logpriors=row[-(self.n_priors + self.n_likes):-self.n_likes], loglikes=row[-self.n_likes:]) # make sure that the points are written collection.out_update() return collection
class polychord(Sampler): r""" PolyChord sampler \cite{Handley:2015fda,2015MNRAS.453.4384H}, a nested sampler tailored for high-dimensional parameter spaces with a speed hierarchy. """ # Name of the PolyChord repo and version to download _pc_repo_name = "PolyChord/PolyChordLite" _pc_repo_version = "1.20.1" _base_dir_suffix = "polychord_raw" _clusters_dir = "clusters" _at_resume_prefer_old = Sampler._at_resume_prefer_old + ["blocking"] _at_resume_prefer_new = Sampler._at_resume_prefer_new + ["callback_function"] pypolychord: Any # variables from yaml do_clustering: bool num_repeats: int confidence_for_unbounded: float callback_function: Callable blocking: Any measure_speeds: bool oversample_power: float nlive: NumberWithUnits path: str logzero: float max_ndead: int def initialize(self): """Imports the PolyChord sampler and prepares its arguments.""" try: install_path = (lambda _p: self.get_path(_p) if _p else None)( self.packages_path) self.pc = load_external_module( "pypolychord", path=self.path, install_path=install_path, min_version=self._pc_repo_version, get_import_path=get_compiled_import_path, logger=self.log, not_installed_level="debug") except ComponentNotInstalledError as excpt: raise ComponentNotInstalledError( self.log, (f"Could not find PolyChord: {excpt}. " "To install it, run `cobaya-install polychord`")) with NoLogging(logging.CRITICAL): settings = load_external_module( "pypolychord.settings", path=self.path, install_path=install_path, get_import_path=get_compiled_import_path, logger=self.log) # Prepare arguments and settings self.n_sampled = len(self.model.parameterization.sampled_params()) self.n_derived = len(self.model.parameterization.derived_params()) self.n_priors = len(self.model.prior) self.n_likes = len(self.model.likelihood) self.nDims = self.model.prior.d() self.nDerived = (self.n_derived + self.n_priors + self.n_likes) if self.logzero is None: self.logzero = np.nan_to_num(-np.inf) if self.max_ndead == np.inf: self.max_ndead = -1 self._quants_d_units = ["nlive", "max_ndead"] for p in self._quants_d_units: if getattr(self, p) is not None: setattr(self, p, NumberWithUnits( getattr(self, p), "d", scale=self.nDims, dtype=int).value) self._quants_nlive_units = ["nprior", "nfail"] for p in self._quants_nlive_units: if getattr(self, p) is not None: setattr(self, p, NumberWithUnits( getattr(self, p), "nlive", scale=self.nlive, dtype=int).value) # Fill the automatic ones if getattr(self, "feedback", None) is None: values = {logging.CRITICAL: 0, logging.ERROR: 0, logging.WARNING: 0, logging.INFO: 1, logging.DEBUG: 2} self.feedback = values[self.log.getEffectiveLevel()] # Prepare output folders and prefixes if self.output: self.file_root = self.output.prefix self.read_resume = self.output.is_resuming() else: output_prefix = share_mpi(hex(int(self._rng.random() * 16 ** 6))[2:] if is_main_process() else None) self.file_root = output_prefix # dummy output -- no resume! self.read_resume = False self.base_dir = self.get_base_dir(self.output) self.raw_clusters_dir = os.path.join(self.base_dir, self._clusters_dir) self.output.create_folder(self.base_dir) if self.do_clustering: self.clusters_folder = self.get_clusters_dir(self.output) self.output.create_folder(self.clusters_folder) self.mpi_info("Storing raw PolyChord output in '%s'.", self.base_dir) # Exploiting the speed hierarchy if self.blocking: blocks, oversampling_factors = self.model.check_blocking(self.blocking) else: if self.measure_speeds: self.model.measure_and_set_speeds(n=self.measure_speeds, random_state=self._rng) blocks, oversampling_factors = self.model.get_param_blocking_for_sampler( oversample_power=self.oversample_power) self.mpi_info("Parameter blocks and their oversampling factors:") max_width = len(str(max(oversampling_factors))) for f, b in zip(oversampling_factors, blocks): self.mpi_info("* %" + "%d" % max_width + "d : %r", f, b) # Save blocking in updated info, in case we want to resume self._updated_info["blocking"] = list(zip(oversampling_factors, blocks)) blocks_flat = list(chain(*blocks)) self.ordering = [ blocks_flat.index(p) for p in self.model.parameterization.sampled_params()] self.grade_dims = [len(block) for block in blocks] # Steps per block # NB: num_repeats is ignored by PolyChord when int "grade_frac" given, # so needs to be applied by hand. # In num_repeats, `d` is interpreted as dimension of each block self.grade_frac = [ int(o * read_dnumber(self.num_repeats, dim_block)) for o, dim_block in zip(oversampling_factors, self.grade_dims)] # Assign settings pc_args = ["nlive", "num_repeats", "nprior", "nfail", "do_clustering", "feedback", "precision_criterion", "logzero", "max_ndead", "boost_posterior", "posteriors", "equals", "cluster_posteriors", "write_resume", "read_resume", "write_stats", "write_live", "write_dead", "write_prior", "maximise", "compression_factor", "synchronous", "base_dir", "file_root", "grade_dims", "grade_frac", "nlives"] # As stated above, num_repeats is ignored, so let's not pass it pc_args.pop(pc_args.index("num_repeats")) self.pc_settings = settings.PolyChordSettings( self.nDims, self.nDerived, seed=(self.seed if self.seed is not None else -1), **{p: getattr(self, p) for p in pc_args if getattr(self, p) is not None}) # prior conversion from the hypercube bounds = self.model.prior.bounds( confidence_for_unbounded=self.confidence_for_unbounded) # Check if priors are bounded (nan's to inf) inf = np.where(np.isinf(bounds)) if len(inf[0]): params_names = list(self.model.parameterization.sampled_params()) params = [params_names[i] for i in sorted(set(inf[0]))] raise LoggedError( self.log, "PolyChord needs bounded priors, but the parameter(s) '" "', '".join(params) + "' is(are) unbounded.") locs = bounds[:, 0] scales = bounds[:, 1] - bounds[:, 0] # This function re-scales the parameters AND puts them in the right order self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales).tolist() # We will need the volume of the prior domain, since PolyChord divides by it self.logvolume = np.log(np.prod(scales)) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = ( get_external_function(self.callback_function)) self.last_point_callback = 0 # Prepare runtime live and dead points collections self.live = SampleCollection(self.model, None, name="live") self.dead = SampleCollection(self.model, self.output, name="dead") # Done! if is_main_process(): self.log.debug("Calling PolyChord with arguments:") for p, v in inspect.getmembers(self.pc_settings, lambda a: not (callable(a))): if not p.startswith("_"): self.log.debug(" %s: %s", p, v) def dumper(self, live_points, dead_points, logweights, logZ, logZstd): if self.callback_function is None: return # Store live and dead points and evidence computed so far self.live.reset() for point in live_points: self.live.add( point[:self.n_sampled], derived=point[self.n_sampled:self.n_sampled + self.n_derived], weight=np.nan, logpriors=point[self.n_sampled + self.n_derived: self.n_sampled + self.n_derived + self.n_priors], loglikes=point[self.n_sampled + self.n_derived + self.n_priors: self.n_sampled + self.n_derived + self.n_priors + self.n_likes]) for logweight, point in zip(logweights[self.last_point_callback:], dead_points[self.last_point_callback:]): self.dead.add( point[:self.n_sampled], derived=point[self.n_sampled:self.n_sampled + self.n_derived], weight=np.exp(logweight), logpriors=point[self.n_sampled + self.n_derived: self.n_sampled + self.n_derived + self.n_priors], loglikes=point[self.n_sampled + self.n_derived + self.n_priors: self.n_sampled + self.n_derived + self.n_priors + self.n_likes]) self.logZ, self.logZstd = logZ, logZstd self._correct_unphysical_fraction() # Callback function if self.callback_function is not None: try: self.callback_function_callable(self) except Exception as e: self.log.error("The callback function produced an error: %r", str(e)) self.last_point_callback = len(self.dead) def run(self): """ Prepares the posterior function and calls ``PolyChord``'s ``run`` function. """ # Prepare the posterior # Don't forget to multiply by the volume of the physical hypercube, # since PolyChord divides by it def logpost(params_values): result = self.model.logposterior(params_values) loglikes = result.loglikes if len(loglikes) != self.n_likes: loglikes = np.full(self.n_likes, np.nan) derived = result.derived if len(derived) != self.n_derived: derived = np.full(self.n_derived, np.nan) derived = list(derived) + list(result.logpriors) + list(loglikes) return (max(result.logpost + self.logvolume, self.pc_settings.logzero), derived) sync_processes() self.mpi_info("Calling PolyChord...") self.pc.run_polychord(logpost, self.nDims, self.nDerived, self.pc_settings, self.pc_prior, self.dumper) self.process_raw_output() @property def raw_prefix(self): return os.path.join( self.pc_settings.base_dir, self.pc_settings.file_root) def dump_paramnames(self, prefix): labels = self.model.parameterization.labels() with open(prefix + ".paramnames", "w") as f_paramnames: for p in self.model.parameterization.sampled_params(): f_paramnames.write("%s\t%s\n" % (p, labels.get(p, ""))) for p in self.model.parameterization.derived_params(): f_paramnames.write("%s*\t%s\n" % (p, labels.get(p, ""))) for p in self.model.prior: f_paramnames.write("%s*\t%s\n" % ( "logprior" + derived_par_name_separator + p, r"\pi_\mathrm{" + p.replace("_", r"\ ") + r"}")) for p in self.model.likelihood: f_paramnames.write("%s*\t%s\n" % ( "loglike" + derived_par_name_separator + p, r"\log\mathcal{L}_\mathrm{" + p.replace("_", r"\ ") + r"}")) def save_sample(self, fname, name): sample = np.atleast_2d(np.loadtxt(fname)) if not sample.size: return None collection = SampleCollection(self.model, self.output, name=str(name)) for row in sample: collection.add( row[2:2 + self.n_sampled], derived=row[2 + self.n_sampled:2 + self.n_sampled + self.n_derived], weight=row[0], logpriors=row[-(self.n_priors + self.n_likes):-self.n_likes], loglikes=row[-self.n_likes:]) # make sure that the points are written collection.out_update() return collection def _correct_unphysical_fraction(self): """ Correction for the fraction of the prior that is unphysical -- see issue #77 """ if not hasattr(self, "_frac_unphysical"): with open(self.raw_prefix + ".prior_info", "r", encoding="utf-8-sig") as pf: lines = list(pf.readlines()) get_value_str = lambda line: line[line.find("=") + 1:] get_value_str_var = lambda var: get_value_str( next(line for line in lines if line.lstrip().startswith(var))) nprior = int(get_value_str_var("nprior")) ndiscarded = int(get_value_str_var("ndiscarded")) self._frac_unphysical = nprior / ndiscarded if self._frac_unphysical != 1: self.log.debug( "Correcting for unphysical region fraction: %g", self._frac_unphysical) self.logZ += np.log(self._frac_unphysical) if hasattr(self, "clusters"): for cluster in self.clusters.values(): cluster["logZ"] += np.log(self._frac_unphysical) def process_raw_output(self): """ Loads the sample of live points from ``PolyChord``'s raw output and writes it (if ``txt`` output requested). """ if is_main_process(): self.log.info("Loading PolyChord's results: samples and evidences.") self.dump_paramnames(self.raw_prefix) self.collection = self.save_sample(self.raw_prefix + ".txt", "1") # Load clusters, and save if output if self.pc_settings.do_clustering: self.clusters = {} clusters_raw_regexp = re.compile( re.escape(self.pc_settings.file_root + "_") + r"\d+\.txt") cluster_raw_files = sorted(find_with_regexp( clusters_raw_regexp, os.path.join( self.pc_settings.base_dir, self._clusters_dir), walk_tree=True)) for f in cluster_raw_files: i = int(f[f.rfind("_") + 1:-len(".txt")]) if self.output: old_folder = self.output.folder self.output.folder = self.clusters_folder sample = self.save_sample(f, str(i)) if self.output: # noinspection PyUnboundLocalVariable self.output.folder = old_folder self.clusters[i] = {"sample": sample} # Prepare the evidence(s) and write to file pre = "log(Z" active = "(Still active)" with open(self.raw_prefix + ".stats", "r", encoding="utf-8-sig") as statsfile: lines = [line for line in statsfile.readlines() if line.startswith(pre)] for line in lines: logZ, logZstd = [float(n.replace(active, "")) for n in line.split("=")[-1].split("+/-")] component = line.split("=")[0].lstrip(pre + "_").rstrip(") ") if not component: self.logZ, self.logZstd = logZ, logZstd elif self.pc_settings.do_clustering: i = int(component) self.clusters[i]["logZ"], self.clusters[i]["logZstd"] = logZ, logZstd self.log.debug( "RAW log(Z) = %g +/- %g ; RAW Z in [%.8g, %.8g] (68%% C.L. log-gaussian)", self.logZ, self.logZstd, *[np.exp(self.logZ + n * self.logZstd) for n in [-1, 1]]) self._correct_unphysical_fraction() if self.output: out_evidences = dict(logZ=self.logZ, logZstd=self.logZstd) if getattr(self, "clusters", None): out_evidences["clusters"] = {} for i in sorted(list(self.clusters)): out_evidences["clusters"][i] = dict( logZ=self.clusters[i]["logZ"], logZstd=self.clusters[i]["logZstd"]) fname = os.path.join(self.output.folder, self.output.prefix + Extension.evidence) yaml_dump_file(fname, out_evidences, comment="log-evidence", error_if_exists=False) # TODO: try to broadcast the collections # if get_mpi(): # bcast_from_0 = lambda attrname: setattr(self, # attrname, get_mpi_comm().bcast(getattr(self, attrname, None), root=0)) # map(bcast_from_0, ["collection", "logZ", "logZstd", "clusters"]) if is_main_process(): self.log.info("Finished! Raw PolyChord output stored in '%s', " "with prefix '%s'", self.pc_settings.base_dir, self.pc_settings.file_root) self.log.info( "log(Z) = %g +/- %g ; Z in [%.8g, %.8g] (68%% C.L. log-gaussian)", self.logZ, self.logZstd, *[np.exp(self.logZ + n * self.logZstd) for n in [-1, 1]]) def products(self): """ Auxiliary function to define what should be returned in a scripted call. Returns: The sample ``SampleCollection`` containing the sequentially discarded live points. """ if is_main_process(): products = { "sample": self.collection, "logZ": self.logZ, "logZstd": self.logZstd} if self.pc_settings.do_clustering: products.update({"clusters": self.clusters}) return products else: return {} @classmethod def get_base_dir(cls, output): if output: return output.add_suffix(cls._base_dir_suffix, separator="_") return os.path.join(gettempdir(), cls._base_dir_suffix) @classmethod def get_clusters_dir(cls, output): if output: return output.add_suffix(cls._clusters_dir, separator="_") @classmethod def output_files_regexps(cls, output, info=None, minimal=False): # Resume file regexps_tuples = [ (re.compile(re.escape(output.prefix + ".resume")), cls.get_base_dir(output))] if minimal: return regexps_tuples return regexps_tuples + [ # Raw products base dir (None, cls.get_base_dir(output)), # Main sample (output.collection_regexp(name=None), None), # Evidence (re.compile(re.escape(output.prefix + Extension.evidence)), None), # Clusters (None, cls.get_clusters_dir(output)) ] @classmethod def get_version(cls): return None @classmethod def get_path(cls, path): return os.path.realpath( os.path.join(path, "code", cls._pc_repo_name[cls._pc_repo_name.find("/") + 1:])) @classmethod def is_compatible(cls): import platform if platform.system() == "Windows": return False return True @classmethod def is_installed(cls, reload=False, **kwargs): if not kwargs.get("code", True): return True try: return bool(load_external_module( "pypolychord", path=kwargs["path"], get_import_path=get_compiled_import_path, min_version=cls._pc_repo_version, reload=reload, logger=get_logger(cls.__name__), not_installed_level="debug")) except ComponentNotInstalledError: return False @classmethod def install(cls, path=None, force=False, code=False, data=False, no_progress_bars=False): if not code: return True log = get_logger(__name__) log.info("Downloading PolyChord...") success = download_github_release(os.path.join(path, "code"), cls._pc_repo_name, cls._pc_repo_version, no_progress_bars=no_progress_bars, logger=log) if not success: log.error("Could not download PolyChord.") return False log.info("Compiling (Py)PolyChord...") from subprocess import Popen, PIPE # Needs to re-define os' PWD, # because MakeFile calls it and is not affected by the cwd of Popen cwd = os.path.join(path, "code", cls._pc_repo_name[cls._pc_repo_name.find("/") + 1:]) my_env = os.environ.copy() my_env.update({"PWD": cwd}) if "CC" not in my_env: my_env["CC"] = "mpicc" if "CXX" not in my_env: my_env["CXX"] = "mpicxx" process_make = Popen([sys.executable, "setup.py", "build"], cwd=cwd, env=my_env, stdout=PIPE, stderr=PIPE) out, err = process_make.communicate() if process_make.returncode: log.info(out.decode("utf-8")) log.info(err.decode("utf-8")) log.error("Python build failed!") return False return True
class Evaluate(Sampler): file_base_name = 'evaluate' override: Mapping[str, float] N: int def initialize(self): """ Creates a 1-point collection to store the point at which the posterior is evaluated. """ try: self.N = int(self.N) except ValueError: raise LoggedError( self.log, "Could not convert the number of samples to an integer: %r", self.N) self.one_point = SampleCollection(self.model, self.output, name="1") self.log.info("Initialized!") def run(self): """ First gets a reference point. If a single reference point is not given, the point is sampled from the reference pdf. If that one is not defined either, the point is sampled from the prior. Then it evaluates the prior and likelihood(s) and stores them in the one-member sample collection. """ for i in range(self.N): if self.N > 1: self.log.info("Evaluating sample #%d ------------------------------", i + 1) self.log.info("Looking for a reference point with non-zero prior.") reference_values = self.model.prior.reference(random_state=self._rng) reference_point = dict( zip(self.model.parameterization.sampled_params(), reference_values)) for p, v in (self.override or {}).items(): if p not in reference_point: raise LoggedError( self.log, "Parameter '%s' used in override not known. " "Known parameters names are %r.", p, self.model.parameterization.sampled_params()) reference_point[p] = v self.log.info("Reference point:\n " + "\n ".join( ["%s = %g" % pv for pv in reference_point.items()])) self.log.info("Evaluating prior and likelihoods...") self.logposterior = self.model.logposterior(reference_point) self.one_point.add( list(reference_point.values()), derived=self.logposterior.derived, logpost=self.logposterior.logpost, logpriors=self.logposterior.logpriors, loglikes=self.logposterior.loglikes) self.log.info("log-posterior = %g", self.logposterior.logpost) self.log.info("log-prior = %g", self.logposterior.logprior) for j, name in enumerate(self.model.prior): self.log.info( " logprior_" + name + " = %g", self.logposterior.logpriors[j]) if self.logposterior.logprior > -np.inf: self.log.info("log-likelihood = %g", self.logposterior.loglike) for j, name in enumerate(self.model.likelihood): self.log.info( " chi2_" + name + " = %g", (-2 * self.logposterior.loglikes[j])) self.log.info("Derived params:") for name, value in zip(self.model.parameterization.derived_params(), self.logposterior.derived): self.log.info(" " + name + " = %g", value) else: self.log.info("Likelihoods and derived parameters not computed, " "since the prior is null.") # Write the output: the point and its prior, posterior and likelihood. self.one_point.out_update() def products(self): """ Auxiliary function to define what should be returned in a scripted call. Returns: The sample ``SampleCollection`` containing the sequentially discarded live points. """ return {"sample": self.one_point}