def write_checkpoint(self): if is_main_process() and self.output: checkpoint_filename = self.checkpoint_filename() self.dump_covmat(self.proposer.get_covariance()) checkpoint_info = { "sampler": { self.get_name(): dict([ ("converged", self.converged), ("Rminus1_last", self.Rminus1_last), ( "burn_in", ( self.burn_in. value # initial: repeat burn-in if not finished if not self.n() and self.burn_in_left else 0) ), # to avoid overweighting last point of prev. run ("mpi_size", get_mpi_size()) ]) } } yaml_dump_file(checkpoint_filename, checkpoint_info, error_if_exists=False) if not self.progress.empty: with open(self.progress_filename(), "a", encoding="utf-8") as progress_file: fmts = {"N": lambda x: "{:9d}".format(x)} # TODO: next one is ignored when added to the dict # "acceptance_rate": lambda x: "{:15.8g}".format(x)} progress_file.write( self.progress.tail(1).to_string( header=False, index=False, formatters=fmts) + "\n") self.log.debug( "Dumped checkpoint and progress info, and current covmat.")
def write_checkpoint(self): if not get_mpi_rank() and self.output: checkpoint_filename = self.checkpoint_filename() covmat_filename = self.covmat_filename() np.savetxt(covmat_filename, self.proposer.get_covariance(), header=" ".join( list(self.model.parameterization.sampled_params()))) checkpoint_info = { _sampler: { self.name: odict([ ["converged", bool(self.converged)], ["Rminus1_last", self.Rminus1_last], ["proposal_scale", self.proposer.get_scale()], ["blocks", self.blocks], ["oversampling_factors", self.oversampling_factors], ["i_last_slow_block", self.i_last_slow_block], [ "burn_in", ( self. burn_in # initial: repeat burn-in if not finished if not self.n() and self.burn_in_left else "d") ], # to avoid overweighting last point of prev. run ["mpi_size", get_mpi_size()] ]) } } yaml_dump_file(checkpoint_filename, checkpoint_info, error_if_exists=False) self.log.debug("Dumped checkpoint info and current covmat.")
def test_run_file(tmpdir): input_file = os.path.join(tmpdir, 'pars.yaml') root = os.path.join(tmpdir, 'test') yaml_dump_file(input_file, dict(test_info_common, output=root)) run_script([input_file, '--force']) likname = list(test_info_common["likelihood"])[0] default_info = get_default_info(likname, "likelihood") updated_info = yaml_load_file(root + '.updated.yaml') assert updated_info["prior"] == default_info["prior"]
def write_config_file(config_info, append=True): """ Writes the given info into the config file. """ # Just-in-time import to avoid recursion from cobaya.yaml import yaml_dump_file try: info = {} if append: info.update(load_config_file()) info.update(config_info) yaml_dump_file(os.path.join(get_config_path(), _packages_path_config_file), info, error_if_exists=False) except Exception as e: log.error("Could not write the external packages installation path into the " "config file. Reason: %r", str(e))
def write_checkpoint(self): if is_main_process() and self.output: checkpoint_filename = self.checkpoint_filename() self.dump_covmat(self.proposer.get_covariance()) checkpoint_info = {kinds.sampler: {self.get_name(): dict([ ("converged", bool(self.converged)), ("Rminus1_last", self.Rminus1_last), ("burn_in", (self.burn_in.value # initial: repeat burn-in if not finished if not self.n() and self.burn_in_left else 0)), # to avoid overweighting last point of prev. run ("mpi_size", get_mpi_size())])}} yaml_dump_file(checkpoint_filename, checkpoint_info, error_if_exists=False) if not self.progress.empty: with open(self.progress_filename(), "a", encoding="utf-8") as progress_file: progress_file.write( self.progress.tail(1).to_string(header=False, index=False) + "\n") self.log.debug("Dumped checkpoint and progress info, and current covmat.")
def get_covmat_database(modules, cached=True): # Get folders with corresponding modules installed installed_folders = [ folder for folder in covmat_folders if os.path.exists(folder.format(**{_path_install: modules})) ] covmats_database_fullpath = os.path.join(modules, _covmats_file) # Check if there is a usable cached one if cached: try: covmat_database = yaml_load_file(covmats_database_fullpath) assert set(covmat_database) == set(installed_folders) return covmat_database except: log.info( "No cached covmat database present, not usable or not up-to-date. " "Will be re-created and cached.") pass # Create it (again) covmat_database = odict() for folder in installed_folders: covmat_database[folder] = [] folder_full = folder.format(**{ _path_install: modules }).replace("/", os.sep) for filename in os.listdir(folder_full): try: with open(os.path.join(folder_full, filename)) as covmat: header = covmat.readline() assert header.strip().startswith("#") params = header.strip().lstrip("#").split() except: continue covmat_database[folder].append({ "name": filename, "params": params }) if cached: yaml_dump_file(covmats_database_fullpath, covmat_database, error_if_exists=False) return covmat_database
def write_checkpoint(self): if am_single_or_primary_process() and self.output: checkpoint_filename = self.checkpoint_filename() covmat_filename = self.covmat_filename() np.savetxt(covmat_filename, self.proposer.get_covariance(), header=" ".join( list(self.model.parameterization.sampled_params()))) checkpoint_info = {_sampler: {self.name: odict([ ["converged", bool(self.converged)], ["Rminus1_last", self.Rminus1_last], ["proposal_scale", self.proposer.get_scale()], ["blocks", self.blocks], ["oversampling_factors", self.oversampling_factors], ["i_last_slow_block", self.i_last_slow_block], ["burn_in", (self.burn_in # initial: repeat burn-in if not finished if not self.n() and self.burn_in_left else "d")], # to avoid overweighting last point of prev. run ["mpi_size", get_mpi_size()]])}} yaml_dump_file(checkpoint_filename, checkpoint_info, error_if_exists=False) if not self.progress.empty: with open(self.progress_filename(), "a") as progress_file: progress_file.write( self.progress.tail(1).to_string(header=False, index=False) + "\n") self.log.debug("Dumped checkpoint and progress info, and current covmat.")
def process_raw_output(self): """ Loads the sample of live points from ``PolyChord``'s raw output and writes it (if ``txt`` output requested). """ if is_main_process(): self.log.info( "Loading PolyChord's results: samples and evidences.") self.dump_paramnames(self.raw_prefix) self.collection = self.save_sample(self.raw_prefix + ".txt", "1") # Load clusters, and save if output if self.pc_settings.do_clustering: self.clusters = {} clusters_raw_regexp = re.compile( re.escape(self.pc_settings.file_root + "_") + r"\d+\.txt") cluster_raw_files = sorted( find_with_regexp(clusters_raw_regexp, os.path.join(self.pc_settings.base_dir, self._clusters_dir), walk_tree=True)) for f in cluster_raw_files: i = int(f[f.rfind("_") + 1:-len(".txt")]) if self.output: old_folder = self.output.folder self.output.folder = self.clusters_folder sample = self.save_sample(f, str(i)) if self.output: self.output.folder = old_folder self.clusters[i] = {"sample": sample} # Prepare the evidence(s) and write to file pre = "log(Z" active = "(Still active)" with open(self.raw_prefix + ".stats", "r", encoding="utf-8-sig") as statsfile: lines = [l for l in statsfile.readlines() if l.startswith(pre)] for l in lines: logZ, logZstd = [ float(n.replace(active, "")) for n in l.split("=")[-1].split("+/-") ] component = l.split("=")[0].lstrip(pre + "_").rstrip(") ") if not component: self.logZ, self.logZstd = logZ, logZstd elif self.pc_settings.do_clustering: i = int(component) self.clusters[i]["logZ"], self.clusters[i][ "logZstd"] = logZ, logZstd self.log.debug( "RAW log(Z) = %g +/- %g ; RAW Z in [%.8g, %.8g] (68%% C.L. log-gaussian)", self.logZ, self.logZstd, *[np.exp(self.logZ + n * self.logZstd) for n in [-1, 1]]) self._correct_unphysical_fraction() if self.output: out_evidences = dict(logZ=self.logZ, logZstd=self.logZstd) if getattr(self, "clusters", None): out_evidences["clusters"] = {} for i in sorted(list(self.clusters)): out_evidences["clusters"][i] = dict( logZ=self.clusters[i]["logZ"], logZstd=self.clusters[i]["logZstd"]) fname = os.path.join(self.output.folder, self.output.prefix + _evidence_extension) yaml_dump_file(fname, out_evidences, comment="log-evidence", error_if_exists=False) # TODO: try to broadcast the collections # if get_mpi(): # bcast_from_0 = lambda attrname: setattr(self, # attrname, get_mpi_comm().bcast(getattr(self, attrname, None), root=0)) # map(bcast_from_0, ["collection", "logZ", "logZstd", "clusters"]) if is_main_process(): self.log.info( "Finished! Raw PolyChord output stored in '%s', " "with prefix '%s'", self.pc_settings.base_dir, self.pc_settings.file_root) self.log.info( "log(Z) = %g +/- %g ; Z in [%.8g, %.8g] (68%% C.L. log-gaussian)", self.logZ, self.logZstd, *[np.exp(self.logZ + n * self.logZstd) for n in [-1, 1]])
def close(self, exception_type=None, exception_value=None, traceback=None): """ Loads the sample of live points from ``PolyChord``'s raw output and writes it (if ``txt`` output requested). """ if exception_type: raise if am_single_or_primary_process(): self.log.info( "Loading PolyChord's results: samples and evidences.") prefix = os.path.join(self.pc_settings.base_dir, self.pc_settings.file_root) self.collection = self.save_sample(prefix + ".txt", "1") if self.pc_settings.do_clustering is not False: # NB: "None" == "default" self.clusters = {} do_output = hasattr(self.output, "folder") for f in os.listdir( os.path.join(self.pc_settings.base_dir, clusters)): if not f.startswith(self.pc_settings.file_root): continue if do_output: cluster_folder = os.path.join( self.output.folder, self.output.prefix + ("_" if self.output.prefix else "") + clusters) if not os.path.exists(cluster_folder): os.mkdir(cluster_folder) try: i = int(f[len(self.pc_settings.file_root) + 1:-len(".txt")]) except ValueError: continue if do_output: old_folder = self.output.folder self.output.folder = cluster_folder fname = os.path.join(self.pc_settings.base_dir, clusters, f) sample = self.save_sample(fname, str(i)) self.clusters[i] = {"sample": sample} if do_output: self.output.folder = old_folder # Prepare the evidence(s) and write to file pre = "log(Z" active = "(Still active)" lines = [] with open(prefix + ".stats", "r") as statsfile: lines = [l for l in statsfile.readlines() if l.startswith(pre)] for l in lines: logZ, logZstd = [ float(n.replace(active, "")) for n in l.split("=")[-1].split("+/-") ] component = l.split("=")[0].lstrip(pre + "_").rstrip(") ") if not component: self.logZ, self.logZstd = logZ, logZstd elif self.pc_settings.do_clustering: i = int(component) self.clusters[i]["logZ"], self.clusters[i][ "logZstd"] = logZ, logZstd if do_output: out_evidences = odict([["logZ", self.logZ], ["logZstd", self.logZstd]]) if self.clusters: out_evidences["clusters"] = odict() for i in sorted(list(self.clusters.keys())): out_evidences["clusters"][i] = odict( [["logZ", self.clusters[i]["logZ"]], ["logZstd", self.clusters[i]["logZstd"]]]) fname = os.path.join(self.output.folder, self.output.prefix + ".logZ") yaml_dump_file(fname, out_evidences, comment="log-evidence", error_if_exists=False) # TODO: try to broadcast the collections # if get_mpi(): # bcast_from_0 = lambda attrname: setattr(self, # attrname, get_mpi_comm().bcast(getattr(self, attrname, None), root=0)) # map(bcast_from_0, ["collection", "logZ", "logZstd", "clusters"]) if am_single_or_primary_process(): self.log.info( "Finished! Raw PolyChord output stored in '%s', " "with prefix '%s'", self.pc_settings.base_dir, self.pc_settings.file_root)
def makeGrid(batchPath, settingName=None, settings=None, read_only=False, interactive=False, install_reqs_at=None, install_reqs_force=None): print("Generating grid...") batchPath = os.path.abspath(batchPath) + os.sep if not settings: if not settingName: raise NotImplementedError( "Re-using previous batch is work in progress...") # if not pathIsGrid(batchPath): # raise Exception('Need to give name of setting file if batchPath/config ' # 'does not exist') # read_only = True # sys.path.insert(0, batchPath + 'config') # settings = __import__(IniFile(batchPath + 'config/config.ini').params['setting_file'].replace('.py', '')) elif os.path.splitext(settingName)[-1].lower() in _yaml_extensions: settings = yaml_load_file(settingName) else: raise NotImplementedError( "Using a python script is work in progress...") # In this case, info-as-dict would be passed # settings = __import__(settingName, fromlist=['dummy']) batch = batchjob.BatchJob(batchPath) # batch.skip = settings.get("skip", False) batch.makeItems(settings, messages=not read_only) if read_only: for jobItem in [b for b in batch.jobItems]: if not jobItem.chainExists(): batch.jobItems.remove(jobItem) batch.save() print('OK, configured grid with %u existing chains' % (len(batch.jobItems))) return batch else: batch.makeDirectories(setting_file=None) batch.save() infos = {} components_used = {} # Default info defaults = copy.deepcopy(settings) grid_definition = defaults.pop("grid") models_definitions = grid_definition["models"] datasets_definitions = grid_definition["datasets"] for jobItem in batch.items(wantSubItems=False): # Model info jobItem.makeChainPath() try: model_info = copy.deepcopy(models_definitions[jobItem.param_set] or {}) except KeyError: raise ValueError("Model '%s' must be defined." % jobItem.param_set) model_info = merge_info(defaults, model_info) # Dataset info try: dataset_info = copy.deepcopy( datasets_definitions[jobItem.data_set.tag]) except KeyError: raise ValueError("Data set '%s' must be defined." % jobItem.data_set.tag) # Combined info combined_info = merge_info(defaults, model_info, dataset_info) if "preset" in combined_info: preset = combined_info.pop("preset") combined_info = merge_info(create_input(**preset), combined_info) combined_info[_output_prefix] = jobItem.chainRoot # Requisites components_used = get_used_components(components_used, combined_info) if install_reqs_at: combined_info[_packages_path] = os.path.abspath(install_reqs_at) # Save the info (we will write it after installation: # we need to install to add auto covmats if jobItem.param_set not in infos: infos[jobItem.param_set] = {} infos[jobItem.param_set][jobItem.data_set.tag] = combined_info # Installing requisites if install_reqs_at: print("Installing required code and data for the grid.") from cobaya.log import logger_setup logger_setup() install_reqs(components_used, path=install_reqs_at, force=install_reqs_force) print("Adding covmats (if necessary) and writing input files") for jobItem in batch.items(wantSubItems=False): info = infos[jobItem.param_set][jobItem.data_set.tag] # Covariance matrices # We try to find them now, instead of at run time, to check if correctly selected try: sampler = list(info[kinds.sampler])[0] except KeyError: raise ValueError("No sampler has been chosen") if sampler == "mcmc" and info[kinds.sampler][sampler].get( "covmat", "auto"): packages_path = install_reqs_at or info.get(_packages_path, None) if not packages_path: raise ValueError( "Cannot assign automatic covariance matrices because no " "external packages path has been defined.") # Need updated info for covmats: includes renames updated_info = update_info(info) # Ideally, we use slow+sampled parameters to look for the covariance matrix # but since for that we'd need to initialise a model, we approximate that set # as theory+sampled from itertools import chain like_params = set( chain(*[ list(like[_params]) for like in updated_info[kinds.likelihood].values() ])) params_info = { p: v for p, v in updated_info[_params].items() if is_sampled_param(v) and p not in like_params } best_covmat = _get_best_covmat(os.path.abspath(packages_path), params_info, updated_info[kinds.likelihood]) info[kinds.sampler][sampler]["covmat"] = os.path.join( best_covmat["folder"], best_covmat["name"]) # Write the info for this job # Allow overwrite since often will want to regenerate grid with tweaks yaml_dump_file(jobItem.iniFile(), sort_cosmetic(info), error_if_exists=False) # Non-translated old code # if not start_at_bestfit: # setMinimize(jobItem, ini) # variant = '_minimize' # ini.saveFile(jobItem.iniFile(variant)) ## NOT IMPLEMENTED: start at best fit ## ini.params['start_at_bestfit'] = start_at_bestfit # --- # for deffile in settings.defaults: # ini.defaults.append(batch.commonPath + deffile) # if hasattr(settings, 'override_defaults'): # ini.defaults = [batch.commonPath + deffile for deffile in settings.override_defaults] + ini.defaults # --- # # add ini files for importance sampling runs # for imp in jobItem.importanceJobs(): # if getattr(imp, 'importanceFilter', None): continue # if batch.hasName(imp.name.replace('_post', '')): # raise Exception('importance sampling something you already have?') # for minimize in (False, True): # if minimize and not getattr(imp, 'want_minimize', True): continue # ini = IniFile() # updateIniParams(ini, imp.importanceSettings, batch.commonPath) # if cosmomcAction == 0 and not minimize: # for deffile in settings.importanceDefaults: # ini.defaults.append(batch.commonPath + deffile) # ini.params['redo_outroot'] = imp.chainRoot # ini.params['action'] = 1 # else: # ini.params['file_root'] = imp.chainRoot # if minimize: # setMinimize(jobItem, ini) # variant = '_minimize' # else: # variant = '' # ini.defaults.append(jobItem.iniFile()) # ini.saveFile(imp.iniFile(variant)) # if cosmomcAction != 0: break if not interactive: return batch print('Done... to run do: cobaya-grid-run %s' % batchPath)
def makeGrid(batchPath, settingName=None, settings=None, read_only=False, interactive=False, install_reqs_at=None, install_reqs_force=None): batchPath = os.path.abspath(batchPath) + os.sep # # 0: chains, 1: importance sampling, 2: best-fit, 3: best-fit and Hessian # cosmomcAction = 0 if not settings: if not settingName: raise NotImplementedError( "Re-using previous batch is work in progress...") # if not pathIsGrid(batchPath): # raise Exception('Need to give name of setting file if batchPath/config ' # 'does not exist') # read_only = True # sys.path.insert(0, batchPath + 'config') # sys.modules['batchJob'] = batchjob # old name # settings = __import__(IniFile(batchPath + 'config/config.ini').params['setting_file'].replace('.py', '')) elif os.path.splitext(settingName)[-1].lower() in (".yml", ".yaml"): settings = yaml_load_file(settingName) else: # ACTUALLY, in the scripted case a DICT or a YAML FILE NAME should be passed raise NotImplementedError( "Using a python script is work in progress...") # settings = __import__(settingName, fromlist=['dummy']) from cobaya.grid_tools import batchjob batch = batchjob.batchJob(batchPath, settings.get("yaml_dir", None)) ### batch.skip = settings.get("skip", False) if "skip" in settings: raise NotImplementedError("Skipping not implemented yet.") batch.makeItems(settings, messages=not read_only) if read_only: for jobItem in [b for b in batch.jobItems]: if not jobItem.chainExists(): batch.jobItems.remove(jobItem) batch.save() print('OK, configured grid with %u existing chains' % (len(batch.jobItems))) return batch else: # WAS batch.makeDirectories(settings.__file__) # WHY THE DIR OF settings AND NOT THE GRID DIR GIVEN??? batch.makeDirectories(setting_file=None) batch.save() # NOT IMPLEMENTED YET: start at best fit!!! # start_at_bestfit = getattr(settings, 'start_at_bestfit', False) defaults = copy.deepcopy(settings) modules_used = {} grid_definition = defaults.pop("grid") models_definitions = grid_definition["models"] datasets_definitions = grid_definition["datasets"] for jobItem in batch.items(wantSubItems=False): jobItem.makeChainPath() base_info = copy.deepcopy(defaults) try: model_info = models_definitions[jobItem.param_set] or {} except KeyError: raise ValueError("Model '%s' must be defined." % jobItem.param_set) # COVMATS NOT IMPLEMENTED YET!!! # cov_dir_name = getattr(settings, 'cov_dir', 'planck_covmats') # covdir = os.path.join(batch.basePath, cov_dir_name) # covmat = os.path.join(covdir, jobItem.name + '.covmat') # if not os.path.exists(covmat): # covNameMappings = getattr(settings, 'covNameMappings', None) # mapped_name_norm = jobItem.makeNormedName(covNameMappings)[0] # covmat_normed = os.path.join(covdir, mapped_name_norm + '.covmat') # covmat = covmat_normed # if not os.path.exists(covmat) and hasattr(jobItem.data_set, # 'covmat'): covmat = batch.basePath + jobItem.data_set.covmat # if not os.path.exists(covmat) and hasattr(settings, 'covmat'): covmat = batch.basePath + settings.covmat # else: # covNameMappings = None # if os.path.exists(covmat): # ini.params['propose_matrix'] = covmat # if getattr(settings, 'newCovmats', True): ini.params['MPI_Max_R_ProposeUpdate'] = 20 # else: # hasCov = False # ini.params['MPI_Max_R_ProposeUpdate'] = 20 # covmat_try = [] # if 'covRenamer' in dir(settings): # covmat_try += settings.covRenamer(jobItem.name) # covmat_try += settings.covRenamer(mapped_name_norm) # if hasattr(settings, 'covrenames'): # for aname in [jobItem.name, mapped_name_norm]: # covmat_try += [aname.replace(old, new, 1) for old, new in settings.covrenames if old in aname] # for new1, old1 in settings.covrenames: # if old1 in aname: # name = aname.replace(old1, new1, 1) # covmat_try += [name.replace(old, new, 1) for old, new in settings.covrenames if old in name] # if 'covWithoutNameOrder' in dir(settings): # if covNameMappings: # removes = copy.deepcopy(covNameMappings) # else: # removes = dict() # for name in settings.covWithoutNameOrder: # if name in jobItem.data_set.names: # removes[name] = '' # covmat_try += [jobItem.makeNormedName(removes)[0]] # covdir2 = os.path.join(batch.basePath, getattr(settings, 'cov_dir_fallback', cov_dir_name)) # for name in covmat_try: # covmat = os.path.join(batch.basePath, covdir2, name + '.covmat') # if os.path.exists(covmat): # ini.params['propose_matrix'] = covmat # print('covmat ' + jobItem.name + ' -> ' + name) # hasCov = True # break # if not hasCov: print('WARNING: no matching specific covmat for ' + jobItem.name) ## NOT IMPLEMENTED: start at best fit ## ini.params['start_at_bestfit'] = start_at_bestfit try: dataset_info = datasets_definitions[jobItem.data_set.tag] except KeyError: raise ValueError("Data set '%s' must be defined." % jobItem.data_set.tag) combined_info = merge_info(base_info, model_info, dataset_info) combined_info[_output_prefix] = jobItem.chainRoot # ??? # for deffile in settings.defaults: # ini.defaults.append(batch.commonPath + deffile) # if hasattr(settings, 'override_defaults'): # ini.defaults = [batch.commonPath + deffile for deffile in settings.override_defaults] + ini.defaults # requisites modules_used = get_modules(modules_used, combined_info) if install_reqs_at: combined_info[_path_install] = os.path.abspath(install_reqs_at) # Write the info for this job yaml_dump_file(combined_info, jobItem.iniFile()) # if not start_at_bestfit: # setMinimize(jobItem, ini) # variant = '_minimize' # ini.saveFile(jobItem.iniFile(variant)) # # add ini files for importance sampling runs # for imp in jobItem.importanceJobs(): # if getattr(imp, 'importanceFilter', None): continue # if batch.hasName(imp.name.replace('_post', '')): # raise Exception('importance sampling something you already have?') # for minimize in (False, True): # if minimize and not getattr(imp, 'want_minimize', True): continue # ini = IniFile() # updateIniParams(ini, imp.importanceSettings, batch.commonPath) # if cosmomcAction == 0 and not minimize: # for deffile in settings.importanceDefaults: # ini.defaults.append(batch.commonPath + deffile) # ini.params['redo_outroot'] = imp.chainRoot # ini.params['action'] = 1 # else: # ini.params['file_root'] = imp.chainRoot # if minimize: # setMinimize(jobItem, ini) # variant = '_minimize' # else: # variant = '' # ini.defaults.append(jobItem.iniFile()) # ini.saveFile(imp.iniFile(variant)) # if cosmomcAction != 0: break # Installing requisites print("Installing required code and data for the grid.") if install_reqs_at: install_reqs(modules_used, path=install_reqs_at, force=install_reqs_force) if not interactive: return batch print('Done... to run do: cobaya-grid-run %s' % batchPath)