def load_all_yaml_files(dataset_, dir_): for filename in [ dir_ + "/" + f for f in os.listdir(dir_) if is_yaml_file(f) ]: msg.info("HEP_data_utils.helpers.load_all_yaml_files", "opening yaml file {0}".format(filename), _verbose_level=0) load_yaml_file(dataset_, filename)
def open_yaml_file(path_): yaml_file = open(path_, 'r') data = [] try: for datum in yaml.safe_load_all(yaml_file): msg.info("HEP_data_utils.helpers.open_yaml_file", "yaml file opened with entries:", _verbose_level=1) msg.check_verbosity_and_print(yaml.safe_dump(datum), _verbose_level=1) data.append(datum) except yaml.YAMLError as exc: print(exc) msg.fatal( "HEP_data_utils.helpers.open_yaml_file", "Exception thrown when opening the yaml file (see previous messages)" ) return data
def load_submission_file(dataset_, path_, fname_=""): if len(fname_) > 0: path_ = path_ + "/" + fname_ data = open_yaml_file(path_) dataset_properties = data[0] msg.info("HEP_data_utils.helpers.load_submission_file", "submission file with the following metadata:", _verbose_level=1) msg.check_verbosity_and_print(yaml.safe_dump(dataset_properties), _verbose_level=1) dataset_._description = dataset_properties["additional_resources"][0][ "description"] dataset_._location = dataset_properties["additional_resources"][0][ "location"] dataset_._comment = dataset_properties["comment"] dataset_._hepdata_doi = dataset_properties["hepdata_doi"] for idx in range(1, len(data)): datum = data[idx] msg.info("HEP_data_utils.helpers.load_submission_file", "submission file entry with the following definitions:", _verbose_level=1) msg.check_verbosity_and_print(yaml.safe_dump(datum), _verbose_level=1) filename = hlp.get_directory(path_) + "/" + datum["data_file"] if not os.path.isfile(filename): msg.fatal( "HEP_data_utils.helpers.load_submission_file", "submission file asks for a yaml file called {0} but none exists" .format(filename)) msg.info("HEP_data_utils.helpers.load_submission_file", "opening yaml file {0}".format(filename), _verbose_level=0) load_yaml_file(dataset_, filename, metadata_global_=datum)
def get_error_from_yaml_map(distribution_, error_, pt_idx_, err_idx_=0): key = error_.get("label", "err{0}".format(err_idx_)) if "symerror" in error_: if key not in distribution_._symm_errors: msg.info("HEP_data_utils.helpers.get_error_from_yaml_map", "Creating symmetric error {0} with length {1}".format( key, len(distribution_)), _verbose_level=1) distribution_._symm_errors[key] = np.zeros( shape=(len(distribution_))) distribution_._symm_errors[key][pt_idx_] = error_["symerror"] elif "asymerror" in error_: err_asymm = error_["asymerror"] if key not in distribution_._asymm_errors_up: msg.info("HEP_data_utils.helpers.get_error_from_yaml_map", "Creating asymmetric error {0} with length {1}".format( key, len(distribution_)), _verbose_level=1) distribution_._asymm_errors_up[key] = np.zeros( shape=(len(distribution_))) distribution_._asymm_errors_down[key] = np.zeros( shape=(len(distribution_))) if "plus" not in err_asymm: msg.fatal("HEP_data_utils.helpers.get_error_from_yaml_map", "No entry named \"plus\" for error \"asymerror\"") else: distribution_._asymm_errors_up[key][pt_idx_] = err_asymm["plus"] if err_asymm.get("minus", None) == None: msg.fatal("HEP_data_utils.helpers.get_error_from_yaml_map", "No entry named \"minus\" for error \"asymerror\"") else: distribution_._asymm_errors_down[key][pt_idx_] = err_asymm["minus"] else: print(error_) msg.fatal("HEP_data_utils.helpers.get_error_from_yaml_map", "map does not have an entry called symerror or asymerror") return key
def load_dataset(dataset_, path_): path_ = hlp.remove_subleading(path_, "/") if os.path.isdir(path_): msg.info( "HEP_data_utils.helpers.load_dataset", "{0} is a directory... I am expanding the entries (but will only go one directory deep!)" .format(path_)) if os.path.isfile(path_ + "/submission.yaml"): msg.info( "HEP_data_utils.helpers.load_dataset", "submission.yaml file found in directory {0}... I will use this to steer the directory" .format(path_)) load_submission_file(dataset_, path_, "submission.yaml") else: msg.info( "HEP_data_utils.helpers.load_dataset", "no submission.yaml file found in directory {0}... I will open all available yaml files" .format(path_)) load_all_yaml_files(dataset_, path_) else: if is_yaml_file(path_) == False: msg.fatal( "HEP_data_utils.helpers.load_dataset", "{0} doesn't seem to be a yaml file or a directory... I don't know what to do with it" .format(path_)) if is_submission_file(path_): msg.info( "HEP_data_utils.helpers.load_dataset", "{0} is a submission.yaml file... I will use this to steer the directory" .format(path_)) load_submission_file(dataset_, path_) else: msg.info( "HEP_data_utils.helpers.load_dataset", "Interpreting {0} as a yaml file... I will use it as my only input" .format(path_)) load_yaml_file(dataset_, path_)
def print_keys(self): msg.info("Distribution_store.print_keys", "keys for _distributions_1D are:") for key in self._distributions_1D: print(key) msg.info("Distribution_store.print_keys", "keys for _distributions_2D are:") for key in self._distributions_2D: print(key) for key in self._distributions_2D: msg.info( "Distribution_store.print_keys", "2D distribution [key={0}] with local-keys: {1}". format(key, [ "{0}@{1}".format( little_key, self._distributions_2D[key]. _local_key_indices[little_key]) for little_key in self._distributions_2D[key]._local_keys ])) msg.info( "Distribution_store.print_keys", "N.B. you can rename these keys using obj.rename(<old-key>,<new-key>)", _verbose_level=0)
def rename(self, old_key_, new_key_): something_done = False old_key_ = old_key_.replace("\text", "\\text") for key in self._distributions_1D: if old_key_ != key: continue self._distributions_1D[new_key_] = self._distributions_1D.pop( old_key_) msg.info( "Distribution_store.rename", "Store \"{0}\" renaming 1D distribution key {1} to {2}".format( self._name, old_key_, new_key_), _verbose_level=0) something_done = True for key in self._distributions_2D: dist_key = key if old_key_ == key: self._distributions_2D[new_key_] = self._distributions_2D.pop( old_key_) msg.info( "Distribution_store.rename", "Store \"{0}\" renaming 2D distribution key {1} to {2}". format(self._name, old_key_, new_key_), _verbose_level=0) dist_key = new_key_ something_done = True indices = [ idx for idx, k2 in enumerate( self._distributions_2D[dist_key]._local_keys) if k2 == old_key_ ] if old_key_ not in indices: continue for idx in indices: self._distributions_2D[dist_key]._local_keys[idx] = new_key_ msg.info( "Distribution_store.rename", "Store \"{0}\" using 2D distribution key {1}... renaming subkey {2} to {3}" .format(self._name, dist_key, old_key_, new_key_), _verbose_level=0) something_done = True if not something_done: msg.warning( "Distribution_store.rename", "Store \"{0}\" with nothing done for old_key_={1}, new_key_={2}" .format(self._name, old_key_, new_key_), _verbose_level=0)
def print_meta(self): msg.info("Distribution.print_meta", "printing all metadata for object " + self._name) for key in self._meta: print("{0} : {1}".format(key, self._meta[key]))
def print_help () : msg.info("study_DM_paper.py:print_help","Usage is: python study_DM_paper.py <single yaml-file OR directory with yaml-files OR submission.yaml steering file>") msg.info("study_DM_paper.py:print_help","I'm currently configured specifically for the DM paper format")
import os, sys, yaml import general_utils.messaging as msg import general_utils.helpers as hlp import HEP_data_utils.helpers as HEPData_hlp from HEP_data_utils.data_structures import * def print_help () : msg.info("study_DM_paper.py:print_help","Usage is: python study_DM_paper.py <single yaml-file OR directory with yaml-files OR submission.yaml steering file>") msg.info("study_DM_paper.py:print_help","I'm currently configured specifically for the DM paper format") if __name__ == "__main__" : # Welcome, check arguments and set verbosity (to be a config in future) msg.info("study_DM_paper.py","Running program") msg.VERBOSE_LEVEL = -1 if len(sys.argv) != 2 : print_help() msg.fatal("study_DM_paper.py","{0} argument(s) provided where 2 were expected".format(len(sys.argv))) if sys.argv[1] == "--help" or sys.argv[1] == "-h" : print_help() exit() # Create dataset container and load from yaml file(s) dataset = Distribution_store("Darren DM paper") HEPData_hlp.load_dataset ( dataset , sys.argv[1] ) msg.info("study_DM_paper.py","Dataset loaded with the following entries") dataset.print_keys() # Set a table key to something intelligible. If I don't know what info is contained within a file, I will print it's metadata to find out. This will help me set a nicely informative key. dataset.print_meta("|10.17182/hepdata.78366.v2/t1|Table1.yaml|measured $R^\\text{miss}$|") dataset.rename("|10.17182/hepdata.78366.v2/t1|Table1.yaml|measured $R^\\text{miss}$|","R_pT_miss_geq1j_meas") # Do for the rest of the tables. I put them in a file to make our life easier. dataset.load_keys("DM_paper_keys.dat")
def load_distributions_from_yaml(dataset_, dep_vars_, indep_vars_, path_, **argv): n_dim_ = argv.get("n_dim_", 1) extra_info_global = argv.get("metadata_global_", {}) extra_info_local = argv.get("metadata_global_", {}) for var_idx in range(0, len(dep_vars_)): dep_var = dep_vars_[var_idx] distribution = Distribution() dist_key = "|" if extra_info_global.get("table_doi", None) != None: dist_key = dist_key + str(extra_info_global["table_doi"]) + "|" if extra_info_global.get("data_file", None) != None: dist_key = dist_key + str(extra_info_global["data_file"]) + "|" else: dist_key = dist_key + path_ + "|" if n_dim_ == 1: distribution = Distribution_1D() if n_dim_ == 2: distribution = Distribution_2D() distribution._description = dep_var["header"].get("name", "unknown") distribution._name = distribution._description distribution._dep_var = dep_var["header"].get("name", "unknown") distribution._indep_var = indep_vars_[0]["header"].get( "name", "unknown") distribution._units = dep_var["header"].get("units", "unknown") for key in extra_info_local: if key == "dependent_variables" or key == "independent_variables": continue distribution._meta["LOCAL::" + key] = extra_info_local[key] for key in extra_info_global: distribution._meta["GLOBAL::" + key] = extra_info_global[key] for key in dep_var: if key == "values": continue if key == "errors": continue distribution._meta["LOCAL::DEP_VARS::" + key] = dep_var[key] pt_idx = 0 for entry in dep_var["values"]: try: distribution._values = np.append(distribution._values, entry["value"]) except KeyError as exc: msg.error( "HEP_data_utils.helpers.load_distributions_from_yaml", "KeyError: {0}".format(exc), _verbose_level=-1) msg.fatal( "HEP_data_utils.helpers.load_distributions_from_yaml", "Entry with no \"value\" when trying to create distribution {0} in file {1}" .format(distribution._description, path_)) for entry in dep_var["values"]: try: errors = entry["errors"] except KeyError as exc: msg.error( "HEP_data_utils.helpers.load_distributions_from_yaml", "KeyError: {0}".format(exc), _verbose_level=1) msg.warning( "HEP_data_utils.helpers.load_distributions_from_yaml", "Entry with no \"errors\" when trying to create distribution {0} in file {1}... Assuming there are none" .format(distribution._description, path_), _verbose_level=1) errors = [] err_idx = 0 for error in errors: get_error_from_yaml_map(distribution, error, pt_idx, err_idx) err_idx = err_idx + 1 pt_idx = pt_idx + 1 for var_idx in range(0, len(indep_vars_)): indep_var = indep_vars_[var_idx] for key in indep_var: if key == "values": continue distribution._meta["LOCAL::INDEP_VARS::" + key] = indep_var[key] expected_size = len(distribution._values) if n_dim_ == 1: set_1D_bins(distribution, indep_vars_) elif n_dim_ == 2: set_2D_bins(distribution, indep_vars_) regularise_bins(distribution) else: msg.fatal( "HEP_data_utils.helpers.load_distributions_from_yaml", "number of bin dimensions is {0} but I can only handle 1 or 2". format(n_dim_)) for error in [ distribution._symm_errors, distribution._asymm_errors_up, distribution._asymm_errors_down ]: for key in error: this_err_size = len(error[key]) if this_err_size == expected_size: continue msg.fatal( "HEP_data_utils.helpers.load_distributions_from_yaml", "error source {0} has length {1} for distribution [{2}] where {3} was expected" .format(key, this_err_size, distribution._description, expected_size)) msg.info("HEP_data_utils.helpers.load_distributions_from_yaml", "yaml file loaded with the following entries", _verbose_level=0) dist_key = dist_key + str(distribution._name) + "|" if msg.VERBOSE_LEVEL >= 0: print(distribution) if n_dim_ == 1: if dataset_._distributions_1D.get(dist_key, None) != None: dist_key = dist_key + "-duplicated-auto-key;1" while dataset_._distributions_1D.get(dist_key, None) != None: dist_key = dist_key[:-1] + str(1 + int(dist_key[-1:])) dataset_._distributions_1D[dist_key] = distribution if n_dim_ == 2: if dataset_._distributions_2D.get(dist_key, None) != None: dist_key = dist_key + "-duplicated-auto-key;1" while dataset_._distributions_2D.get(dist_key, None) != None: dist_key = dist_key[:-1] + str(1 + int(dist_key[-1:])) dataset_._distributions_2D[dist_key] = distribution
import os, sys, yaml import general_utils.messaging as msg import general_utils.helpers as hlp import HEP_data_utils.helpers as HEPData_hlp from HEP_data_utils.data_structures import * if __name__ == "__main__": msg.info("study_yaml.py", "Running program") msg.VERBOSE_LEVEL = 0 if len(sys.argv) != 2: msg.fatal( "study_yaml.py", "{0} argument(s) provided where 2 were expected".format( len(sys.argv))) in_str = sys.argv[1] in_str = hlp.remove_subleading(in_str, "/") dataset = Dataset() if os.path.isdir(in_str): msg.info( "study_yaml.py", "{0} is a directory... I am expanding the entries (but will only go one directory deep!)" .format(in_str)) if os.path.isfile(in_str + "/submission.yaml"): msg.info( "study_yaml.py", "submission.yaml file found in directory {0}... I will use this to steer the directory" .format(in_str)) HEPData_hlp.load_submission_file(dataset, in_str, "submission.yaml") else: msg.info(