Ejemplo n.º 1
0
def load_all_yaml_files(dataset_, dir_):
    for filename in [
            dir_ + "/" + f for f in os.listdir(dir_) if is_yaml_file(f)
    ]:
        msg.info("HEP_data_utils.helpers.load_all_yaml_files",
                 "opening yaml file {0}".format(filename),
                 _verbose_level=0)
        load_yaml_file(dataset_, filename)
Ejemplo n.º 2
0
def open_yaml_file(path_):
    yaml_file = open(path_, 'r')
    data = []
    try:
        for datum in yaml.safe_load_all(yaml_file):
            msg.info("HEP_data_utils.helpers.open_yaml_file",
                     "yaml file opened with entries:",
                     _verbose_level=1)
            msg.check_verbosity_and_print(yaml.safe_dump(datum),
                                          _verbose_level=1)
            data.append(datum)
    except yaml.YAMLError as exc:
        print(exc)
        msg.fatal(
            "HEP_data_utils.helpers.open_yaml_file",
            "Exception thrown when opening the yaml file (see previous messages)"
        )
    return data
Ejemplo n.º 3
0
def load_submission_file(dataset_, path_, fname_=""):
    if len(fname_) > 0:
        path_ = path_ + "/" + fname_
    data = open_yaml_file(path_)
    dataset_properties = data[0]
    msg.info("HEP_data_utils.helpers.load_submission_file",
             "submission file with the following metadata:",
             _verbose_level=1)
    msg.check_verbosity_and_print(yaml.safe_dump(dataset_properties),
                                  _verbose_level=1)
    dataset_._description = dataset_properties["additional_resources"][0][
        "description"]
    dataset_._location = dataset_properties["additional_resources"][0][
        "location"]
    dataset_._comment = dataset_properties["comment"]
    dataset_._hepdata_doi = dataset_properties["hepdata_doi"]
    for idx in range(1, len(data)):
        datum = data[idx]
        msg.info("HEP_data_utils.helpers.load_submission_file",
                 "submission file entry with the following definitions:",
                 _verbose_level=1)
        msg.check_verbosity_and_print(yaml.safe_dump(datum), _verbose_level=1)
        filename = hlp.get_directory(path_) + "/" + datum["data_file"]
        if not os.path.isfile(filename):
            msg.fatal(
                "HEP_data_utils.helpers.load_submission_file",
                "submission file asks for a yaml file called {0} but none exists"
                .format(filename))
        msg.info("HEP_data_utils.helpers.load_submission_file",
                 "opening yaml file {0}".format(filename),
                 _verbose_level=0)
        load_yaml_file(dataset_, filename, metadata_global_=datum)
Ejemplo n.º 4
0
def get_error_from_yaml_map(distribution_, error_, pt_idx_, err_idx_=0):
    key = error_.get("label", "err{0}".format(err_idx_))
    if "symerror" in error_:
        if key not in distribution_._symm_errors:
            msg.info("HEP_data_utils.helpers.get_error_from_yaml_map",
                     "Creating symmetric error {0} with length {1}".format(
                         key, len(distribution_)),
                     _verbose_level=1)
            distribution_._symm_errors[key] = np.zeros(
                shape=(len(distribution_)))
        distribution_._symm_errors[key][pt_idx_] = error_["symerror"]
    elif "asymerror" in error_:
        err_asymm = error_["asymerror"]
        if key not in distribution_._asymm_errors_up:
            msg.info("HEP_data_utils.helpers.get_error_from_yaml_map",
                     "Creating asymmetric error {0} with length {1}".format(
                         key, len(distribution_)),
                     _verbose_level=1)
            distribution_._asymm_errors_up[key] = np.zeros(
                shape=(len(distribution_)))
            distribution_._asymm_errors_down[key] = np.zeros(
                shape=(len(distribution_)))
        if "plus" not in err_asymm:
            msg.fatal("HEP_data_utils.helpers.get_error_from_yaml_map",
                      "No entry named \"plus\" for error \"asymerror\"")
        else:
            distribution_._asymm_errors_up[key][pt_idx_] = err_asymm["plus"]
        if err_asymm.get("minus", None) == None:
            msg.fatal("HEP_data_utils.helpers.get_error_from_yaml_map",
                      "No entry named \"minus\" for error \"asymerror\"")
        else:
            distribution_._asymm_errors_down[key][pt_idx_] = err_asymm["minus"]
    else:
        print(error_)
        msg.fatal("HEP_data_utils.helpers.get_error_from_yaml_map",
                  "map does not have an entry called symerror or asymerror")
    return key
Ejemplo n.º 5
0
def load_dataset(dataset_, path_):
    path_ = hlp.remove_subleading(path_, "/")
    if os.path.isdir(path_):
        msg.info(
            "HEP_data_utils.helpers.load_dataset",
            "{0} is a directory... I am expanding the entries (but will only go one directory deep!)"
            .format(path_))
        if os.path.isfile(path_ + "/submission.yaml"):
            msg.info(
                "HEP_data_utils.helpers.load_dataset",
                "submission.yaml file found in directory {0}... I will use this to steer the directory"
                .format(path_))
            load_submission_file(dataset_, path_, "submission.yaml")
        else:
            msg.info(
                "HEP_data_utils.helpers.load_dataset",
                "no submission.yaml file found in directory {0}... I will open all available yaml files"
                .format(path_))
            load_all_yaml_files(dataset_, path_)
    else:
        if is_yaml_file(path_) == False:
            msg.fatal(
                "HEP_data_utils.helpers.load_dataset",
                "{0} doesn't seem to be a yaml file or a directory... I don't know what to do with it"
                .format(path_))
        if is_submission_file(path_):
            msg.info(
                "HEP_data_utils.helpers.load_dataset",
                "{0} is a submission.yaml file... I will use this to steer the directory"
                .format(path_))
            load_submission_file(dataset_, path_)
        else:
            msg.info(
                "HEP_data_utils.helpers.load_dataset",
                "Interpreting {0} as a yaml file... I will use it as my only input"
                .format(path_))
            load_yaml_file(dataset_, path_)
Ejemplo n.º 6
0
 def print_keys(self):
     msg.info("Distribution_store.print_keys",
              "keys for _distributions_1D are:")
     for key in self._distributions_1D:
         print(key)
     msg.info("Distribution_store.print_keys",
              "keys for _distributions_2D are:")
     for key in self._distributions_2D:
         print(key)
     for key in self._distributions_2D:
         msg.info(
             "Distribution_store.print_keys",
             "2D distribution [key={0}] with local-keys: {1}".
             format(key, [
                 "{0}@{1}".format(
                     little_key, self._distributions_2D[key].
                     _local_key_indices[little_key])
                 for little_key in self._distributions_2D[key]._local_keys
             ]))
     msg.info(
         "Distribution_store.print_keys",
         "N.B. you can rename these keys using obj.rename(<old-key>,<new-key>)",
         _verbose_level=0)
Ejemplo n.º 7
0
 def rename(self, old_key_, new_key_):
     something_done = False
     old_key_ = old_key_.replace("\text", "\\text")
     for key in self._distributions_1D:
         if old_key_ != key: continue
         self._distributions_1D[new_key_] = self._distributions_1D.pop(
             old_key_)
         msg.info(
             "Distribution_store.rename",
             "Store \"{0}\" renaming 1D distribution key {1} to {2}".format(
                 self._name, old_key_, new_key_),
             _verbose_level=0)
         something_done = True
     for key in self._distributions_2D:
         dist_key = key
         if old_key_ == key:
             self._distributions_2D[new_key_] = self._distributions_2D.pop(
                 old_key_)
             msg.info(
                 "Distribution_store.rename",
                 "Store \"{0}\" renaming 2D distribution key {1} to {2}".
                 format(self._name, old_key_, new_key_),
                 _verbose_level=0)
             dist_key = new_key_
             something_done = True
         indices = [
             idx for idx, k2 in enumerate(
                 self._distributions_2D[dist_key]._local_keys)
             if k2 == old_key_
         ]
         if old_key_ not in indices: continue
         for idx in indices:
             self._distributions_2D[dist_key]._local_keys[idx] = new_key_
             msg.info(
                 "Distribution_store.rename",
                 "Store \"{0}\" using 2D distribution key {1}... renaming subkey {2} to {3}"
                 .format(self._name, dist_key, old_key_, new_key_),
                 _verbose_level=0)
             something_done = True
     if not something_done:
         msg.warning(
             "Distribution_store.rename",
             "Store \"{0}\" with nothing done for old_key_={1}, new_key_={2}"
             .format(self._name, old_key_, new_key_),
             _verbose_level=0)
Ejemplo n.º 8
0
 def print_meta(self):
     msg.info("Distribution.print_meta",
              "printing all metadata for object " + self._name)
     for key in self._meta:
         print("{0}   :   {1}".format(key, self._meta[key]))
Ejemplo n.º 9
0
def print_help () :
	msg.info("study_DM_paper.py:print_help","Usage is: python study_DM_paper.py <single yaml-file OR directory with yaml-files OR submission.yaml steering file>")
	msg.info("study_DM_paper.py:print_help","I'm currently configured specifically for the DM paper format")
Ejemplo n.º 10
0
import os, sys, yaml
import general_utils.messaging as msg
import general_utils.helpers as hlp
import HEP_data_utils.helpers as HEPData_hlp
from HEP_data_utils.data_structures import *


def print_help () :
	msg.info("study_DM_paper.py:print_help","Usage is: python study_DM_paper.py <single yaml-file OR directory with yaml-files OR submission.yaml steering file>")
	msg.info("study_DM_paper.py:print_help","I'm currently configured specifically for the DM paper format")

if __name__ == "__main__" :
	#  Welcome, check arguments and set verbosity (to be a config in future)
	msg.info("study_DM_paper.py","Running program")
	msg.VERBOSE_LEVEL = -1
	if len(sys.argv) != 2 :
		print_help()
		msg.fatal("study_DM_paper.py","{0} argument(s) provided where 2 were expected".format(len(sys.argv)))
	if sys.argv[1] == "--help" or sys.argv[1] == "-h" :
		print_help()
		exit()
	#  Create dataset container and load from yaml file(s)
	dataset = Distribution_store("Darren DM paper")
	HEPData_hlp.load_dataset ( dataset , sys.argv[1] )
	msg.info("study_DM_paper.py","Dataset loaded with the following entries")
	dataset.print_keys()
	#  Set a table key to something intelligible. If I don't know what info is contained within a file, I will print it's metadata to find out. This will help me set a nicely informative key.
	dataset.print_meta("|10.17182/hepdata.78366.v2/t1|Table1.yaml|measured $R^\\text{miss}$|")
	dataset.rename("|10.17182/hepdata.78366.v2/t1|Table1.yaml|measured $R^\\text{miss}$|","R_pT_miss_geq1j_meas")
	#  Do for the rest of the tables. I put them in a file to make our life easier.
	dataset.load_keys("DM_paper_keys.dat")
Ejemplo n.º 11
0
def load_distributions_from_yaml(dataset_, dep_vars_, indep_vars_, path_,
                                 **argv):
    n_dim_ = argv.get("n_dim_", 1)
    extra_info_global = argv.get("metadata_global_", {})
    extra_info_local = argv.get("metadata_global_", {})
    for var_idx in range(0, len(dep_vars_)):
        dep_var = dep_vars_[var_idx]
        distribution = Distribution()
        dist_key = "|"
        if extra_info_global.get("table_doi", None) != None:
            dist_key = dist_key + str(extra_info_global["table_doi"]) + "|"
        if extra_info_global.get("data_file", None) != None:
            dist_key = dist_key + str(extra_info_global["data_file"]) + "|"
        else:
            dist_key = dist_key + path_ + "|"
        if n_dim_ == 1: distribution = Distribution_1D()
        if n_dim_ == 2: distribution = Distribution_2D()
        distribution._description = dep_var["header"].get("name", "unknown")
        distribution._name = distribution._description
        distribution._dep_var = dep_var["header"].get("name", "unknown")
        distribution._indep_var = indep_vars_[0]["header"].get(
            "name", "unknown")
        distribution._units = dep_var["header"].get("units", "unknown")
        for key in extra_info_local:
            if key == "dependent_variables" or key == "independent_variables":
                continue
            distribution._meta["LOCAL::" + key] = extra_info_local[key]
        for key in extra_info_global:
            distribution._meta["GLOBAL::" + key] = extra_info_global[key]
        for key in dep_var:
            if key == "values": continue
            if key == "errors": continue
            distribution._meta["LOCAL::DEP_VARS::" + key] = dep_var[key]
        pt_idx = 0
        for entry in dep_var["values"]:
            try:
                distribution._values = np.append(distribution._values,
                                                 entry["value"])
            except KeyError as exc:
                msg.error(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "KeyError: {0}".format(exc),
                    _verbose_level=-1)
                msg.fatal(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "Entry with no \"value\" when trying to create distribution {0} in file {1}"
                    .format(distribution._description, path_))
        for entry in dep_var["values"]:
            try:
                errors = entry["errors"]
            except KeyError as exc:
                msg.error(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "KeyError: {0}".format(exc),
                    _verbose_level=1)
                msg.warning(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "Entry with no \"errors\" when trying to create distribution {0} in file {1}... Assuming there are none"
                    .format(distribution._description, path_),
                    _verbose_level=1)
                errors = []
            err_idx = 0
            for error in errors:
                get_error_from_yaml_map(distribution, error, pt_idx, err_idx)
                err_idx = err_idx + 1
            pt_idx = pt_idx + 1
        for var_idx in range(0, len(indep_vars_)):
            indep_var = indep_vars_[var_idx]
            for key in indep_var:
                if key == "values": continue
                distribution._meta["LOCAL::INDEP_VARS::" +
                                   key] = indep_var[key]
        expected_size = len(distribution._values)
        if n_dim_ == 1: set_1D_bins(distribution, indep_vars_)
        elif n_dim_ == 2:
            set_2D_bins(distribution, indep_vars_)
            regularise_bins(distribution)
        else:
            msg.fatal(
                "HEP_data_utils.helpers.load_distributions_from_yaml",
                "number of bin dimensions is {0} but I can only handle 1 or 2".
                format(n_dim_))
        for error in [
                distribution._symm_errors, distribution._asymm_errors_up,
                distribution._asymm_errors_down
        ]:
            for key in error:
                this_err_size = len(error[key])
                if this_err_size == expected_size: continue
                msg.fatal(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "error source {0} has length {1} for distribution [{2}] where {3} was expected"
                    .format(key, this_err_size, distribution._description,
                            expected_size))
        msg.info("HEP_data_utils.helpers.load_distributions_from_yaml",
                 "yaml file loaded with the following entries",
                 _verbose_level=0)
        dist_key = dist_key + str(distribution._name) + "|"
        if msg.VERBOSE_LEVEL >= 0: print(distribution)
        if n_dim_ == 1:
            if dataset_._distributions_1D.get(dist_key, None) != None:
                dist_key = dist_key + "-duplicated-auto-key;1"
                while dataset_._distributions_1D.get(dist_key, None) != None:
                    dist_key = dist_key[:-1] + str(1 + int(dist_key[-1:]))
            dataset_._distributions_1D[dist_key] = distribution
        if n_dim_ == 2:
            if dataset_._distributions_2D.get(dist_key, None) != None:
                dist_key = dist_key + "-duplicated-auto-key;1"
                while dataset_._distributions_2D.get(dist_key, None) != None:
                    dist_key = dist_key[:-1] + str(1 + int(dist_key[-1:]))
            dataset_._distributions_2D[dist_key] = distribution
import os, sys, yaml
import general_utils.messaging as msg
import general_utils.helpers as hlp
import HEP_data_utils.helpers as HEPData_hlp
from HEP_data_utils.data_structures import *

if __name__ == "__main__":
    msg.info("study_yaml.py", "Running program")
    msg.VERBOSE_LEVEL = 0
    if len(sys.argv) != 2:
        msg.fatal(
            "study_yaml.py",
            "{0} argument(s) provided where 2 were expected".format(
                len(sys.argv)))
    in_str = sys.argv[1]
    in_str = hlp.remove_subleading(in_str, "/")
    dataset = Dataset()
    if os.path.isdir(in_str):
        msg.info(
            "study_yaml.py",
            "{0} is a directory... I am expanding the entries (but will only go one directory deep!)"
            .format(in_str))
        if os.path.isfile(in_str + "/submission.yaml"):
            msg.info(
                "study_yaml.py",
                "submission.yaml file found in directory {0}... I will use this to steer the directory"
                .format(in_str))
            HEPData_hlp.load_submission_file(dataset, in_str,
                                             "submission.yaml")
        else:
            msg.info(