def load_yaml_file(dataset_, path_, **kwargs): data = open_yaml_file(path_) if len(data) != 1: msg.fatal( "HEP_data_utils.helpers.load_yaml_file", "{0} contains {1} entries, but I am only configured for 1... is this really a single distribution/matrix?" .format(path_, len(data))) dep_vars = data[0]["dependent_variables"] indep_vars = data[0]["independent_variables"] if len(indep_vars) < 3: load_distributions_from_yaml(dataset_, dep_vars, indep_vars, path_, n_dim_=len(indep_vars), metadata_global_=kwargs.get( "metadata_global_", {}), metadata_local_=data[0]) else: msg.error( "HEP_data_utils.helpers.load_yaml_file", "file {0} has {1} independent_variables... I don't know what to do. I'm such a failure, I knew I wasn't cut out for this :(. The problematic entries are as follows:" .format(path_, len(indep_vars))) print(indep_vars) msg.fatal("HEP_data_utils.helpers.load_yaml_file", "could not interpret number of independent_variables")
def regularise_bins(dist_2D_): if not isinstance(dist_2D_, Distribution_2D): msg.fatal("HEP_data_utils.helpers.regularise_bins", "argument must be of type Distribution_2D") n_vals = len(dist_2D_._values) n_bins_x = len(dist_2D_._bin_labels_x) n_bins_y = len(dist_2D_._bin_labels_y) if n_vals == n_bins_x == n_bins_y: bin_labels_x = [y for y in {x for x in dist_2D_._bin_labels_x}] bin_labels_x.sort() bin_labels_y = [y for y in {x for x in dist_2D_._bin_labels_y}] bin_labels_y.sort() if bin_labels_x != bin_labels_y: return new_n_bins_x = len(bin_labels_x) new_n_bins_y = len(bin_labels_y) new_values = np.array(np.zeros(shape=(new_n_bins_x, new_n_bins_y))) for x, y, v in zip(dist_2D_._bin_labels_x, dist_2D_._bin_labels_y, dist_2D_._values): new_values[bin_labels_x.index(x), bin_labels_y.index(y)] = v dist_2D_._values = new_values dist_2D_._bin_labels_x = bin_labels_x dist_2D_._bin_labels_y = bin_labels_y elif n_vals == n_bins_x * n_bins_y: new_values = np.array(np.zeros(shape=(n_bins_x, n_bins_y))) for x_idx in enumerate(dist_2D_._bin_labels_x): for y_idx in enumerate(dist_2D_._bin_labels_y): v = dist_2D_._values[x_idx + n_bins_x * y_idx] new_values[x_idx, y_idx] = v dist_2D_._values = new_values else: msg.error("HEP_data_utils.helpers.regularise_bins", "function not implemented for this type of matrix", _verbose_level=0)
def remove_local_key(self, key_): if key_ not in self._local_keys: msg.error( "HEP_data_utils.data_structures.Distribution_2D.remove_local_key", "key {0} does not exist... returning with nothing done".format( key_), _verbose_level=0) return self._local_keys.remove(key_) del self._local_key_indices[key_]
def set_local_key(self, key_, key_idx_lower_, key_idx_upper_): if key_ not in self._local_keys: self._local_keys.append(key_) if key_idx_lower_ > key_idx_upper_: msg.error( "HEP_data_utils.data_structures.Distribution_2D.set_local_key", "upper index {0} cannot be greater than lower index {1}... returning with nothing done" .format(key_idx_lower_, key_idx_upper_), _verbose_level=0) return self._local_key_indices[key_] = [key_idx_lower_, key_idx_upper_]
def copy_2D_local_keys(self, from_key_, *args): if from_key_ not in self._distributions_2D: msg.error( "HEP_data_utils.data_structures.Distribution_store.copy_2D_local_keys", "key {0} does not exist... returning with nothing done".format( from_key_), _verbose_level=0) return from_dist = self._distributions_2D[from_key_] for to_key in args: to_dist = self._distributions_2D[to_key] for local_key in to_dist._local_keys: to_dist.remove_local_key(local_key) for local_key in from_dist._local_keys: to_dist.set_local_key( local_key, from_dist._local_key_indices[local_key][0], from_dist._local_key_indices[local_key][1])
def change_local_key(self, old_key_, new_key_): if old_key_ not in self._local_keys: msg.error( "HEP_data_utils.data_structures.Distribution_2D.change_local_key", "key {0} does not exist... returning with nothing done".format( old_key_), _verbose_level=0) return if new_key_ in self._local_keys: msg.error( "HEP_data_utils.data_structures.Distribution_2D.change_local_key", "key {0} already exists... returning with nothing done".format( new_key_), _verbose_level=0) return self._local_keys.remove(old_key_) self._local_keys.append(new_key_) self._local_key_indices[new_key_] = self._local_key_indices[old_key_] del self._local_key_indices[old_key_]
def plot_all(self): for key in self._distributions_1D: try: self.plot_1D_distribution(key, label=key) except Exception as e: print(e) msg.error( "Distribution_store.plot_all", "Error when plotting 1D distribution with key {0}... skipping" .format(key)) for key in self._distributions_2D: try: self.plot_matrix(key, label=key) except Exception as e: print(e) msg.error( "Distribution_store.plot_all", "Error when plotting 2D distribution with key {0}... skipping" .format(key))
def load_keys(self, filename_): config = configparser.ConfigParser() config.optionxform = str try: config.read(filename_) except: msg.check_verbosity_and_print(str(sys.exc_info()[0]), _verbose_level=-1) msg.error( "HEP_data_utils.data_structures.Distribution_store", "an exception occured when parsing the config file... Continuing with nothing done" ) return if "KEYS" not in config.sections(): msg.error( "HEP_data_utils.data_structures.Distribution_store", "no section titled \"KEYS\" in file {0}".format(filename_)) return keys = config["KEYS"] for old_key in keys: self.rename(old_key, keys[old_key]) self.print_keys()
def set_1D_bins(distribution_, indep_vars_): if len(indep_vars_) != 1: msg.fatal( "HEP_data_utils.helpers.set_1D_bins", "distribution {0} has {1} independent_variables but I am only configured to deal with 1" .format(distribution_._description, len(indep_vars_))) distribution_._bin_values = np.zeros(shape=(1 + len(distribution_))) distribution_._bin_labels = [ "unlabeled" for i in range(0, len(distribution_)) ] for i in range(0, len(indep_vars_[0]["values"])): bin = indep_vars_[0]["values"][i] if bin.get("value", None) != None: distribution_._bin_labels[i] = bin["value"] elif bin.get("high", None) != None and bin.get("low", None) != None: bin_lo, bin_hi = bin["low"], bin["high"] if i == 0: distribution_._bin_values[0], distribution_._bin_values[ 1] = bin_lo, bin_hi continue if bin_hi == distribution_._bin_values[i]: distribution_._bin_values[i + 1] = bin_lo elif bin_lo == distribution_._bin_values[i]: distribution_._bin_values[i + 1] = bin_hi else: distribution_._has_errors = True msg.error( "HEP_data_utils.helpers.set_1D_bins", "Bin entry {0} for distribution {1} is not continuous from the previous bin which ended at {2}" .format(bin, distribution_._description, distribution_._bin_values[i]), verbose_level=-1) else: distribution_._has_errors = True msg.error("HEP_data_utils.helpers.set_1D_bins", "Could not interpret bin entry {0} for distribution {1}". format(bin, distribution_._description), verbose_level=-1)
def plot_1D_distribution(self, key_, **kwargs): if self._distributions_1D[key_]._has_errors: msg.error( "Distribution_store.plot_1D_distribution", "Key {0} had some errors when loading. Please clear them before plotting." .format(key_), -1) return x, y, [ey_lo, ey_hi], ex, labels, keys = HEPData_plt.get_1D_distribution( self, key_) x, y, [ey_lo_sys, ey_hi_sys ], ex, labels, sys_keys = HEPData_plt.get_1D_distribution( self, key_, "sys") x, y, [ey_lo_stat, ey_hi_stat ], ex, labels, stat_keys = HEPData_plt.get_1D_distribution( self, key_, "stat") fig = plt.figure(figsize=(15, 5)) ax = fig.add_subplot(111) str_tot_legend = kwargs.get( "label", "distribution") + " ( " + " + ".join(keys) + " )" str_tot_legend = "\n".join([ str_tot_legend[120 * i:min(len(str_tot_legend), 120 * (i + 1))] for i in range(int(len(str_tot_legend) / 120) + 1) ]) str_sys_legend = kwargs.get( "label", "distribution") + " ( " + " + ".join(sys_keys) + " )" str_sys_legend = "\n".join([ str_sys_legend[120 * i:min(len(str_sys_legend), 120 * (i + 1))] for i in range(int(len(str_sys_legend) / 120) + 1) ]) if sum([np.fabs(x) for x in ey_hi_sys + ey_lo_sys]) > 0: ax.errorbar(x, y, yerr=[ey_lo_sys, ey_hi_sys], c='royalblue', linewidth=18, linestyle='None', marker='None', alpha=0.4, label=str_tot_legend) if sum([np.fabs(x) for x in ey_hi_stat + ey_lo_stat]) > 0: ax.errorbar(x, y, yerr=[ey_lo_stat, ey_hi_stat], c='indianred', linewidth=6, linestyle='None', marker='None', alpha=0.6, label=kwargs.get("label", "distribution") + " ( stat )") ax.errorbar(x, y, yerr=[ey_lo, ey_hi], xerr=ex, c='k', linewidth=2, linestyle='None', marker='+', alpha=1, label=str_sys_legend) if labels: ax.set_xticks(x) ax.set_xticklabels(self._distributions_1D[key_]._bin_labels, rotation=45) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.4, box.height]) if "legend_loc" in kwargs: ax.legend(loc=kwargs.get("legend_loc", "best")) else: ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) plt.xlabel( kwargs.get("xlabel", self._distributions_1D[key_]._indep_var)) plt.ylabel(kwargs.get("ylabel", self._distributions_1D[key_]._dep_var)) plt.title(kwargs.get("title", "")) ax.axis(xlim=kwargs.get("xlim", [x[0], x[len(x) - 1]])) ax.axis(ylim=kwargs.get("ylim", [x[0], x[len(x) - 1]])) if kwargs.get("logy", False) is True: plt.yscale("log") if kwargs.get("logx", False) is True: plt.xscale("log") plt.grid() plt.show()
def load_distributions_from_yaml(dataset_, dep_vars_, indep_vars_, path_, **argv): n_dim_ = argv.get("n_dim_", 1) extra_info_global = argv.get("metadata_global_", {}) extra_info_local = argv.get("metadata_global_", {}) for var_idx in range(0, len(dep_vars_)): dep_var = dep_vars_[var_idx] distribution = Distribution() dist_key = "|" if extra_info_global.get("table_doi", None) != None: dist_key = dist_key + str(extra_info_global["table_doi"]) + "|" if extra_info_global.get("data_file", None) != None: dist_key = dist_key + str(extra_info_global["data_file"]) + "|" else: dist_key = dist_key + path_ + "|" if n_dim_ == 1: distribution = Distribution_1D() if n_dim_ == 2: distribution = Distribution_2D() distribution._description = dep_var["header"].get("name", "unknown") distribution._name = distribution._description distribution._dep_var = dep_var["header"].get("name", "unknown") distribution._indep_var = indep_vars_[0]["header"].get( "name", "unknown") distribution._units = dep_var["header"].get("units", "unknown") for key in extra_info_local: if key == "dependent_variables" or key == "independent_variables": continue distribution._meta["LOCAL::" + key] = extra_info_local[key] for key in extra_info_global: distribution._meta["GLOBAL::" + key] = extra_info_global[key] for key in dep_var: if key == "values": continue if key == "errors": continue distribution._meta["LOCAL::DEP_VARS::" + key] = dep_var[key] pt_idx = 0 for entry in dep_var["values"]: try: distribution._values = np.append(distribution._values, entry["value"]) except KeyError as exc: msg.error( "HEP_data_utils.helpers.load_distributions_from_yaml", "KeyError: {0}".format(exc), _verbose_level=-1) msg.fatal( "HEP_data_utils.helpers.load_distributions_from_yaml", "Entry with no \"value\" when trying to create distribution {0} in file {1}" .format(distribution._description, path_)) for entry in dep_var["values"]: try: errors = entry["errors"] except KeyError as exc: msg.error( "HEP_data_utils.helpers.load_distributions_from_yaml", "KeyError: {0}".format(exc), _verbose_level=1) msg.warning( "HEP_data_utils.helpers.load_distributions_from_yaml", "Entry with no \"errors\" when trying to create distribution {0} in file {1}... Assuming there are none" .format(distribution._description, path_), _verbose_level=1) errors = [] err_idx = 0 for error in errors: get_error_from_yaml_map(distribution, error, pt_idx, err_idx) err_idx = err_idx + 1 pt_idx = pt_idx + 1 for var_idx in range(0, len(indep_vars_)): indep_var = indep_vars_[var_idx] for key in indep_var: if key == "values": continue distribution._meta["LOCAL::INDEP_VARS::" + key] = indep_var[key] expected_size = len(distribution._values) if n_dim_ == 1: set_1D_bins(distribution, indep_vars_) elif n_dim_ == 2: set_2D_bins(distribution, indep_vars_) regularise_bins(distribution) else: msg.fatal( "HEP_data_utils.helpers.load_distributions_from_yaml", "number of bin dimensions is {0} but I can only handle 1 or 2". format(n_dim_)) for error in [ distribution._symm_errors, distribution._asymm_errors_up, distribution._asymm_errors_down ]: for key in error: this_err_size = len(error[key]) if this_err_size == expected_size: continue msg.fatal( "HEP_data_utils.helpers.load_distributions_from_yaml", "error source {0} has length {1} for distribution [{2}] where {3} was expected" .format(key, this_err_size, distribution._description, expected_size)) msg.info("HEP_data_utils.helpers.load_distributions_from_yaml", "yaml file loaded with the following entries", _verbose_level=0) dist_key = dist_key + str(distribution._name) + "|" if msg.VERBOSE_LEVEL >= 0: print(distribution) if n_dim_ == 1: if dataset_._distributions_1D.get(dist_key, None) != None: dist_key = dist_key + "-duplicated-auto-key;1" while dataset_._distributions_1D.get(dist_key, None) != None: dist_key = dist_key[:-1] + str(1 + int(dist_key[-1:])) dataset_._distributions_1D[dist_key] = distribution if n_dim_ == 2: if dataset_._distributions_2D.get(dist_key, None) != None: dist_key = dist_key + "-duplicated-auto-key;1" while dataset_._distributions_2D.get(dist_key, None) != None: dist_key = dist_key[:-1] + str(1 + int(dist_key[-1:])) dataset_._distributions_2D[dist_key] = distribution