def get_series(eventWise, fixed_params, vary_param, tag_index, name_start=""): all_cols, var_cols, score_cols, table = CompareClusters.tabulate_scores( eventWise) name_mask = [ TypeTools.restring(name).startswith(name_start) for name in table[:, all_cols.index("jet_name")] ] table = table[name_mask] mask = CompareClusters.filter_matching(all_cols, table, approx=fixed_params) table = table[mask] vary_values = table[:, all_cols.index(vary_param)] if not len(set(vary_values)) == len(vary_values): import jet_tools jet_tools.st() values_dict = {v: i for i, v in enumerate(vary_values)} sorted_values = CompareClusters.generic_sort(vary_values) order = [values_dict[v] for v in sorted_values] jet_names = table[order, all_cols.index("jet_name")] jet_indices = get_jet_index(eventWise, jet_names, tag_index) labels = [] for value in sorted_values: if isinstance(value, float): labels.append(f"{vary_param}={value:.1}") else: labels.append(f"{vary_param}={value}") return jet_names, jet_indices, labels
def test_tabulate_scores(): params1 = {} params2 = {} params1["DogJet_QualityWidth"] = 4 params1["DogJet_DeltaR"] = .5 params2["CatJet_QualityWidth"] = np.inf with TempTestDir("tst") as dir_name: # this will raise a value error if given an empty eventWise file_name1 = "test1.parquet" file_name2 = "test2.parquet" ew1 = Components.EventWise(os.path.join(dir_name, file_name1)) ew1.append_hyperparameters(**params1) ew1.append(DogJet_Label=ak.from_iter([[]])) path1 = os.path.join(dir_name, file_name1) ew2 = Components.EventWise(os.path.join(dir_name, file_name2)) ew2.append_hyperparameters(**params2) ew2.append(CatJet_Label=ak.from_iter([[]])) path2 = os.path.join(dir_name, file_name2) all_cols, variable_cols, score_cols, table = CompareClusters.tabulate_scores( [path1, path2]) assert len(all_cols) == len(table[0]) assert len(all_cols) == len(variable_cols) + len(score_cols) + 3 assert len(table) == 2 dog_row = next( i for i, name in enumerate(table[:, all_cols.index("jet_name")]) if name == "DogJet") cat_row = next( i for i, name in enumerate(table[:, all_cols.index("jet_name")]) if name == "CatJet") use_cols = [ all_cols.index("QualityWidth"), all_cols.index("DeltaR"), all_cols.index("AveBGMassRatio") ] assert np.all( TypeTools.soft_equality(table[dog_row, use_cols], [4, .5, "Undefined"])) assert np.all( TypeTools.soft_equality(table[cat_row, use_cols], [np.inf, "Undefined", "Undefined"]))