def test_readme(): momentum = from_root('TMath::Sqrt(X_PX**2 + X_PY**2 + X_PZ**2)') assert momentum.to_numexpr( ) == 'sqrt(((X_PX ** 2) + (X_PY ** 2) + (X_PZ ** 2)))' assert momentum.to_root( ) == 'TMath::Sqrt(((X_PX ** 2) + (X_PY ** 2) + (X_PZ ** 2)))' my_selection = from_numexpr('X_PT > 5 & (Mu_NHits > 3 | Mu_PT > 10)') assert my_selection.to_root( ) == '(X_PT > 5) && ((Mu_NHits > 3) || (Mu_PT > 10))' assert my_selection.to_numexpr( ) == '(X_PT > 5) & ((Mu_NHits > 3) | (Mu_PT > 10))' my_sum = from_auto('True + False') assert my_sum.to_root() == 'true + false' assert my_sum.to_numexpr() == 'True + False' my_check = from_auto('(X_THETA*TMath::DegToRad() > pi/4) && D_PE > 9.2') assert my_check.variables == {'D_PE', 'X_THETA'} assert my_check.named_constants == {'DEG2RAD', 'PI'} assert my_check.unnamed_constants == {'4', '9.2'} new_selection = (momentum > 100) and (my_check or (np.sqrt(my_sum) < 1)) def numexpr_eval(string): return numexpr.evaluate(string, local_dict=dict(X_THETA=1234, D_PE=678)) assert pytest.approx( numexpr_eval(new_selection.to_numexpr()), numexpr_eval( '((X_THETA * 0.017453292519943295) > (3.141592653589793 / 4)) & (D_PE > 9.2)' ))
def extract_branch_names(string): try: string = string.decode() except (UnicodeDecodeError, AttributeError): pass return formulate.from_auto(string).variables
def selection_branches(selection: str) -> set[str]: """Construct the minimal set of branches required for a selection. Parameters ----------- selection : str Selection string in ROOT or numexpr Returns ------- set(str) Necessary branches/variables Examples -------- >>> from tdub.data import minimal_selection_branches >>> selection = "(reg1j1b == True) & (OS == True) & (mass_lep1lep2 > 100)" >>> minimal_branches(selection) {'OS', 'mass_lep1lep2', 'reg1j1b'} >>> selection = "reg2j1b == true && OS == true && (mass_lep1jet1 < 155)" >>> minimal_branches(selection) {'OS', 'mass_lep1jet1', 'reg2j1b'} """ return formulate.from_auto(selection).variables
def selection_as_root(selection: str) -> str: """Get the ROOT selection string from an arbitrary selection. Parameters ----------- selection : str The selection string in ROOT or numexpr Returns ------- str The same selection in ROOT format. Examples -------- >>> selection = "(reg1j1b == True) & (OS == True) & (mass_lep1jet1 < 155)" >>> from tdub.data import selection_as_root >>> selection_as_root(selection) '(reg1j1b == true) && (OS == true) && (mass_lep1jet1 < 155)' """ return formulate.from_auto(selection).to_root()
def selection_as_numexpr(selection: str) -> str: """Get the numexpr selection string from an arbitrary selection. Parameters ----------- selection : str Selection string in ROOT or numexpr Returns ------- str Selection in numexpr format. Examples -------- >>> selection = "reg1j1b == true && OS == true && mass_lep1jet1 < 155" >>> from tdub.data import selection_as_numexpr >>> selection_as_numexpr(selection) '(reg1j1b == True) & (OS == True) & (mass_lep1jet1 < 155)' """ return formulate.from_auto(selection).to_numexpr()
def do_cut( tree_name, files, supercuts, proposedBranches, output_directory, eventWeightBranch, pids, ): position = -1 if pids is not None: # handle pid registration if os.getpid() not in pids: pids[np.argmax(pids == 0)] = os.getpid() # this gives us the position of this particular process in our list of processes position = np.where(pids == os.getpid())[0][0] start = clock() try: branches = [] aliases = {} missingBranches = False for fname in files: with uproot.open(fname) as f: tree = f[tree_name] for branch in proposedBranches: if branch in tree: branches.append(branch) else: if branch in tree.aliases: aliases[branch.decode()] = formulate.from_auto( tree.aliases[branch].decode()) branches.extend( extract_branch_names(tree.aliases[branch])) else: logger.error( 'branch {} not found in {} for {}'.format( branch, tree_name, fname)) missingBranches |= True if missingBranches: sys.exit(1) for alias, alias_expr in aliases.items(): alias_expr = expand_definition(alias_expr, aliases) branches.extend(extract_branch_names(alias_expr.to_numexpr())) aliases[alias] = alias_expr branches = set(branches) eventWeightBranch = expand_selection(eventWeightBranch, aliases) supercuts = expand_supercuts(supercuts, aliases) # iterate over the cuts available cuts = defaultdict(lambda: {'raw': 0, 'weighted': 0}) events_tqdm = tqdm( total=uproot.numentries(files, tree_name), disable=(position == -1), position=2 * position + 1, leave=False, mininterval=5, maxinterval=10, unit="events", dynamic_ncols=True, ) for file, start, stop, events in uproot.iterate( files, tree_name, branches=branches, namedecode='utf-8', reportfile=True, reportentries=True, ): events_tqdm.set_description("({1:d}) Working on {0:s}".format( tree_name.decode('utf-8'), 2 * position + 1)) for cut in tqdm( get_cut(copy.deepcopy(supercuts)), desc="({1:d}) Applying cuts to {0:s}".format( file.name.decode('utf-8'), 2 * position + 2), total=get_n_cuts(supercuts), disable=(position == -1), position=2 * position + 2, leave=False, unit="cuts", miniters=10, dynamic_ncols=True, ): cut_hash = get_cut_hash(cut) rawEvents, weightedEvents = apply_cuts(events, cut, eventWeightBranch) cuts[cut_hash]['raw'] += rawEvents cuts[cut_hash]['weighted'] += weightedEvents events_tqdm.update(stop - start) with open( "{0:s}/{1:s}.json".format(output_directory, tree_name.decode('utf-8')), "w+") as f: f.write(json.dumps(cuts, sort_keys=True, indent=4)) result = True except: logger.exception("Caught an error - skipping {0:s}".format( tree_name.decode('utf-8'))) result = False end = clock() return (result, end - start)
def expand_selection(selection, aliases): return expand_definition(formulate.from_auto(selection), aliases).to_numexpr()