def test_readme():
    momentum = from_root('TMath::Sqrt(X_PX**2 + X_PY**2 + X_PZ**2)')
    assert momentum.to_numexpr(
    ) == 'sqrt(((X_PX ** 2) + (X_PY ** 2) + (X_PZ ** 2)))'
    assert momentum.to_root(
    ) == 'TMath::Sqrt(((X_PX ** 2) + (X_PY ** 2) + (X_PZ ** 2)))'
    my_selection = from_numexpr('X_PT > 5 & (Mu_NHits > 3 | Mu_PT > 10)')
    assert my_selection.to_root(
    ) == '(X_PT > 5) && ((Mu_NHits > 3) || (Mu_PT > 10))'
    assert my_selection.to_numexpr(
    ) == '(X_PT > 5) & ((Mu_NHits > 3) | (Mu_PT > 10))'
    my_sum = from_auto('True + False')
    assert my_sum.to_root() == 'true + false'
    assert my_sum.to_numexpr() == 'True + False'
    my_check = from_auto('(X_THETA*TMath::DegToRad() > pi/4) && D_PE > 9.2')
    assert my_check.variables == {'D_PE', 'X_THETA'}
    assert my_check.named_constants == {'DEG2RAD', 'PI'}
    assert my_check.unnamed_constants == {'4', '9.2'}
    new_selection = (momentum > 100) and (my_check or (np.sqrt(my_sum) < 1))

    def numexpr_eval(string):
        return numexpr.evaluate(string,
                                local_dict=dict(X_THETA=1234, D_PE=678))

    assert pytest.approx(
        numexpr_eval(new_selection.to_numexpr()),
        numexpr_eval(
            '((X_THETA * 0.017453292519943295) > (3.141592653589793 / 4)) & (D_PE > 9.2)'
        ))
Exemple #2
0
def extract_branch_names(string):
    try:
        string = string.decode()
    except (UnicodeDecodeError, AttributeError):
        pass

    return formulate.from_auto(string).variables
Exemple #3
0
def selection_branches(selection: str) -> set[str]:
    """Construct the minimal set of branches required for a selection.

    Parameters
    -----------
    selection : str
        Selection string in ROOT or numexpr

    Returns
    -------
    set(str)
        Necessary branches/variables

    Examples
    --------
    >>> from tdub.data import minimal_selection_branches
    >>> selection = "(reg1j1b == True) & (OS == True) & (mass_lep1lep2 > 100)"
    >>> minimal_branches(selection)
    {'OS', 'mass_lep1lep2', 'reg1j1b'}
    >>> selection = "reg2j1b == true && OS == true && (mass_lep1jet1 < 155)"
    >>> minimal_branches(selection)
    {'OS', 'mass_lep1jet1', 'reg2j1b'}

    """
    return formulate.from_auto(selection).variables
Exemple #4
0
def selection_as_root(selection: str) -> str:
    """Get the ROOT selection string from an arbitrary selection.

    Parameters
    -----------
    selection : str
        The selection string in ROOT or numexpr

    Returns
    -------
    str
        The same selection in ROOT format.

    Examples
    --------
    >>> selection = "(reg1j1b == True) & (OS == True) & (mass_lep1jet1 < 155)"
    >>> from tdub.data import selection_as_root
    >>> selection_as_root(selection)
    '(reg1j1b == true) && (OS == true) && (mass_lep1jet1 < 155)'

    """
    return formulate.from_auto(selection).to_root()
Exemple #5
0
def selection_as_numexpr(selection: str) -> str:
    """Get the numexpr selection string from an arbitrary selection.

    Parameters
    -----------
    selection : str
        Selection string in ROOT or numexpr

    Returns
    -------
    str
        Selection in numexpr format.

    Examples
    --------
    >>> selection = "reg1j1b == true && OS == true && mass_lep1jet1 < 155"
    >>> from tdub.data import selection_as_numexpr
    >>> selection_as_numexpr(selection)
    '(reg1j1b == True) & (OS == True) & (mass_lep1jet1 < 155)'

    """
    return formulate.from_auto(selection).to_numexpr()
Exemple #6
0
def do_cut(
    tree_name,
    files,
    supercuts,
    proposedBranches,
    output_directory,
    eventWeightBranch,
    pids,
):

    position = -1
    if pids is not None:
        # handle pid registration
        if os.getpid() not in pids:
            pids[np.argmax(pids == 0)] = os.getpid()
        # this gives us the position of this particular process in our list of processes
        position = np.where(pids == os.getpid())[0][0]

    start = clock()
    try:
        branches = []
        aliases = {}
        missingBranches = False
        for fname in files:
            with uproot.open(fname) as f:
                tree = f[tree_name]
                for branch in proposedBranches:
                    if branch in tree:
                        branches.append(branch)
                    else:
                        if branch in tree.aliases:
                            aliases[branch.decode()] = formulate.from_auto(
                                tree.aliases[branch].decode())
                            branches.extend(
                                extract_branch_names(tree.aliases[branch]))
                        else:
                            logger.error(
                                'branch {} not found in {} for {}'.format(
                                    branch, tree_name, fname))
                            missingBranches |= True
        if missingBranches:
            sys.exit(1)

        for alias, alias_expr in aliases.items():
            alias_expr = expand_definition(alias_expr, aliases)
            branches.extend(extract_branch_names(alias_expr.to_numexpr()))
            aliases[alias] = alias_expr

        branches = set(branches)
        eventWeightBranch = expand_selection(eventWeightBranch, aliases)
        supercuts = expand_supercuts(supercuts, aliases)

        # iterate over the cuts available
        cuts = defaultdict(lambda: {'raw': 0, 'weighted': 0})

        events_tqdm = tqdm(
            total=uproot.numentries(files, tree_name),
            disable=(position == -1),
            position=2 * position + 1,
            leave=False,
            mininterval=5,
            maxinterval=10,
            unit="events",
            dynamic_ncols=True,
        )
        for file, start, stop, events in uproot.iterate(
                files,
                tree_name,
                branches=branches,
                namedecode='utf-8',
                reportfile=True,
                reportentries=True,
        ):
            events_tqdm.set_description("({1:d}) Working on {0:s}".format(
                tree_name.decode('utf-8'), 2 * position + 1))
            for cut in tqdm(
                    get_cut(copy.deepcopy(supercuts)),
                    desc="({1:d}) Applying cuts to {0:s}".format(
                        file.name.decode('utf-8'), 2 * position + 2),
                    total=get_n_cuts(supercuts),
                    disable=(position == -1),
                    position=2 * position + 2,
                    leave=False,
                    unit="cuts",
                    miniters=10,
                    dynamic_ncols=True,
            ):
                cut_hash = get_cut_hash(cut)
                rawEvents, weightedEvents = apply_cuts(events, cut,
                                                       eventWeightBranch)
                cuts[cut_hash]['raw'] += rawEvents
                cuts[cut_hash]['weighted'] += weightedEvents

            events_tqdm.update(stop - start)

        with open(
                "{0:s}/{1:s}.json".format(output_directory,
                                          tree_name.decode('utf-8')),
                "w+") as f:
            f.write(json.dumps(cuts, sort_keys=True, indent=4))
            result = True
    except:
        logger.exception("Caught an error - skipping {0:s}".format(
            tree_name.decode('utf-8')))
        result = False
    end = clock()
    return (result, end - start)
Exemple #7
0
def expand_selection(selection, aliases):
    return expand_definition(formulate.from_auto(selection),
                             aliases).to_numexpr()