Beispiel #1
0
def test_lazy_dataframe_getattr():
    import uproot
    from coffea.processor import LazyDataFrame

    tree = uproot.open(osp.abspath('tests/samples/nano_dy.root'))['Events']
    entrystart = 0
    entrystop = 100

    df = LazyDataFrame(tree, entrystart, entrystop, preload_items=['nMuon'])

    assert (len(df) == 1)

    pt = df.Muon_pt
    assert (len(df) == 2)
    assert ('Muon_pt' in df.materialized)

    assert ('Muon_eta' in df.available)

    assert (df.size == tree.num_entries)

    with pytest.raises(AttributeError):
        x = df.notthere

    import copy
    df2 = copy.copy(df)
    pt = df2.Muon_pt
    with pytest.raises(AttributeError):
        df2.notthere
Beispiel #2
0
def test_lazy_dataframe_getattr():
    import uproot
    from coffea.processor import LazyDataFrame

    tree = uproot.open(osp.abspath("tests/samples/nano_dy.root"))["Events"]
    entrystart = 0
    entrystop = 100

    df = LazyDataFrame(tree, entrystart, entrystop, preload_items=["nMuon"])

    assert len(df) == 1

    df.Muon_pt
    assert len(df) == 2
    assert "Muon_pt" in df.materialized

    assert "Muon_eta" in df.available

    assert df.size == tree.num_entries

    with pytest.raises(AttributeError):
        df.notthere

    import copy

    df2 = copy.copy(df)
    df2.Muon_pt

    with pytest.raises(AttributeError):
        df2.notthere
Beispiel #3
0
def test_lazy_dataframe():
    import uproot
    from coffea.processor import LazyDataFrame

    tree = uproot.open(osp.abspath('tests/samples/nano_dy.root'))['Events']
    chunksize = 20
    index = 0

    df = LazyDataFrame(tree, chunksize, index, preload_items=['nMuon'])

    assert (len(df) == 1)

    pt = df['Muon_pt']
    df['Muon_pt_up'] = pt * 1.05
    assert (len(df) == 3)
    assert ('Muon_pt' in df.materialized)

    assert (b'Muon_eta' in df.available)

    assert (df.size == tree.numentries)

    try:
        x = df['notthere']
    except KeyError:
        pass
Beispiel #4
0
def test_lazy_dataframe_getattr():
    import uproot
    from coffea.processor import LazyDataFrame

    tree = uproot.open(osp.abspath('tests/samples/nano_dy.root'))['Events']
    chunksize = 20
    index = 0

    df = LazyDataFrame(tree, chunksize, index, preload_items=['nMuon'])

    assert (len(df) == 1)

    pt = df.Muon_pt
    assert (len(df) == 2)
    assert ('Muon_pt' in df.materialized)

    assert (b'Muon_eta' in df.available)

    assert (df.size == tree.numentries)

    with pytest.raises(AttributeError):
        x = df.notthere
Beispiel #5
0
def test_lazy_dataframe():
    import uproot
    from coffea.processor import LazyDataFrame

    tree = uproot.open(osp.abspath("tests/samples/nano_dy.root"))["Events"]
    entrystart = 0
    entrystop = 100

    df = LazyDataFrame(tree, entrystart, entrystop, preload_items=["nMuon"])

    assert len(df) == 1

    pt = df["Muon_pt"]
    assert len(df) == 2
    df["Muon_pt_up"] = pt * 1.05
    assert len(df) == 3
    assert "Muon_pt" in df.materialized

    assert "Muon_eta" in df.available

    assert df.size == tree.num_entries

    with pytest.raises(KeyError):
        df["notthere"]
Beispiel #6
0
def test_lazy_dataframe():
    import uproot
    from coffea.processor import LazyDataFrame

    tree = uproot.open(osp.abspath('tests/samples/nano_dy.root'))['Events']
    entrystart = 0
    entrystop = 100

    df = LazyDataFrame(tree, entrystart, entrystop, preload_items=['nMuon'])

    assert (len(df) == 1)

    pt = df['Muon_pt']
    assert (len(df) == 2)
    df['Muon_pt_up'] = pt * 1.05
    assert (len(df) == 3)
    assert ('Muon_pt' in df.materialized)

    assert ('Muon_eta' in df.available)

    assert (df.size == tree.num_entries)

    with pytest.raises(KeyError):
        x = df['notthere']
Beispiel #7
0
def _work_function_nanoaod(item, flatten=False, savemetrics=False, mmap=False, **_):
    dataset, fn, treename, chunksize, index, processor_instance = item
    if mmap:
        localsource = {}
    else:
        opts = dict(uproot.FileSource.defaults)
        opts.update({'parallel': None})

        def localsource(path):
            return uproot.FileSource(path, **opts)

    file = uproot.open(fn, localsource=localsource)

    tree = file[treename]
    df = LazyDataFrame(tree, chunksize, index, flatten=flatten)
    for name in file['Runs'].keys():
        name = name.decode('utf-8')
        if index==0:
            df[name] = file['Runs'][name].array()
        else:
            df[name] = 0 * file['Runs'][name].array()
    df['dataset'] = dataset
    tic = time.time()
    out = processor_instance.process(df)
    toc = time.time()
    metrics = dict_accumulator()
    if savemetrics:
        if isinstance(file.source, uproot.source.xrootd.XRootDSource):
            metrics['bytesread'] = value_accumulator(int, file.source.bytesread)
            metrics['dataservers'] = set_accumulator({file.source._source.get_property('DataServer')})
        metrics['columns'] = set_accumulator(df.materialized)
        metrics['entries'] = value_accumulator(int, df.size)
        metrics['processtime'] = value_accumulator(float, toc - tic)
    wrapped_out = dict_accumulator({'out': out, 'metrics': metrics})
    file.source.close()
    return wrapped_out
from awkward import JaggedArray
import numpy as np
from bucoffea.helpers import object_overlap
from bucoffea.helpers.paths import bucoffea_path
from bucoffea.helpers.gen import find_first_parent

from coffea.processor import LazyDataFrame

import uproot

fn = 'root://cmsxrootd.fnal.gov/eos/cms/store/group/phys_exotica/monojet/aalbert/nanopost/16Jul19/DYJetsToLL_M-50_HT-400to600_TuneCP5_13TeV-madgraphMLM-pythia8/DYJetsToLL_M-50_HT-400to600-MLM_2017/190717_212115/0000/tree_6.root'

file = uproot.open(fn)

tree = file['Events']
df = LazyDataFrame(tree, flatten=True)

ak4 = JaggedCandidateArray.candidatesfromcounts(
    df['nJet'],
    pt=df['Jet_pt'],
    eta=df['Jet_eta'],
    phi=df['Jet_phi'],
    mass=df['Jet_mass'],
)
muons = JaggedCandidateArray.candidatesfromcounts(
    df['nMuon'],
    pt=df['Muon_pt'],
    eta=df['Muon_eta'],
    phi=df['Muon_phi'],
    mass=0 * df['Muon_pt'],
)