예제 #1
0
 def events(self, runtime_cache=None):
     mapping = CachedMapping(self._cc.storage,
                             UprootSourceMapping(self._cc))
     partition_tuple = (
         self._data["uuid"],
         self._data["tree_name"],
         "{0}-{1}".format(self._data["start"], self._data["stop"]),
     )
     factory = NanoEventsFactory(self._schema,
                                 mapping,
                                 tuple_to_key(partition_tuple),
                                 cache=runtime_cache)
     return factory.events()
예제 #2
0
    def load(cls, path, *args, **kwargs):
        path = get_path(path)

        if path.endswith(".root"):
            from coffea.nanoevents import NanoEventsFactory
            return NanoEventsFactory.from_root(path, *args, **kwargs)

        if path.endswith(".parquet"):
            from coffea.nanoevents import NanoEventsFactory
            return NanoEventsFactory.from_parquet(path, *args, **kwargs)

        # .coffea
        from coffea.util import load
        return load(path, *args, **kwargs)
예제 #3
0
def test_preloaded_nanoevents():
    columns = [
        "nMuon",
        "Muon_pt",
        "Muon_eta",
        "Muon_phi",
        "Muon_mass",
        "Muon_charge",
        "nJet",
        "Jet_eta",
    ]
    p = NanoEventsProcessor(columns=columns)

    rootdir = uproot.open(os.path.abspath("tests/samples/nano_dy.root"))
    tree = rootdir["Events"]
    arrays = tree.arrays(columns, how=dict)
    src = SimplePreloadedColumnSource(
        arrays, rootdir.file.uuid, tree.num_entries, object_path="/Events"
    )
    print(arrays)

    events = NanoEventsFactory.from_preloaded(
        src, metadata={"dataset": "ZJets"}
    ).events()
    hists = p.process(events)

    print(hists)
    assert hists["cutflow"]["ZJets_pt"] == 18
    assert hists["cutflow"]["ZJets_mass"] == 6

    with pytest.raises(AttributeError):
        print(events.Muon.matched_jet)
예제 #4
0
def test_read_nanomc():
    factory = NanoEventsFactory.from_file(
        os.path.abspath('tests/samples/nano_dy.root'))
    events = factory.events()

    # test after views first
    genroundtrips(events.GenPart.mask[events.GenPart.eta > 0])
    genroundtrips(events.mask[ak.any(events.Electron.pt > 50, axis=1)].GenPart)
    genroundtrips(events.GenPart)

    genroundtrips(events.GenPart[events.GenPart.eta > 0])
    genroundtrips(events[ak.any(events.Electron.pt > 50, axis=1)].GenPart)

    # sane gen matching (note for electrons gen match may be photon(22))
    assert ak.all((abs(events.Electron.matched_gen.pdgId) == 11)
                  | (events.Electron.matched_gen.pdgId == 22))
    assert ak.all(abs(events.Muon.matched_gen.pdgId) == 13)

    genroundtrips(events.Electron.matched_gen)

    crossref(events[ak.num(events.Jet) > 2])
    crossref(events)

    assert ak.any(events.Photon.isTight, axis=1).tolist()[:9] == [
        False, True, True, True, False, False, False, False, False
    ]
예제 #5
0
def test_preloaded_nanoevents():
    columns = [
        'nMuon', 'Muon_pt', 'Muon_eta', 'Muon_phi', 'Muon_mass', 'Muon_charge',
        'nJet', 'Jet_eta'
    ]
    p = NanoEventsProcessor(columns=columns)

    rootdir = uproot.open(os.path.abspath('tests/samples/nano_dy.root'))
    tree = rootdir['Events']
    arrays = tree.arrays(columns, how=dict)
    src = SimplePreloadedColumnSource(arrays,
                                      rootdir.file.uuid,
                                      tree.num_entries,
                                      object_path='/Events')
    print(arrays)

    events = NanoEventsFactory.from_preloaded(src,
                                              metadata={
                                                  'dataset': 'ZJets'
                                              }).events()
    hists = p.process(events)

    print(hists)
    assert (hists['cutflow']['ZJets_pt'] == 18)
    assert (hists['cutflow']['ZJets_mass'] == 6)

    with pytest.raises(AttributeError):
        print(events.Muon.matched_jet)
예제 #6
0
 def run():
     events = NanoEventsFactory.from_root(
         os.path.abspath("tests/samples/nano_dy.root"),
         persistent_cache=array_log,
     ).events()
     jets = events.Jet
     met = events.MET
     jets["pt_raw"] = (1 - jets["rawFactor"]) * jets["pt"]
     jets["mass_raw"] = (1 - jets["rawFactor"]) * jets["mass"]
     jets["pt_gen"] = ak.values_astype(
         ak.fill_none(jets.matched_gen.pt, 0.0), np.float32)
     jets["rho"] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll,
                                       jets.pt)[0]
     jec_cache = cachetools.Cache(np.inf)
     weakref.finalize(jec_cache, jec_finalized.set)
     corrected_jets = jet_factory.build(jets, lazy_cache=jec_cache)
     corrected_met = met_factory.build(met,
                                       corrected_jets,
                                       lazy_cache=jec_cache)
     print(corrected_met.pt_orig)
     print(corrected_met.pt)
     for unc in jet_factory.uncertainties() + met_factory.uncertainties():
         print(unc, corrected_met[unc].up.pt)
         print(unc, corrected_met[unc].down.pt)
     for unc in jet_factory.uncertainties():
         print(unc, corrected_jets[unc].up.pt)
     print("Finalized:", array_log.finalized)
예제 #7
0
def test_read_nanodata():
    factory = NanoEventsFactory.from_file(
        os.path.abspath('tests/samples/nano_dimuon.root'))
    events = factory.events()

    crossref(events)
    crossref(events[ak.num(events.Jet) > 2])
예제 #8
0
파일: validate.py 프로젝트: fleble/PFNano
 def nano_evts(fname):
     factory = NanoEventsFactory.from_file(
         fname,
         entry_start=0,
         entry_stop=10000,
         metadata={"dataset": ""},
     )
     return factory.events()
예제 #9
0
def get_sum_wgts(file):
    try:
        events = NanoEventsFactory.from_root(
            file, "Delphes", schemaclass=DelphesSchema).events()
        # result = (file, ak.sum(events.Event.Weight))
        result = (file, ak.count(events.Event.Number))
    except Exception:
        result = (file, 0)
    return result
예제 #10
0
파일: executor.py 프로젝트: yimuchen/coffea
def run_coffea_processor(
    events_url: str, tree_name: Optional[str], proc, data_type, meta_data
):
    """
    Process a single file from a tree via a coffea processor on the remote node
    :param events_url:
        a URL to a ROOT file that uproot4 can open
    :param tree_name:
        The tree in the ROOT file to use for our data. Can be null if the data isn't a root
        tree!
    :param accumulator:
        Accumulator to store the results
    :param proc:
        Analysis function to execute. Must have signature
    :param data_type:
        What datatype is the data (root, parquet?)
    :return:
        Populated accumulator
    """
    # Since we execute remotely, explicitly include everything we need.
    from coffea.nanoevents import NanoEventsFactory
    from coffea.nanoevents.schemas.schema import auto_schema

    if data_type == "root":
        # Use NanoEvents to build a 4-vector
        assert tree_name is not None
        events = NanoEventsFactory.from_root(
            file=str(events_url),
            treepath=f"/{tree_name}",
            schemaclass=auto_schema,
            metadata=dict(meta_data, filename=str(events_url)),
        ).events()
    elif data_type == "parquet":
        events = NanoEventsFactory.from_parquet(
            file=str(events_url),
            treepath="/",
            schemaclass=auto_schema,
            metadata=dict(meta_data, filename=str(events_url)),
        ).events()
    else:
        raise Exception(f"Unknown stream data type of {data_type} - cannot process.")

    return proc(events)
def run_coffea_processor(events_url: str,
                         tree_name: str,
                         accumulator,
                         proc,
                         explicit_func_pickle=False):
    """
    Process a single file from a tree via a coffea processor on the remote node
    :param events_url:
        a URL to a ROOT file that uproot4 can open
    :param tree_name:
        The tree in the ROOT file to use for our data
    :param accumulator:
        Accumulator to store the results
    :param proc:
        Analysis fuction to execute. Must have signature
    :param explicit_func_pickle: bool
        Do we need to use dill to explicitly pickle the process function, or can we
        rely on the remote execution framework to handle it correctly?
    :return:
        Populated accumulator
    """
    # Since we execute remotely, explicitly include everything we need.
    import awkward1 as ak
    from coffea.nanoevents import NanoEventsFactory
    from sx_multi.schema import auto_schema

    # This in is amazingly important - the invar mass will fail silently without it.
    # And must be done in here as this function is shipped off to the funcx processor
    # on a remote machine/remote python environment.
    from coffea.nanoevents.methods import candidate
    ak.behavior.update(candidate.behavior)

    # Use NanoEvents to build a 4-vector
    events = NanoEventsFactory.from_file(file=str(events_url),
                                         treepath=f'/{tree_name}',
                                         schemaclass=auto_schema,
                                         metadata={
                                             'dataset': 'mc15x',
                                             'filename': str(events_url)
                                         }).events()

    output = accumulator.identity()
    if explicit_func_pickle:
        import dill as pickle
        f = pickle.loads(proc)
        return f(output, events)
    else:
        return proc(output, events)
예제 #12
0
파일: executor.py 프로젝트: uccross/coffea
def run_coffea_processor(events_url: str,
                         tree_name: str,
                         proc,
                         explicit_func_pickle=False):
    """
    Process a single file from a tree via a coffea processor on the remote node
    :param events_url:
        a URL to a ROOT file that uproot4 can open
    :param tree_name:
        The tree in the ROOT file to use for our data
    :param accumulator:
        Accumulator to store the results
    :param proc:
        Analysis function to execute. Must have signature
    :param explicit_func_pickle: bool
        Do we need to use dill to explicitly pickle the process function, or can we
        rely on the remote execution framework to handle it correctly?
    :return:
        Populated accumulator
    """
    # Since we execute remotely, explicitly include everything we need.
    from coffea.nanoevents import NanoEventsFactory
    from coffea.nanoevents.schemas.schema import auto_schema

    # Use NanoEvents to build a 4-vector
    events = NanoEventsFactory.from_root(
        file=str(events_url),
        treepath=f"/{tree_name}",
        schemaclass=auto_schema,
        metadata={
            "dataset": "mc15x",
            "filename": str(events_url)
        },
    ).events()

    if explicit_func_pickle:
        import dill as pickle

        f = pickle.loads(proc)
        return f(events)
    else:
        return proc(events)
예제 #13
0
            dataset="Pythia8 CUETP8M1",
            nTracks=nTracks,
        )

        return output

    def postprocess(self, accumulator):
        return accumulator


if __name__ == '__main__':
    pythiaFilename = "qcd_CUETP8M1.root"
    pythiaFile = uproot.open(pythiaFilename)
    pythiaEvents = NanoEventsFactory.from_root(
        pythiaFile,
        treepath='tree',
        metadata={"dataset": "standalone CUETP8M1"},
        schemaclass=BaseSchema,
    ).events()
    pythiaP = PythiaProcessor()
    pythiaOut = pythiaP.process(pythiaEvents)

    tmFileset = {
        'CMSSW CUETPM81': [
            '/Users/chrispap/QCD/new/Autumn18.QCD_HT1000to1500_TuneCP5_13TeV-madgraphMLM-pythia8_0_RA2AnalysisTree.root',
            '/Users/chrispap/QCD/new/Autumn18.QCD_HT1500to2000_TuneCP5_13TeV-madgraphMLM-pythia8_0_RA2AnalysisTree.root',
            '/Users/chrispap/QCD/new/Autumn18.QCD_HT2000toInf_TuneCP5_13TeV-madgraphMLM-pythia8_0_RA2AnalysisTree.root',
        ],
    }

    tmOut = processor.run_uproot_job(
        tmFileset,
예제 #14
0
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from coffea.analysis_tools import Weights, PackedSelection
from coffea import processor, hist
import pandas as pd
import numpy as np

# the below command will change to .from_root in coffea v0.7.0
# events = NanoEventsFactory.from_root('/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/topW_v0.2.3/ProjectMetis_TTWJetsToLNuEWK_5f_NLO_RunIIAutumn18_NANO_v2/nanoSkim_1.root', schemaclass=NanoAODSchema).events()

# events = NanoEventsFactory.from_root('root://xcache-redirector.t2.ucsd.edu:2040//store/mc/RunIIAutumn18NanoAODv7/QCD_Pt-120to170_MuEnrichedPt5_TuneCP5_13TeV_pythia8/NANOAODSIM/Nano02Apr2020_102X_upgrade2018_realistic_v21-v1/70000/DE335891-829A-B943-99BE-E5A179F5F3EB.root', schemaclass=NanoAODSchema).events()
events = NanoEventsFactory.from_root('/hadoop/cms/store/user/ksalyer/FCNC_NanoSkim/fcnc_v3/TTJets_TuneCUETP8M2T4_13TeV-amcatnloFXFX-pythia8_RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_NANOAODSIM_fcnc_v3/output_40.root', schemaclass=NanoAODSchema).events()
#
from Tools.objects import *
from Tools.basic_objects import *
from Tools.cutflow import *
from Tools.config_helpers import *
from Tools.triggers import *
from Tools.btag_scalefactors import *
from Tools.lepton_scalefactors import *
from Tools.helpers import mt
from Tools.SS_selection import SS_selection
from Tools.fake_rate import fake_rate
from processor.default_accumulators import desired_output, add_processes_to_output, dataset_axis, pt_axis, eta_axis

#electron     = Collections(events, "Electron", "tightSSTTH").get()

## now do whatever you would have done in the processor

def SS_fill_weighted(output, mumu_sel, ee_sel, mue_sel, emu_sel, mu_weights=None, e_weights=None, **kwargs):
    if len(kwargs.keys())==3: #dataset, axis_1, axis_2
        vals_1 = np.array([])
예제 #15
0
from coffea.nanoevents import NanoEventsFactory, BaseSchema

fileName = "test.root"
events = NanoEventsFactory.from_root(fileName, schemaclass=BaseSchema).events()

nevt = len(events.FatJet_pt)

for ievt in range(nevt):
    njets = len(events.FatJet_pt[ievt])
    if njets > 3:
        print("\n%d jets in event %d:" %(njets, ievt))
        print("eta")
        for ijet in range(njets):
            print(events.FatJet_eta[ievt][ijet])
        print("phi")
        for ijet in range(njets):
            print(events.FatJet_phi[ievt][ijet])
        print("pt")
        for ijet in range(njets):
            print(events.FatJet_pt[ievt][ijet])
        print("mass")
        for ijet in range(njets):
            print(events.FatJet_mass[ievt][ijet])
    else:
        print("\n0 jet in event %d, skipping." %ievt)

예제 #16
0
import numpy as np
import awkward as ak
from coffea.nanoevents import NanoEventsFactory, TreeMakerSchema
import coffea.hist as hist
import matplotlib.pyplot as plt
import mplhep
plt.style.use(mplhep.style.ROOT)

fname = "/Users/chrispap/QCD/new/Autumn18.QCD_HT1500to2000_TuneCP5_13TeV-madgraphMLM-pythia8_0_RA2AnalysisTree.root"
events = NanoEventsFactory.from_root(fname,
                                     treepath='TreeMaker2/PreSelection',
                                     schemaclass=TreeMakerSchema).events()

ht = events.HT
GenParticles = events.GenParticles
finalParticles = (GenParticles.Status == 1) & (GenParticles.pt > 1) & (abs(
    GenParticles.eta) < 2.5) & (GenParticles.Charge != 0)
multiplicity_gen = ak.sum(finalParticles[ht > 1200], axis=1)

tracks = events.Tracks
tracks_pt = np.sqrt(tracks.x**2 + tracks.y**2)
tracks_eta = np.arcsinh(tracks.z / tracks_pt)
track_cut = (tracks_pt > 1.) & (abs(tracks_eta) < 2.5) & (
    tracks.fromPV0 >= 2) & tracks.matchedToPFCandidate
multiplicity = ak.to_numpy(ak.sum(track_cut[ht > 1200], axis=1))

histo = hist.Hist(
    "Counts",
    hist.Cat("sample", "samples"),
    hist.Bin("nTracks", "nTracks", 50, 0, 250),
)
예제 #17
0
def events():
    path = os.path.abspath("tests/samples/delphes.root")
    factory = NanoEventsFactory.from_root(path,
                                          treepath="Delphes",
                                          schemaclass=DelphesSchema)
    return factory.events()
예제 #18
0
def events():
    path = os.path.abspath("tests/samples/DAOD_PHYSLITE_21.2.108.0.art.pool.root")
    factory = NanoEventsFactory.from_root(
        path, treepath="CollectionTree", schemaclass=PHYSLITESchema
    )
    return factory.events()
예제 #19
0
        with open(JSON_LOC, "r") as fo:
            file_names = json.load(fo)
        file_names = file_names[dataset]
        print('find ', len(file_names)," files")

        if options.startfile>=options.endfile and options.endfile!=-1:
            print("make sure options.startfile<options.endfile")
            exit()
        inpz=0
        eventperfile=1000
        currentfile=0
        for ifile in file_names:
            if currentfile<options.startfile:
                currentfile+=1
                continue
            events = NanoEventsFactory.from_root(ifile, schemaclass=NanoAODSchema).events()
            nevents_total = len(events)
            print(ifile, ' Number of events:', nevents_total)
            
            for i in range(int(nevents_total / eventperfile)+1):
                if i< int(nevents_total / eventperfile):
                    print('from ',i*eventperfile, ' to ', (i+1)*eventperfile)
                    events_slice = events[i*eventperfile:(i+1)*eventperfile]
                elif i == int(nevents_total / eventperfile) and i*eventperfile<=nevents_total:
                    print('from ',i*eventperfile, ' to ', nevents_total)
                    events_slice = events[i*eventperfile:nevents_total]
                else:
                    print(' weird ... ')

                nparticles_per_event = max(ak.num(events_slice.PFCands.pt, axis=1))
                print("max NPF in this range: ", nparticles_per_event)
예제 #20
0
def events():
    path = os.path.abspath("tests/samples/treemaker.root")
    factory = NanoEventsFactory.from_root(
        path, treepath="PreSelection", schemaclass=TreeMakerSchema
    )
    return factory.events()
예제 #21
0
        base_form["contents"].pop("Muon_fsrPhotonIdx", None)
        base_form["contents"].pop("Electron_photonIdx", None)
        super().__init__(base_form)


import argparse
parser = argparse.ArgumentParser(
    description=
    'Prepare files from .root skims to a CSV of t event training data')
parser.add_argument('file', metavar='f', type=str)
parser.add_argument('loc', metavar='d', type=str)
args = parser.parse_args()
from pprint import pprint
##InitialDataCuts

events = NanoEventsFactory.from_root(args.file,
                                     schemaclass=HackSchema).events()

jets = events.Jet

jetSel = (jets.pt > 30) & (abs(jets.eta) < 2.4)
tightJet = jets[jetSel]
bJet = tightJet[tightJet.btagDeepFlavB > 0.642]
muons = events.Muon
muonSel = (muons.pt > 30) & (abs(muons.eta) < 2.4)
tightMuon = muons[muonSel]
ele = events.Electron
eleSel = (ele.pt > 35) & (abs(ele.eta) < 2.4)
tightEle = ele[eleSel]
eventSel = (((ak.num(tightMuon) == 1) | (ak.num(tightEle) == 1)) &
            (ak.num(tightJet) >= 3) & (ak.num(bJet) >= 1))
final = events[eventSel]
예제 #22
0
def test_corrected_jets_factory():
    import os
    from coffea.jetmet_tools import CorrectedJetsFactory, CorrectedMETFactory, JECStack

    events = None
    cache = {}
    from coffea.nanoevents import NanoEventsFactory
    factory = NanoEventsFactory.from_root(
        os.path.abspath('tests/samples/nano_dy.root'))
    events = factory.events()

    jec_stack_names = [
        'Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi',
        'Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi',
        'Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi',
        'Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi',
        'Spring16_25nsV10_MC_PtResolution_AK4PFPuppi',
        'Spring16_25nsV10_MC_SF_AK4PFPuppi'
    ]
    for key in evaluator.keys():
        if 'Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi' in key:
            jec_stack_names.append(key)

    jec_inputs = {name: evaluator[name] for name in jec_stack_names}
    jec_stack = JECStack(jec_inputs)

    name_map = jec_stack.blank_name_map
    name_map['JetPt'] = 'pt'
    name_map['JetMass'] = 'mass'
    name_map['JetEta'] = 'eta'
    name_map['JetA'] = 'area'

    jets = events.Jet

    jets['pt_raw'] = (1 - jets['rawFactor']) * jets['pt']
    jets['mass_raw'] = (1 - jets['rawFactor']) * jets['mass']
    jets['pt_gen'] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0),
                                      np.float32)
    jets['rho'] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll,
                                      jets.pt)[0]
    name_map['ptGenJet'] = 'pt_gen'
    name_map['ptRaw'] = 'pt_raw'
    name_map['massRaw'] = 'mass_raw'
    name_map['Rho'] = 'rho'

    events_cache = events.caches[0]

    print(name_map)

    tic = time.time()
    jet_factory = CorrectedJetsFactory(name_map, jec_stack)
    toc = time.time()

    print('setup corrected jets time =', toc - tic)

    tic = time.time()
    #prof = pyinstrument.Profiler()
    #prof.start()
    corrected_jets = jet_factory.build(jets, lazy_cache=events_cache)
    #prof.stop()
    toc = time.time()

    print('corrected_jets build time =', toc - tic)

    #sprint(prof.output_text(unicode=True, color=True, show_all=True))

    tic = time.time()
    print(corrected_jets.pt_orig)
    print(corrected_jets.pt)
    for unc in jet_factory.uncertainties():
        print(unc)
        print(corrected_jets[unc].up.pt)
        print(corrected_jets[unc].down.pt)
    toc = time.time()

    print('build all jet variations =', toc - tic)

    name_map['METpt'] = 'pt'
    name_map['METphi'] = 'phi'
    name_map['METx'] = 'x'
    name_map['METy'] = 'y'
    name_map['JETx'] = 'x'
    name_map['JETy'] = 'y'
    name_map['xMETRaw'] = 'x_raw'
    name_map['yMETRaw'] = 'y_raw'
    name_map['UnClusteredEnergyDeltaX'] = 'MetUnclustEnUpDeltaX'
    name_map['UnClusteredEnergyDeltaY'] = 'MetUnclustEnUpDeltaY'

    tic = time.time()
    met_factory = CorrectedMETFactory(name_map)
    toc = time.time()

    print('setup corrected MET time =', toc - tic)

    met = events.MET
    tic = time.time()
    #prof = pyinstrument.Profiler()
    #prof.start()
    corrected_met = met_factory.build(met,
                                      corrected_jets,
                                      lazy_cache=events_cache)
    #prof.stop()
    toc = time.time()

    #print(prof.output_text(unicode=True, color=True, show_all=True))

    print('corrected_met build time =', toc - tic)

    tic = time.time()
    print(corrected_met.pt_orig)
    print(corrected_met.pt)
    for unc in (jet_factory.uncertainties() + met_factory.uncertainties()):
        print(unc)
        print(corrected_met[unc].up.pt)
        print(corrected_met[unc].down.pt)
    toc = time.time()

    print('build all met variations =', toc - tic)
예제 #23
0
def events():
    path = os.path.abspath("tests/samples/pduneana.root")
    factory = NanoEventsFactory.from_root(
        path, treepath="pduneana/beamana", schemaclass=PDUNESchema
    )
    return factory.events()
예제 #24
0
            cutflow_reqs_d = {}
            for req in reqs:
                cutflow_reqs_d.update({req: True})
                cutflow.addRow( req, self.selection.require(**cutflow_reqs_d) )

        return selection


if __name__ == '__main__':
    
    from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
    from coffea.analysis_tools import Weights, PackedSelection
    from Tools.samples import fileset_2018
    
    # the below command will change to .from_root in coffea v0.7.0
    ev = NanoEventsFactory.from_root(fileset_2018['TTW'][0], schemaclass=NanoAODSchema).events()
    
    sel = Selection(
        dataset = "TTW",
        events = ev,
        year = 2018,
        ele = ev.Electron,
        ele_veto = ev.Electron,
        mu = ev.Muon,
        mu_veto = ev.Muon,
        jet_all = ev.Jet,
        jet_central = ev.Jet,
        jet_btag = ev.Jet,
        jet_fwd = ev.Jet,
        met = ev.MET,
    )
예제 #25
0
        print("%s:" % key, sf18.evaluator[key])

    ## Load a single file here, get leptons, eval SFs just to be sure everything works
    from coffea.nanoevents import NanoEventsFactory, NanoAODSchema

    from Tools.samples import get_babies
    from Tools.objects import Collections

    import awkward as ak

    fileset_all = get_babies(
        '/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/topW_v0.3.3_dilep/',
        year='UL2018')

    # load a subset of events
    n_max = 5000
    events = NanoEventsFactory.from_root(fileset_all['TTW'][0],
                                         schemaclass=NanoAODSchema,
                                         entry_stop=n_max).events()

    el = Collections(events, 'Electron', 'tightSSTTH', verbose=1).get()
    mu = Collections(events, 'Muon', 'tightSSTTH', verbose=1).get()

    sel = ((ak.num(el) + ak.num(mu)) > 1)

    sf_central = sf18.get(el[sel], mu[sel])
    sf_up = sf18.get(el[sel], mu[sel], variation='up')
    sf_down = sf18.get(el[sel], mu[sel], variation='down')
    print("Mean value of SF (central): %.3f" % ak.mean(sf_central))
    print("Mean value of SF (up): %.3f" % ak.mean(sf_up))
    print("Mean value of SF (down): %.3f" % ak.mean(sf_down))
예제 #26
0
def test_corrected_jets_factory():
    import os
    from coffea.jetmet_tools import CorrectedJetsFactory, CorrectedMETFactory, JECStack

    events = None
    from coffea.nanoevents import NanoEventsFactory

    factory = NanoEventsFactory.from_root(
        os.path.abspath("tests/samples/nano_dy.root"))
    events = factory.events()

    jec_stack_names = [
        "Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi",
        "Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi",
        "Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi",
        "Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi",
        "Spring16_25nsV10_MC_PtResolution_AK4PFPuppi",
        "Spring16_25nsV10_MC_SF_AK4PFPuppi",
    ]
    for key in evaluator.keys():
        if "Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi" in key:
            jec_stack_names.append(key)

    jec_inputs = {name: evaluator[name] for name in jec_stack_names}
    jec_stack = JECStack(jec_inputs)

    name_map = jec_stack.blank_name_map
    name_map["JetPt"] = "pt"
    name_map["JetMass"] = "mass"
    name_map["JetEta"] = "eta"
    name_map["JetA"] = "area"

    jets = events.Jet

    jets["pt_raw"] = (1 - jets["rawFactor"]) * jets["pt"]
    jets["mass_raw"] = (1 - jets["rawFactor"]) * jets["mass"]
    jets["pt_gen"] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0),
                                      np.float32)
    jets["rho"] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll,
                                      jets.pt)[0]
    name_map["ptGenJet"] = "pt_gen"
    name_map["ptRaw"] = "pt_raw"
    name_map["massRaw"] = "mass_raw"
    name_map["Rho"] = "rho"

    jec_cache = cachetools.Cache(np.inf)

    print(name_map)

    tic = time.time()
    jet_factory = CorrectedJetsFactory(name_map, jec_stack)
    toc = time.time()

    print("setup corrected jets time =", toc - tic)

    tic = time.time()
    prof = pyinstrument.Profiler()
    prof.start()
    corrected_jets = jet_factory.build(jets, lazy_cache=jec_cache)
    prof.stop()
    toc = time.time()

    print("corrected_jets build time =", toc - tic)

    print(prof.output_text(unicode=True, color=True, show_all=True))

    tic = time.time()
    print("Generated jet pt:", corrected_jets.pt_gen)
    print("Original jet pt:", corrected_jets.pt_orig)
    print("Raw jet pt:", jets.pt_raw)
    print("Corrected jet pt:", corrected_jets.pt)
    print("Original jet mass:", corrected_jets.mass_orig)
    print("Raw jet mass:", jets["mass_raw"])
    print("Corrected jet mass:", corrected_jets.mass)
    print("jet eta:", jets.eta)
    for unc in jet_factory.uncertainties():
        print(unc)
        print(corrected_jets[unc].up.pt)
        print(corrected_jets[unc].down.pt)
    toc = time.time()

    print("build all jet variations =", toc - tic)

    # Test that the corrections were applied correctly
    from coffea.jetmet_tools import (
        FactorizedJetCorrector,
        JetResolution,
        JetResolutionScaleFactor,
    )

    scalar_form = ak.without_parameters(jets["pt_raw"]).layout.form
    corrector = FactorizedJetCorrector(
        **{name: evaluator[name]
           for name in jec_stack_names[0:4]})
    corrs = corrector.getCorrection(JetEta=jets["eta"],
                                    Rho=jets["rho"],
                                    JetPt=jets["pt_raw"],
                                    JetA=jets["area"])
    reso = JetResolution(
        **{name: evaluator[name]
           for name in jec_stack_names[4:5]})
    jets["jet_energy_resolution"] = reso.getResolution(
        JetEta=jets["eta"],
        Rho=jets["rho"],
        JetPt=jets["pt_raw"],
        form=scalar_form,
        lazy_cache=jec_cache,
    )
    resosf = JetResolutionScaleFactor(
        **{name: evaluator[name]
           for name in jec_stack_names[5:6]})
    jets["jet_energy_resolution_scale_factor"] = resosf.getScaleFactor(
        JetEta=jets["eta"], lazy_cache=jec_cache)

    # Filter out the non-deterministic (no gen pt) jets
    def smear_factor(jetPt, pt_gen, jersf):
        return (ak.full_like(jetPt, 1.0) +
                (jersf[:, 0] - ak.full_like(jetPt, 1.0)) *
                (jetPt - pt_gen) / jetPt)

    test_gen_pt = ak.concatenate(
        [corrected_jets.pt_gen[0, :-2], corrected_jets.pt_gen[-1, :-1]])
    test_raw_pt = ak.concatenate([jets.pt_raw[0, :-2], jets.pt_raw[-1, :-1]])
    test_pt = ak.concatenate(
        [corrected_jets.pt[0, :-2], corrected_jets.pt[-1, :-1]])
    test_eta = ak.concatenate([jets.eta[0, :-2], jets.eta[-1, :-1]])
    test_jer = ak.concatenate([
        jets.jet_energy_resolution[0, :-2], jets.jet_energy_resolution[-1, :-1]
    ])
    test_jer_sf = ak.concatenate([
        jets.jet_energy_resolution_scale_factor[0, :-2],
        jets.jet_energy_resolution_scale_factor[-1, :-1],
    ])
    test_jec = ak.concatenate([corrs[0, :-2], corrs[-1, :-1]])
    test_corrected_pt = ak.concatenate(
        [corrected_jets.pt[0, :-2], corrected_jets.pt[-1, :-1]])
    test_corr_pt = test_raw_pt * test_jec
    test_pt_smear_corr = test_corr_pt * smear_factor(test_corr_pt, test_gen_pt,
                                                     test_jer_sf)

    # Print the results of the "by-hand" calculations and confirm that the values match the expected values
    print("\nConfirm the CorrectedJetsFactory values:")
    print("Jet pt (gen)", test_gen_pt.tolist())
    print("Jet pt (raw)", test_raw_pt.tolist())
    print("Jet pt (nano):", test_pt.tolist())
    print("Jet eta:", test_eta.tolist())
    print("Jet energy resolution:", test_jer.tolist())
    print("Jet energy resolution sf:", test_jer_sf.tolist())
    print("Jet energy correction:", test_jec.tolist())
    print("Corrected jet pt (ref)", test_corr_pt.tolist())
    print("Corrected & smeared jet pt (ref):", test_pt_smear_corr.tolist())
    print("Corrected & smeared jet pt:", test_corrected_pt.tolist(), "\n")
    assert ak.all(np.abs(test_pt_smear_corr - test_corrected_pt) < 1e-6)

    name_map["METpt"] = "pt"
    name_map["METphi"] = "phi"
    name_map["JetPhi"] = "phi"
    name_map["UnClusteredEnergyDeltaX"] = "MetUnclustEnUpDeltaX"
    name_map["UnClusteredEnergyDeltaY"] = "MetUnclustEnUpDeltaY"

    tic = time.time()
    met_factory = CorrectedMETFactory(name_map)
    toc = time.time()

    print("setup corrected MET time =", toc - tic)

    met = events.MET
    tic = time.time()
    # prof = pyinstrument.Profiler()
    # prof.start()
    corrected_met = met_factory.build(met,
                                      corrected_jets,
                                      lazy_cache=jec_cache)
    # prof.stop()
    toc = time.time()

    # print(prof.output_text(unicode=True, color=True, show_all=True))

    print("corrected_met build time =", toc - tic)

    tic = time.time()
    print(corrected_met.pt_orig)
    print(corrected_met.pt)
    prof = pyinstrument.Profiler()
    prof.start()
    for unc in jet_factory.uncertainties() + met_factory.uncertainties():
        print(unc)
        print(corrected_met[unc].up.pt)
        print(corrected_met[unc].down.pt)
    prof.stop()
    toc = time.time()

    print("build all met variations =", toc - tic)

    print(prof.output_text(unicode=True, color=True, show_all=True))
예제 #27
0
def test_rochester():
    rochester_data = lookup_tools.txt_converters.convert_rochester_file(
        "tests/samples/RoccoR2018.txt.gz", loaduncs=True)
    rochester = lookup_tools.rochester_lookup.rochester_lookup(rochester_data)

    # to test 1-to-1 agreement with official Rochester requires loading C++ files
    # instead, preload the correct scales in the sample directory
    # the script tests/samples/rochester/build_rochester.py produces these
    official_data_k = np.load("tests/samples/nano_dimuon_rochester.npy")
    official_data_err = np.load("tests/samples/nano_dimuon_rochester_err.npy")
    official_mc_k = np.load("tests/samples/nano_dy_rochester.npy")
    official_mc_err = np.load("tests/samples/nano_dy_rochester_err.npy")
    mc_rand = np.load("tests/samples/nano_dy_rochester_rand.npy")

    # test against nanoaod
    events = NanoEventsFactory.from_root(
        os.path.abspath("tests/samples/nano_dimuon.root")).events()

    data_k = rochester.kScaleDT(events.Muon.charge, events.Muon.pt,
                                events.Muon.eta, events.Muon.phi)
    data_k = np.array(ak.flatten(data_k))
    assert all(np.isclose(data_k, official_data_k))
    data_err = rochester.kScaleDTerror(events.Muon.charge, events.Muon.pt,
                                       events.Muon.eta, events.Muon.phi)
    data_err = np.array(ak.flatten(data_err), dtype=float)
    assert all(np.isclose(data_err, official_data_err, atol=1e-8))

    # test against mc
    events = NanoEventsFactory.from_root(
        os.path.abspath("tests/samples/nano_dy.root")).events()

    hasgen = ~np.isnan(ak.fill_none(events.Muon.matched_gen.pt, np.nan))
    mc_rand = ak.unflatten(mc_rand, ak.num(hasgen))
    mc_kspread = rochester.kSpreadMC(
        events.Muon.charge[hasgen],
        events.Muon.pt[hasgen],
        events.Muon.eta[hasgen],
        events.Muon.phi[hasgen],
        events.Muon.matched_gen.pt[hasgen],
    )
    mc_ksmear = rochester.kSmearMC(
        events.Muon.charge[~hasgen],
        events.Muon.pt[~hasgen],
        events.Muon.eta[~hasgen],
        events.Muon.phi[~hasgen],
        events.Muon.nTrackerLayers[~hasgen],
        mc_rand[~hasgen],
    )
    mc_k = np.array(ak.flatten(ak.ones_like(events.Muon.pt)))
    hasgen_flat = np.array(ak.flatten(hasgen))
    mc_k[hasgen_flat] = np.array(ak.flatten(mc_kspread))
    mc_k[~hasgen_flat] = np.array(ak.flatten(mc_ksmear))
    assert all(np.isclose(mc_k, official_mc_k))

    mc_errspread = rochester.kSpreadMCerror(
        events.Muon.charge[hasgen],
        events.Muon.pt[hasgen],
        events.Muon.eta[hasgen],
        events.Muon.phi[hasgen],
        events.Muon.matched_gen.pt[hasgen],
    )
    mc_errsmear = rochester.kSmearMCerror(
        events.Muon.charge[~hasgen],
        events.Muon.pt[~hasgen],
        events.Muon.eta[~hasgen],
        events.Muon.phi[~hasgen],
        events.Muon.nTrackerLayers[~hasgen],
        mc_rand[~hasgen],
    )
    mc_err = np.array(ak.flatten(ak.ones_like(events.Muon.pt)))
    mc_err[hasgen_flat] = np.array(ak.flatten(mc_errspread))
    mc_err[~hasgen_flat] = np.array(ak.flatten(mc_errsmear))
    assert all(np.isclose(mc_err, official_mc_err, atol=1e-8))
예제 #28
0
        dataset = events.metadata['dataset']

        nTracks = events.nTracks

        output["sumw"][dataset] += len(events)
        output["nTracks"].fill(
            dataset=dataset,
            nTracks=nTracks,
        )

        return output

    def postprocess(self, accumulator):
        return accumulator

uproot.open.defaults["xrootd_handler"] = uproot.source.xrootd.MultithreadedXRootDSource

filename = "qcd_CUETP8M1.root"
file = uproot.open(filename)
events = NanoEventsFactory.from_root(
    file,
    treepath='tree',
    entry_stop=10000,
    metadata={"dataset": "CUETP8M1"},
    schemaclass=BaseSchema,
).events()
p = MyProcessor()
out = p.process(events)
out