def test_read_nanodata(): factory = NanoEventsFactory.from_file( os.path.abspath('tests/samples/nano_dimuon.root')) events = factory.events() crossref(events) crossref(events[ak.num(events.Jet) > 2])
def test_read_nanomc(): factory = NanoEventsFactory.from_file( os.path.abspath('tests/samples/nano_dy.root')) events = factory.events() # test after views first genroundtrips(events.GenPart.mask[events.GenPart.eta > 0]) genroundtrips(events.mask[ak.any(events.Electron.pt > 50, axis=1)].GenPart) genroundtrips(events.GenPart) genroundtrips(events.GenPart[events.GenPart.eta > 0]) genroundtrips(events[ak.any(events.Electron.pt > 50, axis=1)].GenPart) # sane gen matching (note for electrons gen match may be photon(22)) assert ak.all((abs(events.Electron.matched_gen.pdgId) == 11) | (events.Electron.matched_gen.pdgId == 22)) assert ak.all(abs(events.Muon.matched_gen.pdgId) == 13) genroundtrips(events.Electron.matched_gen) crossref(events[ak.num(events.Jet) > 2]) crossref(events) assert ak.any(events.Photon.isTight, axis=1).tolist()[:9] == [ False, True, True, True, False, False, False, False, False ]
def nano_evts(fname): factory = NanoEventsFactory.from_file( fname, entry_start=0, entry_stop=10000, metadata={"dataset": ""}, ) return factory.events()
def run_coffea_processor(events_url: str, tree_name: str, accumulator, proc, explicit_func_pickle=False): """ Process a single file from a tree via a coffea processor on the remote node :param events_url: a URL to a ROOT file that uproot4 can open :param tree_name: The tree in the ROOT file to use for our data :param accumulator: Accumulator to store the results :param proc: Analysis fuction to execute. Must have signature :param explicit_func_pickle: bool Do we need to use dill to explicitly pickle the process function, or can we rely on the remote execution framework to handle it correctly? :return: Populated accumulator """ # Since we execute remotely, explicitly include everything we need. import awkward1 as ak from coffea.nanoevents import NanoEventsFactory from sx_multi.schema import auto_schema # This in is amazingly important - the invar mass will fail silently without it. # And must be done in here as this function is shipped off to the funcx processor # on a remote machine/remote python environment. from coffea.nanoevents.methods import candidate ak.behavior.update(candidate.behavior) # Use NanoEvents to build a 4-vector events = NanoEventsFactory.from_file(file=str(events_url), treepath=f'/{tree_name}', schemaclass=auto_schema, metadata={ 'dataset': 'mc15x', 'filename': str(events_url) }).events() output = accumulator.identity() if explicit_func_pickle: import dill as pickle f = pickle.loads(proc) return f(output, events) else: return proc(output, events)