def test_rochester(): rochester_data = lookup_tools.txt_converters.convert_rochester_file('tests/samples/RoccoR2018.txt.gz',loaduncs=True) rochester = lookup_tools.rochester_lookup.rochester_lookup(rochester_data) # to test 1-to-1 agreement with official Rochester requires loading C++ files # instead, preload the correct scales in the sample directory # the script tests/samples/rochester/build_rochester.py produces these official_data_k = np.load('tests/samples/nano_dimuon_rochester.npy') official_data_err = np.load('tests/samples/nano_dimuon_rochester_err.npy') official_mc_k = np.load('tests/samples/nano_dy_rochester.npy') official_mc_err = np.load('tests/samples/nano_dy_rochester_err.npy') mc_rand = np.load('tests/samples/nano_dy_rochester_rand.npy') # test against nanoaod events = NanoEvents.from_file(os.path.abspath('tests/samples/nano_dimuon.root')) data_k = rochester.kScaleDT(events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi) assert(all(np.isclose(data_k.flatten(), official_data_k))) data_err = rochester.kScaleDTerror(events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi) data_err = np.array(data_err.flatten(), dtype=float) assert(all(np.isclose(data_err, official_data_err, atol=1e-8))) # test against mc events = NanoEvents.from_file(os.path.abspath('tests/samples/nano_dy.root')) hasgen = ~np.isnan(events.Muon.matched_gen.pt.fillna(np.nan)) mc_rand = JaggedArray.fromoffsets(hasgen.offsets, mc_rand) mc_kspread = rochester.kSpreadMC(events.Muon.charge[hasgen], events.Muon.pt[hasgen], events.Muon.eta[hasgen], events.Muon.phi[hasgen], events.Muon.matched_gen.pt[hasgen]) mc_ksmear = rochester.kSmearMC(events.Muon.charge[~hasgen], events.Muon.pt[~hasgen], events.Muon.eta[~hasgen], events.Muon.phi[~hasgen], events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen]) mc_k = np.ones_like(events.Muon.pt.flatten()) mc_k[hasgen.flatten()] = mc_kspread.flatten() mc_k[~hasgen.flatten()] = mc_ksmear.flatten() assert(all(np.isclose(mc_k, official_mc_k))) mc_errspread = rochester.kSpreadMCerror(events.Muon.charge[hasgen], events.Muon.pt[hasgen], events.Muon.eta[hasgen], events.Muon.phi[hasgen], events.Muon.matched_gen.pt[hasgen]) mc_errsmear = rochester.kSmearMCerror(events.Muon.charge[~hasgen], events.Muon.pt[~hasgen], events.Muon.eta[~hasgen], events.Muon.phi[~hasgen], events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen]) mc_err = np.ones_like(events.Muon.pt.flatten()) mc_err[hasgen.flatten()] = mc_errspread.flatten() mc_err[~hasgen.flatten()] = mc_errsmear.flatten() assert(all(np.isclose(mc_err, official_mc_err, atol=1e-8)))
def test_read_nanomc(): events = NanoEvents.from_file(os.path.abspath('tests/samples/nano_dy.root')) # test after views first genroundtrips(events.GenPart[events.GenPart.eta > 0]) genroundtrips(events[(events.Electron.eta > 0).any()].GenPart) genroundtrips(events.GenPart) # sane gen matching (note for electrons gen match may be photon(22)) assert ((abs(events.Electron.matched_gen.pdgId) == 11) | (events.Electron.matched_gen.pdgId == 22)).all().all() assert (abs(events.Muon.matched_gen.pdgId) == 13).all().all() genroundtrips(events.Electron.matched_gen) crossref(events[events.Jet.counts > 2]) crossref(events) assert events.Photon.isTight.any().tolist()[:9] == [False, True, True, True, False, False, False, False, False]
return np.savez(npz_file, np.array(event_list), np.array(genmet_list)) if __name__ == '__main__': parser = OptionParser() parser.add_option('-d', '--dataset', help='dataset', dest='dataset') (options, args) = parser.parse_args() dataset = options.dataset #fname = '/cms/scratch/matteoc/CMSSW_10_2_22/src/PhysicsTools/NanoMET/test/'+options.dataset+'.root' fname = 'root://cms-xrdr.private.lo:2094//xrd/store/user/' + os.environ[ 'USER'] + '/' + dataset + '.root' print('Opening file:', fname) events = NanoEvents.from_file(fname) n_events = events.JetPFCands.pt.shape[0] print('Total events:', n_events) for i in range(n_events): future_savez(i) ''' with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor: futures = set() futures.update(executor.submit(future_savez, i) for i in range(n_events)) try: total = len(futures) processed = 0 while len(futures) > 0: finished = set(job for job in futures if job.done()) for job in finished:
def _work_function(item, processor_instance, flatten=False, savemetrics=False, mmap=False, nano=False, cachestrategy=None, skipbadfiles=False, retries=0, xrootdtimeout=None): if processor_instance == 'heavy': item, processor_instance = item if not isinstance(processor_instance, ProcessorABC): processor_instance = cloudpickle.loads(lz4f.decompress(processor_instance)) if mmap: localsource = {} else: opts = dict(uproot.FileSource.defaults) opts.update({'parallel': None}) def localsource(path): return uproot.FileSource(path, **opts) import warnings out = processor_instance.accumulator.identity() retry_count = 0 while retry_count <= retries: try: from uproot.source.xrootd import XRootDSource xrootdsource = XRootDSource.defaults xrootdsource['timeout'] = xrootdtimeout file = uproot.open(item.filename, localsource=localsource, xrootdsource=xrootdsource) if nano: cache = None if cachestrategy == 'dask-worker': from dask.distributed import get_worker cache = get_worker().data df = NanoEvents.from_file( file=file, treename=item.treename, entrystart=item.index * item.chunksize, entrystop=(item.index + 1) * item.chunksize, metadata={'dataset': item.dataset}, cache=cache, ) else: tree = file[item.treename] df = LazyDataFrame(tree, item.chunksize, item.index, flatten=flatten) df['dataset'] = item.dataset tic = time.time() out = processor_instance.process(df) toc = time.time() metrics = dict_accumulator() if savemetrics: if isinstance(file.source, uproot.source.xrootd.XRootDSource): metrics['bytesread'] = value_accumulator(int, file.source.bytesread) metrics['dataservers'] = set_accumulator({file.source._source.get_property('DataServer')}) metrics['columns'] = set_accumulator(df.materialized) metrics['entries'] = value_accumulator(int, df.size) metrics['processtime'] = value_accumulator(float, toc - tic) wrapped_out = dict_accumulator({'out': out, 'metrics': metrics}) file.source.close() break # catch xrootd errors and optionally skip # or retry to read the file except OSError as e: if not skipbadfiles: raise e else: w_str = 'Bad file source %s.' % item.filename if retries: w_str += ' Attempt %d of %d.' % (retry_count + 1, retries + 1) if retry_count + 1 < retries: w_str += ' Will retry.' else: w_str += ' Skipping.' else: w_str += ' Skipping.' warnings.warn(w_str) metrics = dict_accumulator() if savemetrics: metrics['bytesread'] = value_accumulator(int, 0) metrics['dataservers'] = set_accumulator({}) metrics['columns'] = set_accumulator({}) metrics['entries'] = value_accumulator(int, 0) metrics['processtime'] = value_accumulator(float, 0) wrapped_out = dict_accumulator({'out': out, 'metrics': metrics}) except Exception as e: if retries == retry_count: raise e w_str = 'Attempt %d of %d. Will retry.' % (retry_count + 1, retries + 1) warnings.warn(w_str) retry_count += 1 return wrapped_out
def test_read_nanodata(): events = NanoEvents.from_file(os.path.abspath('tests/samples/nano_dimuon.root')) crossref(events) crossref(events[events.Jet.counts > 2])
from coffea.nanoaod import NanoEvents from hbbprocessor import HbbProcessor from ddt_processor import DDTProcessor from zqq_processor import ZQQProcessor from coffea.nanoaod.methods import Candidate events = NanoEvents.from_file( #'root://cmseos.fnal.gov//store/user/jkrupa/nanopost_process/24Jul20/ZJetsToQQ_HT-800toInf_qc19_4j_TuneCP5_13TeV-madgraphMLM-pythia8/nano_mc_2017_9ZJetsToQQ_HT-800toInf_qc19_4j_TuneCP5_13TeV-madgraphMLM-pythia8.root', #'root://cmseos.fnal.gov//store/group/lpcbacon/jkrupa/nanopost_process/6Aug20//WJetsToQQ_HT-800toInf_qc19_3j_TuneCP5_13TeV-madgraphMLM-pythia8/nano_mc_2017_9WJetsToQQ_HT-800toInf_qc19_3j_TuneCP5_13TeV-madgraphMLM-pythia8.root', #'root://cmseos.fnal.gov//store/user/lpcbacon/jkrupa/nanopost_process/6Aug20/QCD_HT700to1000_TuneCP5_PSWeights_13TeV-madgraphMLM-pythia8/nano_mc_2017_9QCD_HT700to1000_TuneCP5_PSWeights_13TeV-madgraphMLM-pythia8.root', 'root://cmseos.fnal.gov//store/user/lpcbacon/jkrupa/nanopost_process/6Aug20/QCD_HT700to1000_TuneCP5_PSWeights_13TeV-madgraphMLM-pythia8/nano_mc_2017_9QCD_HT700to1000_TuneCP5_PSWeights_13TeV-madgraphMLM-pythia8.root', #'root://cmseos.fnal.gov//store/user/lpcbacon/jkrupa/nanopost_process/6Aug20/ST_tW_top_5f_inclusiveDecays_TuneCP5_PSweights_13TeV-powheg-pythia8/nano_mc_2017_15ST_tW_top_5f_inclusiveDecays_TuneCP5_PSweights_13TeV-powheg-pythia8.root', #'root://cmseos.fnal.gov//store/user/lpcbacon/jkrupa/nanopost_process/6Aug20/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/nano_mc_2017_1119TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8.root', #'root://cmseos.fnal.gov//store/user/lpcbacon/jkrupa/nanopost_process/6Aug20_v2/SingleMuon_pancakes-02-withPF_Run2017D-09Aug2019_UL2017-v1/nano_data_2017_8SingleMuon_pancakes-02-withPF_Run2017D-09Aug2019_UL2017-v1.root', #'root://cmseos.fnal.gov//store/group/lpcbacon/jkrupa/nanopost_process/6Aug20/TTToHadronic_TuneCP5_13TeV-powheg-pythia8/nano_mc_2017_99TTToHadronic_TuneCP5_13TeV-powheg-pythia8.root', #'root://cmseos.fnal.gov//store/user/lpcbacon/jkrupa/nanopost_process/6Aug20/WJetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8/nano_mc_2017_1WJetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8.root', #'root://cmseos.fnal.gov//store/user/jkrupa/nanopost_process/27Jul20_v3/ZJetsToQQ_HT400to600_qc19_4j_TuneCP5_13TeV-madgraphMLM-pythia8/nano_mc_2017_9ZJetsToQQ_HT400to600_qc19_4j_TuneCP5_13TeV-madgraphMLM-pythia8.root', #'root://cmseos.fnal.gov//store/user/jkrupa/nanopost_process/27Jul20_v3/ZJetsToQQ_HT400to600_qc19_4j_TuneCP5_13TeV-madgraphMLM-pythia8/nano_mc_2017_98ZJetsToQQ_HT400to600_qc19_4j_TuneCP5_13TeV-madgraphMLM-pythia8.root', #'root://cmseos.fnal.gov//store/user/jkrupa/nanopost_process/22Jul20_v2/QCD_HT2000toInf_TuneCP5_PSWeights_13TeV-madgraphMLM-pythia8/nano_mc_2017_9_Skim.root', #entrystop=100000, #metadata={'dataset': 'WJetsToQQ_HT-800toInf_qc19_3j_TuneCP5_13TeV-madgraphMLM-pythia8'},#,QCD_HT2000toInf_TuneCP5_PSWeights_13TeV-madgraphMLM-pythia8-test'}, #metadata={'dataset': 'QCD_HT2000toInf_TuneCP5_PSWeights_13TeV-madgraphMLM-pythia8-test'}, #metadata={'dataset':'WJetsToQQ_HT-800toInf_qc19_3j_TuneCP5_13TeV-madgraphMLM-pythia8'},#TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8'}, metadata={'dataset': 'QCD_HT700to1000_TuneCP5_PSWeights_13TeV-madgraphMLM-pythia8'}, #metadata={'dataset': 'WJetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8'}, #methods={"FatJetPFCands": Candidate} ) #p = HbbProcessor(year='2017') p = ZQQProcessor(year='2017') #p = DDTProcessor(year='2017') out = p.process(events) print(out)
# import uproot batch = False plt.style.use(hep.style.ROOT) fname = "test.root" fname = sys.argv[1] if fname.startswith('/store/'): print("Attempting to read over XRootD...") fname = "root://xrootd-cms.infn.it//" + fname outdir = "pkls" os.system("mkdir -p " + outdir) events = Events.from_file(fname) alljets = events.Jet def maskDC(jets): dflt = (jets.btagDeepB < 0) | (jets.btagDeepC < 0) | ( jets.btagDeepB > 1 ) | (jets.btagDeepC > 1) | (jets.btagDeepB + jets.btagDeepC > 1) | ( 1 - jets.btagDeepB <= 0) | (jets.btagDeepC + jets.btagDeepB <= 0) | ( np.isnan(jets.btagDeepB)) | (np.isnan(jets.btagDeepC)) return dflt def maskDJ(jets): dflt = (jets.btagDeepFlavB < 0) | (jets.btagDeepFlavC < 0) | ( jets.btagDeepFlavB >= 1) | (jets.btagDeepFlavC >= 1) | (