def load_dataset(numpy_lib, num_iter=1): print("loading dataset") download_if_not_exists( "data/nanoaod_test.root", "https://jpata.web.cern.ch/jpata/opendata_files/DY2JetsToLL-merged/1.root", ) datastructures = { "Muon": [ ("Muon_pt", "float32"), ("Muon_eta", "float32"), ("Muon_phi", "float32"), ("Muon_mass", "float32"), ("Muon_charge", "int32"), ("Muon_pfRelIso03_all", "float32"), ("Muon_tightId", "bool"), ], "Electron": [ ("Electron_pt", "float32"), ("Electron_eta", "float32"), ("Electron_phi", "float32"), ("Electron_mass", "float32"), ("Electron_charge", "int32"), ("Electron_pfRelIso03_all", "float32"), ("Electron_pfId", "bool"), ], "Jet": [ ("Jet_pt", "float32"), ("Jet_eta", "float32"), ("Jet_phi", "float32"), ("Jet_mass", "float32"), ("Jet_btag", "float32"), ("Jet_puId", "bool"), ], "EventVariables": [ ("HLT_IsoMu24", "bool"), ("MET_pt", "float32"), ("MET_phi", "float32"), ("MET_sumet", "float32"), ("MET_significance", "float32"), ("MET_CovXX", "float32"), ("MET_CovXY", "float32"), ("MET_CovYY", "float32"), ], } dataset = Dataset( "nanoaod", num_iter * ["./data/nanoaod_test.root"], datastructures, treename="Events", datapath="", ) dataset.load_root(verbose=True) dataset.merge_inplace(verbose=True) print("dataset has {0} events, {1:.2f} MB".format( dataset.numevents(), dataset.memsize() / 1000 / 1000)) dataset.move_to_device(numpy_lib, verbose=True) return dataset
"Jet": [("Jet_Px", "float32"), ("Jet_Py", "float32"), ("Jet_Pz", "float32"), ("Jet_E", "float32"), ("Jet_btag", "float32"), ("Jet_ID", "bool")], "EventVariables": [("NPrimaryVertices", "int32"), ("triggerIsoMu24", "bool"), ("EventWeight", "float32")] } #Define a dataset, given the data structure and a list of filenames dataset = Dataset("HZZ", [filename], datastructures, treename="events") #Load the ROOT files dataset.load_root(verbose=True) #merge arrays across files into one big array dataset.merge_inplace(verbose=True) #move to GPU if CUDA was specified dataset.move_to_device(nplib, verbose=True) #process data, save output as a json file results = dataset.analyze(analyze_data_function, verbose=True, parameters={"muons_ptcut": 30.0}) results.save_json("out.json") #Make a simple PDF plot as an example hist = results["hist_leading_muon_pt"] fig = plt.figure(figsize=(5, 5)) plt.errorbar(hist.edges[:-1], hist.contents, np.sqrt(hist.contents_w2)) plt.savefig("hist.png", bbox_inches="tight")