def load_dataset(numpy_lib, num_iter=1): print("loading dataset") download_if_not_exists( "data/nanoaod_test.root", "https://jpata.web.cern.ch/jpata/opendata_files/DY2JetsToLL-merged/1.root", ) datastructures = { "Muon": [ ("Muon_pt", "float32"), ("Muon_eta", "float32"), ("Muon_phi", "float32"), ("Muon_mass", "float32"), ("Muon_charge", "int32"), ("Muon_pfRelIso03_all", "float32"), ("Muon_tightId", "bool"), ], "Electron": [ ("Electron_pt", "float32"), ("Electron_eta", "float32"), ("Electron_phi", "float32"), ("Electron_mass", "float32"), ("Electron_charge", "int32"), ("Electron_pfRelIso03_all", "float32"), ("Electron_pfId", "bool"), ], "Jet": [ ("Jet_pt", "float32"), ("Jet_eta", "float32"), ("Jet_phi", "float32"), ("Jet_mass", "float32"), ("Jet_btag", "float32"), ("Jet_puId", "bool"), ], "EventVariables": [ ("HLT_IsoMu24", "bool"), ("MET_pt", "float32"), ("MET_phi", "float32"), ("MET_sumet", "float32"), ("MET_significance", "float32"), ("MET_CovXX", "float32"), ("MET_CovXY", "float32"), ("MET_CovYY", "float32"), ], } dataset = Dataset( "nanoaod", num_iter * ["./data/nanoaod_test.root"], datastructures, treename="Events", datapath="", ) dataset.load_root(verbose=True) dataset.merge_inplace(verbose=True) print("dataset has {0} events, {1:.2f} MB".format( dataset.numevents(), dataset.memsize() / 1000 / 1000)) dataset.move_to_device(numpy_lib, verbose=True) return dataset
def setUpClass(self): self.NUMPY_LIB, self.ha = choose_backend(use_cuda=USE_CUPY) import hmumu_utils hmumu_utils.NUMPY_LIB = self.NUMPY_LIB hmumu_utils.ha = self.ha download_if_not_exists( "data/myNanoProdMc2016_NANO.root", "https://jpata.web.cern.ch/jpata/hmm/test_files/myNanoProdMc2016_NANO.root" ) #Load a simple sync dataset self.datastructures = create_datastructure("vbf_sync", True, "2016", do_fsr=True) self.dataset = Dataset("vbf_sync", ["data/myNanoProdMc2016_NANO.root"], self.datastructures, datapath="", treename="Events", is_mc=True) self.dataset.num_chunk = 0 self.dataset.era = "2016" self.dataset.load_root() self.dataset.numpy_lib = self.NUMPY_LIB self.dataset.move_to_device(self.NUMPY_LIB) #disable everything that requires ROOT which is not easily available on travis tests from pars import analysis_parameters self.analysis_parameters = analysis_parameters self.analysis_parameters["baseline"][ "do_rochester_corrections"] = False self.analysis_parameters["baseline"]["do_lepton_sf"] = False self.analysis_parameters["baseline"]["save_dnn_vars"] = False self.analysis_parameters["baseline"]["do_bdt_ucsd"] = False self.analysis_parameters["baseline"]["do_bdt_pisa"] = False self.analysis_parameters["baseline"]["do_factorized_jec"] = False self.analysis_parameters["baseline"]["do_jec"] = {"2016:": False} self.analysis_parameters["baseline"]["do_jer"] = {"2016": True} from argparse import Namespace self.cmdline_args = Namespace(use_cuda=USE_CUPY, datapath=".", do_fsr=False, nthreads=1, async_data=False, do_sync=False, out="test_out") from analysis_hmumu import AnalysisCorrections self.analysis_corrections = AnalysisCorrections( self.cmdline_args, True)
def load_dataset(num_iter=1): datastructures = { "Muon": [ ("Muon_Px", "float32"), ("Muon_Py", "float32"), ("Muon_Pz", "float32"), ("Muon_E", "float32"), ("Muon_Charge", "int32"), ("Muon_Iso", "float32") ], "Jet": [ ("Jet_Px", "float32"), ("Jet_Py", "float32"), ("Jet_Pz", "float32"), ("Jet_E", "float32"), ("Jet_btag", "float32"), ("Jet_ID", "bool") ], "EventVariables": [ ("NPrimaryVertices", "int32"), ("triggerIsoMu24", "bool"), ("EventWeight", "float32") ] } dataset = Dataset("HZZ", num_iter*["data/HZZ.root"], datastructures, treename="events", datapath="") assert(dataset.filenames[0] == "data/HZZ.root") assert(len(dataset.filenames) == num_iter) assert(len(dataset.structs["Jet"]) == 0) assert(len(dataset.eventvars) == 0) return dataset
class TestAnalysisSmall(unittest.TestCase): @classmethod def setUpClass(self): self.NUMPY_LIB, self.ha = choose_backend(use_cuda=USE_CUPY) import hmumu_utils hmumu_utils.NUMPY_LIB = self.NUMPY_LIB hmumu_utils.ha = self.ha download_if_not_exists( "data/myNanoProdMc2016_NANO.root", "https://jpata.web.cern.ch/jpata/hmm/test_files/myNanoProdMc2016_NANO.root" ) #Load a simple sync dataset self.datastructures = create_datastructure("vbf_sync", True, "2016", do_fsr=True) self.dataset = Dataset( "vbf_sync", ["data/myNanoProdMc2016_NANO.root"], self.datastructures, datapath="", treename="Events", is_mc=True) self.dataset.num_chunk = 0 self.dataset.era = "2016" self.dataset.load_root() self.dataset.numpy_lib = self.NUMPY_LIB self.dataset.move_to_device(self.NUMPY_LIB) #disable everything that requires ROOT which is not easily available on travis tests from pars import analysis_parameters self.analysis_parameters = analysis_parameters self.analysis_parameters["baseline"]["do_rochester_corrections"] = False self.analysis_parameters["baseline"]["do_lepton_sf"] = False self.analysis_parameters["baseline"]["save_dnn_vars"] = False self.analysis_parameters["baseline"]["do_bdt_ucsd"] = False self.analysis_parameters["baseline"]["do_bdt_pisa"] = False self.analysis_parameters["baseline"]["do_factorized_jec"] = False self.analysis_parameters["baseline"]["do_jec"] = True self.analysis_parameters["baseline"]["do_jer"] = {"2016": True} from argparse import Namespace self.cmdline_args = Namespace(use_cuda=USE_CUPY, datapath=".", do_fsr=False, nthreads=1, async_data=False, do_sync=False, out="test_out") from analysis_hmumu import AnalysisCorrections self.analysis_corrections = AnalysisCorrections(self.cmdline_args, True) def setUp(self): pass def test_dnn(self): import keras dnn_model = keras.models.load_model("data/DNN27vars_sig_vbf_ggh_bkg_dyvbf_dy105To160_ewk105To160_split_60_40_mod10_191008.h5") inp = np.zeros((1000,26), dtype=np.float32) out = dnn_model.predict(inp) print(np.mean(out)) def testDataset(self): nev = self.dataset.numevents() print("Loaded dataset from {0} with {1} events".format(self.dataset.filenames[0], nev)) assert(nev>0) def test_get_genpt(self): from hmumu_utils import get_genpt_cpu, get_genpt_cuda NUMPY_LIB = self.NUMPY_LIB muons = self.dataset.structs["Muon"][0] genpart = self.dataset.structs["GenPart"][0] muons_genpt = NUMPY_LIB.zeros(muons.numobjects(), dtype=NUMPY_LIB.float32) if USE_CUPY: get_genpt_cuda[32,1024](muons.offsets, muons.genPartIdx, genpart.offsets, genpart.pt, muons_genpt) cuda.synchronize() else: get_genpt_cpu(muons.offsets, muons.genPartIdx, genpart.offsets, genpart.pt, muons_genpt) muons_genpt = NUMPY_LIB.asnumpy(muons_genpt) self.assertAlmostEqual(NUMPY_LIB.sum(muons_genpt), 250438.765625) self.assertListEqual(list(muons_genpt[:10]), [16.875, 53.125, 50.5, 0.0, 153.5, 32.5, 53.75, 53.125, 55.125, 22.6875]) def test_fix_muon_fsrphoton_index(self): from hmumu_utils import fix_muon_fsrphoton_index NUMPY_LIB = self.NUMPY_LIB analysis_parameters = self.analysis_parameters muons = self.dataset.structs["Muon"][0] fsrphotons = self.dataset.structs["FsrPhoton"][0] out_muons_fsrPhotonIdx = np.zeros_like(NUMPY_LIB.asnumpy(muons.fsrPhotonIdx)) mu_pt = NUMPY_LIB.asnumpy(muons.pt) mu_eta = NUMPY_LIB.asnumpy(muons.eta) mu_phi = NUMPY_LIB.asnumpy(muons.phi) mu_mass = NUMPY_LIB.asnumpy(muons.mass) mu_iso = NUMPY_LIB.asnumpy(muons.pfRelIso04_all) fix_muon_fsrphoton_index( mu_pt, mu_eta, mu_phi, mu_mass, NUMPY_LIB.asnumpy(fsrphotons.offsets), NUMPY_LIB.asnumpy(muons.offsets), NUMPY_LIB.asnumpy(fsrphotons.dROverEt2), NUMPY_LIB.asnumpy(fsrphotons.relIso03), NUMPY_LIB.asnumpy(fsrphotons.pt), NUMPY_LIB.asnumpy(fsrphotons.muonIdx), NUMPY_LIB.asnumpy(muons.fsrPhotonIdx), out_muons_fsrPhotonIdx, analysis_parameters["baseline"]["fsr_dROverEt2"], analysis_parameters["baseline"]["fsr_relIso03"], analysis_parameters["baseline"]["pt_fsr_over_mu_e"] ) def test_analyze_function(self): import hmumu_utils from hmumu_utils import analyze_data, load_puhist_target from analysis_hmumu import JetMetCorrections, BTagWeights from coffea.lookup_tools import extractor NUMPY_LIB = self.NUMPY_LIB hmumu_utils.NUMPY_LIB = self.NUMPY_LIB hmumu_utils.ha = self.ha analysis_parameters = self.analysis_parameters puid_maps = "data/puidSF/PUIDMaps.root" puid_extractor = extractor() puid_extractor.add_weight_sets(["* * {0}".format(puid_maps)]) puid_extractor.finalize() random_seed = 0 ret = analyze_data( self.dataset, self.analysis_corrections, analysis_parameters["baseline"], "baseline", random_seed, do_fsr=True, use_cuda=False) h = ret["hist__dimuon_invmass_z_peak_cat5__M_mmjj"] nev_zpeak_nominal = np.sum(h["nominal"].contents) if not USE_CUPY: self.assertAlmostEqual(nev_zpeak_nominal, 0.0034586303, places=4) self.assertTrue("Total__up" in h.keys()) self.assertTrue("Total__down" in h.keys()) self.assertTrue("jerB1__up" in h.keys()) self.assertTrue("jerB1__down" in h.keys()) self.assertTrue("jerB2__up" in h.keys()) self.assertTrue("jerB2__down" in h.keys()) self.assertTrue("jerF1__up" in h.keys()) self.assertTrue("jerF1__down" in h.keys()) self.assertTrue("jerF2__up" in h.keys()) self.assertTrue("jerF2__down" in h.keys()) self.assertTrue("jerEC1__up" in h.keys()) self.assertTrue("jerEC1__down" in h.keys()) self.assertTrue("jerEC2__up" in h.keys()) self.assertTrue("jerEC2__down" in h.keys())
#Predefine which branches to read from the TTree and how they are grouped to objects #This will be verified against the actual ROOT TTree when it is loaded datastructures = { "Muon": [("Muon_Px", "float32"), ("Muon_Py", "float32"), ("Muon_Pz", "float32"), ("Muon_E", "float32"), ("Muon_Charge", "int32"), ("Muon_Iso", "float32")], "Jet": [("Jet_Px", "float32"), ("Jet_Py", "float32"), ("Jet_Pz", "float32"), ("Jet_E", "float32"), ("Jet_btag", "float32"), ("Jet_ID", "bool")], "EventVariables": [("NPrimaryVertices", "int32"), ("triggerIsoMu24", "bool"), ("EventWeight", "float32")] } #Define a dataset, given the data structure and a list of filenames dataset = Dataset("HZZ", [filename], datastructures, treename="events") #Load the ROOT files dataset.load_root(verbose=True) #merge arrays across files into one big array dataset.merge_inplace(verbose=True) #move to GPU if CUDA was specified dataset.move_to_device(nplib, verbose=True) #process data, save output as a json file results = dataset.analyze(analyze_data_function, verbose=True, parameters={"muons_ptcut": 30.0}) results.save_json("out.json")
# Load this input file #filename = "data/data_A.4lep.root" if not os.path.isdir("data/atlas"): os.makedirs("data/atlas") walltime_t0 = time.time() for ds, fn_pattern, is_mc in datasets: filename = glob.glob(fn_pattern) print(filename) if len(filename) == 0: raise Exception( "Could not find any filenames for dataset={0}: {{fn_pattern}}={1}" .format(ds, fn_pattern)) # Define a dataset, given the data structure and a list of filenames dataset = Dataset(ds, filename, datastructures, treename="mini") # Load the ROOT files dataset.load_root(verbose=True) # merge arrays across files into one big array dataset.merge_inplace(verbose=True) # move to GPU if CUDA was specified dataset.move_to_device(nplib, verbose=True) # process data, save output as a json file results = dataset.analyze( analyze_data_function, verbose=True, parameters={ "lep_ptcut": 10000.0, #MeV units
("Muon_Py", "float32"), ], "Jet": [ ("Jet_E", "float32"), ("Jet_btag", "float32"), ], "EventVariables": [ ("NPrimaryVertices", "int32"), ("triggerIsoMu24", "bool"), ("EventWeight", "float32"), ], } # Define the dataset across the files dataset = Dataset("HZZ", ["data/HZZ.root"], datastructures, treename="events", datapath="") # Load the data to memory dataset.load_root() # Jets in the first file ifile = 0 jets = dataset.structs["Jet"][ifile] # common offset array for jets jets_offsets = jets.offsets print(jets_offsets) # data arrays jets_energy = jets.E
class TestAnalysisSmall(unittest.TestCase): @classmethod def setUpClass(self): self.NUMPY_LIB, self.ha = choose_backend(use_cuda=USE_CUPY) import hmumu_utils hmumu_utils.NUMPY_LIB = self.NUMPY_LIB hmumu_utils.ha = self.ha download_if_not_exists( "data/myNanoProdMc2016_NANO.root", "https://jpata.web.cern.ch/jpata/hmm/test_files/myNanoProdMc2016_NANO.root" ) #Load a simple sync dataset self.datastructures = create_datastructure("vbf_sync", True, "2016", do_fsr=True) self.dataset = Dataset( "vbf_sync", ["data/myNanoProdMc2016_NANO.root"], self.datastructures, datapath="", treename="Events", is_mc=True) self.dataset.num_chunk = 0 self.dataset.era = "2016" self.dataset.load_root() self.dataset.numpy_lib = self.NUMPY_LIB self.dataset.move_to_device(self.NUMPY_LIB) #disable everything that requires ROOT which is not easily available on travis tests from pars import analysis_parameters self.analysis_parameters = analysis_parameters self.analysis_parameters["baseline"]["do_rochester_corrections"] = False self.analysis_parameters["baseline"]["do_lepton_sf"] = False self.analysis_parameters["baseline"]["save_dnn_vars"] = False self.analysis_parameters["baseline"]["do_bdt_ucsd"] = False self.analysis_parameters["baseline"]["do_bdt_pisa"] = False self.analysis_parameters["baseline"]["do_factorized_jec"] = False self.analysis_parameters["baseline"]["do_jec"] = True self.analysis_parameters["baseline"]["do_jer"] = {"2016": True} from argparse import Namespace self.cmdline_args = Namespace(use_cuda=USE_CUPY, datapath=".", do_fsr=False, nthreads=1, async_data=False, do_sync=False, out="test_out") if os.path.isfile("tests/hmm/libhmm.so"): from analysis_hmumu import AnalysisCorrections self.analysis_corrections = AnalysisCorrections(self.cmdline_args, True) else: print("Could not load analysis corrections with ROOT, skipping this in further tests") self.analysis_corrections = None def setUp(self): pass def testDataset(self): nev = self.dataset.numevents() print("Loaded dataset from {0} with {1} events".format(self.dataset.filenames[0], nev)) assert(nev>0) def test_get_genpt(self): from hmumu_utils import get_genpt_cpu, get_genpt_cuda NUMPY_LIB = self.NUMPY_LIB muons = self.dataset.structs["Muon"][0] genpart = self.dataset.structs["GenPart"][0] muons_genpt = NUMPY_LIB.zeros(muons.numobjects(), dtype=NUMPY_LIB.float32) if USE_CUPY: get_genpt_cuda[32,1024](muons.offsets, muons.genPartIdx, genpart.offsets, genpart.pt, muons_genpt) cuda.synchronize() else: get_genpt_cpu(muons.offsets, muons.genPartIdx, genpart.offsets, genpart.pt, muons_genpt) muons_genpt = NUMPY_LIB.asnumpy(muons_genpt) self.assertAlmostEqual(NUMPY_LIB.sum(muons_genpt), 11943932) self.assertListEqual(list(muons_genpt[:10]), [105.0, 30.4375, 0.0, 0.0, 140.5, 28.625, 102.75, 41.25, 120.5, 80.5]) def test_fix_muon_fsrphoton_index(self): from hmumu_utils import fix_muon_fsrphoton_index NUMPY_LIB = self.NUMPY_LIB muons = self.dataset.structs["Muon"][0] fsrphotons = self.dataset.structs["FsrPhoton"][0] out_muons_fsrPhotonIdx = np.zeros_like(NUMPY_LIB.asnumpy(muons.fsrPhotonIdx)) fix_muon_fsrphoton_index( NUMPY_LIB.asnumpy(fsrphotons.offsets), NUMPY_LIB.asnumpy(muons.offsets), NUMPY_LIB.asnumpy(fsrphotons.dROverEt2), NUMPY_LIB.asnumpy(fsrphotons.muonIdx), NUMPY_LIB.asnumpy(muons.fsrPhotonIdx), out_muons_fsrPhotonIdx ) def test_analyze_function(self): import hmumu_utils from hmumu_utils import analyze_data, load_puhist_target from analysis_hmumu import JetMetCorrections from coffea.lookup_tools import extractor NUMPY_LIB = self.NUMPY_LIB hmumu_utils.NUMPY_LIB = self.NUMPY_LIB hmumu_utils.ha = self.ha analysis_parameters = self.analysis_parameters puid_maps = "data/puidSF/PUIDMaps.root" puid_extractor = extractor() puid_extractor.add_weight_sets(["* * {0}".format(puid_maps)]) puid_extractor.finalize() kwargs = { "pu_corrections": {"2016": load_puhist_target("data/pileup/RunII_2016_data.root")}, "puidreweighting": puid_extractor.make_evaluator(), "jetmet_corrections": { "2016": { "Summer16_07Aug2017_V11": JetMetCorrections( jec_tag="Summer16_07Aug2017_V11_MC", jec_tag_data={ "RunB": "Summer16_07Aug2017BCD_V11_DATA", "RunC": "Summer16_07Aug2017BCD_V11_DATA", "RunD": "Summer16_07Aug2017BCD_V11_DATA", "RunE": "Summer16_07Aug2017EF_V11_DATA", "RunF": "Summer16_07Aug2017EF_V11_DATA", "RunG": "Summer16_07Aug2017GH_V11_DATA", "RunH": "Summer16_07Aug2017GH_V11_DATA", }, jer_tag="Summer16_25nsV1_MC", jmr_vals=[1.0, 1.2, 0.8], do_factorized_jec=True), }, }, "do_fsr": True } ret = self.dataset.analyze( analyze_data, use_cuda = USE_CUPY, parameter_set_name = "baseline", parameters = analysis_parameters["baseline"], dataset_era = self.dataset.era, dataset_name = self.dataset.name, dataset_num_chunk = self.dataset.num_chunk, is_mc = self.dataset.is_mc, **kwargs ) h = ret["hist__dimuon_invmass_z_peak_cat5__M_mmjj"] nev_zpeak_nominal = np.sum(h["nominal"].contents) if not USE_CUPY: self.assertAlmostEqual(nev_zpeak_nominal, 0.012528435, places=4) self.assertTrue("Total__up" in h.keys()) self.assertTrue("Total__down" in h.keys()) self.assertTrue("jer__up" in h.keys()) self.assertTrue("jer__down" in h.keys())