def test_write_read(tmp_path): array1 = awkward1.Array([[1, 2, 3], [], [4, 5], [], [], [6, 7, 8, 9]]) array2 = awkward1.repartition(array1, 2) array3 = awkward1.Array( [ {"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 3, "y": 3.3}, {"x": 4, "y": 4.4}, {"x": 5, "y": 5.5}, {"x": 6, "y": 6.6}, {"x": 7, "y": 7.7}, {"x": 8, "y": 8.8}, {"x": 9, "y": 9.9}, ] ) array4 = awkward1.repartition(array3, 2) awkward1.to_parquet(array1, os.path.join(tmp_path, "array1.parquet")) awkward1.to_parquet(array2, os.path.join(tmp_path, "array2.parquet")) awkward1.to_parquet(array3, os.path.join(tmp_path, "array3.parquet")) awkward1.to_parquet(array4, os.path.join(tmp_path, "array4.parquet")) assert awkward1.to_list( awkward1.from_parquet(os.path.join(tmp_path, "array1.parquet")) ) == awkward1.to_list(array1) assert awkward1.to_list( awkward1.from_parquet(os.path.join(tmp_path, "array2.parquet")) ) == awkward1.to_list(array2) assert awkward1.to_list( awkward1.from_parquet(os.path.join(tmp_path, "array3.parquet")) ) == awkward1.to_list(array3) assert awkward1.to_list( awkward1.from_parquet(os.path.join(tmp_path, "array4.parquet")) ) == awkward1.to_list(array4) assert awkward1.to_list( awkward1.from_parquet(os.path.join(tmp_path, "array1.parquet"), lazy=True) ) == awkward1.to_list(array1) assert awkward1.to_list( awkward1.from_parquet(os.path.join(tmp_path, "array2.parquet"), lazy=True) ) == awkward1.to_list(array2) assert awkward1.to_list( awkward1.from_parquet(os.path.join(tmp_path, "array3.parquet"), lazy=True) ) == awkward1.to_list(array3) assert awkward1.to_list( awkward1.from_parquet(os.path.join(tmp_path, "array4.parquet"), lazy=True) ) == awkward1.to_list(array4)
def test_explode(tmp_path): array3 = awkward1.Array( [ [{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 3, "y": 3.3}], [], [{"x": 4, "y": 4.4}, {"x": 5, "y": 5.5}], [], [], [ {"x": 6, "y": 6.6}, {"x": 7, "y": 7.7}, {"x": 8, "y": 8.8}, {"x": 9, "y": 9.9}, ], ] ) array4 = awkward1.repartition(array3, 2) awkward1.to_parquet(array3, os.path.join(tmp_path, "array3.parquet"), explode_records=True) awkward1.to_parquet(array4, os.path.join(tmp_path, "array4.parquet"), explode_records=True) assert awkward1.from_parquet(os.path.join(tmp_path, "array3.parquet")) == [ {"x": [1, 2, 3], "y": [1.1, 2.2, 3.3]}, {"x": [], "y": []}, {"x": [4, 5], "y": [4.4, 5.5]}, {"x": [], "y": []}, {"x": [], "y": []}, {"x": [6, 7, 8, 9], "y": [6.6, 7.7, 8.8, 9.9]}, ] assert awkward1.from_parquet(os.path.join(tmp_path, "array4.parquet")) == [ {"x": [1, 2, 3], "y": [1.1, 2.2, 3.3]}, {"x": [], "y": []}, {"x": [4, 5], "y": [4.4, 5.5]}, {"x": [], "y": []}, {"x": [], "y": []}, {"x": [6, 7, 8, 9], "y": [6.6, 7.7, 8.8, 9.9]}, ]
def runOneFile(filename): #print ("filename: ", filename) #inputfile=filename outputfile = "output/" + inputfile.split("/")[-1] #outputfile = "tmp.root" mycache = uproot4.LRUArrayCache("1 MB") file_ = uproot4.open(inputfile, num_workers=10) #print ("root file opened: ", filename) nevents = ak.to_list(file_["h_total_mcweight"].values())[2] #nevents = 1000000 print("histogram opened: ", nevents) #tree_ = uproot4.open(inputfile, num_workers=10)["outTree"].arrays(array_cache=mycache) tree_ = file_["outTree"].arrays(array_cache=mycache) print("tree length", len(tree_)) #tree_ = uproot4.open(inputfile)[trees[0]].arrays() #tree_ = uproot4.open(inputfile)["outTree"].arrays(array_cache=mycache) #tree_ = uproot4.open("Merged_WJetsInclusiveSkim.root")["outTree"].arrays(array_cache=mycache) #tree_ = uproot4.open("/eos/cms/store/group/phys_exotica/bbMET/2016_SkimmedFiles/skim_setup_2016_v16_07-00/crab_DYJetsToLL_M-50_HT-400to600_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_200918_215129_0000_0.root")["outTree"].arrays(array_cache=mycache) #tree_ = uproot4.open("/eos/cms/store/group/phys_exotica/bbMET/2016_SkimmedFiles/skim_setup_2016_v16_07-00/crab_ttHTobb_M125_13TeV_powheg_pythia8_200918_215950_0000_0.root")["outTree"].arrays(array_cache=mycache) #print ((tree_)) cms_events = ak.zip( { "run": tree_["st_runId"], "lumi": tree_["st_lumiSection"], "event": tree_["st_eventId"], "jetpx": tree_["st_THINjetPx"], "jetpy": tree_["st_THINjetPy"], "jetpz": tree_["st_THINjetPz"], "jete": tree_["st_THINjetEnergy"], "jetpt": getpt(tree_["st_THINjetPx"], tree_["st_THINjetPy"]), "jeteta": geteta(tree_["st_THINjetPx"], tree_["st_THINjetPy"], tree_["st_THINjetPz"]), "jetphi": getphi(tree_["st_THINjetPx"], tree_["st_THINjetPy"]), "jetcsv": tree_["st_THINjetDeepCSV"], "jetflav": tree_["st_THINjetHadronFlavor"], "metpt": tree_["st_pfMetCorrPt"], "metphi": tree_["st_pfMetCorrPhi"], "mettrig": tree_["st_mettrigdecision"], "elepx": tree_["st_elePx"], "elepy": tree_["st_elePy"], "elepz": tree_["st_elePz"], "elee": tree_["st_eleEnergy"], "eleidL": tree_["st_eleIsPassLoose"], "eleidT": tree_["st_eleIsPassTight"], "eleq": tree_["st_eleCharge"], "elept": getpt(tree_["st_elePx"], tree_["st_elePy"]), "eleeta": geteta(tree_["st_elePx"], tree_["st_elePy"], tree_["st_elePz"]), "elephi": getphi(tree_["st_elePx"], tree_["st_elePy"]), "mupx": tree_["st_muPx"], "mupy": tree_["st_muPy"], "mupz": tree_["st_muPz"], "mue": tree_["st_muEnergy"], "muidT": tree_["st_isTightMuon"], "muq": tree_["st_muCharge"], "mupt": getpt(tree_["st_muPx"], tree_["st_muPy"]), "mueta": geteta(tree_["st_muPx"], tree_["st_muPy"], tree_["st_muPz"]), "muphi": getphi(tree_["st_muPx"], tree_["st_muPy"]), "ntau": tree_["st_nTau_discBased_TightEleTightMuVeto"], "npho": tree_["st_nPho"], "phopx": tree_["st_phoPx"], "phopy": tree_["st_phoPy"], "phopz": tree_["st_phoPz"], "phoe": tree_["st_phoEnergy"], "phopt": getpt(tree_["st_phoPx"], tree_["st_phoPy"]), "phoeta": geteta(tree_["st_phoPx"], tree_["st_phoPy"], tree_["st_phoPz"]), "nTrueInt": tree_["st_pu_nTrueInt"], "nPUVert": tree_["st_pu_nPUVert"], "genpt": tree_["st_genParPt"] }, depth_limit=1) out_events = ak.zip( { "run": tree_["st_runId"], "lumi": tree_["st_lumiSection"], "event": tree_["st_eventId"] }, depth_limit=1) print("event loading done") print("# of events: ", len(cms_events)) ## add more columns/properties to the event cms_events["mu_sel_tight"] = (cms_events.mupt > 30) & ( cms_events.muidT == True) & (numpy.abs(cms_events.mueta) < 2.4) cms_events["mu_sel_tight0"] = ak.Array(getN(cms_events.mu_sel_tight, 0)) cms_events["nMuTight"] = ak.sum(cms_events.mu_sel_tight, axis=-1) cms_events["nMuLoose"] = ak.sum((cms_events.mupt > 10), axis=-1) cms_events["mu_q0"] = ak.Array(getN(cms_events.muq, 0)) cms_events["mu_q1"] = ak.Array(getN(cms_events.muq, 1)) cms_events["ele_sel_tight"] = (cms_events.eleidT == True) & ( cms_events.elept > 30) & (numpy.abs(cms_events.eleeta) < 2.5) cms_events["ele_sel_tight0"] = ak.Array(getN(cms_events.ele_sel_tight, 0)) cms_events["nEleTight"] = ak.sum(cms_events.ele_sel_tight, axis=-1) cms_events["nEleLoose"] = ak.sum((cms_events.elept > 10), axis=-1) cms_events["ele_q0"] = ak.Array(getN(cms_events.eleq, 0)) cms_events["ele_q1"] = ak.Array(getN(cms_events.eleq, 1)) cms_events["recoil_Wmunu"] = getrecoil(cms_events.nMuTight, cms_events.mupt, cms_events.muphi, cms_events.mupx, cms_events.mupy, cms_events.metpt, cms_events.metphi) cms_events["recoil_Wmunu0"] = ak.firsts(cms_events.recoil_Wmunu) cms_events["recoil_Wenu"] = getrecoil(cms_events.nEleTight, cms_events.elept, cms_events.elephi, cms_events.elepx, cms_events.elepy, cms_events.metpt, cms_events.metphi) cms_events["recoil_Wenu0"] = ak.firsts(cms_events.recoil_Wenu) elepx0 = ak.Array(getN(cms_events.elepx, 0)) elepx1 = ak.Array(getN(cms_events.elepx, 1)) elepy0 = ak.Array(getN(cms_events.elepy, 0)) elepy1 = ak.Array(getN(cms_events.elepy, 1)) elepz0 = ak.Array(getN(cms_events.elepz, 0)) elepz1 = ak.Array(getN(cms_events.elepz, 1)) elee0 = ak.Array(getN(cms_events.elee, 0)) elee1 = ak.Array(getN(cms_events.elee, 1)) cms_events["Zee_mass"] = numpy.sqrt((elee0 + elee1)**2 - (elepx0 + elepx1)**2 - (elepy0 + elepy1)**2 - (elepz0 + elepz1)**2) cms_events["Zee_pt"] = numpy.sqrt((elepx0 + elepx1)**2 + (elepy0 + elepy1)**2) cms_events["Zee_recoil"] = getrecoil1((elepx0 + elepx1), (elepy0 + elepy1), cms_events.metpt, cms_events.metphi) mupx0 = ak.Array(getN(cms_events.mupx, 0)) mupx1 = ak.Array(getN(cms_events.mupx, 1)) mupy0 = ak.Array(getN(cms_events.mupy, 0)) mupy1 = ak.Array(getN(cms_events.mupy, 1)) mupz0 = ak.Array(getN(cms_events.mupz, 0)) mupz1 = ak.Array(getN(cms_events.mupz, 1)) mue0 = ak.Array(getN(cms_events.mue, 0)) mue1 = ak.Array(getN(cms_events.mue, 1)) cms_events["Zmumu_mass"] = numpy.sqrt((mue0 + mue1)**2 - (mupx0 + mupx1)**2 - (mupy0 + mupy1)**2 - (mupz0 + mupz1)**2) cms_events["Zmumu_pt"] = numpy.sqrt((mupx0 + mupx1)**2 + (mupy0 + mupy1)**2) cms_events["Zmumu_recoil"] = getrecoil1( (mupx0 + mupx1), (mupy0 + mupy1), cms_events.metpt, cms_events.metphi) #cms_events["recoil_Zmumu"] = getrecoil cms_events["recoil_WmunuPhi"] = getRecoilPhi( cms_events.nMuTight, cms_events.mupt, cms_events.muphi, cms_events.mupx, cms_events.mupy, cms_events.metpt, cms_events.metphi) cms_events["recoil_WmunuPhi0"] = ak.firsts(cms_events.recoil_WmunuPhi) cms_events["recoil_WenuPhi"] = getRecoilPhi( cms_events.nEleTight, cms_events.elept, cms_events.elephi, cms_events.elepx, cms_events.elepy, cms_events.metpt, cms_events.metphi) cms_events["recoil_WenuPhi0"] = ak.firsts(cms_events.recoil_WenuPhi) cms_events["mt_Wmunu"] = getMT(cms_events.nMuTight, cms_events.mupt, cms_events.muphi, cms_events.mupx, cms_events.mupy, cms_events.metpt, cms_events.metphi) cms_events["mt_Wmunu0"] = ak.firsts(cms_events.mt_Wmunu) cms_events["mt_Wenu"] = getMT(cms_events.nEleTight, cms_events.elept, cms_events.elephi, cms_events.elepx, cms_events.elepy, cms_events.metpt, cms_events.metphi) cms_events["mt_Wenu0"] = ak.firsts(cms_events.mt_Wenu) cms_events["jet_sel_loose"] = (cms_events.jetpt > 30.0) & (numpy.abs( cms_events.jeteta) < 2.5) cms_events["jet_sel_tight"] = (cms_events.jetpt > 50.0) & (numpy.abs( cms_events.jeteta) < 2.5) #cms_events["jet_sel_b"] = (cms_events.jetcsv > 0.6321) & (numpy.abs(cms_events.jeteta)<2.4) cms_events["jet_sel_b"] = ( cms_events.jetcsv[cms_events.jet_sel_loose == True] > 0.6321 ) & (numpy.abs(cms_events.jeteta[cms_events.jet_sel_loose == True]) < 2.4) cms_events["jetptTight"] = cms_events.jetpt[cms_events.jet_sel_tight == True] cms_events["jetetaTight"] = cms_events.jeteta[cms_events.jet_sel_tight == True] cms_events["jetphiTight"] = cms_events.jetphi[cms_events.jet_sel_tight == True] cms_events["jetptLoose"] = cms_events.jetpt[cms_events.jet_sel_loose == True] cms_events["jetetaLoose"] = cms_events.jeteta[cms_events.jet_sel_loose == True] cms_events["jetphiLoose"] = cms_events.jetphi[cms_events.jet_sel_loose == True] cms_events["jet_sel_tight0"] = ak.Array( getN(cms_events.jet_sel_tight[cms_events.jet_sel_loose == True], 0)) cms_events["jet_sel_b_0"] = ak.Array(getN(cms_events.jet_sel_b, 0)) cms_events["jet_sel_b_1"] = ak.Array(getN(cms_events.jet_sel_b, 1)) cms_events["nJetLoose"] = ak.sum(cms_events.jet_sel_loose, axis=-1) cms_events["nJetTight"] = ak.sum(cms_events.jet_sel_tight, axis=-1) cms_events["nJetb"] = ak.sum(cms_events.jet_sel_b, axis=-1) cms_events["dphi_jet_met"] = DeltaPhi( cms_events.jetphi[cms_events.jet_sel_loose == True], cms_events.metphi) cms_events["min_dphi_jet_met"] = ak.min(cms_events.dphi_jet_met, axis=-1) #-------------------------------------------------------------------------------------------------- ## W --> lepton + nu #-------------------------------------------------------------------------------------------------- from regions import get_mask_wmunu1b, get_mask_wmunu2b, get_mask_wenu1b, get_mask_wenu2b, get_mask_topmunu1b, get_mask_topmunu2b, get_mask_topenu1b, get_mask_topenu2b, get_mask_Zmumu1b, get_mask_Zmumu2b, get_mask_Zee1b, get_mask_Zee2b, get_mask_SR1b, get_mask_SR2b cms_events["mask_wmunu1b"] = get_mask_wmunu1b(cms_events) cms_events["mask_wmunu2b"] = get_mask_wmunu2b(cms_events) cms_events["mask_wenu1b"] = get_mask_wenu1b(cms_events) cms_events["mask_wenu2b"] = get_mask_wenu2b(cms_events) cms_events["mask_topmunu1b"] = get_mask_topmunu1b(cms_events) cms_events["mask_topmunu2b"] = get_mask_topmunu2b(cms_events) cms_events["mask_topenu1b"] = get_mask_topenu1b(cms_events) cms_events["mask_topenu2b"] = get_mask_topenu2b(cms_events) cms_events["mask_Zmumu1b"] = get_mask_Zmumu1b(cms_events) cms_events["mask_Zmumu2b"] = get_mask_Zmumu2b(cms_events) cms_events["mask_Zee1b"] = get_mask_Zee1b(cms_events) cms_events["mask_Zee2b"] = get_mask_Zee2b(cms_events) cms_events["mask_SR1b"] = get_mask_SR1b(cms_events) cms_events["mask_SR2b"] = get_mask_SR2b(cms_events) ''' wm = cms_events.event[mask_SR2b] wm[~ak.is_none(wm)] ''' ############### out_events["metpt"] = cms_events["metpt"] out_events["metphi"] = cms_events["metphi"] out_events["nTrueInt"] = cms_events["nTrueInt"] out_events["nJetLoose"] = cms_events["nJetLoose"] out_events["mu_sel_tight0"] = cms_events["mu_sel_tight0"] out_events["nMuTight"] = cms_events["nMuTight"] out_events["nMuLoose"] = cms_events["nMuLoose"] out_events["mu_q0"] = cms_events["mu_q0"] out_events["mu_q1"] = cms_events["mu_q1"] out_events["mupt0"] = ak.Array(getN(cms_events.mupt, 0)) out_events["mupt1"] = ak.Array(getN(cms_events.mupt, 1)) out_events["mueta0"] = ak.Array(getN(cms_events.mueta, 0)) out_events["mueta1"] = ak.Array(getN(cms_events.mueta, 1)) out_events["muphi0"] = ak.Array(getN(cms_events.muphi, 0)) out_events["muphi1"] = ak.Array(getN(cms_events.muphi, 1)) out_events["ele_sel_tight0"] = cms_events["ele_sel_tight0"] out_events["nEleTight"] = cms_events["nEleTight"] out_events["nEleLoose"] = cms_events["nEleLoose"] out_events["ele_q0"] = cms_events["ele_q0"] out_events["ele_q1"] = cms_events["ele_q1"] out_events["elept0"] = ak.Array(getN(cms_events.elept, 0)) out_events["elept1"] = ak.Array(getN(cms_events.elept, 1)) out_events["eleeta0"] = ak.Array(getN(cms_events.eleeta, 0)) out_events["eleeta1"] = ak.Array(getN(cms_events.eleeta, 1)) out_events["elephi0"] = ak.Array(getN(cms_events.elephi, 0)) out_events["elephi1"] = ak.Array(getN(cms_events.elephi, 1)) out_events["recoil_Wmunu0"] = cms_events["recoil_Wmunu0"] out_events["recoil_Wenu0"] = cms_events["recoil_Wenu0"] out_events["recoil_WmunuPhi0"] = cms_events["recoil_WmunuPhi0"] out_events["recoil_WenuPhi0"] = cms_events["recoil_WenuPhi0"] out_events["mt_Wmunu0"] = cms_events["mt_Wmunu0"] out_events["mt_Wenu0"] = cms_events["mt_Wenu0"] out_events["Zee_mass"] = cms_events["Zee_mass"] out_events["Zee_pt"] = cms_events["Zee_pt"] out_events["Zee_recoil"] = cms_events["Zee_recoil"] out_events["Zmumu_mass"] = cms_events["Zmumu_mass"] out_events["Zmumu_pt"] = cms_events["Zmumu_pt"] out_events["Zmumu_recoil"] = cms_events["Zmumu_recoil"] out_events["nJetLoose"] = cms_events["nJetLoose"] out_events["nJetTight"] = cms_events["nJetTight"] out_events["nJetb"] = cms_events["nJetb"] out_events["min_dphi_jet_met"] = cms_events["min_dphi_jet_met"] cms_events["jet_sel_tight0"] = cms_events["jet_sel_tight0"] cms_events["jet_sel_b_0"] = cms_events["jet_sel_b_0"] cms_events["jet_sel_b_1"] = cms_events["jet_sel_b_1"] out_events["jetpt0"] = ak.Array(getN(cms_events.jetptTight, 0)) out_events["jetpt1"] = ak.Array(getN(cms_events.jetptLoose, 1)) out_events["jetpt2"] = ak.Array(getN(cms_events.jetptLoose, 2)) out_events["jetpt3"] = ak.Array(getN(cms_events.jetptLoose, 3)) out_events["jetpt4"] = ak.Array(getN(cms_events.jetptLoose, 4)) out_events["jetpt5"] = ak.Array(getN(cms_events.jetptLoose, 5)) out_events["jetpt6"] = ak.Array(getN(cms_events.jetptLoose, 6)) out_events["jeteta0"] = ak.Array(getN(cms_events.jetetaTight, 0)) out_events["jeteta1"] = ak.Array(getN(cms_events.jetetaLoose, 1)) out_events["jeteta2"] = ak.Array(getN(cms_events.jetetaLoose, 2)) out_events["jeteta3"] = ak.Array(getN(cms_events.jetetaLoose, 3)) out_events["jeteta4"] = ak.Array(getN(cms_events.jetetaLoose, 4)) out_events["jeteta5"] = ak.Array(getN(cms_events.jetetaLoose, 5)) out_events["jeteta6"] = ak.Array(getN(cms_events.jetetaLoose, 6)) out_events["jetphi0"] = ak.Array(getN(cms_events.jetphiTight, 0)) out_events["jetphi1"] = ak.Array(getN(cms_events.jetphiLoose, 1)) out_events["jetphi2"] = ak.Array(getN(cms_events.jetphiLoose, 2)) out_events["jetflav0"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_tight == True], 0)) out_events["jetflav1"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 1)) out_events["jetflav2"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 2)) out_events["jetflav3"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 3)) out_events["jetflav4"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 4)) out_events["jetflav5"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 5)) out_events["jetflav6"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 6)) out_events["csv0"] = ak.Array( getN(cms_events.jetcsv[cms_events.jet_sel_tight == True], 0)) out_events["csv1"] = ak.Array( getN(cms_events.jetcsv[cms_events.jet_sel_loose == True], 1)) out_events["csv2"] = ak.Array( getN(cms_events.jetcsv[cms_events.jet_sel_loose == True], 2)) out_events["csv3"] = ak.Array( getN(cms_events.jetcsv[cms_events.jet_sel_loose == True], 3)) out_events["SR_2b"] = cms_events["mask_SR2b"] out_events["SR_1b"] = cms_events["mask_SR1b"] out_events["ZeeCR_2b"] = cms_events["mask_Zee2b"] out_events["ZeeCR_1b"] = cms_events["mask_Zee1b"] out_events["ZmumuCR_2b"] = cms_events["mask_Zmumu2b"] out_events["ZmumuCR_1b"] = cms_events["mask_Zmumu1b"] out_events["TopenuCR_2b"] = cms_events["mask_topenu2b"] out_events["TopenuCR_1b"] = cms_events["mask_topenu1b"] out_events["TopmunuCR_2b"] = cms_events["mask_topmunu2b"] out_events["TopmunuCR_1b"] = cms_events["mask_topmunu1b"] out_events["WenuCR_1b"] = cms_events["mask_wenu1b"] out_events["WenuCR_2b"] = cms_events["mask_wenu2b"] out_events["WmunuCR_1b"] = cms_events["mask_wmunu1b"] out_events["WmunuCR_2b"] = cms_events["mask_wmunu2b"] ## btagging SFs from read_sfs import btag_sf from read_sfs import evaluator out_events["btagsf0"] = btag_sf.eval("central", out_events.jetflav0, abs(out_events.jeteta0), out_events.jetpt0) out_events["btagsf1"] = btag_sf.eval("central", out_events.jetflav1, abs(out_events.jeteta1), out_events.jetpt1) out_events["btagsf2"] = btag_sf.eval("central", out_events.jetflav2, abs(out_events.jeteta2), out_events.jetpt2) out_events["btagsf3"] = btag_sf.eval("central", out_events.jetflav3, abs(out_events.jeteta3), out_events.jetpt3) out_events["btagsf4"] = btag_sf.eval("central", out_events.jetflav4, abs(out_events.jeteta4), out_events.jetpt4) out_events["btagsf5"] = btag_sf.eval("central", out_events.jetflav5, abs(out_events.jeteta5), out_events.jetpt5) out_events["btagsf6"] = btag_sf.eval("central", out_events.jetflav6, abs(out_events.jeteta6), out_events.jetpt6) ## btag efficiency out_events["btag_eff_lwp_0"] = evaluator["btag_eff_lwp"]( out_events.jeteta0, out_events.jetpt0) out_events["btag_eff_lwp_1"] = evaluator["btag_eff_lwp"]( out_events.jeteta1, out_events.jetpt1) out_events["ctag_eff_lwp_0"] = evaluator["ctag_eff_lwp"]( out_events.jeteta0, out_events.jetpt0) out_events["ctag_eff_lwp_1"] = evaluator["ctag_eff_lwp"]( out_events.jeteta1, out_events.jetpt1) out_events["ltag_eff_lwp_0"] = evaluator["ltag_eff_lwp"]( out_events.jeteta0, out_events.jetpt0) out_events["ltag_eff_lwp_1"] = evaluator["ltag_eff_lwp"]( out_events.jeteta1, out_events.jetpt1) out_events["btag_eff_mwp_0"] = evaluator["btag_eff_mwp"]( out_events.jeteta0, out_events.jetpt0) out_events["btag_eff_mwp_1"] = evaluator["btag_eff_mwp"]( out_events.jeteta1, out_events.jetpt1) out_events["ctag_eff_mwp_0"] = evaluator["ctag_eff_mwp"]( out_events.jeteta0, out_events.jetpt0) out_events["ctag_eff_mwp_1"] = evaluator["ctag_eff_mwp"]( out_events.jeteta1, out_events.jetpt1) out_events["ltag_eff_mwp_0"] = evaluator["ltag_eff_mwp"]( out_events.jeteta0, out_events.jetpt0) out_events["ltag_eff_mwp_1"] = evaluator["ltag_eff_mwp"]( out_events.jeteta1, out_events.jetpt1) ## ele sfs out_events["eleTightSF0"] = evaluator["EGamma_SF2D_T"](out_events.eleeta0, out_events.elept0) out_events["eleLooseSF1"] = evaluator["EGamma_SF2D_L"](out_events.eleeta1, out_events.elept1) out_events["eleTrigSF0"] = evaluator["EGamma_SF2D_Trig"]( out_events.eleeta0, out_events.elept0) out_events["eleRecoSF0"] = evaluator["EGamma_SF2D_Reco"]( out_events.eleeta0, out_events.elept0) eleRecoSF1_hi = evaluator["EGamma_SF2D_Reco"](out_events.eleeta1, out_events.elept1) eleRecoSF1_lo = evaluator["EGamma_SF2D_Reco_lowpt"](out_events.eleeta1, out_events.elept1) eleRecoSF1_hi_ = ak.fill_none( ak.mask(eleRecoSF1_hi, out_events.elept1 > 20.), 0) eleRecoSF1_lo_ = ak.fill_none( ak.mask(eleRecoSF1_lo, out_events.elept1 > 20.), 0) out_events["eleRecoSF1"] = eleRecoSF1_hi_ + eleRecoSF1_lo_ ## muon sfs bcdef_lumi = 19.554725529 gh_lumi = 16.224846377 total_lumi = bcdef_lumi + gh_lumi ##--------low pt Loose muonLooseIDSF_lowpt1 = ( (bcdef_lumi * evaluator["muon_lowpt_BCDEF_LooseID"] (out_events.mupt1, abs(out_events.mueta1))) + (gh_lumi * evaluator["muon_lowpt_GH_LooseID"] (out_events.mupt1, abs(out_events.mueta1)))) / total_lumi ##----------- medium pt Loose muonLooseIDSF1 = ((bcdef_lumi * evaluator["muon_highpt_BCDEF_LooseID"] (out_events.mueta1, out_events.mupt1)) + (gh_lumi * evaluator["muon_highpt_GH_LooseID"] (out_events.mueta1, out_events.mupt1))) / total_lumi muonLooseISOSF1 = ((bcdef_lumi * evaluator["muon_highpt_BCDEF_LooseISO"] (out_events.mueta1, out_events.mupt1)) + (gh_lumi * evaluator["muon_highpt_GH_LooseISO"] (out_events.mueta1, out_events.mupt1))) / total_lumi muon_loose_ID_low_SF_1 = ak.fill_none( ak.mask(muonLooseIDSF_lowpt1, out_events.mupt1 < 20.), 0) muon_loose_ID_high_SF_1 = ak.fill_none( ak.mask(muonLooseIDSF1, out_events.mupt1 > 20.), 0) muon_loose_ID_SF_1 = muon_loose_ID_low_SF_1 + muon_loose_ID_high_SF_1 out_events["muLooseSF1"] = muon_loose_ID_SF_1 * muonLooseISOSF1 ##------------medium pt tight muonTightIDSF0 = ((bcdef_lumi * evaluator["muon_highpt_BCDEF_TightID"] (out_events.mueta0, out_events.mupt0)) + (gh_lumi * evaluator["muon_highpt_GH_TightID"] (out_events.mueta0, out_events.mupt0))) / total_lumi muonTightISOSF0 = ((bcdef_lumi * evaluator["muon_highpt_BCDEF_TightISO"] (out_events.mueta0, out_events.mupt0)) + (gh_lumi * evaluator["muon_highpt_GH_TightISO"] (out_events.mueta0, out_events.mupt0))) / total_lumi out_events["muTightSF0"] = muonTightIDSF0 * muonTightISOSF0 out_events["puweight"] = evaluator["pu_weight"](cms_events.nTrueInt) ## trigger sfs out_events["mettrigWeight"] = evaluator["met_trig"](cms_events.metpt) out_events["recoilWmunutrigWeight"] = evaluator["met_trig"]( cms_events.recoil_Wmunu0) out_events["recoilWenutrigWeight"] = evaluator["met_trig"]( cms_events.recoil_Wenu0) out_events["recoilZmumutrigWeight"] = evaluator["met_trig"]( cms_events.Zmumu_recoil) out_events["recoilZeetrigWeight"] = evaluator["met_trig"]( cms_events.Zee_recoil) ## Fill weights for each CR so that we don't need to worry later out_events["weight_SR_2b"] = out_events.puweight * out_events.mettrigWeight out_events["weight_SR_1b"] = out_events.puweight * out_events.mettrigWeight out_events["weight_ZeeCR_2b"] = out_events.puweight * out_events.eleTrigSF0 out_events["weight_ZeeCR_1b"] = out_events.puweight * out_events.eleTrigSF0 out_events[ "weight_ZmumuCR_2b"] = out_events.puweight * out_events.recoilZmumutrigWeight out_events[ "weight_ZmumuCR_1b"] = out_events.puweight * out_events.recoilZmumutrigWeight out_events[ "weight_TopenuCR_2b"] = out_events.puweight * out_events.eleTrigSF0 out_events[ "weight_TopenuCR_1b"] = out_events.puweight * out_events.eleTrigSF0 out_events[ "weight_TopmunuCR_2b"] = out_events.puweight * out_events.recoilWmunutrigWeight out_events[ "weight_TopmunuCR_1b"] = out_events.puweight * out_events.recoilWmunutrigWeight out_events[ "weight_WenuCR_1b"] = out_events.puweight * out_events.eleTrigSF0 out_events[ "weight_WenuCR_2b"] = out_events.puweight * out_events.eleTrigSF0 out_events[ "weight_WmunuCR_1b"] = out_events.puweight * out_events.recoilWmunutrigWeight out_events[ "weight_WmunuCR_2b"] = out_events.puweight * out_events.recoilWmunutrigWeight ## Fill Histograms from variables import vardict, regions, variables_common from binning import binning f = TFile(outputfile, "RECREATE") for ireg in regions: thisregion = out_events[out_events[ireg] == True] thisregion_ = thisregion[~(ak.is_none(thisregion))] weight_ = "weight_" + ireg for ivar in variables_common[ireg]: hist_name_ = "h_reg_" + ireg + "_" + vardict[ivar] h = VarToHist(thisregion_[ivar], thisregion_[weight_], hist_name_, binning[ireg][ivar]) f.cd() h.Write() h_total = TH1F("h_total_mcweight", "h_total_mcweight", 2, 0, 2) h_total.SetBinContent(1, nevents) f.cd() h_total.Write() write_parquet = False if write_parquet: ak.to_parquet(out_events, "analysis_wjets_allevents.parquet")
def parse_to_parquet( base_output_filename: Union[Path, str], store_only_necessary_columns: bool, input_filename: Union[Path, str], events_per_chunk: int, parser: str = "pandas", max_chunks: int = -1, compression: str = "zstd", compression_level: Optional[int] = None) -> Iterator[ak.Array]: """ Parse the JETSCAPE ASCII and convert it to parquet, (potentially) storing only the minimum necessary columns. Args: base_output_filename: Basic output filename. Should include the entire path. store_only_necessary_columns: If True, store only the necessary columns, rather than all of them. input_filename: Filename of the input JETSCAPE ASCII file. events_per_chunk: Number of events to be read per chunk. parser: Name of the parser. Default: "pandas". max_chunks: Maximum number of chunks to read. Default: -1. compression: Compression algorithm for parquet. Default: "zstd". Options include: ["snappy", "gzip", "ztsd"]. "gzip" is slightly better for storage, but slower. See the compression tests and parquet docs for more. compression_level: Compression level for parquet. Default: `None`, which lets parquet choose the best value. Returns: None. The parsed events are stored in parquet files. """ # Validation base_output_filename = Path(base_output_filename) # Setup the base output filename if events_per_chunk > 0: base_output_filename = base_output_filename / base_output_filename.name base_output_filename.parent.mkdir(parents=True, exist_ok=True) for i, arrays in enumerate( read(filename=input_filename, events_per_chunk=events_per_chunk, parser=parser)): # Reduce to the minimum required data. if store_only_necessary_columns: arrays = full_events_to_only_necessary_columns_E_px_py_pz(arrays) # We limit the depth of the zip to ensure that we can write the parquet successfully. # (parquet can't handle lists of structs at the moment). Later, we'll recreate this # structure fully zipped together. ak.zip(dict(zip(ak.fields(arrays), ak.unzip(arrays))), depth_limit=1) # Parquet with zlib seems to do about the same as ascii tar.gz when we drop unneeded columns. # And it should load much faster! if events_per_chunk > 0: suffix = base_output_filename.suffix output_filename = ( base_output_filename.parent / f"{base_output_filename.stem}_{i:02}").with_suffix(suffix) else: output_filename = base_output_filename ak.to_parquet( arrays, output_filename, compression=compression, compression_level=compression_level, # We run into a recursion limit or crash if there's a cut and we don't explode records. Probably a bug... # But it works fine if we explored records, so fine for now. explode_records=True, ) # Break now so we don't have to read the next chunk. if (i + 1) == max_chunks: break
out_events[ "weight_wmunu1b"] = out_events.puweight * out_events.recoilWmunutrigWeight out_events[ "weight_wmunu2b"] = out_events.puweight * out_events.recoilWmunutrigWeight ## add EWK reweighting ## add QCD reweighting ## add missing weights ## add up and down weights ## get total events ## get cross-section ## ## Fill Histograms print(ak.to_list(out_events[:10])) ak.to_parquet(out_events, "analysis_wjets_allevents.parquet") end = time.clock() print("%.4gs" % (end - start)) ''' SF Links ----------------- electrons: trigger: https://github.com/ExoPie/ExoPieUtils/blob/test_systematics/scalefactortools/data_2016/electron_Trigger_eleTrig.root ele_reco: https://github.com/ExoPie/ExoPieUtils/blob/test_systematics/scalefactortools/data_2016/EGM2D_BtoH_GT20GeV_RecoSF_Legacy2016.root ele_reco_lowpt: https://github.com/ExoPie/ExoPieUtils/blob/test_systematics/scalefactortools/data_2016/EGM2D_BtoH_low_RecoSF_Legacy2016.root loose_id: https://github.com/ExoPie/ExoPieUtils/blob/test_systematics/scalefactortools/data_2016/2016LegacyReReco_ElectronLoose_Fall17V2.root tight_id: https://github.com/ExoPie/ExoPieUtils/blob/test_systematics/scalefactortools/data_2016/2016LegacyReReco_ElectronTight_Fall17V2.root Twiki: https://twiki.cern.ch/twiki/bin/view/CMS/EgammaIDRecipesRun2#Efficiencies_and_scale_factors -----------------