def process(self, df): output = self.accumulator.identity() dataset = df['dataset'] muon = None if isinstance(df['Muon_pt'], akd.JaggedArray): muon = CandArray.candidatesfromcounts(counts=df['Muon_pt'].counts, pt=df['Muon_pt'].content, eta=df['Muon_eta'].content, phi=df['Muon_phi'].content, mass=df['Muon_mass'].content) else: muon = CandArray.candidatesfromcounts(counts=df['nMuon'], pt=df['Muon_pt'], eta=df['Muon_eta'], phi=df['Muon_phi'], mass=df['Muon_mass']) dimuon = muon.distincts() output['pt'].fill(dataset=dataset, pt=muon.pt.flatten()) output['mass'].fill(dataset=dataset, mass=dimuon.mass.flatten()) output['cutflow']['%s_pt' % dataset] += np.sum(muon.counts) output['cutflow']['%s_mass' % dataset] += np.sum(dimuon.counts) return output
def process(self, df): output = self.accumulator.identity() dataset = df["dataset"] muon = None if isinstance(df["Muon_pt"], akd.JaggedArray): muon = CandArray.candidatesfromcounts( counts=df["Muon_pt"].counts, pt=df["Muon_pt"].content, eta=df["Muon_eta"].content, phi=df["Muon_phi"].content, mass=df["Muon_mass"].content, ) else: muon = CandArray.candidatesfromcounts( counts=df["nMuon"], pt=df["Muon_pt"], eta=df["Muon_eta"], phi=df["Muon_phi"], mass=df["Muon_mass"], ) dimuon = muon.distincts() output["pt"].fill(dataset=dataset, pt=muon.pt.flatten()) output["mass"].fill(dataset=dataset, mass=dimuon.mass.flatten()) output["cutflow"]["%s_pt" % dataset] += np.sum(muon.counts) output["cutflow"]["%s_mass" % dataset] += np.sum(dimuon.counts) return output
def test_jet_transformer(): from coffea.analysis_objects import JaggedCandidateArray as CandArray from coffea.jetmet_tools import (FactorizedJetCorrector, JetResolution, JetResolutionScaleFactor, JetCorrectionUncertainty, JetTransformer) counts, test_px, test_py, test_pz, test_e = dummy_four_momenta() test_Rho = np.full(shape=(np.sum(counts),), fill_value=100.) test_A = np.full(shape=(np.sum(counts),), fill_value=5.) jets = CandArray.candidatesfromcounts(counts, px=test_px, py=test_py, pz=test_pz, energy=test_e) jets.add_attributes(ptRaw=jets.pt, massRaw=jets.mass, rho=test_Rho, area=test_A) jec_names = ['Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi', 'Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi', 'Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi', 'Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi'] corrector = FactorizedJetCorrector(**{name: evaluator[name] for name in jec_names}) junc_names = [] for name in dir(evaluator): if 'Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi' in name: junc_names.append(name) junc = JetCorrectionUncertainty(**{name: evaluator[name] for name in junc_names}) jer_names = ['Spring16_25nsV10_MC_PtResolution_AK4PFPuppi'] reso = JetResolution(**{name: evaluator[name] for name in jer_names}) jersf_names = ['Spring16_25nsV10_MC_SF_AK4PFPuppi'] resosf = JetResolutionScaleFactor(**{name: evaluator[name] for name in jersf_names}) xform = JetTransformer(jec=corrector, junc=junc, jer=reso, jersf=resosf) print(xform.uncertainties) xform.transform(jets) print(jets.columns) assert('pt_jer_up' in jets.columns) assert('pt_jer_down' in jets.columns) assert('mass_jer_up' in jets.columns) assert('mass_jer_down' in jets.columns) for unc in xform.uncertainties: assert('pt_'+unc+'_up' in jets.columns) assert('pt_'+unc+'_down' in jets.columns) assert('mass_'+unc+'_up' in jets.columns) assert('mass_'+unc+'_down' in jets.columns)
def dummy_four_momenta(): np.random.seed(12345) nrows = 1000 counts = np.minimum(np.random.exponential(0.5, size=nrows).astype(int), 20) px = np.random.normal(loc=20.0, scale=5.0, size=np.sum(counts)) py = np.random.normal(loc=20.0, scale=5.0, size=np.sum(counts)) pz = np.random.normal(loc=0, scale=55, size=np.sum(counts)) m_pi = np.full_like(px, fill_value=0.135) energy = np.sqrt(px * px + py * py + pz * pz + m_pi * m_pi) return (counts, px, py, pz, energy)
def dummy_jagged_eta_pt(): np.random.seed(42) counts = np.random.exponential(2, size=50).astype(int) entries = np.sum(counts) test_eta = np.random.uniform(-3., 3., size=entries) test_pt = np.random.exponential(10., size=entries) + np.random.exponential( 10, size=entries) return (counts, test_eta, test_pt)
def test_jet_transformer(): import numpy as np import awkward as ak import math from coffea.analysis_objects import JaggedCandidateArray as CandArray from coffea.jetmet_tools import (FactorizedJetCorrector, JetResolution, JetResolutionScaleFactor, JetCorrectionUncertainty, JetTransformer) counts, test_px, test_py, test_pz, test_e = dummy_four_momenta() test_Rho = np.full(shape=(np.sum(counts), ), fill_value=100.) test_A = np.full(shape=(np.sum(counts), ), fill_value=5.) jets = CandArray.candidatesfromcounts(counts, px=test_px, py=test_py, pz=test_pz, energy=test_e) jets.add_attributes(ptRaw=jets.pt, massRaw=jets.mass, rho=test_Rho, area=test_A) fakemet = np.random.exponential(scale=1.0, size=counts.size) metphi = np.random.uniform(low=-math.pi, high=math.pi, size=counts.size) syst_up = 0.001 * fakemet syst_down = -0.001 * fakemet met = CandArray.candidatesfromcounts( np.ones_like(counts), pt=fakemet, eta=np.zeros_like(counts), phi=metphi, mass=np.zeros_like(counts), MetUnclustEnUpDeltaX=syst_up * np.cos(metphi), MetUnclustEnUpDeltaY=syst_down * np.sin(metphi)) jec_names = [ 'Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi', 'Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi', 'Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi', 'Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi' ] corrector = FactorizedJetCorrector( **{name: evaluator[name] for name in jec_names}) junc_names = [] for name in dir(evaluator): if 'Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi' in name: junc_names.append(name) junc = JetCorrectionUncertainty( **{name: evaluator[name] for name in junc_names}) jer_names = ['Spring16_25nsV10_MC_PtResolution_AK4PFPuppi'] reso = JetResolution(**{name: evaluator[name] for name in jer_names}) jersf_names = ['Spring16_25nsV10_MC_SF_AK4PFPuppi'] resosf = JetResolutionScaleFactor( **{name: evaluator[name] for name in jersf_names}) xform = JetTransformer(jec=corrector, junc=junc, jer=reso, jersf=resosf) print(xform.uncertainties) xform.transform(jets, met=met) print('jets', jets.columns) print('met', met.columns) assert ('pt_jer_up' in jets.columns) assert ('pt_jer_down' in jets.columns) assert ('mass_jer_up' in jets.columns) assert ('mass_jer_down' in jets.columns) assert ('pt_UnclustEn_up' in met.columns) assert ('pt_UnclustEn_down' in met.columns) assert ('phi_UnclustEn_up' in met.columns) assert ('phi_UnclustEn_down' in met.columns) for unc in xform.uncertainties: assert ('pt_' + unc + '_up' in jets.columns) assert ('pt_' + unc + '_down' in jets.columns) assert ('mass_' + unc + '_up' in jets.columns) assert ('mass_' + unc + '_down' in jets.columns) assert ('pt_' + unc + '_up' in met.columns) assert ('phi_' + unc + '_up' in met.columns)
def test_hist(): counts, test_eta, test_pt = dummy_jagged_eta_pt() h_nothing = hist.Hist("empty inside") assert h_nothing.sparse_dim() == h_nothing.dense_dim() == 0 assert h_nothing.values() == {} h_regular_bins = hist.Hist("regular joe", hist.Bin("x", "x", 20, 0, 200), hist.Bin("y", "why", 20, -3, 3)) h_regular_bins.fill(x=test_pt, y=test_eta) nentries = np.sum(counts) assert h_regular_bins.sum( "x", "y", overflow='all').values(sumw2=True)[()] == (nentries, nentries) # bin x=2, y=10 (when overflow removed) count_some_bin = np.sum((test_pt >= 20.) & (test_pt < 30.) & (test_eta >= 0.) & (test_eta < 0.3)) assert h_regular_bins.integrate("x", slice( 20, 30)).values()[()][10] == count_some_bin assert h_regular_bins.integrate("y", slice( 0, 0.3)).values()[()][2] == count_some_bin h_reduced = h_regular_bins[10:, -.6:] # bin x=1, y=2 assert h_reduced.integrate("x", slice(20, 30)).values()[()][2] == count_some_bin assert h_reduced.integrate("y", slice(0, 0.3)).values()[()][1] == count_some_bin h_reduced.fill(x=23, y=0.1) assert h_reduced.integrate("x", slice(20, 30)).values()[()][2] == count_some_bin + 1 assert h_reduced.integrate("y", slice( 0, 0.3)).values()[()][1] == count_some_bin + 1 animal = hist.Cat("animal", "type of animal") vocalization = hist.Cat("vocalization", "onomatopoiea is that how you spell it?") h_cat_bins = hist.Hist("I like cats", animal, vocalization) h_cat_bins.fill(animal="cat", vocalization="meow", weight=2.) h_cat_bins.fill(animal="dog", vocalization="meow", weight=np.array([-1., -1., -5.])) h_cat_bins.fill(animal="dog", vocalization="woof", weight=100.) h_cat_bins.fill(animal="dog", vocalization="ruff") assert h_cat_bins.values()[("cat", "meow")] == 2. assert h_cat_bins.values(sumw2=True)[("dog", "meow")] == (-7., 27.) assert h_cat_bins.integrate( "vocalization", ["woof", "ruff"]).values(sumw2=True)[("dog", )] == (101., 10001.) height = hist.Bin("height", "height [m]", 10, 0, 5) h_mascots_1 = hist.Hist( "fermi mascot showdown", animal, vocalization, height, # weight is a reserved keyword hist.Bin("mass", "weight (g=9.81m/s**2) [kg]", np.power(10., np.arange(5) - 1)), ) h_mascots_2 = hist.Hist( "fermi mascot showdown", axes=( animal, vocalization, height, # weight is a reserved keyword hist.Bin("mass", "weight (g=9.81m/s**2) [kg]", np.power(10., np.arange(5) - 1)), )) h_mascots_3 = hist.Hist( axes=[ animal, vocalization, height, # weight is a reserved keyword hist.Bin("mass", "weight (g=9.81m/s**2) [kg]", np.power(10., np.arange(5) - 1)), ], label="fermi mascot showdown") h_mascots_4 = hist.Hist( "fermi mascot showdown", animal, vocalization, height, # weight is a reserved keyword hist.Bin("mass", "weight (g=9.81m/s**2) [kg]", np.power(10., np.arange(5) - 1)), axes=[ animal, vocalization, height, # weight is a reserved keyword hist.Bin("mass", "weight (g=9.81m/s**2) [kg]", np.power(10., np.arange(5) - 1)), ], ) assert h_mascots_1._dense_shape == h_mascots_2._dense_shape assert h_mascots_2._dense_shape == h_mascots_3._dense_shape assert h_mascots_3._dense_shape == h_mascots_4._dense_shape assert h_mascots_1._axes == h_mascots_2._axes assert h_mascots_2._axes == h_mascots_3._axes assert h_mascots_3._axes == h_mascots_4._axes adult_bison_h = np.random.normal(loc=2.5, scale=0.2, size=40) adult_bison_w = np.random.normal(loc=700, scale=100, size=40) h_mascots_1.fill(animal="bison", vocalization="huff", height=adult_bison_h, mass=adult_bison_w) goose_h = np.random.normal(loc=0.4, scale=0.05, size=1000) goose_w = np.random.normal(loc=7, scale=1, size=1000) h_mascots_1.fill(animal="goose", vocalization="honk", height=goose_h, mass=goose_w) crane_h = np.random.normal(loc=1, scale=0.05, size=4) crane_w = np.random.normal(loc=10, scale=1, size=4) h_mascots_1.fill(animal="crane", vocalization="none", height=crane_h, mass=crane_w) h_mascots_2 = h_mascots_1.copy() h_mascots_2.clear() baby_bison_h = np.random.normal(loc=.5, scale=0.1, size=20) baby_bison_w = np.random.normal(loc=200, scale=10, size=20) baby_bison_cutefactor = 2.5 * np.ones_like(baby_bison_w) h_mascots_2.fill(animal="bison", vocalization="baa", height=baby_bison_h, mass=baby_bison_w, weight=baby_bison_cutefactor) h_mascots_2.fill(animal="fox", vocalization="none", height=1., mass=30.) h_mascots = h_mascots_1 + h_mascots_2 assert h_mascots.integrate("vocalization", "h*").sum("height", "mass", "animal").values()[()] == 1040. species_class = hist.Cat("species_class", "where the subphylum is vertibrates") classes = { 'birds': ['goose', 'crane'], 'mammals': ['bison', 'fox'], } h_species = h_mascots.group("animal", species_class, classes) assert set(h_species.integrate("vocalization").values().keys()) == set([ ('birds', ), ('mammals', ) ]) nbirds_bin = np.sum((goose_h >= 0.5) & (goose_h < 1) & (goose_w > 10) & (goose_w < 100)) nbirds_bin += np.sum((crane_h >= 0.5) & (crane_h < 1) & (crane_w > 10) & (crane_w < 100)) assert h_species.integrate("vocalization").values()[( 'birds', )][1, 2] == nbirds_bin tally = h_species.sum("mass", "height", "vocalization").values() assert tally[('birds', )] == 1004. assert tally[('mammals', )] == 91. h_species.scale({"honk": 0.1, "huff": 0.9}, axis="vocalization") h_species.scale(5.) tally = h_species.sum("mass", height, vocalization).values(sumw2=True) assert tally[('birds', )] == (520., 350.) assert tally[('mammals', )] == (435., 25 * (40 * (0.9**2) + 20 * (2.5**2) + 1)) assert h_species.axis("vocalization") is vocalization assert h_species.axis("height") is height assert h_species.integrate("vocalization", "h*").axis("height") is height tall_class = hist.Cat("tall_class", "species class (species above 1m)") mapping = { 'birds': (['goose', 'crane'], slice(1., None)), 'mammals': (['bison', 'fox'], slice(1., None)), } h_tall = h_mascots.group((animal, height), tall_class, mapping) tall_bird_count = np.sum(goose_h >= 1.) + np.sum(crane_h >= 1) assert h_tall.sum("mass", "vocalization").values()[('birds', )] == tall_bird_count tall_mammal_count = np.sum(adult_bison_h >= 1.) + np.sum( baby_bison_h >= 1) + 1 assert h_tall.sum( "mass", "vocalization").values()[('mammals', )] == tall_mammal_count h_less = h_mascots.remove(["fox", "bison"], axis="animal") assert h_less.sum("vocalization", "height", "mass", "animal").values()[()] == 1004.
def test_analysis_objects(): counts, px, py, pz, energy = dummy_four_momenta() thep4 = np.stack((px, py, pz, energy)).T #test JaggedTLorentzVectorArray tlva1 = uproot_methods.TLorentzVectorArray(px, py, pz, energy) tlva2 = uproot_methods.TLorentzVectorArray(thep4[:, 0], thep4[:, 1], thep4[:, 2], thep4[:, 3]) jtlva1 = JaggedTLorentzVectorArray.fromcounts(counts, tlva1) jtlva2 = JaggedTLorentzVectorArray.fromcounts(counts, tlva2) jtlva1_selection1 = jtlva1[jtlva1.counts > 0] jtlva1_selection2 = jtlva1_selection1[jtlva1_selection1.pt > 5] jtlva2_selection1 = jtlva2[jtlva2.counts > 0] jtlva2_selection2 = jtlva1_selection1[jtlva2_selection1.pt > 5] diffx = np.abs(jtlva1.x - jtlva2.x) diffy = np.abs(jtlva1.y - jtlva2.y) diffz = np.abs(jtlva1.z - jtlva2.z) difft = np.abs(jtlva1.t - jtlva2.t) assert (diffx < 1e-8).flatten().all() assert (diffy < 1e-8).flatten().all() assert (diffz < 1e-8).flatten().all() assert (difft < 1e-8).flatten().all() #test JaggedCandidateArray jca1 = JaggedCandidateArray.candidatesfromcounts(counts, p4=thep4) jca2 = JaggedCandidateArray.candidatesfromcounts(counts, p4=thep4) assert ((jca1.offsets == jca2.offsets).all()) addon1 = jca1.zeros_like() addon2 = jca2.ones_like() jca1['addon'] = addon1 jca2['addon'] = addon2 jca1.add_attributes(addonFlat=addon1.flatten(), addonJagged=addon1) diffm = np.abs(jca1.p4.mass - jca2.p4.mass) assert ((jca1.offsets == jca2.offsets).all()) diffpt = np.abs(jca1.p4.pt - jca2.p4.pt) assert ((jca1.offsets == jca2.offsets).all()) eta2 = jca2.p4.eta eta1 = jca1.p4.eta print(np.sum(eta1.counts), np.sum(eta2.counts)) diffeta_temp = np.abs(eta1 - eta2) diffeta = np.abs(jca1.p4.eta - jca2.p4.eta) assert ((jca1.offsets == jca2.offsets).all()) assert (diffm < 1e-8).flatten().all() assert (diffpt < 1e-8).flatten().all() assert (diffeta < 1e-8).flatten().all() #test fast functions fastfs = ['pt', 'eta', 'phi', 'mass'] for func in fastfs: func1 = getattr(jca1, func) func2 = getattr(jca1.p4, func) dfunc = np.abs(func1 - func2) assert (dfunc < 1e-8).flatten().all() adistinct = jca1.distincts() apair = jca1.pairs() across = jca1.cross(jca2) achoose2 = jca1.choose(2) achoose3 = jca1.choose(3) assert 'p4' in adistinct.columns assert 'p4' in apair.columns assert 'p4' in across.columns assert 'p4' in achoose2.columns assert 'p4' in achoose3.columns admsum = (adistinct.i0.p4 + adistinct.i1.p4).mass apmsum = (apair.i0.p4 + apair.i1.p4).mass acmsum = (across.i0.p4 + across.i1.p4).mass ach3msum = (achoose3.i0.p4 + achoose3.i1.p4 + achoose3.i2.p4).mass diffadm = np.abs(adistinct.p4.mass - admsum) diffapm = np.abs(apair.p4.mass - apmsum) diffacm = np.abs(across.p4.mass - acmsum) diffachm = np.abs(achoose2.p4.mass - admsum) diffach3m = np.abs(achoose3.p4.mass - ach3msum) assert (diffadm < 1e-8).flatten().all() assert (diffapm < 1e-8).flatten().all() assert (diffacm < 1e-8).flatten().all() assert (diffachm < 1e-8).flatten().all() assert (diffach3m < 1e-8).flatten().all() selection11 = jca1[jca1.counts > 0] selection12 = selection11[selection11.p4.pt > 5] selection21 = jca2[jca2.counts > 0] selection22 = selection21[selection21.p4.pt > 5] diffcnts = selection12.counts - jtlva1_selection2.counts diffm = np.abs(selection12.p4.mass - jtlva1_selection2.mass) diffaddon = selection12.addon - selection22.addon assert (diffcnts == 0).flatten().all() assert (diffm < 1e-8).flatten().all() assert (diffaddon == -1).flatten().all() #test gen-reco matching gen, reco = gen_reco_TLV() flat_gen = gen.flatten() gen_px, gen_py, gen_pz, gen_e = flat_gen.x, flat_gen.y, flat_gen.z, flat_gen.t flat_reco = reco.flatten() reco_px, reco_py, reco_pz, reco_e = flat_reco.x, flat_reco.y, flat_reco.z, flat_reco.t jca_gen = JaggedCandidateArray.candidatesfromcounts(gen.counts, px=gen_px, py=gen_py, pz=gen_pz, energy=gen_e) jca_reco = JaggedCandidateArray.candidatesfromcounts(reco.counts, px=reco_px, py=reco_py, pz=reco_pz, energy=reco_e) print('gen eta: ', jca_gen.p4.eta, '\n gen phi:', jca_gen.p4.phi) print('reco eta: ', jca_reco.p4.eta, '\n reco phi:', jca_reco.p4.phi) match_mask = jca_reco.match(jca_gen, deltaRCut=0.3) print('match mask: ', match_mask) fast_match_mask = jca_reco.fastmatch(jca_gen, deltaRCut=0.3) print('fastmatch mask: ', fast_match_mask) assert ((match_mask == fast_match_mask).all().all()) print('arg matches: ', jca_reco.argmatch(jca_gen, deltaRCut=0.3)) argmatch_nocut = jca_gen.argmatch(jca_reco).flatten() argmatch_dr03 = jca_gen.argmatch(jca_reco, deltaRCut=0.3).flatten() argmatch_dr03_dpt01 = jca_gen.argmatch(jca_reco, deltaRCut=0.3, deltaPtCut=0.1).flatten() assert (argmatch_nocut.size == 5) assert (argmatch_dr03[argmatch_dr03 != -1].size == 3) assert (argmatch_dr03_dpt01[argmatch_dr03_dpt01 != -1].size == 2) assert (jca_gen.match(jca_reco, deltaRCut=0.3).flatten().flatten().sum() == 3) assert (jca_gen.match(jca_reco, deltaRCut=0.3, deltaPtCut=0.1).flatten().flatten().sum() == 2) # test various four-momentum constructors ptetaphiE_test = JaggedCandidateArray.candidatesfromcounts( jca_reco.counts, pt=jca_reco.pt, eta=jca_reco.eta, phi=jca_reco.phi, energy=jca_reco.p4.energy) pxpypzM_test = JaggedCandidateArray.candidatesfromcounts( jca_reco.counts, px=jca_reco.p4.x, py=jca_reco.p4.y, pz=jca_reco.p4.z, mass=jca_reco.mass) ptphipzE_test = JaggedCandidateArray.candidatesfromcounts( jca_reco.counts, pt=jca_reco.pt, phi=jca_reco.phi, pz=jca_reco.p4.z, energy=jca_reco.p4.energy) pthetaphiE_test = JaggedCandidateArray.candidatesfromcounts( jca_reco.counts, p=jca_reco.p4.p, theta=jca_reco.p4.theta, phi=jca_reco.phi, energy=jca_reco.p4.energy)