def to_np_array(ak_array, maxN=100, pad=0): return ak.fill_none(ak.pad_none(ak_array, maxN, clip=True, axis=-1), pad).to_numpy()
def pad_awkward_array(array, pad_length, pad_value): return awkward.fill_none(awkward.pad_none(array, pad_length, clip=True), pad_value)
def trilep_baseline(self, omit=[], cutflow=None, tight=False): ''' give it a cutflow object if you want it to be filed. cuts in the omit list will not be applied ''' self.selection = PackedSelection() is_trilep = ((ak.num(self.ele) + ak.num(self.mu))==3) los_trilep = ((ak.num(self.ele) + ak.num(self.mu))>=2) pos_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))<0) neg_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))>0) lep0pt = ((ak.num(self.ele[(self.ele.pt>25)]) + ak.num(self.mu[(self.mu.pt>25)]))>0) lep1pt = ((ak.num(self.ele[(self.ele.pt>20)]) + ak.num(self.mu[(self.mu.pt>20)]))>1) lepveto = ((ak.num(self.ele_veto) + ak.num(self.mu_veto))==3) dimu = choose(self.mu, 2) diele = choose(self.ele, 2) dimu_veto = choose(self.mu_veto,2) diele_veto = choose(self.ele_veto,2) #dilep = cross(self.mu, self.ele) OS_dimu = dimu[(dimu['0'].charge*dimu['1'].charge < 0)] OS_diele = diele[(diele['0'].charge*diele['1'].charge < 0)] OS_dimu_veto = dimu_veto[(dimu_veto['0'].charge*dimu_veto['1'].charge < 0)] OS_diele_veto = diele_veto[(diele_veto['0'].charge*diele_veto['1'].charge < 0)] SFOS = ak.concatenate([OS_diele_veto, OS_dimu_veto], axis=1) offZ = (ak.all(abs(OS_dimu.mass-91.2)>10, axis=1) & ak.all(abs(OS_diele.mass-91.2)>10, axis=1)) offZ_veto = (ak.all(abs(OS_dimu_veto.mass-91.2)>10, axis=1) & ak.all(abs(OS_diele_veto.mass-91.2)>10, axis=1)) lepton = ak.concatenate([self.ele, self.mu], axis=1) lepton_pdgId_pt_ordered = ak.fill_none(ak.pad_none(lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True), 0) dilep = choose(lepton,2) SS_dilep = (dilep['0'].charge*dilep['1'].charge > 0) los_trilep_SS = (ak.any(SS_dilep, axis=1)) vetolepton = ak.concatenate([self.ele_veto, self.mu_veto], axis=1) vetotrilep = choose3(vetolepton, 3) pos_trilep = ak.any((vetotrilep['0'].charge+vetotrilep['1'].charge+vetotrilep['2'].charge > 0),axis=1) neg_trilep = ak.any((vetotrilep['0'].charge+vetotrilep['1'].charge+vetotrilep['2'].charge < 0),axis=1) triggers = getTriggers(self.events, ak.flatten(lepton_pdgId_pt_ordered[:,0:1]), ak.flatten(lepton_pdgId_pt_ordered[:,1:2]), year=self.year, dataset=self.dataset) ht = ak.sum(self.jet_all.pt, axis=1) st = self.met.pt + ht + ak.sum(self.mu.pt, axis=1) + ak.sum(self.ele.pt, axis=1) st_veto = self.met.pt + ht + ak.sum(self.mu_veto.pt, axis=1) + ak.sum(self.ele_veto.pt, axis=1) lep0pt_veto = ((ak.num(self.ele_veto[(self.ele_veto.pt>25)]) + ak.num(self.mu_veto[(self.mu_veto.pt>25)]))>0) lep1pt_veto = ((ak.num(self.ele_veto[(self.ele_veto.pt>20)]) + ak.num(self.mu_veto[(self.mu_veto.pt>20)]))>1) self.selection.add('lepveto', lepveto) self.selection.add('trilep', los_trilep_SS) self.selection.add('filter', self.filters) self.selection.add('trigger', triggers) self.selection.add('p_T(lep0)>25', lep0pt_veto) self.selection.add('p_T(lep1)>20', lep1pt_veto) self.selection.add('N_jet>2', (ak.num(self.jet_all)>2) ) self.selection.add('N_jet>3', (ak.num(self.jet_all)>3) ) self.selection.add('N_central>1', (ak.num(self.jet_central)>1) ) self.selection.add('N_central>2', (ak.num(self.jet_central)>2) ) self.selection.add('N_btag>0', (ak.num(self.jet_btag)>0 )) self.selection.add('N_fwd>0', (ak.num(self.jet_fwd)>0) ) self.selection.add('MET>50', (self.met.pt>50) ) self.selection.add('ST>600', (st_veto>600) ) self.selection.add('offZ', offZ_veto ) #self.selection.add('SFOS>=1', ak.num(SFOS)==0) #self.selection.add('charge_sum', neg_trilep) reqs = [ 'filter', 'lepveto', 'trilep', 'p_T(lep0)>25', 'p_T(lep1)>20', 'trigger', 'offZ', 'MET>50', 'N_jet>2', 'N_central>1', 'N_btag>0', 'N_fwd>0', #'SFOS>=1', #'charge_sum' ] if tight: reqs += [ 'N_jet>3', 'N_central>2', 'ST>600', #'MET>50', #'delta_eta', ] reqs_d = { sel: True for sel in reqs if not sel in omit } selection = self.selection.require(**reqs_d) self.reqs = [ sel for sel in reqs if not sel in omit ] if cutflow: # cutflow_reqs_d = {} for req in reqs: cutflow_reqs_d.update({req: True}) cutflow.addRow( req, self.selection.require(**cutflow_reqs_d) ) return selection
def dilep_baseline(self, omit=[], cutflow=None, tight=False, SS=True): ''' give it a cutflow object if you want it to be filed. cuts in the omit list will not be applied ''' self.selection = PackedSelection() lepton = ak.concatenate([self.ele, self.mu], axis=1) is_dilep = ( ((ak.num(self.ele) + ak.num(self.mu))==2) & ((ak.num(self.ele_veto) + ak.num(self.mu_veto))==2) ) pos_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))<0) neg_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))>0) lep0pt = ((ak.num(self.ele[(get_pt(self.ele)>25)]) + ak.num(self.mu[(get_pt(self.mu)>25)]))>0) lep1pt = ((ak.num(self.ele[(get_pt(self.ele)>20)]) + ak.num(self.mu[(get_pt(self.mu)>20)]))>1) #lepsel = ((ak.num(self.ele_tight) + ak.num(self.mu_tight))==2) dimu = choose(self.mu, 2) diele = choose(self.ele, 2) dilep = cross(self.mu, self.ele) if SS: is_SS = ( ak.sum(lepton.charge, axis=1)!=0 ) else: is_OS = ( ak.sum(lepton.charge, axis=1)==0 ) lepton_pdgId_pt_ordered = ak.fill_none( ak.pad_none( lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True), 0) triggers = getTriggers(self.events, ak.flatten(lepton_pdgId_pt_ordered[:,0:1]), ak.flatten(lepton_pdgId_pt_ordered[:,1:2]), year=self.year, dataset=self.dataset) ht = ak.sum(self.jet_all.pt, axis=1) st = self.met.pt + ht + ak.sum(self.mu.pt, axis=1) + ak.sum(self.ele.pt, axis=1) #self.selection.add('lepsel', lepsel) self.selection.add('dilep', is_dilep) self.selection.add('filter', self.filters) self.selection.add('trigger', triggers) self.selection.add('p_T(lep0)>25', lep0pt) self.selection.add('p_T(lep1)>20', lep1pt) if SS: self.selection.add('SS', is_SS ) else: self.selection.add('OS', is_OS ) self.selection.add('N_jet>3', (ak.num(self.jet_all)>3) ) self.selection.add('N_jet>4', (ak.num(self.jet_all)>4) ) self.selection.add('N_central>2', (ak.num(self.jet_central)>2) ) self.selection.add('N_central>3', (ak.num(self.jet_central)>3) ) self.selection.add('N_btag>0', (ak.num(self.jet_btag)>0) ) self.selection.add('N_light>0', (ak.num(self.jet_light)>0) ) self.selection.add('N_fwd>0', (ak.num(self.jet_fwd)>0) ) self.selection.add('MET>30', (self.met.pt>30) ) self.selection.add('MET>50', (self.met.pt>50) ) self.selection.add('ST>600', (st>600) ) ss_reqs = [ 'filter', # 'lepsel', 'dilep', 'p_T(lep0)>25', 'p_T(lep1)>20', 'trigger', 'SS' if SS else 'OS', 'N_jet>3', 'N_central>2', 'N_btag>0', 'N_light>0', 'MET>30', 'N_fwd>0', ] if tight: ss_reqs += [ 'N_jet>4', 'N_central>3', 'ST>600', 'MET>50', #'delta_eta', ] ss_reqs_d = { sel: True for sel in ss_reqs if not sel in omit } ss_selection = self.selection.require(**ss_reqs_d) if cutflow: # cutflow_reqs_d = {} for req in ss_reqs: cutflow_reqs_d.update({req: True}) cutflow.addRow( req, self.selection.require(**cutflow_reqs_d) ) return ss_selection
def jetVar_i(var, i): paddedVar = ak.fill_none(ak.pad_none(var, i + 1), np.Inf) return paddedVar[:, i]
def make_perm_table(bhad, blep, wja, wjb, lepton, met, nu): ''' Inputs: Jets, leptons, neutrinos, jet assignment object ordering, array of disrminiant probabilities (Total, mass discriminant, neutrino discrminant), and associated event weights Returns: awkward Table containing 1: Jet objects (BLeps, BHads, WJas, WJbs) 2: Leptons, Neutrinos 3: Calculated probabilities (Total -> Prob, mass discriminant -> MassDiscr, neutrino discrminant -> NuDiscr) ''' ## these attributes are based on those from URTTbar/interface/Permutation.h https://gitlab.cern.ch/jdulemba/URTTbar/-/blob/htt_analysis_2016legacydata_9410/interface/Permutation.h isWLepComplete = (ak.num(lepton.pt) == 1) & (ak.num(nu.pt) == 1) isTLepComplete = isWLepComplete & (ak.num(blep.pt) == 1) WLep = ak.Array({ 'pt' : ak.fill_none( (ak.mask(lepton, isWLepComplete)+ak.mask(nu, isWLepComplete)).pt, []), 'eta' : ak.fill_none( (ak.mask(lepton, isWLepComplete)+ak.mask(nu, isWLepComplete)).eta, []), 'phi' : ak.fill_none( (ak.mask(lepton, isWLepComplete)+ak.mask(nu, isWLepComplete)).phi, []), 'mass' : ak.fill_none( (ak.mask(lepton, isWLepComplete)+ak.mask(nu, isWLepComplete)).mass, []), 'charge': ak.fill_none( (ak.mask(lepton, isWLepComplete)).charge, []), }, with_name="PtEtaPhiMLorentzVector") TLep = ak.Array({ 'pt' : ak.fill_none( (ak.mask(WLep, isTLepComplete)+ak.mask(blep, isTLepComplete)).pt, []), 'eta' : ak.fill_none( (ak.mask(WLep, isTLepComplete)+ak.mask(blep, isTLepComplete)).eta, []), 'phi' : ak.fill_none( (ak.mask(WLep, isTLepComplete)+ak.mask(blep, isTLepComplete)).phi, []), 'mass' : ak.fill_none( (ak.mask(WLep, isTLepComplete)+ak.mask(blep, isTLepComplete)).mass, []), }, with_name="PtEtaPhiMLorentzVector") ## Event categories # fill empty [] events with values for easier comparisons bhad_jetIdx = ak.fill_none(ak.pad_none(bhad, 1).jetIdx, -999) blep_jetIdx = ak.fill_none(ak.pad_none(blep, 1).jetIdx, -999) wja_jetIdx = ak.fill_none(ak.pad_none(wja, 1).jetIdx, -999) wjb_jetIdx = ak.fill_none(ak.pad_none(wjb, 1).jetIdx, -999) # merged jets event categories # only bhad and blep merged Merged_BHadBLep = (bhad_jetIdx >= 0) & (bhad_jetIdx == blep_jetIdx) & (bhad_jetIdx != wja_jetIdx) & (bhad_jetIdx != wjb_jetIdx) # only bhad and wja merged Merged_BHadWJa = (bhad_jetIdx >= 0) & (bhad_jetIdx == wja_jetIdx) & (bhad_jetIdx != blep_jetIdx) & (bhad_jetIdx != wjb_jetIdx) # only bhad and wjb merged Merged_BHadWJb = (bhad_jetIdx >= 0) & (bhad_jetIdx == wjb_jetIdx) & (bhad_jetIdx != blep_jetIdx) & (bhad_jetIdx != wja_jetIdx) # only blep and wja merged Merged_BLepWJa = (blep_jetIdx >= 0) & (blep_jetIdx == wja_jetIdx) & (blep_jetIdx != bhad_jetIdx) & (blep_jetIdx != wjb_jetIdx) # only blep and wjb merged Merged_BLepWJb = (blep_jetIdx >= 0) & (blep_jetIdx == wjb_jetIdx) & (blep_jetIdx != bhad_jetIdx) & (blep_jetIdx != wja_jetIdx) # only wja and wjb merged Merged_WJets = (wja_jetIdx >= 0) & (wja_jetIdx == wjb_jetIdx) & (wja_jetIdx != bhad_jetIdx) & (wja_jetIdx != blep_jetIdx) # only two jets merged Merged_Event = (Merged_BHadBLep) | (Merged_BHadWJa) | (Merged_BHadWJb) | (Merged_BLepWJa) | (Merged_BLepWJb) | (Merged_WJets) # only bhad, blep, wja merged Merged_BHadBLepWJa = (bhad_jetIdx >= 0) & (bhad_jetIdx == blep_jetIdx) & (bhad_jetIdx == wja_jetIdx) & (bhad_jetIdx != wjb_jetIdx) # only bhad, blep, wjb merged Merged_BHadBLepWJb = (bhad_jetIdx >= 0) & (bhad_jetIdx == blep_jetIdx) & (bhad_jetIdx == wjb_jetIdx) & (bhad_jetIdx != wja_jetIdx) # only bhad, wja, wjb merged Merged_BHadWJaWJb = (bhad_jetIdx >= 0) & (bhad_jetIdx == wja_jetIdx) & (bhad_jetIdx == wjb_jetIdx) & (bhad_jetIdx != blep_jetIdx) # only blep, wja, wjb merged Merged_BLepWJaWJb = (blep_jetIdx >= 0) & (blep_jetIdx == wja_jetIdx) & (blep_jetIdx == wjb_jetIdx) & (blep_jetIdx != bhad_jetIdx) # # lost jet event categories # only bhad is lost, other jets exist and are resolved Lost_BHad = (bhad_jetIdx < 0) & (blep_jetIdx >= 0) & (wja_jetIdx >= 0) & (wjb_jetIdx >= 0) & (blep_jetIdx != wja_jetIdx) & (blep_jetIdx != wjb_jetIdx) & (wja_jetIdx != wjb_jetIdx) # only blep is lost, other jets exist and are resolved Lost_BLep = (blep_jetIdx < 0) & (bhad_jetIdx >= 0) & (wja_jetIdx >= 0) & (wjb_jetIdx >= 0) & (bhad_jetIdx != wja_jetIdx) & (bhad_jetIdx != wjb_jetIdx) & (wja_jetIdx != wjb_jetIdx) # only wja is lost, other jets exist and are resolved Lost_WJa = (wja_jetIdx < 0) & (bhad_jetIdx >= 0) & (blep_jetIdx >= 0) & (wjb_jetIdx >= 0) & (bhad_jetIdx != blep_jetIdx) & (bhad_jetIdx != wjb_jetIdx) & (blep_jetIdx != wjb_jetIdx) # only wjb is lost, other jets exist and are resolved Lost_WJb = (wjb_jetIdx < 0) & (bhad_jetIdx >= 0) & (blep_jetIdx >= 0) & (wja_jetIdx >= 0) & (bhad_jetIdx != blep_jetIdx) & (bhad_jetIdx != wja_jetIdx) & (blep_jetIdx != wja_jetIdx) # only one jet is lost, others exist and are resolved Lost_Event = (Lost_BHad) | (Lost_BLep) | (Lost_WJa) | (Lost_WJb) ## n_perm_matches = ak.unflatten(ak.num(bhad.pt)+ak.num(blep.pt)+ak.num(wja.pt)+ak.num(wjb.pt), np.ones(len(bhad.pt), dtype=int)) #isEmpty = (bhad_jetIdx < 0) & (blep_jetIdx < 0) & (wja_jetIdx < 0) & (wjb_jetIdx < 0) # find number of unique matches jetIdx_stack = np.stack( (ak.to_numpy(ak.flatten(bhad_jetIdx)), ak.to_numpy(ak.flatten(blep_jetIdx)), ak.to_numpy(ak.flatten(wja_jetIdx)), ak.to_numpy(ak.flatten(wjb_jetIdx)) ), axis=1) unique_matches = ak.unflatten(np.array([len(list(set([ind for ind in inds if ind >= 0]))) for inds in jetIdx_stack.tolist()]), np.ones(len(bhad.pt), dtype=int)) # create WHad # init variables whad_pt = np.zeros(len(bhad.pt)) whad_eta = np.zeros(len(bhad.pt)) whad_phi = np.zeros(len(bhad.pt)) whad_mass= np.zeros(len(bhad.pt)) # inds where only WJb p4 is used as WHad use_wjb_inds = ak.flatten(Merged_BHadWJa | Merged_BLepWJa | Merged_WJets | Lost_WJa) whad_pt[use_wjb_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wjb.pt, 1), np.nan)[use_wjb_inds])) whad_eta[use_wjb_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wjb.eta, 1), np.nan)[use_wjb_inds])) whad_phi[use_wjb_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wjb.phi, 1), np.nan)[use_wjb_inds])) whad_mass[use_wjb_inds]= ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wjb.mass, 1), np.nan)[use_wjb_inds])) # inds where only WJa p4 is used as WHad use_wja_inds = ak.flatten(Merged_BHadWJb | Merged_BLepWJb | Lost_WJb) whad_pt[use_wja_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wja.pt, 1), np.nan)[use_wja_inds])) whad_eta[use_wja_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wja.eta, 1), np.nan)[use_wja_inds])) whad_phi[use_wja_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wja.phi, 1), np.nan)[use_wja_inds])) whad_mass[use_wja_inds]= ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wja.mass, 1), np.nan)[use_wja_inds])) # inds where combined p4 from WJa and WJb is used as WHad (all other inds) use_comb_inds = ~(use_wjb_inds | use_wja_inds) whad_pt[use_comb_inds] = ak.to_numpy(ak.flatten( (ak.fill_none(ak.pad_none( wja, 1), 0) + ak.fill_none(ak.pad_none( wjb, 1), 0)).pt[use_comb_inds] )) whad_eta[use_comb_inds] = ak.to_numpy(ak.flatten( (ak.fill_none(ak.pad_none( wja, 1), 0) + ak.fill_none(ak.pad_none( wjb, 1), 0)).eta[use_comb_inds] )) whad_phi[use_comb_inds] = ak.to_numpy(ak.flatten( (ak.fill_none(ak.pad_none( wja, 1), 0) + ak.fill_none(ak.pad_none( wjb, 1), 0)).phi[use_comb_inds] )) whad_mass[use_comb_inds]= ak.to_numpy(ak.flatten( (ak.fill_none(ak.pad_none( wja, 1), 0) + ak.fill_none(ak.pad_none( wjb, 1), 0)).mass[use_comb_inds] )) whad_pt[whad_pt == 0.] = np.nan whad_eta[whad_eta == 0.] = np.nan whad_phi[whad_phi == 0.] = np.nan whad_mass[whad_mass == 0.] = np.nan WHad = ak.Array({ 'pt' : ak.unflatten(whad_pt[~np.isnan(whad_pt)], (~np.isnan(whad_pt)).astype(int)), 'eta' : ak.unflatten(whad_eta[~np.isnan(whad_eta)], (~np.isnan(whad_eta)).astype(int)), 'phi' : ak.unflatten(whad_phi[~np.isnan(whad_phi)], (~np.isnan(whad_phi)).astype(int)), 'mass' : ak.unflatten(whad_mass[~np.isnan(whad_mass)], (~np.isnan(whad_mass)).astype(int)), 'charge': -1*ak.fill_none(ak.mask(WLep, ~np.isnan(whad_mass)).charge, []), # opposite charge as WLep for events that exist }, with_name="PtEtaPhiMLorentzVector") isWHadComplete = (ak.num(WHad.pt) == 1) isTHadComplete = (isWHadComplete) & (ak.num(bhad.pt) == 1) #set_trace() # create THad THad = ak.Array({ 'pt' : ak.fill_none( (ak.mask(WHad, isTHadComplete)+ak.mask(bhad, isTHadComplete)).pt, []), 'eta' : ak.fill_none( (ak.mask(WHad, isTHadComplete)+ak.mask(bhad, isTHadComplete)).eta, []), 'phi' : ak.fill_none( (ak.mask(WHad, isTHadComplete)+ak.mask(bhad, isTHadComplete)).phi, []), 'mass' : ak.fill_none( (ak.mask(WHad, isTHadComplete)+ak.mask(bhad, isTHadComplete)).mass, []), }, with_name="PtEtaPhiMLorentzVector") # create TTbar isComplete = isTHadComplete & isTLepComplete TTbar = ak.Array({ 'pt' : ak.fill_none( (ak.mask(THad, isComplete)+ak.mask(TLep, isComplete)).pt, []), 'eta' : ak.fill_none( (ak.mask(THad, isComplete)+ak.mask(TLep, isComplete)).eta, []), 'phi' : ak.fill_none( (ak.mask(THad, isComplete)+ak.mask(TLep, isComplete)).phi, []), 'mass' : ak.fill_none( (ak.mask(THad, isComplete)+ak.mask(TLep, isComplete)).mass, []), }, with_name="PtEtaPhiMLorentzVector") ## Combine everything into a single table, all objects are JaggedArrays permutations = ak.zip({ "BHad" : bhad, "BLep" : blep, "WJa" : wja, "WJb" : wjb, "Lepton" : lepton, "MET" : met, "Nu" : nu, "WLep" : WLep, "TLep" : TLep, "WHad" : WHad, "THad" : THad, "TTbar" : TTbar, "n_perm_matches" : n_perm_matches, #"isEmpty" : isEmpty, "unique_matches" : unique_matches, "Merged_BHadBLep" : Merged_BHadBLep, "Merged_BHadWJa" : Merged_BHadWJa, "Merged_BHadWJb" : Merged_BHadWJb, "Merged_BLepWJa" : Merged_BLepWJa, "Merged_BLepWJb" : Merged_BLepWJb, "Merged_WJets" : Merged_WJets, "Merged_Event" : Merged_Event, "Lost_BHad" : Lost_BHad, "Lost_BLep" : Lost_BLep, "Lost_WJa" : Lost_WJa, "Lost_WJb" : Lost_WJb, "Lost_Event" : Lost_Event, }, depth_limit=1) #set_trace() return permutations
tic=time.time() met_list = np.column_stack([ events_slice.GenMET.pt * np.cos(events_slice.GenMET.phi), events_slice.GenMET.pt * np.sin(events_slice.GenMET.phi), events_slice.MET.pt * np.cos(events_slice.MET.phi), events_slice.MET.pt * np.sin(events_slice.MET.phi), events_slice.PuppiMET.pt * np.cos(events_slice.PuppiMET.phi), events_slice.PuppiMET.pt * np.sin(events_slice.PuppiMET.phi), events_slice.DeepMETResponseTune.pt * np.cos(events_slice.DeepMETResponseTune.phi), events_slice.DeepMETResponseTune.pt * np.sin(events_slice.DeepMETResponseTune.phi), events_slice.DeepMETResolutionTune.pt * np.cos(events_slice.DeepMETResolutionTune.phi), events_slice.DeepMETResolutionTune.pt * np.sin(events_slice.DeepMETResolutionTune.phi), events_slice.LHE.HT ]) particle_list = ak.concatenate([ [ ak.fill_none(ak.pad_none(events_slice.PFCands.pt, nparticles_per_event,clip=True),-999) ] , [ ak.fill_none(ak.pad_none(events_slice.PFCands.eta, nparticles_per_event,clip=True),-999) ] , [ ak.fill_none(ak.pad_none(events_slice.PFCands.phi, nparticles_per_event,clip=True),-999) ] , [ ak.fill_none(ak.pad_none(events_slice.PFCands.d0, nparticles_per_event,clip=True),-999) ] , [ ak.fill_none(ak.pad_none(events_slice.PFCands.dz, nparticles_per_event,clip=True),-999) ] , [ ak.fill_none(ak.pad_none(events_slice.PFCands.mass, nparticles_per_event,clip=True),-999) ] , [ ak.fill_none(ak.pad_none(events_slice.PFCands.puppiWeight, nparticles_per_event,clip=True),-999) ] , [ ak.fill_none(ak.pad_none(events_slice.PFCands.pdgId, nparticles_per_event,clip=True),-999) ] , [ ak.fill_none(ak.pad_none(events_slice.PFCands.charge, nparticles_per_event,clip=True),-999) ] , [ ak.fill_none(ak.pad_none(events_slice.PFCands.fromPV, nparticles_per_event,clip=True),-999) ] , [ ak.fill_none(ak.pad_none(events_slice.PFCands.pvRef, nparticles_per_event,clip=True),-999) ] , [ ak.fill_none(ak.pad_none(events_slice.PFCands.pvAssocQuality, nparticles_per_event,clip=True),-999)] , ]) npz_file=os.environ['PWD']+'/raw/'+dataset+'_file'+str(currentfile)+'_slice_'+str(i)+'_nevent_'+str(len(events_slice)) np.savez(npz_file,x=particle_list,y=met_list) toc=time.time()
def test_pad_none(): assert ak.pad_none(empty, 0, axis=0).tolist() == [] assert ak.pad_none(empty, 0, axis=1).tolist() == [] assert ak.pad_none(empty, 0, axis=2).tolist() == [] assert ak.pad_none(empty, 1, axis=0).tolist() == [None] assert ak.pad_none(empty, 1, axis=1).tolist() == [] assert ak.pad_none(empty, 1, axis=2).tolist() == [] assert ak.pad_none(empty, 0, axis=0, clip=True).tolist() == [] assert ak.pad_none(empty, 0, axis=1, clip=True).tolist() == [] assert ak.pad_none(empty, 0, axis=2, clip=True).tolist() == [] assert ak.pad_none(empty, 1, axis=0, clip=True).tolist() == [None] assert ak.pad_none(empty, 1, axis=1, clip=True).tolist() == [] assert ak.pad_none(empty, 1, axis=2, clip=True).tolist() == []
def test_ByteMaskedArray_pad_none(): content = ak.from_iter( [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], [[5.5]], [[6.6, 7.7, 8.8, 9.9]], [[], [10.0, 11.1, 12.2]], ], highlevel=False, ) mask = ak.layout.Index8(np.array([0, 0, 1, 1, 0], dtype=np.int8)) array = ak.Array(ak.layout.ByteMaskedArray(mask, content, valid_when=False)) assert ak.to_list(array) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], None, None, [[], [10.0, 11.1, 12.2]], ] assert ak.to_list(ak.pad_none(array, 7, axis=0)) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], None, None, [[], [10.0, 11.1, 12.2]], None, None, ] assert ak.to_list(ak.pad_none(array, 7, axis=-3)) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], None, None, [[], [10.0, 11.1, 12.2]], None, None, ] assert ak.to_list(ak.pad_none(array, 3, axis=1)) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [None, None, None], None, None, [[], [10.0, 11.1, 12.2], None], ] assert ak.to_list(ak.pad_none(array, 3, axis=-2)) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [None, None, None], None, None, [[], [10.0, 11.1, 12.2], None], ] assert ak.to_list(ak.pad_none(array, 3, axis=2)) == [ [[0.0, 1.1, 2.2], [None, None, None], [3.3, 4.4, None]], [], None, None, [[None, None, None], [10.0, 11.1, 12.2]], ] assert ak.to_list(ak.pad_none(array, 3, axis=-1)) == [ [[0.0, 1.1, 2.2], [None, None, None], [3.3, 4.4, None]], [], None, None, [[None, None, None], [10.0, 11.1, 12.2]], ] assert ak.to_list(ak.pad_none(array, 3, axis=0, clip=True)) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], None, ] assert ak.to_list(ak.pad_none(array, 3, axis=-3, clip=True)) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], None, ] assert ak.to_list(ak.pad_none(array, 2, axis=1, clip=True)) == [ [[0.0, 1.1, 2.2], []], [None, None], None, None, [[], [10.0, 11.1, 12.2]], ] assert ak.to_list(ak.pad_none(array, 2, axis=-2, clip=True)) == [ [[0.0, 1.1, 2.2], []], [None, None], None, None, [[], [10.0, 11.1, 12.2]], ] assert ak.to_list(ak.pad_none(array, 2, axis=2, clip=True)) == [ [[0.0, 1.1], [None, None], [3.3, 4.4]], [], None, None, [[None, None], [10.0, 11.1]], ] assert ak.to_list(ak.pad_none(array, 2, axis=-1, clip=True)) == [ [[0.0, 1.1], [None, None], [3.3, 4.4]], [], None, None, [[None, None], [10.0, 11.1]], ]
def process(self, events): output = self.accumulator.identity() dataset = events.metadata['dataset'] isRealData = 'genWeight' not in events.fields if not isRealData: output['sumw'][dataset] += sum(events.genWeight) JECversion = JECversions[str(self.year)]['MC'] else: output['nbtagmu'][dataset] += ak.count(events.event) JECversion = JECversions[str( self.year)]['Data'][dataset.split('BTagMu')[1]] ############ # Some corrections weights = processor.Weights(len(events)) corrections = {} if not isRealData: weights.add('genWeight', events.genWeight) weights.add( 'pileup_weight', self.puReweight(self.puFile, self.nTrueFile, dataset)(events.Pileup.nPU)) events.FatJet = self.applyJEC(events.FatJet, events.fixedGridRhoFastjetAll, events.caches[0], 'AK8PFPuppi', isRealData, JECversion) cuts = processor.PackedSelection() ############ # Trigger selection if self.year == 2016: if 'BTagMu_AK4Jet300_Mu5' not in events.HLT.fields: self.triggers = [ trigger.replace('AK4', '') for trigger in self.triggers ] elif self.year == 2018: for (i, trigger) in enumerate(self.triggers): if trigger.strip("HLT_") not in events.HLT.fields: self.triggers[i] = trigger + "_noalgo" trig_arrs = [ events.HLT[_trig.strip("HLT_")] for _trig in self.triggers ] req_trig = np.zeros(len(events), dtype='bool') for t in trig_arrs: req_trig = req_trig | t cuts.add('trigger', ak.to_numpy(req_trig)) ############ # Basic cuts ## Muon cuts # muon twiki: https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideMuonIdRun2 events.Muon = events.Muon[(events.Muon.pt > 5) & (abs(events.Muon.eta < 2.4)) & (events.Muon.tightId != 1) & (events.Muon.pfRelIso04_all > 0.15)] events.Muon = ak.pad_none(events.Muon, 2, axis=1) ## Jet cuts (not used) events.Jet = events.Jet[(events.Jet.pt > 25) & (abs(events.Jet.eta) <= 2.5)] #req_jets = (ak.count(events.Jet.pt, axis=1) >= 2) ## FatJet cuts events.FatJet = events.FatJet[ (events.FatJet.pt > self._mask_fatjets['basic']['pt_cut']) & (abs(events.FatJet.eta) <= self._mask_fatjets['basic']['eta_cut']) & (events.FatJet.jetId > self._mask_fatjets['basic']['jetId_cut']) & (ak.count(events.FatJet.subjets.pt, axis=2) >= 2)] ## subjet sel to crosscheck #print(events['FatJetSVs']) ## Event level variables eventVariables = {} eventVariables['nfatjet'] = ak.num(events.FatJet) ## Leading jet variables leadfatjet = ak.firsts(events.FatJet) leadfatjet['tau21'] = leadfatjet.tau2 / leadfatjet.tau1 subjet1 = ak.pad_none(leadfatjet.subjets, 2)[:, 0] subjet2 = ak.pad_none(leadfatjet.subjets, 2)[:, 1] leadfatjet['nsv1'] = get_nsv(subjet1, events.SV) leadfatjet['nsv2'] = get_nsv(subjet2, events.SV) leadfatjet['nmusj1'] = ak.num(subjet1.delta_r(events.Muon) < 0.4) leadfatjet['nmusj2'] = ak.num(subjet2.delta_r(events.Muon) < 0.4) fatjet_mutag = (leadfatjet.nmusj1 >= 1) & (leadfatjet.nmusj2 >= 1) cuts.add('fatjet_mutag', ak.to_numpy(fatjet_mutag)) for DDX in self._mask_DDX.keys(): for wp, cut in self._mask_DDX[DDX].items(): DDX_pass = (leadfatjet[f'btag{DDX}vLV2'] > cut) DDX_fail = (leadfatjet[f'btag{DDX}vLV2'] < cut) cuts.add(f'{DDX}_pass{wp}wp', ak.to_numpy(DDX_pass)) cuts.add(f'{DDX}_fail{wp}wp', ak.to_numpy(DDX_fail)) flavors = {} if not isRealData: flavors['b'] = (leadfatjet.hadronFlavour == 5) flavors['c'] = (leadfatjet.hadronFlavour == 4) flavors['l'] = (leadfatjet.hadronFlavour < 4) flavors['bb'] = abs(leadfatjet.hadronFlavour == 5) & ( leadfatjet.nBHadrons >= 2) #& (leadfatjet.nCHadrons == 0) flavors['cc'] = abs(leadfatjet.hadronFlavour == 4) & ( leadfatjet.nBHadrons == 0) & (leadfatjet.nCHadrons >= 2) #flavors['ll'] = abs(leadfatjet.hadronFlavour < 4) & (leadfatjet.nBHadrons == 0) & (leadfatjet.nCHadrons == 0) flavors['b'] = flavors['b'] & ~flavors['bb'] flavors['c'] = flavors['c'] & ~flavors['cc'] flavors['l'] = flavors['l'] & ~flavors['bb'] & ~flavors[ 'cc'] & ~flavors['b'] & ~flavors['c'] #flavors['others'] = ~flavors['l'] & ~flavors['bb'] & ~flavors['cc'] & ~flavors['b'] & ~flavors['c'] else: flavors['Data'] = np.ones(len(events), dtype='bool') for selname, cut in self._mask_fatjets.items(): sel = (leadfatjet.pt > cut['pt_cut']) & \ (leadfatjet.msoftdrop > cut['mass_cut']) & \ (abs(leadfatjet.eta) < cut['eta_cut']) & \ (leadfatjet.jetId >= cut['jetId_cut']) & \ (leadfatjet.tau21 < cut['tau21_cut']) #(leadfatjet.Hbb > cut['Hbb']) cuts.add(selname, ak.to_numpy(sel)) selection = {} selection['basic'] = {'trigger', 'basic'} selection['pt350msd50'] = {'trigger', 'fatjet_mutag', 'pt350msd50'} selection['msd100tau06'] = {'trigger', 'fatjet_mutag', 'msd100tau06'} selection['pt400msd100tau06'] = { 'trigger', 'fatjet_mutag', 'pt400msd100tau06' } for mask_f in self._final_mask: for DDX in self._mask_DDX.keys(): for wp, cut in self._mask_DDX[DDX].items(): selection[f'{mask_f}{DDX}pass{wp}wp'] = selection[ mask_f].copy() selection[f'{mask_f}{DDX}pass{wp}wp'].add( f'{DDX}_pass{wp}wp') selection[f'{mask_f}{DDX}fail{wp}wp'] = selection[ mask_f].copy() selection[f'{mask_f}{DDX}fail{wp}wp'].add( f'{DDX}_fail{wp}wp') for histname, h in output.items(): sel = [r for r in selection.keys() if r in histname.split('_')] if ((histname in self.fatjet_hists) | ('hist2d_fatjet' in histname)): for flav, mask in flavors.items(): weight = weights.weight() * cuts.all( *selection[sel[0]]) * ak.to_numpy(mask) fields = { k: ak.fill_none(leadfatjet[k], -9999) for k in h.fields if k in dir(leadfatjet) } h.fill(dataset=dataset, flavor=flav, **fields, weight=weight) if histname in self.event_hists: for flav, mask in flavors.items(): weight = weights.weight() * cuts.all( *selection[sel[0]]) * ak.to_numpy(mask) fields = { k: ak.fill_none(eventVariables[k], -9999) for k in h.fields if k in eventVariables.keys() } h.fill(dataset=dataset, flavor=flav, **fields, weight=weight) return output
def test_IndexedOptionArray_pad_none(): array = ak.Array([[[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], None, None, [[], [10.0, 11.1, 12.2]]]) assert ak.to_list(array) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], None, None, [[], [10.0, 11.1, 12.2]], ] assert ak.to_list(ak.pad_none(array, 7, axis=0)) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], None, None, [[], [10.0, 11.1, 12.2]], None, None, ] assert ak.to_list(ak.pad_none(array, 7, axis=-3)) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], None, None, [[], [10.0, 11.1, 12.2]], None, None, ] assert ak.to_list(ak.pad_none(array, 3, axis=1)) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [None, None, None], None, None, [[], [10.0, 11.1, 12.2], None], ] assert ak.to_list(ak.pad_none(array, 3, axis=-2)) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [None, None, None], None, None, [[], [10.0, 11.1, 12.2], None], ] assert ak.to_list(ak.pad_none(array, 3, axis=2)) == [ [[0.0, 1.1, 2.2], [None, None, None], [3.3, 4.4, None]], [], None, None, [[None, None, None], [10.0, 11.1, 12.2]], ] assert ak.to_list(ak.pad_none(array, 3, axis=-1)) == [ [[0.0, 1.1, 2.2], [None, None, None], [3.3, 4.4, None]], [], None, None, [[None, None, None], [10.0, 11.1, 12.2]], ] assert ak.to_list(ak.pad_none(array, 3, axis=0, clip=True)) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], None, ] assert ak.to_list(ak.pad_none(array, 3, axis=-3, clip=True)) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], None, ] assert ak.to_list(ak.pad_none(array, 2, axis=1, clip=True)) == [ [[0.0, 1.1, 2.2], []], [None, None], None, None, [[], [10.0, 11.1, 12.2]], ] assert ak.to_list(ak.pad_none(array, 2, axis=-2, clip=True)) == [ [[0.0, 1.1, 2.2], []], [None, None], None, None, [[], [10.0, 11.1, 12.2]], ] assert ak.to_list(ak.pad_none(array, 2, axis=2, clip=True)) == [ [[0.0, 1.1], [None, None], [3.3, 4.4]], [], None, None, [[None, None], [10.0, 11.1]], ] assert ak.to_list(ak.pad_none(array, 2, axis=-1, clip=True)) == [ [[0.0, 1.1], [None, None], [3.3, 4.4]], [], None, None, [[None, None], [10.0, 11.1]], ]
def process(self, events): output = self.accumulator.identity() output['total']['all'] += len(events) # use a very loose preselection to filter the events presel = ak.num(events.Jet) > 2 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() gen_lep = ev.GenL ## Muons muon = Collections(ev, "Muon", "vetoTTH").get() tightmuon = Collections(ev, "Muon", "tightSSTTH").get() dimuon = choose(muon, 2) SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1) leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1)) leading_muon = muon[leading_muon_idx] ## Electrons electron = Collections(ev, "Electron", "vetoTTH").get() tightelectron = Collections(ev, "Electron", "tightSSTTH").get() dielectron = choose(electron, 2) SSelectron = ak.any( (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1) leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1)) leading_electron = electron[leading_electron_idx] ## Merge electrons and muons - this should work better now in ak1 dilepton = cross(muon, electron) SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0, axis=1) lepton = ak.concatenate([muon, electron], axis=1) lepton = get_four_vec(lepton) leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1)) leading_lepton = get_four_vec(lepton[leading_lepton_idx]) trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1)) trailing_lepton = get_four_vec(lepton[trailing_lepton_idx]) dilepton_mass = (leading_lepton + trailing_lepton).mass dilepton_pt = (leading_lepton + trailing_lepton).pt dilepton_dR = delta_r(leading_lepton, trailing_lepton) mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi) min_mt_lep_met = ak.min(mt_lep_met, axis=1) ## Jets jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom') jet = jet[ak.argsort( jet.pt_nom, ascending=False )] # need to sort wrt smeared and recorrected jet pt jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match( jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta) < 2.4)] btag = getBTagsDeepFlavB( jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) fwd_cleaned = fwd[~match( fwd, getFwdJet(jet[:, 0:5]), deltaRCut=0.1 )] # the leading forward jets that are not in the 5 leading jets overall tau = getTaus(ev) track = getIsoTracks(ev) ## forward jets j_fwd = fwd[ak.singletons(ak.argmax( fwd.p, axis=1))] # highest momentum spectator high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2] bl = cross(lepton, high_score_btag) bl_dR = delta_r(bl['0'], bl['1']) min_bl_dR = ak.min(bl_dR, axis=1) jf = cross(j_fwd, jet) mjf = (jf['0'] + jf['1']).mass j_fwd2 = jf[ak.singletons( ak.argmax(mjf, axis=1) )]['1'] # this is the jet that forms the largest invariant mass with j_fwd delta_eta = ak.fill_none( ak.pad_none(abs(j_fwd2.eta - j_fwd.eta), 1, clip=True), 0) ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi ## other variables ht = ak.sum(jet.pt, axis=1) st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1) ## event selectors filters = getFilters(ev, year=self.year, dataset=dataset) dilep = ((ak.num(tightelectron) + ak.num(tightmuon)) == 2) lep0pt = ((ak.num(electron[(electron.pt > 25)]) + ak.num(muon[(muon.pt > 25)])) > 0) lep1pt = ((ak.num(electron[(electron.pt > 20)]) + ak.num(muon[(muon.pt > 20)])) > 1) lepveto = ((ak.num(electron) + ak.num(muon)) == 2) selection = PackedSelection() selection.add('lepveto', lepveto) selection.add('dilep', dilep) selection.add('filter', (filters)) selection.add('p_T(lep0)>25', lep0pt) selection.add('p_T(lep1)>20', lep1pt) selection.add('SS', (SSlepton | SSelectron | SSmuon)) selection.add('N_jet>3', (ak.num(jet) >= 4)) selection.add('N_central>2', (ak.num(central) >= 3)) selection.add('N_btag>0', (ak.num(btag) >= 1)) selection.add('N_fwd>0', (ak.num(fwd) >= 1)) #ss_reqs = ['lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS'] ss_reqs = [ 'lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS' ] #bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0', 'N_fwd>0'] bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0'] ss_reqs_d = {sel: True for sel in ss_reqs} ss_selection = selection.require(**ss_reqs_d) bl_reqs_d = {sel: True for sel in bl_reqs} BL = selection.require(**bl_reqs_d) weight = Weights(len(ev)) if not dataset == 'MuonEG': # lumi weight weight.add("weight", ev.weight) # PU weight - not in the babies... weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False) # b-tag SFs weight.add("btag", self.btagSF.Method1a(btag, light)) # lepton SFs weight.add("lepton", self.leptonSF.get(electron, muon)) #cutflow = Cutflow(output, ev, weight=weight) #cutflow_reqs_d = {} #for req in bl_reqs: # cutflow_reqs_d.update({req: True}) # cutflow.addRow( req, selection.require(**cutflow_reqs_d) ) labels = { 'topW_v3': 0, 'TTW': 1, 'TTZ': 2, 'TTH': 3, 'ttbar': 4, 'ttbar1l_MG': 4, 'DY': 6, 'topW_EFT_cp8': 100 } if dataset in labels: label_mult = labels[dataset] else: label_mult = 5 label = np.ones(len(ev[BL])) * label_mult n_nonprompt = (getNonPromptFromFlavour(tightelectron) + getNonPromptFromFlavour(tightmuon))[BL] n_chargeflip = (getChargeFlips(tightelectron, ev.GenPart) + getChargeFlips(tightmuon, ev.GenPart))[BL] n_genlep = ak.num(ev.GenL, axis=1)[BL] label_cat = ( n_nonprompt > 0 ) * 100 + (n_chargeflip > 0) * 1000 + (n_genlep > 2) * 10 + np.ones( len(ev[BL]) ) # >1000 for charge flip, >100 for non prompt, >10 for more than 2 gen lep, 1 for prompt if dataset == 'topW_v3': label_cat = np.ones(len(ev[BL])) * 0 else: label_cat = 4 * (label_cat >= 1000) + 3 * ( (label_cat >= 100) & (label_cat < 1000)) + 2 * ( (label_cat >= 10) & (label_cat < 100)) + 1 * ( label_cat < 10 ) # this makes charge flip 4, nonprompt 3... label_cat = np.array(label_cat) output["n_lep"] += processor.column_accumulator( ak.to_numpy((ak.num(electron) + ak.num(muon))[BL])) output["n_lep_tight"] += processor.column_accumulator( ak.to_numpy((ak.num(tightelectron) + ak.num(tightmuon))[BL])) o_leading_lepton = get_four_vec(leading_lepton[BL]) output["lead_lep_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_leading_lepton.pt, axis=1))) output["lead_lep_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_leading_lepton.eta, axis=1))) output["lead_lep_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_leading_lepton.phi, axis=1))) output["lead_lep_charge"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_leading_lepton.charge, axis=1))) output["lead_lep_energy"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_leading_lepton.energy, axis=1))) output["lead_lep_px"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_leading_lepton.px, axis=1))) output["lead_lep_py"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_leading_lepton.py, axis=1))) output["lead_lep_pz"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_leading_lepton.pz, axis=1))) o_trailing_lepton = get_four_vec(trailing_lepton[BL]) output["sublead_lep_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_trailing_lepton.pt, axis=1))) output["sublead_lep_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_trailing_lepton.eta, axis=1))) output["sublead_lep_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_trailing_lepton.phi, axis=1))) output["sublead_lep_charge"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_trailing_lepton.charge, axis=1))) output["sublead_lep_energy"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_trailing_lepton.energy, axis=1))) output["sublead_lep_px"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_trailing_lepton.px, axis=1))) output["sublead_lep_py"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_trailing_lepton.py, axis=1))) output["sublead_lep_pz"] += processor.column_accumulator( ak.to_numpy(ak.flatten(o_trailing_lepton.pz, axis=1))) output["lead_jet_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 0:1][BL].pt, axis=1))) output["lead_jet_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 0:1][BL].eta, axis=1))) output["lead_jet_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 0:1][BL].phi, axis=1))) output["sublead_jet_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 1:2][BL].pt, axis=1))) output["sublead_jet_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 1:2][BL].eta, axis=1))) output["sublead_jet_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 1:2][BL].phi, axis=1))) for i in range(5): output["j%s_pt" % i] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(jet[:, i:i + 1][BL].pt))) output["j%s_eta" % i] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(jet[:, i:i + 1][BL].eta))) output["j%s_phi" % i] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(jet[:, i:i + 1][BL].phi))) output["j%s_energy" % i] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(jet[:, i:i + 1][BL].energy))) output["j%s_px" % i] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(jet[:, i:i + 1][BL].px))) output["j%s_py" % i] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(jet[:, i:i + 1][BL].py))) output["j%s_pz" % i] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(jet[:, i:i + 1][BL].pz))) output["j5_pt"] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(fwd_cleaned[:, 0:1][BL].pt))) output["j5_eta"] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(fwd_cleaned[:, 0:1][BL].eta))) output["j5_phi"] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(fwd_cleaned[:, 0:1][BL].phi))) output["j5_energy"] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(fwd_cleaned[:, 0:1][BL].energy))) output["j5_px"] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(fwd_cleaned[:, 0:1][BL].px))) output["j5_py"] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(fwd_cleaned[:, 0:1][BL].py))) output["j5_pz"] += processor.column_accumulator( ak.to_numpy(pad_and_flatten(fwd_cleaned[:, 0:1][BL].pz))) output["lead_btag_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].pt, axis=1))) output["lead_btag_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].eta, axis=1))) output["lead_btag_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].phi, axis=1))) output["lead_btag_energy"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].energy, axis=1))) output["lead_btag_px"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].px, axis=1))) output["lead_btag_py"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].py, axis=1))) output["lead_btag_pz"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].pz, axis=1))) output["sublead_btag_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].pt, axis=1))) output["sublead_btag_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].eta, axis=1))) output["sublead_btag_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].phi, axis=1))) output["sublead_btag_energy"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].energy, axis=1))) output["sublead_btag_px"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].px, axis=1))) output["sublead_btag_py"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].py, axis=1))) output["sublead_btag_pz"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].pz, axis=1))) output["fwd_jet_p"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none(ak.pad_none(j_fwd[BL].p, 1, clip=True), 0), axis=1))) output["fwd_jet_pt"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none( ak.pad_none(j_fwd[BL].pt, 1, clip=True), 0), axis=1))) output["fwd_jet_eta"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none( ak.pad_none(j_fwd[BL].eta, 1, clip=True), 0), axis=1))) output["fwd_jet_phi"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none( ak.pad_none(j_fwd[BL].phi, 1, clip=True), 0), axis=1))) output["fwd_jet_energy"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none( ak.pad_none(j_fwd[BL].energy, 1, clip=True), 0), axis=1))) output["fwd_jet_px"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none( ak.pad_none(j_fwd[BL].px, 1, clip=True), 0), axis=1))) output["fwd_jet_py"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none( ak.pad_none(j_fwd[BL].py, 1, clip=True), 0), axis=1))) output["fwd_jet_pz"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none( ak.pad_none(j_fwd[BL].pz, 1, clip=True), 0), axis=1))) output["mjj_max"] += processor.column_accumulator( ak.to_numpy(ak.fill_none(ak.max(mjf[BL], axis=1), 0))) output["delta_eta_jj"] += processor.column_accumulator( ak.to_numpy(ak.flatten(delta_eta[BL], axis=1))) output["met"] += processor.column_accumulator(ak.to_numpy(met_pt[BL])) output["ht"] += processor.column_accumulator(ak.to_numpy(ht[BL])) output["st"] += processor.column_accumulator(ak.to_numpy(st[BL])) output["n_jet"] += processor.column_accumulator( ak.to_numpy(ak.num(jet[BL]))) output["n_btag"] += processor.column_accumulator( ak.to_numpy(ak.num(btag[BL]))) output["n_fwd"] += processor.column_accumulator( ak.to_numpy(ak.num(fwd[BL]))) output["n_central"] += processor.column_accumulator( ak.to_numpy(ak.num(central[BL]))) output["n_tau"] += processor.column_accumulator( ak.to_numpy(ak.num(tau[BL]))) output["n_track"] += processor.column_accumulator( ak.to_numpy(ak.num(track[BL]))) output["dilepton_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(dilepton_pt[BL], axis=1))) output["dilepton_mass"] += processor.column_accumulator( ak.to_numpy(ak.flatten(dilepton_mass[BL], axis=1))) output["min_bl_dR"] += processor.column_accumulator( ak.to_numpy(min_bl_dR[BL])) output["min_mt_lep_met"] += processor.column_accumulator( ak.to_numpy(min_mt_lep_met[BL])) output["label"] += processor.column_accumulator(label) output["label_cat"] += processor.column_accumulator(label_cat) output["weight"] += processor.column_accumulator(weight.weight()[BL]) output["presel"]["all"] += len(ev[ss_selection]) output["sel"]["all"] += len(ev[BL]) return output
def future_savez(dataset, currentfile): print('before selection ', len(events_slice)) # select Muon myMuon = events_slice.Muon[:] myMuon['istight'] = ((events_slice.Muon.tightId == 1) & (events_slice.Muon.pfRelIso03_all < 0.15) & (events_slice.Muon.pt > 20.)) events_slice['Muon'] = myMuon[myMuon.istight] # select electrons myElectron = events_slice.Electron[:] myElectron['istight'] = ((events_slice.Electron.mvaFall17V1Iso_WP80 == 1) & (events_slice.Electron.pt > 20.0)) events_slice['Electron'] = myElectron[myElectron.istight] # select events with n tight leptons n_tight_leptons = ak.count( events_slice.Muon.pt[events_slice.Muon.istight], axis=-1) + ak.count( events_slice.Electron.pt[events_slice.Electron.istight], axis=-1) # number of leptons can be larger than the required number events_selected = events_slice[n_tight_leptons >= options.n_leptons] print('after selection ', len(events_selected)) muons = events_selected.Muon[events_selected.Muon.istight] electrons = events_selected.Electron[events_selected.Electron.istight] # mix leptons and sort according to pt leptons = ak.concatenate([muons, electrons], axis=1) leptons = leptons[ak.argsort(leptons.pt, axis=1, ascending=False)] leptons = leptons[:, 0:int(options.n_leptons_subtract)] # only want the first n_leptons_subtract leptons #print('number of leptons ', ak.count(leptons.pt, axis=-1)) leptons_px = leptons.pt * np.cos(leptons.phi) leptons_py = leptons.pt * np.sin(leptons.phi) leptons_px = ak.sum(leptons_px, axis=1) leptons_py = ak.sum(leptons_py, axis=1) met_list = np.column_stack([ events_selected.GenMET.pt * np.cos(events_selected.GenMET.phi) + leptons_px, events_selected.GenMET.pt * np.sin(events_selected.GenMET.phi) + leptons_py, events_selected.MET.pt * np.cos(events_selected.MET.phi) + leptons_px, events_selected.MET.pt * np.sin(events_selected.MET.phi) + leptons_py, events_selected.PuppiMET.pt * np.cos(events_selected.PuppiMET.phi) + leptons_px, events_selected.PuppiMET.pt * np.sin(events_selected.PuppiMET.phi) + leptons_py, events_selected.DeepMETResponseTune.pt * np.cos(events_selected.DeepMETResponseTune.phi) + leptons_px, events_selected.DeepMETResponseTune.pt * np.sin(events_selected.DeepMETResponseTune.phi) + leptons_py, events_selected.DeepMETResolutionTune.pt * np.cos(events_selected.DeepMETResolutionTune.phi) + leptons_px, events_selected.DeepMETResolutionTune.pt * np.sin(events_selected.DeepMETResolutionTune.phi) + leptons_py, events_selected.LHE.HT ]) overlap_removal = run_deltar_matching(events_selected.PFCands, leptons, drname='deltaR', radius=0.001, unique=True, sort=False) # remove the cloest PF particle mask = ak.count(overlap_removal.deltaR, axis=-1) == 0 #print(len(events_selected.PFCands.pt[0])) events_selected['PFCands'] = events_selected.PFCands[mask] #print(len(events_selected.PFCands.pt[0])) #save the rest of PFcandidates nparticles_per_event = max(ak.num(events_selected.PFCands.pt, axis=1)) print("max NPF in this range: ", nparticles_per_event) particle_list = ak.concatenate([ [ ak.fill_none( ak.pad_none(events_selected.PFCands.pt, nparticles_per_event, clip=True), -999) ], [ ak.fill_none( ak.pad_none(events_selected.PFCands.eta, nparticles_per_event, clip=True), -999) ], [ ak.fill_none( ak.pad_none(events_selected.PFCands.phi, nparticles_per_event, clip=True), -999) ], [ ak.fill_none( ak.pad_none(events_selected.PFCands.d0, nparticles_per_event, clip=True), -999) ], [ ak.fill_none( ak.pad_none(events_selected.PFCands.dz, nparticles_per_event, clip=True), -999) ], [ ak.fill_none( ak.pad_none(events_selected.PFCands.mass, nparticles_per_event, clip=True), -999) ], [ ak.fill_none( ak.pad_none(events_selected.PFCands.puppiWeight, nparticles_per_event, clip=True), -999) ], [ ak.fill_none( ak.pad_none(events_selected.PFCands.pdgId, nparticles_per_event, clip=True), -999) ], [ ak.fill_none( ak.pad_none(events_selected.PFCands.charge, nparticles_per_event, clip=True), -999) ], [ ak.fill_none( ak.pad_none(events_selected.PFCands.fromPV, nparticles_per_event, clip=True), -999) ], [ ak.fill_none( ak.pad_none(events_selected.PFCands.pvRef, nparticles_per_event, clip=True), -999) ], [ ak.fill_none( ak.pad_none(events_selected.PFCands.pvAssocQuality, nparticles_per_event, clip=True), -999) ], ]) npz_file = os.environ['PWD'] + '/raw/' + dataset + '_file' + str( currentfile) + '_slice_' + str(i) + '_nevent_' + str( len(events_selected)) np.savez(npz_file, x=particle_list, y=met_list)
def process(self, events): output = self.accumulator.identity() # use a very loose preselection to filter the events presel = ak.num(events.Jet) > 2 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() output['totalEvents']['all'] += len(events) output['skimmedEvents']['all'] += len(ev) ## Generated leptons '''gen_lep = ev.GenL leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))] trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]''' ## Muons muon = Collections(ev, "Muon", "tightTTH").get() vetomuon = Collections(ev, "Muon", "vetoTTH").get() leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1)) leading_muon = muon[leading_muon_idx] ## Electrons electron = Collections(ev, "Electron", "tightTTH").get() vetoelectron = Collections(ev, "Electron", "vetoTTH").get() leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1)) leading_electron = electron[leading_electron_idx] ## Merge electrons and muons - this should work better now in ak1 dilepton = cross(muon, electron) dimuon = choose(muon, 2) OS_dimuon = dimuon[(dimuon['0'].charge * dimuon['1'].charge < 0)] dielectron = choose(electron, 2) OS_dielectron = dielectron[( dielectron['0'].charge * dielectron['1'].charge < 0)] OS_dimuon_bestZmumu = OS_dimuon[ak.singletons( ak.argmin(abs(OS_dimuon.mass - 91.2), axis=1))] OS_dielectron_bestZee = OS_dielectron[ak.singletons( ak.argmin(abs(OS_dielectron.mass - 91.2), axis=1))] OS_dilepton_mass = ak.fill_none( ak.pad_none(ak.concatenate( [OS_dimuon_bestZmumu.mass, OS_dielectron_bestZee.mass], axis=1), 1, clip=True), -1) lepton = ak.concatenate([muon, electron], axis=1) leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1)) leading_lepton = lepton[leading_lepton_idx] trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1)) trailing_lepton = lepton[trailing_lepton_idx] ## Jets jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom') jet = jet[ak.argsort( jet.pt_nom, ascending=False )] # need to sort wrt smeared and recorrected jet pt jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match( jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta) < 2.4)] btag = getBTagsDeepFlavB( jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) ## forward jets j_fwd = fwd[ak.singletons(ak.argmax( fwd.p, axis=1))] # highest momentum spectator jf = cross(j_fwd, jet) mjf = (jf['0'] + jf['1']).mass # j_fwd2 = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd # delta_eta = abs(j_fwd2.eta - j_fwd.eta) ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi ## other variables ht = ak.sum(jet.pt, axis=1) st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1) # define the weight weight = Weights(len(ev)) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): # lumi weight weight.add("weight", ev.weight * cfg['lumi'][self.year]) # PU weight - not in the babies... weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False) # b-tag SFs weight.add("btag", self.btagSF.Method1a(btag, light)) # lepton SFs # weight.add("lepton", self.leptonSF.get(electron, muon)) cutflow = Cutflow(output, ev, weight=weight) sel = Selection( dataset=dataset, events=ev, year=self.year, ele=electron, ele_veto=vetoelectron, mu=muon, mu_veto=vetomuon, jet_all=jet, jet_central=central, jet_btag=btag, jet_fwd=fwd, met=ev.MET, ) BL = sel.trilep_baseline(cutflow=cutflow) # first, make a few super inclusive plots output['ST'].fill(dataset=dataset, ht=st[BL], weight=weight.weight()[BL]) output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight.weight()[BL]) output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight.weight()[BL]) output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL]) output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL], weight=weight.weight()[BL]) output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL]) output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(vetoelectron)[BL], weight=weight.weight()[BL]) output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(vetomuon)[BL], weight=weight.weight()[BL]) output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL], weight=weight.weight()[BL]) '''output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight.weight()[BL]) output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight.weight()[BL]) output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight.weight()[BL]) output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight.weight()[BL]) output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight.weight()[BL]) output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight.weight()[BL])''' # make a plot of the dilepton mass, but without applying the cut on the dilepton mass itself (N-1 plot) output['dilep_mass'].fill( dataset=dataset, mass=ak.flatten( OS_dilepton_mass[sel.trilep_baseline(omit=['offZ'])]), weight=weight.weight()[sel.trilep_baseline(omit=['offZ'])]) output['MET'].fill(dataset=dataset, pt=ev.MET[BL].pt, phi=ev.MET[BL].phi, weight=weight.weight()[BL]) '''output['lead_gen_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)), eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)), phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)), weight = weight.weight()[BL] ) output['trail_gen_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)), eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)), phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)), weight = weight.weight()[BL] )''' output['lead_lep'].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(leading_lepton[BL].pt)), eta=ak.to_numpy(ak.flatten(leading_lepton[BL].eta)), phi=ak.to_numpy(ak.flatten(leading_lepton[BL].phi)), weight=weight.weight()[BL]) output['trail_lep'].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)), eta=ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)), phi=ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)), weight=weight.weight()[BL]) output['j1'].fill(dataset=dataset, pt=ak.flatten(jet.pt_nom[:, 0:1][BL]), eta=ak.flatten(jet.eta[:, 0:1][BL]), phi=ak.flatten(jet.phi[:, 0:1][BL]), weight=weight.weight()[BL]) output['j2'].fill(dataset=dataset, pt=ak.flatten(jet[:, 1:2][BL].pt_nom), eta=ak.flatten(jet[:, 1:2][BL].eta), phi=ak.flatten(jet[:, 1:2][BL].phi), weight=weight.weight()[BL]) #output['j3'].fill( # dataset = dataset, # pt = ak.flatten(jet[:, 2:3][BL].pt_nom), # eta = ak.flatten(jet[:, 2:3][BL].eta), # phi = ak.flatten(jet[:, 2:3][BL].phi), # weight = weight.weight()[BL] #) output['fwd_jet'].fill(dataset=dataset, pt=ak.flatten(j_fwd[BL].pt), eta=ak.flatten(j_fwd[BL].eta), phi=ak.flatten(j_fwd[BL].phi), weight=weight.weight()[BL]) output['high_p_fwd_p'].fill(dataset=dataset, p=ak.flatten(j_fwd[BL].p), weight=weight.weight()[BL]) vetolepton = ak.concatenate([vetomuon, vetoelectron], axis=1) trilep = choose3(vetolepton, 3) trilep_m = trilep.mass output['m3l'].fill(dataset=dataset, mass=ak.flatten(trilep_m[BL]), weight=weight.weight()[BL]) return output
def _put_tracks_into_blob(self, blob, tracks, reco_identifier, n_tracks): """ Put a certain type of "tracks" in the blob and give specific name. Parameters ---------- tracks : awkward array The tracks object to be put in the blob eventually. Can be only best tracks. identifier : string A string to name the kp table. n_tracks : int The number of tracks from before. Use to distinguish between best and all tracks. """ reco_tracks = dict( pos_x=tracks.pos_x, pos_y=tracks.pos_y, pos_z=tracks.pos_z, dir_x=tracks.dir_x, dir_y=tracks.dir_y, dir_z=tracks.dir_z, E=tracks.E, rec_type=tracks.rec_type, t=tracks.t, likelihood=tracks.lik, length=tracks.len, # do all recos have this? ) if n_tracks != 1: reco_tracks.update( id=tracks.id, idx=np.arange(n_tracks), ) n_columns = max(km3io.definitions.fitparameters.values()) + 1 fitinf_array = np.ma.filled( ak.to_numpy(ak.pad_none(tracks.fitinf, target=n_columns, axis=-1)), fill_value=np.nan, ).astype("float32") fitinf_split = np.split(fitinf_array, fitinf_array.shape[-1], axis=-1) if n_tracks == 1: for fitparam, idx in km3io.definitions.fitparameters.items(): reco_tracks[fitparam] = fitinf_split[idx][0] else: for fitparam, idx in km3io.definitions.fitparameters.items(): reco_tracks[fitparam] = fitinf_split[idx][:, 0] blob["Reco_" + reco_identifier] = kp.Table( reco_tracks, h5loc=f"/reco/" + reco_identifier, name="Reco " + reco_identifier, split_h5=self.split, ) # write out the rec stages only once with all tracks if n_tracks != 1: _rec_stage = np.array(ak.flatten(tracks.rec_stages)._layout) _counts = ak.count(tracks.rec_stages, axis=1) _idx = np.repeat(np.arange(n_tracks), _counts) blob["RecStages"] = kp.Table( dict(rec_stage=_rec_stage, idx=_idx), # Just to save space, we specify smaller dtypes. # We assume there will be never more than 32767 # reco tracks for a single reconstruction type. dtypes=[("rec_stage", np.int16), ("idx", np.uint16)], h5loc=f"/reco/rec_stages", name="Reconstruction Stages", split_h5=self.split, )
def test_ListOffsetArray_rpad_and_clip(): array = ak.Array([[1, 2, 3], [], [4, 5]]) assert ak.pad_none(array, 0, clip=True).tolist() == [[], [], []] array = ak.Array([[1, 2, 3], [], [4, 5]]) assert ak.pad_none(array, 0).tolist() == [[1, 2, 3], [], [4, 5]]
def compare_matched_best_perms(mp, bp, njets, bp_mask=None): ''' Compare object assignments across two permutations. Inputs: matched perm, best perm, njets category, matched perm mask, best perm mask ''' if len(mp['BLep']) != len(bp['BLep']): raise ValueError("Permutations must have the same size in order to be compared!") if not ((njets == '3Jets') or (njets == '4PJets')): raise ValueError("Only 3Jets or 4+ jets categorizations are supported") #set_trace() mp_blep_idx = ak.fill_none(ak.pad_none(mp['BLep'].jetIdx, 1), -999) mp_bhad_idx = ak.fill_none(ak.pad_none(mp['BHad'].jetIdx, 1), -999) mp_wja_idx = ak.fill_none(ak.pad_none(mp['WJa'].jetIdx, 1), -999) mp_wjb_idx = ak.fill_none(ak.pad_none(mp['WJb'].jetIdx, 1), -999) mp_lep_pt = ak.fill_none(ak.pad_none(mp['Lepton'].pt, 1), -999) bp_blep_idx = ak.fill_none(ak.pad_none(bp['BLep'].jetIdx, 1), -999) bp_bhad_idx = ak.fill_none(ak.pad_none(bp['BHad'].jetIdx, 1), -999) bp_wja_idx = ak.fill_none(ak.pad_none(bp['WJa'].jetIdx, 1), -999) bp_wjb_idx = ak.fill_none(ak.pad_none(bp['WJb'].jetIdx, 1), -999) bp_lep_pt = ak.fill_none(ak.pad_none(bp['Lepton'].pt, 1), -999) # index comparisons same_blep = (mp_blep_idx == bp_blep_idx) & (mp_blep_idx >= 0) same_bhad = (mp_bhad_idx == bp_bhad_idx) & (mp_bhad_idx >= 0) same_wja = (mp_wja_idx == bp_wja_idx) & (mp_wja_idx >= 0) same_wjb = (mp_wjb_idx == bp_wjb_idx) & (mp_wjb_idx >= 0) same_bs = same_blep & same_bhad cats = np.zeros(len(mp['BLep'])) # 0 == '' (no gen matching), 1 == 'right', 2 == 'matchable', 3 == 'unmatchable', 4 == 'sl_tau', 5 == 'noslep' if njets == '3Jets': valid_evts = (ak.num(mp['TTbar'].pt) > 0) & (ak.flatten(mp['unique_matches'] >= 3)) # merged events merged_evts = valid_evts & ak.flatten(mp['Merged_Event']) correct_merged = merged_evts & ak.flatten(mp['Merged_BHadWJa'] | mp['Merged_BHadWJb'] | mp['Merged_WJets']) wrong_merged = merged_evts & ak.flatten(~(mp['Merged_BHadWJa'] | mp['Merged_BHadWJb'] | mp['Merged_WJets'])) # lost events lost_evts = valid_evts & ak.flatten(mp['Lost_Event']) correct_lost = lost_evts & ak.flatten(mp['Lost_WJa'] | mp['Lost_WJb']) wrong_lost = lost_evts & ak.flatten(~(mp['Lost_WJa'] | mp['Lost_WJb'])) # bs are matched correctly, bp wjet is matched to one of the matched perm wjets, leptons are correctly matched right_matching = same_bs & (((bp_wja_idx == mp_wja_idx) | (bp_wja_idx == mp_wjb_idx)) & (bp_wja_idx >= 0)) & (bp_lep_pt == mp_lep_pt) # event categorization # unmatchable events unmatchable_evts = (~valid_evts | wrong_merged | wrong_lost) # right events right_perm_evts = ak.flatten((correct_lost & right_matching) | (correct_merged & right_matching)) # matched perm is correct event type and right object matching # matchable events matchable_evts = ak.flatten((correct_lost & ~right_matching) | (correct_merged & ~right_matching)) # matched perm is correct event type but wrong object matching else: valid_evts = (ak.num(mp['TTbar'].pt) > 0) & (ak.flatten(mp['unique_matches'] == 4)) isWHadCorrect = ((bp_wja_idx == mp_wja_idx) & (bp_wjb_idx == mp_wjb_idx)) | ((bp_wja_idx == mp_wjb_idx) & (bp_wjb_idx == mp_wja_idx)) isTHadCorrect = same_bhad & isWHadCorrect isTLepCorrect = same_blep & (bp_lep_pt == mp_lep_pt) isCorrect = isTLepCorrect & isTHadCorrect # event categorization # unmatchable events unmatchable_evts = ~valid_evts # right events right_perm_evts = ak.flatten(isCorrect & valid_evts) #matchable events matchable_evts = ak.flatten((~isCorrect & valid_evts)) # check that there's no overlap in event categories if not ak.all((unmatchable_evts | right_perm_evts | matchable_evts) == True): raise ValueError("Events %s have no categories!" % ak.where((unmatchable_evts | right_perm_evts | matchable_evts) == False)) # set values in category array cats[right_perm_evts] = 1 # matched perm is correct event type and right object matching cats[matchable_evts] = 2 # matched perm is correct event type but wrong object matching cats[unmatchable_evts] = 3 # matched perm isn't valid or has wrong event type return cats
def uproot_tree_to_numpy(fname, MeanNormTuple, inbranches_listlist, nMaxslist, nevents, treename="ttree", stop=None, branches=None): # array = uproot_root2array(fname, treename, stop=stop, branches=branches) # Read in total number of events totallengthperjet = 0 for i in range(len(nMaxslist)): if nMaxslist[i] >= 0: totallengthperjet += len(inbranches_listlist[i]) * nMaxslist[i] else: totallengthperjet += len(inbranches_listlist[i]) #flat branch # branches = [ak.fill_none(ak.pad_none(tree[barr, target=feature_length), 0.) for feature_length, arr in zip( nMaxslist, inbranches_listlist)] tree = u3.open(fname)[treename] branches = [ ak.fill_none( ak.pad_none(tree[branch_name].array(), target=feature_length, axis=-1, clip=True if feature_length > 1 else False), 0.) for feature_length, branch_list in zip(nMaxslist, inbranches_listlist) for branch_name in branch_list ] branchnames = [n for names in inbranches_listlist for n in names] feature_lenghts = [ f for branches, f in zip(inbranches_listlist, nMaxslist) for _ in branches ] means = [ m[0] for branches, m in zip(inbranches_listlist, MeanNormTuple) for _ in branches ] norms = [ m[1] for branches, m in zip(inbranches_listlist, MeanNormTuple) for _ in branches ] print("Debugigng means and norms") print(means) print(norms) print(branchnames) branches_numpy = [] for br, brname, fl, mean, norm in zip(branches, branchnames, feature_lenghts, means, norms): print("DBG {}".format(brname)) print(br) print("Length: {}".format(len(br))) if brname == "TagVarCSV_trackJetDistVal": print("BONUS DEBUG!") print("Min: {}, Max: {}".format(ak.min(ak.count(br, axis=-1)), ak.max(ak.count(br, axis=-1)))) if fl > 1: # branches_numpy.append( (ak.to_numpy( br ) - mean) / norm) branches_numpy.append((ak.to_numpy(br) - 0.) / 1.) elif fl == 1: # branches_numpy.append( (np.expand_dims( ak.to_numpy( br ), axis=-1) - mean)/norm ) branches_numpy.append( (np.expand_dims(ak.to_numpy(br), axis=-1) - 0.) / 1.) print("FINISHED THIS LOOP, YOU ARE PERFECT! :) ") numpyarray = np.concatenate(branches_numpy, axis=-1) print("\n" * 5) print("Some metrics about this numpy array") print(np.mean(numpyarray, axis=0)) print(np.std(numpyarray, axis=0)) print("Normalize array") numpyarray = (numpyarray - np.mean(numpyarray, axis=0)) / np.std( numpyarray, axis=0) print("Some metrics about this numpy array") print(np.mean(numpyarray, axis=0)) print(np.std(numpyarray, axis=0)) return numpyarray
def best_match(gen_hyp=None, jets=None, leptons=None, met=None): if gen_hyp is None: raise ValueError("Gen Objects gen_hyp needed for matching") if jets is None: raise ValueError("Reco jets needed for matching") if leptons is None: raise ValueError("Reco leptons needed for matching") if met is None: raise ValueError("Reco met needed for matching") if not ak.all(ak.num(gen_hyp) == 1): raise ValueError("Not all events for matching are semileptonic") jets_ak = ak.with_name(jets[["pt", "eta", "phi", "mass"]],"PtEtaPhiMLorentzVector") leps_ak = ak.with_name(leptons[["pt", "eta", "phi", "mass"]],"PtEtaPhiMLorentzVector") # init dict of objects matched_objects = {} # match jet closest to gen objects for genobj in ['BHad', 'BLep', 'WJa', 'WJb']: genobj_ak = ak.with_name(gen_hyp[genobj][["pt", "eta", "phi", "mass"]],"PtEtaPhiMLorentzVector") jets_akc, genobj_akc = ak.unzip(ak.cartesian([jets_ak, genobj_ak], nested=False)) deltaRs = jets_akc.delta_r(genobj_akc) # find deltaRs between jets and gen object indexOfMin = ak.unflatten(ak.argmin(deltaRs, axis=1), ak.num(genobj_ak)) passing_inds = deltaRs[indexOfMin] < 0.4 matched_jets_inds = indexOfMin[passing_inds] matched_jets = jets[matched_jets_inds] ## add matched perm objects matched_objects[genobj] = ak.Array({ 'pt' : matched_jets.pt, 'eta' : matched_jets.eta, 'phi' : matched_jets.phi, 'mass' : matched_jets.mass, 'jetIdx' : matched_jets_inds, # index of jet that the gen object is matched to in the event }, with_name="PtEtaPhiMLorentzVector") # match lepton closest to gen lepton genlep_ak = ak.with_name(gen_hyp['Lepton'][["pt", "eta", "phi", "mass"]],"PtEtaPhiMLorentzVector") lep_akc, genlep_akc = ak.unzip(ak.cartesian([leps_ak, genlep_ak], nested=False)) lepDRs = lep_akc.delta_r(genlep_akc) lepIdxOfMin = ak.unflatten(ak.argmin(lepDRs, axis=1), ak.num(genlep_ak)) passing_inds = lepDRs[lepIdxOfMin] < 0.4 matched_leps_inds = lepIdxOfMin[passing_inds] matched_leps = leptons[matched_leps_inds] ## add matched perm objects matched_objects['Lepton'] = ak.Array({key: matched_leps[key] for key in matched_leps.fields}, with_name="PtEtaPhiMLorentzVector") # solve for neutrino nu_array = np.zeros((len(ak.num(jets)), 4), dtype='float64') # convert all inputs into 2d numpy arrays of dtype=float64 (won't work if they're not float64) blep_inputs = np.stack((ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['BLep'].px, 1), -999))).astype('float64'), ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['BLep'].py, 1), -999))).astype('float64'),\ ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['BLep'].pz, 1), -999))).astype('float64'), ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['BLep'].energy, 1), -999))).astype('float64')), axis=-1) lep_inputs = np.stack((ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['Lepton'].px, 1), -999))).astype('float64'), ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['Lepton'].py, 1), -999))).astype('float64'),\ ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['Lepton'].pz, 1), -999))).astype('float64'), ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['Lepton'].energy, 1), -999))).astype('float64')), axis=-1) met_inputs = np.stack((ak.to_numpy(ak.fill_none(met.px, -999)).astype('float64'), ak.to_numpy(ak.fill_none(met.py, -999)).astype('float64')), axis=-1) nu_array = find_nu(bleps=blep_inputs, leptons=lep_inputs, met=met_inputs, nu_array=nu_array) valid_nu = ~((nu_array[:, 3] > 1e20) | (nu_array[:, 3] == 0)) # events that have a solution and matched blep # convert px, py, pz to pt, eta, phi nu_px, nu_py, nu_pz = nu_array[:, 0][valid_nu], nu_array[:, 1][valid_nu], nu_array[:, 2][valid_nu] nu_mom, nu_pt = np.sqrt(np.square(nu_px)+np.square(nu_py)+np.square(nu_pz)), np.sqrt(np.square(nu_px)+np.square(nu_py)) nu_phi = np.arctan2(nu_py, nu_px) nu_eta = np.arcsinh(nu_pz/nu_pt) matched_objects['Nu'] = ak.Array({ 'pt' : ak.unflatten(nu_pt, valid_nu.astype(int)), 'eta' : ak.unflatten(nu_eta, valid_nu.astype(int)), 'phi' : ak.unflatten(nu_phi, valid_nu.astype(int)), 'mass' : ak.zeros_like(ak.unflatten(nu_array[:, 0][valid_nu], valid_nu.astype(int))), 'chi2' : ak.unflatten(nu_array[:, 3][valid_nu], valid_nu.astype(int)), }, with_name="PtEtaPhiMLorentzVector") matched_perm = make_perm_table(bhad=matched_objects['BHad'], blep=matched_objects['BLep'], wja=matched_objects['WJa'], wjb=matched_objects['WJb'], lepton=matched_objects['Lepton'], met=met, nu=matched_objects['Nu']) return matched_perm