def test(): array = ak.Array( ak.layout.RegularArray( ak.layout.NumpyArray(np.r_[1, 2, 3, 4, 5, 6, 7, 8, 9]), 3)) mask = ak.Array( ak.layout.NumpyArray( np.array([[True, True, True], [True, True, False], [True, False, True]]))) assert ak.mask(array, mask).to_list() == [[1, 2, 3], [4, 5, None], [7, None, 9]]
def test(): arr1 = ak.Array({"a": [1, 2], "b": [1, None]}) arr2 = ak.mask(arr1, [True, True]) assert isinstance(arr2.layout, ak.layout.ByteMaskedArray) assert isinstance(arr2.layout.content, ak.layout.RecordArray) assert isinstance(arr2.layout.content.field("b"), ak.layout.IndexedOptionArray64) assert isinstance(arr2.b.layout, ak.layout.IndexedOptionArray64) assert isinstance(arr2.b.layout.content, ak.layout.NumpyArray) assert ak.is_none(arr2.b).tolist() == [False, True] arr3 = ak.virtual(lambda: arr2, form=arr2.layout.form, length=len(arr2)) assert ak.is_none(arr3.b).tolist() == [False, True]
def test_tomask(): array = ak.Array([[0.0, 1.1, 2.2], [], [3.3, 4.4], [5.5], [6.6, 7.7, 8.8, 9.9]]) mask1 = ak.Array([True, True, False, False, True]) assert ak.to_list(array[mask1]) == [[0.0, 1.1, 2.2], [], [6.6, 7.7, 8.8, 9.9]] assert ak.to_list(ak.mask(array, mask1)) == [ [0.0, 1.1, 2.2], [], None, None, [6.6, 7.7, 8.8, 9.9], ] mask2 = ak.Array([[False, True, False], [], [True, True], [False], [True, False, False, True]]) assert ak.to_list(array[mask2]) == [[1.1], [], [3.3, 4.4], [], [6.6, 9.9]] assert ak.to_list(ak.mask(array, mask2)) == [ [None, 1.1, None], [], [3.3, 4.4], [None], [6.6, None, None, 9.9], ]
def mask_inf(var_array, var_name=None, var_inf_counter=None): """ Mask inf values in `var_array` with None. If var_inf_counter is passed, append there inplace for a given `var_name` the fraction of its inf values. Arguments: - var_array: awkward array, values of a given feature for a given set of taus - var_name (optional, default=None): string, variable name - var_inf_counter (optional, default=None): defaultdict(list), stores fraction of inf values for variables Returns var_array witn masked infs values to None """ if np.sum(np.isinf(var_array)) > 0: is_inf_mask = np.isinf(var_array) var_array = ak.mask(var_array, is_inf_mask, valid_when=False) if var_inf_counter is not None: var_inf_counter[var_name].append( np.sum(is_inf_mask) / ak.count(var_array)) return var_array
def process_shift(self, events, shift_name): dataset = events.metadata['dataset'] isRealData = not hasattr(events, "genWeight") selection = PackedSelection() weights = Weights(len(events), storeIndividual=True) output = self.make_output() if shift_name is None and not isRealData: output['sumw'] = ak.sum(events.genWeight) if isRealData or self._newTrigger: trigger = np.zeros(len(events), dtype='bool') for t in self._triggers[self._year]: if t in events.HLT.fields: trigger = trigger | events.HLT[t] selection.add('trigger', trigger) del trigger else: selection.add('trigger', np.ones(len(events), dtype='bool')) if isRealData: selection.add( 'lumimask', lumiMasks[self._year](events.run, events.luminosityBlock)) else: selection.add('lumimask', np.ones(len(events), dtype='bool')) if isRealData and self._skipRunB and self._year == '2017': selection.add('dropB', events.run > 299329) else: selection.add('dropB', np.ones(len(events), dtype='bool')) if isRealData: trigger = np.zeros(len(events), dtype='bool') for t in self._muontriggers[self._year]: if t in events.HLT.fields: trigger |= np.array(events.HLT[t]) selection.add('muontrigger', trigger) del trigger else: selection.add('muontrigger', np.ones(len(events), dtype='bool')) metfilter = np.ones(len(events), dtype='bool') for flag in self._met_filters[ self._year]['data' if isRealData else 'mc']: metfilter &= np.array(events.Flag[flag]) selection.add('metfilter', metfilter) del metfilter fatjets = events.FatJet fatjets['msdcorr'] = corrected_msoftdrop(fatjets) fatjets['qcdrho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt) fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year) fatjets['msdcorr_full'] = fatjets['msdcorr'] * self._msdSF[self._year] candidatejet = fatjets[ # https://github.com/DAZSLE/BaconAnalyzer/blob/master/Analyzer/src/VJetLoader.cc#L269 (fatjets.pt > 200) & (abs(fatjets.eta) < 2.5) & fatjets.isTight # this is loose in sampleContainer ] candidatejet = candidatejet[:, : 2] # Only consider first two to match generators if self._jet_arbitration == 'pt': candidatejet = ak.firsts(candidatejet) elif self._jet_arbitration == 'mass': candidatejet = ak.firsts(candidatejet[ak.argmax( candidatejet.msdcorr, axis=1, keepdims=True)]) elif self._jet_arbitration == 'n2': candidatejet = ak.firsts(candidatejet[ak.argmin(candidatejet.n2ddt, axis=1, keepdims=True)]) elif self._jet_arbitration == 'ddb': candidatejet = ak.firsts(candidatejet[ak.argmax( candidatejet.btagDDBvLV2, axis=1, keepdims=True)]) elif self._jet_arbitration == 'ddc': candidatejet = ak.firsts(candidatejet[ak.argmax( candidatejet.btagDDCvLV2, axis=1, keepdims=True)]) else: raise RuntimeError("Unknown candidate jet arbitration") if self._tagger == 'v1': bvl = candidatejet.btagDDBvL cvl = candidatejet.btagDDCvL cvb = candidatejet.btagDDCvB elif self._tagger == 'v2': bvl = candidatejet.btagDDBvLV2 cvl = candidatejet.btagDDCvLV2 cvb = candidatejet.btagDDCvBV2 elif self._tagger == 'v3': bvl = candidatejet.particleNetMD_Xbb cvl = candidatejet.particleNetMD_Xcc / ( 1 - candidatejet.particleNetMD_Xbb) cvb = candidatejet.particleNetMD_Xcc / ( candidatejet.particleNetMD_Xcc + candidatejet.particleNetMD_Xbb) elif self._tagger == 'v4': bvl = candidatejet.particleNetMD_Xbb cvl = candidatejet.btagDDCvLV2 cvb = candidatejet.particleNetMD_Xcc / ( candidatejet.particleNetMD_Xcc + candidatejet.particleNetMD_Xbb) else: raise ValueError("Not an option") selection.add('minjetkin', (candidatejet.pt >= 450) & (candidatejet.pt < 1200) & (candidatejet.msdcorr >= 40.) & (candidatejet.msdcorr < 201.) & (abs(candidatejet.eta) < 2.5)) selection.add('_strict_mass', (candidatejet.msdcorr > 85) & (candidatejet.msdcorr < 130)) selection.add('_high_score', cvl > 0.8) selection.add('minjetkinmu', (candidatejet.pt >= 400) & (candidatejet.pt < 1200) & (candidatejet.msdcorr >= 40.) & (candidatejet.msdcorr < 201.) & (abs(candidatejet.eta) < 2.5)) selection.add('minjetkinw', (candidatejet.pt >= 200) & (candidatejet.pt < 1200) & (candidatejet.msdcorr >= 40.) & (candidatejet.msdcorr < 201.) & (abs(candidatejet.eta) < 2.5)) selection.add('jetid', candidatejet.isTight) selection.add('n2ddt', (candidatejet.n2ddt < 0.)) if not self._tagger == 'v2': selection.add('ddbpass', (bvl >= 0.89)) selection.add('ddcpass', (cvl >= 0.83)) selection.add('ddcvbpass', (cvb >= 0.2)) else: selection.add('ddbpass', (bvl >= 0.7)) selection.add('ddcpass', (cvl >= 0.45)) selection.add('ddcvbpass', (cvb >= 0.03)) jets = events.Jet jets = jets[(jets.pt > 30.) & (abs(jets.eta) < 2.5) & jets.isTight] # only consider first 4 jets to be consistent with old framework jets = jets[:, :4] dphi = abs(jets.delta_phi(candidatejet)) selection.add( 'antiak4btagMediumOppHem', ak.max(jets[dphi > np.pi / 2][self._ak4tagBranch], axis=1, mask_identity=False) < BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium']) ak4_away = jets[dphi > 0.8] selection.add( 'ak4btagMedium08', ak.max(ak4_away[self._ak4tagBranch], axis=1, mask_identity=False) > BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium']) met = events.MET selection.add('met', met.pt < 140.) goodmuon = ((events.Muon.pt > 10) & (abs(events.Muon.eta) < 2.4) & (events.Muon.pfRelIso04_all < 0.25) & events.Muon.looseId) nmuons = ak.sum(goodmuon, axis=1) leadingmuon = ak.firsts(events.Muon[goodmuon]) if self._looseTau: goodelectron = ((events.Electron.pt > 10) & (abs(events.Electron.eta) < 2.5) & (events.Electron.cutBased >= events.Electron.VETO)) nelectrons = ak.sum(goodelectron, axis=1) ntaus = ak.sum( ((events.Tau.pt > 20) & (abs(events.Tau.eta) < 2.3) & events.Tau.idDecayMode & ((events.Tau.idMVAoldDM2017v2 & 2) != 0) & ak.all(events.Tau.metric_table(events.Muon[goodmuon]) > 0.4, axis=2) & ak.all(events.Tau.metric_table( events.Electron[goodelectron]) > 0.4, axis=2)), axis=1, ) else: goodelectron = ( (events.Electron.pt > 10) & (abs(events.Electron.eta) < 2.5) & (events.Electron.cutBased >= events.Electron.LOOSE)) nelectrons = ak.sum(goodelectron, axis=1) ntaus = ak.sum( (events.Tau.pt > 20) & events.Tau.idDecayMode # bacon iso looser than Nano selection & ak.all(events.Tau.metric_table(events.Muon[goodmuon]) > 0.4, axis=2) & ak.all(events.Tau.metric_table(events.Electron[goodelectron]) > 0.4, axis=2), axis=1, ) selection.add('noleptons', (nmuons == 0) & (nelectrons == 0) & (ntaus == 0)) selection.add('onemuon', (nmuons == 1) & (nelectrons == 0) & (ntaus == 0)) selection.add('muonkin', (leadingmuon.pt > 55.) & (abs(leadingmuon.eta) < 2.1)) selection.add('muonDphiAK8', abs(leadingmuon.delta_phi(candidatejet)) > 2 * np.pi / 3) # W-Tag (Tag and Probe) # tag side selection.add( 'ak4btagMediumOppHem', ak.max(jets[dphi > np.pi / 2][self._ak4tagBranch], axis=1, mask_identity=False) > BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium']) selection.add('met40p', met.pt > 40.) selection.add('tightMuon', (leadingmuon.tightId) & (leadingmuon.pt > 53.)) # selection.add('ptrecoW', (leadingmuon + met).pt > 250.) selection.add('ptrecoW200', (leadingmuon + met).pt > 200.) selection.add( 'ak4btagNearMu', leadingmuon.delta_r(leadingmuon.nearest(ak4_away, axis=None)) < 2.0) _bjets = jets[self._ak4tagBranch] > BTagEfficiency.btagWPs[ self._ak4tagger][self._year]['medium'] # _nearAK8 = jets.delta_r(candidatejet) < 0.8 # _nearMu = jets.delta_r(ak.firsts(events.Muon)) < 0.3 # selection.add('ak4btagOld', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1) _nearAK8 = jets.delta_r(candidatejet) < 0.8 _nearMu = jets.delta_r(leadingmuon) < 0.3 selection.add('ak4btagOld', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1) # _nearAK8 = jets.delta_r(candidatejet) < 0.8 # _nearMu = jets.delta_r(candidatejet.nearest(events.Muon[goodmuon], axis=None)) < 0.3 # selection.add('ak4btagNew', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1) # probe side selection.add('minWjetpteta', (candidatejet.pt >= 200) & (abs(candidatejet.eta) < 2.4)) # selection.add('noNearMuon', candidatejet.delta_r(candidatejet.nearest(events.Muon[goodmuon], axis=None)) > 1.0) selection.add('noNearMuon', candidatejet.delta_r(leadingmuon) > 1.0) ##### if isRealData: genflavor = ak.zeros_like(candidatejet.pt) else: if 'HToCC' in dataset or 'HToBB' in dataset: if self._ewkHcorr: add_HiggsEW_kFactors(weights, events.GenPart, dataset) weights.add('genweight', events.genWeight) if "PSWeight" in events.fields: add_ps_weight(weights, events.PSWeight) else: add_ps_weight(weights, None) if "LHEPdfWeight" in events.fields: add_pdf_weight(weights, events.LHEPdfWeight) else: add_pdf_weight(weights, None) if "LHEScaleWeight" in events.fields: add_scalevar_7pt(weights, events.LHEScaleWeight) add_scalevar_3pt(weights, events.LHEScaleWeight) else: add_scalevar_7pt(weights, []) add_scalevar_3pt(weights, []) add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset) bosons = getBosons(events.GenPart) matchedBoson = candidatejet.nearest(bosons, axis=None, threshold=0.8) if self._tightMatch: match_mask = ( (candidatejet.pt - matchedBoson.pt) / matchedBoson.pt < 0.5) & ((candidatejet.msdcorr - matchedBoson.mass) / matchedBoson.mass < 0.3) selmatchedBoson = ak.mask(matchedBoson, match_mask) genflavor = bosonFlavor(selmatchedBoson) else: genflavor = bosonFlavor(matchedBoson) genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0) if self._newVjetsKfactor: add_VJets_kFactors(weights, events.GenPart, dataset) else: add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset) if shift_name is None: output['btagWeight'].fill(val=self._btagSF.addBtagWeight( weights, ak4_away, self._ak4tagBranch)) if self._nnlops_rew and dataset in [ 'GluGluHToCC_M125_13TeV_powheg_pythia8' ]: weights.add('minlo_rew', powheg_to_nnlops(ak.to_numpy(genBosonPt))) if self._newTrigger: add_jetTriggerSF( weights, ak.firsts(fatjets), self._year if not self._skipRunB else f'{self._year}CDEF', selection) else: add_jetTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year) add_mutriggerSF(weights, leadingmuon, self._year, selection) add_mucorrectionsSF(weights, leadingmuon, self._year, selection) if self._year in ("2016", "2017"): weights.add("L1Prefiring", events.L1PreFiringWeight.Nom, events.L1PreFiringWeight.Up, events.L1PreFiringWeight.Dn) logger.debug("Weight statistics: %r" % weights.weightStatistics) msd_matched = candidatejet.msdcorr * self._msdSF[self._year] * ( genflavor > 0) + candidatejet.msdcorr * (genflavor == 0) regions = { 'signal': [ 'noleptons', 'minjetkin', 'met', 'metfilter', 'jetid', 'antiak4btagMediumOppHem', 'n2ddt', 'trigger', 'lumimask' ], 'signal_noddt': [ 'noleptons', 'minjetkin', 'met', 'jetid', 'antiak4btagMediumOppHem', 'trigger', 'lumimask', 'metfilter' ], # 'muoncontrol': ['minjetkinmu', 'jetid', 'n2ddt', 'ak4btagMedium08', 'onemuon', 'muonkin', 'muonDphiAK8', 'muontrigger', 'lumimask', 'metfilter'], 'muoncontrol': [ 'onemuon', 'muonkin', 'muonDphiAK8', 'metfilter', 'minjetkinmu', 'jetid', 'ak4btagMedium08', 'n2ddt', 'muontrigger', 'lumimask' ], 'muoncontrol_noddt': [ 'onemuon', 'muonkin', 'muonDphiAK8', 'jetid', 'metfilter', 'minjetkinmu', 'jetid', 'ak4btagMedium08', 'muontrigger', 'lumimask' ], 'wtag': [ 'onemuon', 'tightMuon', 'minjetkinw', 'jetid', 'met40p', 'metfilter', 'ptrecoW200', 'ak4btagOld', 'muontrigger', 'lumimask' ], 'wtag0': [ 'onemuon', 'tightMuon', 'met40p', 'metfilter', 'ptrecoW200', 'ak4btagOld', 'muontrigger', 'lumimask' ], 'wtag2': [ 'onemuon', 'tightMuon', 'minjetkinw', 'jetid', 'ak4btagMediumOppHem', 'met40p', 'metfilter', 'ptrecoW200', 'ak4btagOld', 'muontrigger', 'lumimask' ], 'noselection': [], } def normalize(val, cut): if cut is None: ar = ak.to_numpy(ak.fill_none(val, np.nan)) return ar else: ar = ak.to_numpy(ak.fill_none(val[cut], np.nan)) return ar import time tic = time.time() if shift_name is None: for region, cuts in regions.items(): allcuts = set([]) cut = selection.all(*allcuts) output['cutflow_msd'].fill(region=region, genflavor=normalize( genflavor, None), cut=0, weight=weights.weight(), msd=normalize(msd_matched, None)) output['cutflow_eta'].fill(region=region, genflavor=normalize(genflavor, cut), cut=0, weight=weights.weight()[cut], eta=normalize( candidatejet.eta, cut)) output['cutflow_pt'].fill(region=region, genflavor=normalize(genflavor, cut), cut=0, weight=weights.weight()[cut], pt=normalize(candidatejet.pt, cut)) for i, cut in enumerate(cuts + ['ddcvbpass', 'ddcpass']): allcuts.add(cut) cut = selection.all(*allcuts) output['cutflow_msd'].fill(region=region, genflavor=normalize( genflavor, cut), cut=i + 1, weight=weights.weight()[cut], msd=normalize(msd_matched, cut)) output['cutflow_eta'].fill( region=region, genflavor=normalize(genflavor, cut), cut=i + 1, weight=weights.weight()[cut], eta=normalize(candidatejet.eta, cut)) output['cutflow_pt'].fill( region=region, genflavor=normalize(genflavor, cut), cut=i + 1, weight=weights.weight()[cut], pt=normalize(candidatejet.pt, cut)) if self._evtVizInfo and 'ddcpass' in allcuts and isRealData and region == 'signal': if 'event' not in events.fields: continue _cut = selection.all(*allcuts, '_strict_mass', '_high_score') # _cut = selection.all('_strict_mass'') output['to_check'][ 'mass'] += processor.column_accumulator( normalize(msd_matched, _cut)) nfatjet = ak.sum( ((fatjets.pt > 200) & (abs(fatjets.eta) < 2.5) & fatjets.isTight), axis=1) output['to_check'][ 'njet'] += processor.column_accumulator( normalize(nfatjet, _cut)) output['to_check'][ 'fname'] += processor.column_accumulator( np.array([events.metadata['filename']] * len(normalize(msd_matched, _cut)))) output['to_check'][ 'event'] += processor.column_accumulator( normalize(events.event, _cut)) output['to_check'][ 'luminosityBlock'] += processor.column_accumulator( normalize(events.luminosityBlock, _cut)) output['to_check'][ 'run'] += processor.column_accumulator( normalize(events.run, _cut)) if shift_name is None: systematics = [None] + list(weights.variations) else: systematics = [shift_name] def fill(region, systematic, wmod=None): selections = regions[region] cut = selection.all(*selections) sname = 'nominal' if systematic is None else systematic if wmod is None: if systematic in weights.variations: weight = weights.weight(modifier=systematic)[cut] else: weight = weights.weight()[cut] else: weight = weights.weight()[cut] * wmod[cut] output['templates'].fill( region=region, systematic=sname, runid=runmap(events.run)[cut], genflavor=normalize(genflavor, cut), pt=normalize(candidatejet.pt, cut), msd=normalize(msd_matched, cut), ddb=normalize(bvl, cut), ddc=normalize(cvl, cut), ddcvb=normalize(cvb, cut), weight=weight, ) if region in [ 'wtag', 'wtag0', 'wtag2', 'wtag3', 'wtag4', 'wtag5', 'wtag6', 'wtag7', 'noselection' ]: # and sname in ['nominal', 'pileup_weightDown', 'pileup_weightUp', 'jet_triggerDown', 'jet_triggerUp']: output['wtag'].fill( region=region, systematic=sname, genflavor=normalize(genflavor, cut), pt=normalize(candidatejet.pt, cut), msd=normalize(msd_matched, cut), n2ddt=normalize(candidatejet.n2ddt, cut), ddc=normalize(cvl, cut), ddcvb=normalize(cvb, cut), weight=weight, ) # if region in ['signal', 'noselection']: # output['etaphi'].fill( # region=region, # systematic=sname, # runid=runmap(events.run)[cut], # genflavor=normalize(genflavor, cut), # pt=normalize(candidatejet.pt, cut), # eta=normalize(candidatejet.eta, cut), # phi=normalize(candidatejet.phi, cut), # ddc=normalize(cvl, cut), # ddcvb=normalize(cvb, cut), # ), if not isRealData: if wmod is not None: _custom_weight = events.genWeight[cut] * wmod[cut] else: _custom_weight = np.ones_like(weight) output['genresponse_noweight'].fill( region=region, systematic=sname, pt=normalize(candidatejet.pt, cut), genpt=normalize(genBosonPt, cut), weight=_custom_weight, ) output['genresponse'].fill( region=region, systematic=sname, pt=normalize(candidatejet.pt, cut), genpt=normalize(genBosonPt, cut), weight=weight, ) if systematic is None: output['signal_opt'].fill( region=region, genflavor=normalize(genflavor, cut), ddc=normalize(cvl, cut), ddcvb=normalize(cvb, cut), msd=normalize(msd_matched, cut), weight=weight, ) output['signal_optb'].fill( region=region, genflavor=normalize(genflavor, cut), ddb=normalize(bvl, cut), msd=normalize(msd_matched, cut), weight=weight, ) for region in regions: cut = selection.all(*(set(regions[region]) - {'n2ddt'})) if shift_name is None: output['nminus1_n2ddt'].fill( region=region, n2ddt=normalize(candidatejet.n2ddt, cut), weight=weights.weight()[cut], ) for systematic in systematics: if isRealData and systematic is not None: continue fill(region, systematic) if shift_name is None and 'GluGluH' in dataset and 'LHEWeight' in events.fields: for i in range(9): fill(region, 'LHEScale_%d' % i, events.LHEScaleWeight[:, i]) for c in events.LHEWeight.fields[1:]: fill(region, 'LHEWeight_%s' % c, events.LHEWeight[c]) toc = time.time() output["filltime"] = toc - tic if shift_name is None: output["weightStats"] = weights.weightStatistics return {dataset: output}
""" Mask inf values in `var_array` with None. If var_inf_counter is passed, append there inplace for a given `var_name` the fraction of its inf values. Arguments: - var_array: awkward array, values of a given feature for a given set of taus - var_name (optional, default=None): string, variable name - var_inf_counter (optional, default=None): defaultdict(list), stores fraction of inf values for variables - raise_exception (optional, default=True): bool, whether to raise exception instead of masking inf values Returns var_array witn masked infs values to None """ if np.any(is_inf_mask:=np.isinf(var_array)): if raise_exception: raise ValueError(f'Inf value detected in {var_name}') var_array = ak.mask(var_array, is_inf_mask, valid_when=False) if var_inf_counter is not None: var_inf_counter[var_name].append(np.sum(is_inf_mask) / ak.count(var_array)) return var_array def mask_nan(var_array, var_name=None, var_nan_counter=None, raise_exception=True): """ Mask nan values in `var_array` with None. If var_nan_counter is passed, append there inplace for a given `var_name` the fraction of its nan values. Arguments: - var_array: awkward array, values of a given feature for a given set of taus - var_name (optional, default=None): string, variable name - var_nan_counter (optional, default=None): defaultdict(list), stores fraction of nan values for variables - raise_exception (optional, default=True): bool, whether to raise exception instead of masking NaN values Returns var_array witn masked nan values to None """
def make_perm_table(bhad, blep, wja, wjb, lepton, met, nu): ''' Inputs: Jets, leptons, neutrinos, jet assignment object ordering, array of disrminiant probabilities (Total, mass discriminant, neutrino discrminant), and associated event weights Returns: awkward Table containing 1: Jet objects (BLeps, BHads, WJas, WJbs) 2: Leptons, Neutrinos 3: Calculated probabilities (Total -> Prob, mass discriminant -> MassDiscr, neutrino discrminant -> NuDiscr) ''' ## these attributes are based on those from URTTbar/interface/Permutation.h https://gitlab.cern.ch/jdulemba/URTTbar/-/blob/htt_analysis_2016legacydata_9410/interface/Permutation.h isWLepComplete = (ak.num(lepton.pt) == 1) & (ak.num(nu.pt) == 1) isTLepComplete = isWLepComplete & (ak.num(blep.pt) == 1) WLep = ak.Array({ 'pt' : ak.fill_none( (ak.mask(lepton, isWLepComplete)+ak.mask(nu, isWLepComplete)).pt, []), 'eta' : ak.fill_none( (ak.mask(lepton, isWLepComplete)+ak.mask(nu, isWLepComplete)).eta, []), 'phi' : ak.fill_none( (ak.mask(lepton, isWLepComplete)+ak.mask(nu, isWLepComplete)).phi, []), 'mass' : ak.fill_none( (ak.mask(lepton, isWLepComplete)+ak.mask(nu, isWLepComplete)).mass, []), 'charge': ak.fill_none( (ak.mask(lepton, isWLepComplete)).charge, []), }, with_name="PtEtaPhiMLorentzVector") TLep = ak.Array({ 'pt' : ak.fill_none( (ak.mask(WLep, isTLepComplete)+ak.mask(blep, isTLepComplete)).pt, []), 'eta' : ak.fill_none( (ak.mask(WLep, isTLepComplete)+ak.mask(blep, isTLepComplete)).eta, []), 'phi' : ak.fill_none( (ak.mask(WLep, isTLepComplete)+ak.mask(blep, isTLepComplete)).phi, []), 'mass' : ak.fill_none( (ak.mask(WLep, isTLepComplete)+ak.mask(blep, isTLepComplete)).mass, []), }, with_name="PtEtaPhiMLorentzVector") ## Event categories # fill empty [] events with values for easier comparisons bhad_jetIdx = ak.fill_none(ak.pad_none(bhad, 1).jetIdx, -999) blep_jetIdx = ak.fill_none(ak.pad_none(blep, 1).jetIdx, -999) wja_jetIdx = ak.fill_none(ak.pad_none(wja, 1).jetIdx, -999) wjb_jetIdx = ak.fill_none(ak.pad_none(wjb, 1).jetIdx, -999) # merged jets event categories # only bhad and blep merged Merged_BHadBLep = (bhad_jetIdx >= 0) & (bhad_jetIdx == blep_jetIdx) & (bhad_jetIdx != wja_jetIdx) & (bhad_jetIdx != wjb_jetIdx) # only bhad and wja merged Merged_BHadWJa = (bhad_jetIdx >= 0) & (bhad_jetIdx == wja_jetIdx) & (bhad_jetIdx != blep_jetIdx) & (bhad_jetIdx != wjb_jetIdx) # only bhad and wjb merged Merged_BHadWJb = (bhad_jetIdx >= 0) & (bhad_jetIdx == wjb_jetIdx) & (bhad_jetIdx != blep_jetIdx) & (bhad_jetIdx != wja_jetIdx) # only blep and wja merged Merged_BLepWJa = (blep_jetIdx >= 0) & (blep_jetIdx == wja_jetIdx) & (blep_jetIdx != bhad_jetIdx) & (blep_jetIdx != wjb_jetIdx) # only blep and wjb merged Merged_BLepWJb = (blep_jetIdx >= 0) & (blep_jetIdx == wjb_jetIdx) & (blep_jetIdx != bhad_jetIdx) & (blep_jetIdx != wja_jetIdx) # only wja and wjb merged Merged_WJets = (wja_jetIdx >= 0) & (wja_jetIdx == wjb_jetIdx) & (wja_jetIdx != bhad_jetIdx) & (wja_jetIdx != blep_jetIdx) # only two jets merged Merged_Event = (Merged_BHadBLep) | (Merged_BHadWJa) | (Merged_BHadWJb) | (Merged_BLepWJa) | (Merged_BLepWJb) | (Merged_WJets) # only bhad, blep, wja merged Merged_BHadBLepWJa = (bhad_jetIdx >= 0) & (bhad_jetIdx == blep_jetIdx) & (bhad_jetIdx == wja_jetIdx) & (bhad_jetIdx != wjb_jetIdx) # only bhad, blep, wjb merged Merged_BHadBLepWJb = (bhad_jetIdx >= 0) & (bhad_jetIdx == blep_jetIdx) & (bhad_jetIdx == wjb_jetIdx) & (bhad_jetIdx != wja_jetIdx) # only bhad, wja, wjb merged Merged_BHadWJaWJb = (bhad_jetIdx >= 0) & (bhad_jetIdx == wja_jetIdx) & (bhad_jetIdx == wjb_jetIdx) & (bhad_jetIdx != blep_jetIdx) # only blep, wja, wjb merged Merged_BLepWJaWJb = (blep_jetIdx >= 0) & (blep_jetIdx == wja_jetIdx) & (blep_jetIdx == wjb_jetIdx) & (blep_jetIdx != bhad_jetIdx) # # lost jet event categories # only bhad is lost, other jets exist and are resolved Lost_BHad = (bhad_jetIdx < 0) & (blep_jetIdx >= 0) & (wja_jetIdx >= 0) & (wjb_jetIdx >= 0) & (blep_jetIdx != wja_jetIdx) & (blep_jetIdx != wjb_jetIdx) & (wja_jetIdx != wjb_jetIdx) # only blep is lost, other jets exist and are resolved Lost_BLep = (blep_jetIdx < 0) & (bhad_jetIdx >= 0) & (wja_jetIdx >= 0) & (wjb_jetIdx >= 0) & (bhad_jetIdx != wja_jetIdx) & (bhad_jetIdx != wjb_jetIdx) & (wja_jetIdx != wjb_jetIdx) # only wja is lost, other jets exist and are resolved Lost_WJa = (wja_jetIdx < 0) & (bhad_jetIdx >= 0) & (blep_jetIdx >= 0) & (wjb_jetIdx >= 0) & (bhad_jetIdx != blep_jetIdx) & (bhad_jetIdx != wjb_jetIdx) & (blep_jetIdx != wjb_jetIdx) # only wjb is lost, other jets exist and are resolved Lost_WJb = (wjb_jetIdx < 0) & (bhad_jetIdx >= 0) & (blep_jetIdx >= 0) & (wja_jetIdx >= 0) & (bhad_jetIdx != blep_jetIdx) & (bhad_jetIdx != wja_jetIdx) & (blep_jetIdx != wja_jetIdx) # only one jet is lost, others exist and are resolved Lost_Event = (Lost_BHad) | (Lost_BLep) | (Lost_WJa) | (Lost_WJb) ## n_perm_matches = ak.unflatten(ak.num(bhad.pt)+ak.num(blep.pt)+ak.num(wja.pt)+ak.num(wjb.pt), np.ones(len(bhad.pt), dtype=int)) #isEmpty = (bhad_jetIdx < 0) & (blep_jetIdx < 0) & (wja_jetIdx < 0) & (wjb_jetIdx < 0) # find number of unique matches jetIdx_stack = np.stack( (ak.to_numpy(ak.flatten(bhad_jetIdx)), ak.to_numpy(ak.flatten(blep_jetIdx)), ak.to_numpy(ak.flatten(wja_jetIdx)), ak.to_numpy(ak.flatten(wjb_jetIdx)) ), axis=1) unique_matches = ak.unflatten(np.array([len(list(set([ind for ind in inds if ind >= 0]))) for inds in jetIdx_stack.tolist()]), np.ones(len(bhad.pt), dtype=int)) # create WHad # init variables whad_pt = np.zeros(len(bhad.pt)) whad_eta = np.zeros(len(bhad.pt)) whad_phi = np.zeros(len(bhad.pt)) whad_mass= np.zeros(len(bhad.pt)) # inds where only WJb p4 is used as WHad use_wjb_inds = ak.flatten(Merged_BHadWJa | Merged_BLepWJa | Merged_WJets | Lost_WJa) whad_pt[use_wjb_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wjb.pt, 1), np.nan)[use_wjb_inds])) whad_eta[use_wjb_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wjb.eta, 1), np.nan)[use_wjb_inds])) whad_phi[use_wjb_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wjb.phi, 1), np.nan)[use_wjb_inds])) whad_mass[use_wjb_inds]= ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wjb.mass, 1), np.nan)[use_wjb_inds])) # inds where only WJa p4 is used as WHad use_wja_inds = ak.flatten(Merged_BHadWJb | Merged_BLepWJb | Lost_WJb) whad_pt[use_wja_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wja.pt, 1), np.nan)[use_wja_inds])) whad_eta[use_wja_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wja.eta, 1), np.nan)[use_wja_inds])) whad_phi[use_wja_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wja.phi, 1), np.nan)[use_wja_inds])) whad_mass[use_wja_inds]= ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wja.mass, 1), np.nan)[use_wja_inds])) # inds where combined p4 from WJa and WJb is used as WHad (all other inds) use_comb_inds = ~(use_wjb_inds | use_wja_inds) whad_pt[use_comb_inds] = ak.to_numpy(ak.flatten( (ak.fill_none(ak.pad_none( wja, 1), 0) + ak.fill_none(ak.pad_none( wjb, 1), 0)).pt[use_comb_inds] )) whad_eta[use_comb_inds] = ak.to_numpy(ak.flatten( (ak.fill_none(ak.pad_none( wja, 1), 0) + ak.fill_none(ak.pad_none( wjb, 1), 0)).eta[use_comb_inds] )) whad_phi[use_comb_inds] = ak.to_numpy(ak.flatten( (ak.fill_none(ak.pad_none( wja, 1), 0) + ak.fill_none(ak.pad_none( wjb, 1), 0)).phi[use_comb_inds] )) whad_mass[use_comb_inds]= ak.to_numpy(ak.flatten( (ak.fill_none(ak.pad_none( wja, 1), 0) + ak.fill_none(ak.pad_none( wjb, 1), 0)).mass[use_comb_inds] )) whad_pt[whad_pt == 0.] = np.nan whad_eta[whad_eta == 0.] = np.nan whad_phi[whad_phi == 0.] = np.nan whad_mass[whad_mass == 0.] = np.nan WHad = ak.Array({ 'pt' : ak.unflatten(whad_pt[~np.isnan(whad_pt)], (~np.isnan(whad_pt)).astype(int)), 'eta' : ak.unflatten(whad_eta[~np.isnan(whad_eta)], (~np.isnan(whad_eta)).astype(int)), 'phi' : ak.unflatten(whad_phi[~np.isnan(whad_phi)], (~np.isnan(whad_phi)).astype(int)), 'mass' : ak.unflatten(whad_mass[~np.isnan(whad_mass)], (~np.isnan(whad_mass)).astype(int)), 'charge': -1*ak.fill_none(ak.mask(WLep, ~np.isnan(whad_mass)).charge, []), # opposite charge as WLep for events that exist }, with_name="PtEtaPhiMLorentzVector") isWHadComplete = (ak.num(WHad.pt) == 1) isTHadComplete = (isWHadComplete) & (ak.num(bhad.pt) == 1) #set_trace() # create THad THad = ak.Array({ 'pt' : ak.fill_none( (ak.mask(WHad, isTHadComplete)+ak.mask(bhad, isTHadComplete)).pt, []), 'eta' : ak.fill_none( (ak.mask(WHad, isTHadComplete)+ak.mask(bhad, isTHadComplete)).eta, []), 'phi' : ak.fill_none( (ak.mask(WHad, isTHadComplete)+ak.mask(bhad, isTHadComplete)).phi, []), 'mass' : ak.fill_none( (ak.mask(WHad, isTHadComplete)+ak.mask(bhad, isTHadComplete)).mass, []), }, with_name="PtEtaPhiMLorentzVector") # create TTbar isComplete = isTHadComplete & isTLepComplete TTbar = ak.Array({ 'pt' : ak.fill_none( (ak.mask(THad, isComplete)+ak.mask(TLep, isComplete)).pt, []), 'eta' : ak.fill_none( (ak.mask(THad, isComplete)+ak.mask(TLep, isComplete)).eta, []), 'phi' : ak.fill_none( (ak.mask(THad, isComplete)+ak.mask(TLep, isComplete)).phi, []), 'mass' : ak.fill_none( (ak.mask(THad, isComplete)+ak.mask(TLep, isComplete)).mass, []), }, with_name="PtEtaPhiMLorentzVector") ## Combine everything into a single table, all objects are JaggedArrays permutations = ak.zip({ "BHad" : bhad, "BLep" : blep, "WJa" : wja, "WJb" : wjb, "Lepton" : lepton, "MET" : met, "Nu" : nu, "WLep" : WLep, "TLep" : TLep, "WHad" : WHad, "THad" : THad, "TTbar" : TTbar, "n_perm_matches" : n_perm_matches, #"isEmpty" : isEmpty, "unique_matches" : unique_matches, "Merged_BHadBLep" : Merged_BHadBLep, "Merged_BHadWJa" : Merged_BHadWJa, "Merged_BHadWJb" : Merged_BHadWJb, "Merged_BLepWJa" : Merged_BLepWJa, "Merged_BLepWJb" : Merged_BLepWJb, "Merged_WJets" : Merged_WJets, "Merged_Event" : Merged_Event, "Lost_BHad" : Lost_BHad, "Lost_BLep" : Lost_BLep, "Lost_WJa" : Lost_WJa, "Lost_WJb" : Lost_WJb, "Lost_Event" : Lost_Event, }, depth_limit=1) #set_trace() return permutations
def test_mask(): array = ak.Array([1, 2, 3, 4, 5]) mask = ak.Array([False, True, True, True, False]) assert ak.to_list(ak.mask(array, mask)) == [None, 2, 3, 4, None]