def test():
    data = ak.Array([[1, 3, 5, 4, 2], [], [2, 3, 1], [5]])
    assert ak.min(data, axis=1, initial=4).tolist() == [1, None, 1, 4]
    assert ak.max(data, axis=1, initial=4).tolist() == [5, None, 4, 5]

    data = ak.Array([[1.1, 3.3, 5.5, 4.4, 2.2], [], [2.2, 3.3, 1.1], [5.5]])
    assert ak.min(data, axis=1, initial=4).tolist() == [1.1, None, 1.1, 4]
    assert ak.max(data, axis=1, initial=4).tolist() == [5.5, None, 4, 5.5]
def test_date_time():

    numpy_array = np.array(
        ["2020-07-27T10:41:11", "2019-01-01", "2020-01-01"], "datetime64[s]"
    )

    array = ak.Array(numpy_array)
    assert str(array.type) == "3 * datetime64"
    assert array.tolist() == [
        np.datetime64("2020-07-27T10:41:11"),
        np.datetime64("2019-01-01T00:00:00"),
        np.datetime64("2020-01-01T00:00:00"),
    ]
    for i in range(len(array)):
        assert ak.to_numpy(array)[i] == numpy_array[i]

    date_time = np.datetime64("2020-07-27T10:41:11.200000011", "us")
    array1 = ak.Array(np.array(["2020-07-27T10:41:11.200000011"], "datetime64[us]"))
    assert np.datetime64(array1[0], "us") == date_time

    assert ak.to_list(ak.from_iter(array1)) == [
        np.datetime64("2020-07-27T10:41:11.200000")
    ]

    assert ak.max(array) == numpy_array[0]
    assert ak.min(array) == numpy_array[1]
Ejemplo n.º 3
0
def test_ByteMaskedArray():
    content = ak.Array([1.1, 2.2, 3.3, 999, 999, 4.4, 5.5]).layout
    mask = ak.layout.Index8(
        np.array([False, False, False, True, True, False, False]))
    bytemaskedarray = ak.layout.ByteMaskedArray(mask,
                                                content,
                                                valid_when=False)
    array = ak.Array(bytemaskedarray)
    assert array.tolist() == [1.1, 2.2, 3.3, None, None, 4.4, 5.5]
    assert ak.max(array, axis=0) == 5.5
    assert ak.argmax(array, axis=0) == 6

    offsets = ak.layout.Index64(np.array([0, 2, 4, 7], dtype=np.int64))
    listoffsetarray = ak.layout.ListOffsetArray64(offsets, bytemaskedarray)
    array = ak.Array(listoffsetarray)
    assert array.tolist() == [[1.1, 2.2], [3.3, None], [None, 4.4, 5.5]]
    assert ak.max(array, axis=1).tolist() == [2.2, 3.3, 5.5]
    assert ak.argmax(array, axis=1).tolist() == [1, 0, 2]
def test_minmax():
    assert ak.min(ak.from_iter([[1 + 5j, 2 + 4j], [], [3 + 3j]])) == 1 + 5j
    assert ak.max(ak.from_iter([[1 + 5j, 2 + 4j], [], [3 + 3j]])) == 3 + 3j

    assert ak.min(ak.from_iter([[1 + 5j, 2 + 4j], [], [3 + 3j]]),
                  axis=1).tolist() == [
                      1 + 5j,
                      None,
                      3 + 3j,
                  ]
    assert ak.max(ak.from_iter([[1 + 5j, 2 + 4j], [], [3 + 3j]]),
                  axis=1).tolist() == [
                      2 + 4j,
                      None,
                      3 + 3j,
                  ]

    assert ak.argmin(ak.from_iter([[1 + 5j, 2 + 4j], [], [3 + 3j]]),
                     axis=1).tolist() == [0, None, 0]
    assert ak.argmax(ak.from_iter([[1 + 5j, 2 + 4j], [], [3 + 3j]]),
                     axis=1).tolist() == [1, None, 0]
Ejemplo n.º 5
0
    def process_shift(self, events, shift_name):
        dataset = events.metadata['dataset']
        isRealData = not hasattr(events, "genWeight")
        selection = PackedSelection()
        weights = Weights(len(events), storeIndividual=True)
        output = self.make_output()
        if shift_name is None and not isRealData:
            output['sumw'] = ak.sum(events.genWeight)

        if isRealData or self._newTrigger:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._triggers[self._year]:
                if t in events.HLT.fields:
                    trigger = trigger | events.HLT[t]
            selection.add('trigger', trigger)
            del trigger
        else:
            selection.add('trigger', np.ones(len(events), dtype='bool'))

        if isRealData:
            selection.add(
                'lumimask', lumiMasks[self._year](events.run,
                                                  events.luminosityBlock))
        else:
            selection.add('lumimask', np.ones(len(events), dtype='bool'))

        if isRealData and self._skipRunB and self._year == '2017':
            selection.add('dropB', events.run > 299329)
        else:
            selection.add('dropB', np.ones(len(events), dtype='bool'))

        if isRealData:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._muontriggers[self._year]:
                if t in events.HLT.fields:
                    trigger |= np.array(events.HLT[t])
            selection.add('muontrigger', trigger)
            del trigger
        else:
            selection.add('muontrigger', np.ones(len(events), dtype='bool'))

        metfilter = np.ones(len(events), dtype='bool')
        for flag in self._met_filters[
                self._year]['data' if isRealData else 'mc']:
            metfilter &= np.array(events.Flag[flag])
        selection.add('metfilter', metfilter)
        del metfilter

        fatjets = events.FatJet
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['qcdrho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt)
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year)
        fatjets['msdcorr_full'] = fatjets['msdcorr'] * self._msdSF[self._year]

        candidatejet = fatjets[
            # https://github.com/DAZSLE/BaconAnalyzer/blob/master/Analyzer/src/VJetLoader.cc#L269
            (fatjets.pt > 200)
            & (abs(fatjets.eta) < 2.5)
            & fatjets.isTight  # this is loose in sampleContainer
        ]

        candidatejet = candidatejet[:, :
                                    2]  # Only consider first two to match generators
        if self._jet_arbitration == 'pt':
            candidatejet = ak.firsts(candidatejet)
        elif self._jet_arbitration == 'mass':
            candidatejet = ak.firsts(candidatejet[ak.argmax(
                candidatejet.msdcorr, axis=1, keepdims=True)])
        elif self._jet_arbitration == 'n2':
            candidatejet = ak.firsts(candidatejet[ak.argmin(candidatejet.n2ddt,
                                                            axis=1,
                                                            keepdims=True)])
        elif self._jet_arbitration == 'ddb':
            candidatejet = ak.firsts(candidatejet[ak.argmax(
                candidatejet.btagDDBvLV2, axis=1, keepdims=True)])
        elif self._jet_arbitration == 'ddc':
            candidatejet = ak.firsts(candidatejet[ak.argmax(
                candidatejet.btagDDCvLV2, axis=1, keepdims=True)])
        else:
            raise RuntimeError("Unknown candidate jet arbitration")

        if self._tagger == 'v1':
            bvl = candidatejet.btagDDBvL
            cvl = candidatejet.btagDDCvL
            cvb = candidatejet.btagDDCvB
        elif self._tagger == 'v2':
            bvl = candidatejet.btagDDBvLV2
            cvl = candidatejet.btagDDCvLV2
            cvb = candidatejet.btagDDCvBV2
        elif self._tagger == 'v3':
            bvl = candidatejet.particleNetMD_Xbb
            cvl = candidatejet.particleNetMD_Xcc / (
                1 - candidatejet.particleNetMD_Xbb)
            cvb = candidatejet.particleNetMD_Xcc / (
                candidatejet.particleNetMD_Xcc +
                candidatejet.particleNetMD_Xbb)

        elif self._tagger == 'v4':
            bvl = candidatejet.particleNetMD_Xbb
            cvl = candidatejet.btagDDCvLV2
            cvb = candidatejet.particleNetMD_Xcc / (
                candidatejet.particleNetMD_Xcc +
                candidatejet.particleNetMD_Xbb)
        else:
            raise ValueError("Not an option")

        selection.add('minjetkin', (candidatejet.pt >= 450)
                      & (candidatejet.pt < 1200)
                      & (candidatejet.msdcorr >= 40.)
                      & (candidatejet.msdcorr < 201.)
                      & (abs(candidatejet.eta) < 2.5))
        selection.add('_strict_mass', (candidatejet.msdcorr > 85) &
                      (candidatejet.msdcorr < 130))
        selection.add('_high_score', cvl > 0.8)
        selection.add('minjetkinmu', (candidatejet.pt >= 400)
                      & (candidatejet.pt < 1200)
                      & (candidatejet.msdcorr >= 40.)
                      & (candidatejet.msdcorr < 201.)
                      & (abs(candidatejet.eta) < 2.5))
        selection.add('minjetkinw', (candidatejet.pt >= 200)
                      & (candidatejet.pt < 1200)
                      & (candidatejet.msdcorr >= 40.)
                      & (candidatejet.msdcorr < 201.)
                      & (abs(candidatejet.eta) < 2.5))
        selection.add('jetid', candidatejet.isTight)
        selection.add('n2ddt', (candidatejet.n2ddt < 0.))
        if not self._tagger == 'v2':
            selection.add('ddbpass', (bvl >= 0.89))
            selection.add('ddcpass', (cvl >= 0.83))
            selection.add('ddcvbpass', (cvb >= 0.2))
        else:
            selection.add('ddbpass', (bvl >= 0.7))
            selection.add('ddcpass', (cvl >= 0.45))
            selection.add('ddcvbpass', (cvb >= 0.03))

        jets = events.Jet
        jets = jets[(jets.pt > 30.) & (abs(jets.eta) < 2.5) & jets.isTight]
        # only consider first 4 jets to be consistent with old framework
        jets = jets[:, :4]
        dphi = abs(jets.delta_phi(candidatejet))
        selection.add(
            'antiak4btagMediumOppHem',
            ak.max(jets[dphi > np.pi / 2][self._ak4tagBranch],
                   axis=1,
                   mask_identity=False) <
            BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium'])
        ak4_away = jets[dphi > 0.8]
        selection.add(
            'ak4btagMedium08',
            ak.max(ak4_away[self._ak4tagBranch], axis=1, mask_identity=False) >
            BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium'])

        met = events.MET
        selection.add('met', met.pt < 140.)

        goodmuon = ((events.Muon.pt > 10)
                    & (abs(events.Muon.eta) < 2.4)
                    & (events.Muon.pfRelIso04_all < 0.25)
                    & events.Muon.looseId)
        nmuons = ak.sum(goodmuon, axis=1)
        leadingmuon = ak.firsts(events.Muon[goodmuon])

        if self._looseTau:
            goodelectron = ((events.Electron.pt > 10)
                            & (abs(events.Electron.eta) < 2.5)
                            &
                            (events.Electron.cutBased >= events.Electron.VETO))
            nelectrons = ak.sum(goodelectron, axis=1)

            ntaus = ak.sum(
                ((events.Tau.pt > 20)
                 & (abs(events.Tau.eta) < 2.3)
                 & events.Tau.idDecayMode
                 & ((events.Tau.idMVAoldDM2017v2 & 2) != 0)
                 & ak.all(events.Tau.metric_table(events.Muon[goodmuon]) > 0.4,
                          axis=2)
                 & ak.all(events.Tau.metric_table(
                     events.Electron[goodelectron]) > 0.4,
                          axis=2)),
                axis=1,
            )
        else:
            goodelectron = (
                (events.Electron.pt > 10)
                & (abs(events.Electron.eta) < 2.5)
                & (events.Electron.cutBased >= events.Electron.LOOSE))
            nelectrons = ak.sum(goodelectron, axis=1)

            ntaus = ak.sum(
                (events.Tau.pt > 20)
                &
                events.Tau.idDecayMode  # bacon iso looser than Nano selection
                & ak.all(events.Tau.metric_table(events.Muon[goodmuon]) > 0.4,
                         axis=2)
                & ak.all(events.Tau.metric_table(events.Electron[goodelectron])
                         > 0.4,
                         axis=2),
                axis=1,
            )

        selection.add('noleptons',
                      (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('onemuon',
                      (nmuons == 1) & (nelectrons == 0) & (ntaus == 0))
        selection.add('muonkin',
                      (leadingmuon.pt > 55.) & (abs(leadingmuon.eta) < 2.1))
        selection.add('muonDphiAK8',
                      abs(leadingmuon.delta_phi(candidatejet)) > 2 * np.pi / 3)

        # W-Tag (Tag and Probe)
        # tag side
        selection.add(
            'ak4btagMediumOppHem',
            ak.max(jets[dphi > np.pi / 2][self._ak4tagBranch],
                   axis=1,
                   mask_identity=False) >
            BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium'])
        selection.add('met40p', met.pt > 40.)
        selection.add('tightMuon',
                      (leadingmuon.tightId) & (leadingmuon.pt > 53.))
        # selection.add('ptrecoW', (leadingmuon + met).pt > 250.)
        selection.add('ptrecoW200', (leadingmuon + met).pt > 200.)
        selection.add(
            'ak4btagNearMu',
            leadingmuon.delta_r(leadingmuon.nearest(ak4_away, axis=None)) <
            2.0)
        _bjets = jets[self._ak4tagBranch] > BTagEfficiency.btagWPs[
            self._ak4tagger][self._year]['medium']
        # _nearAK8 = jets.delta_r(candidatejet)  < 0.8
        # _nearMu = jets.delta_r(ak.firsts(events.Muon))  < 0.3
        # selection.add('ak4btagOld', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1)
        _nearAK8 = jets.delta_r(candidatejet) < 0.8
        _nearMu = jets.delta_r(leadingmuon) < 0.3
        selection.add('ak4btagOld',
                      ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1)

        # _nearAK8 = jets.delta_r(candidatejet)  < 0.8
        # _nearMu = jets.delta_r(candidatejet.nearest(events.Muon[goodmuon], axis=None))  < 0.3
        # selection.add('ak4btagNew', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1)

        # probe side
        selection.add('minWjetpteta',
                      (candidatejet.pt >= 200) & (abs(candidatejet.eta) < 2.4))
        # selection.add('noNearMuon', candidatejet.delta_r(candidatejet.nearest(events.Muon[goodmuon], axis=None)) > 1.0)
        selection.add('noNearMuon', candidatejet.delta_r(leadingmuon) > 1.0)
        #####

        if isRealData:
            genflavor = ak.zeros_like(candidatejet.pt)
        else:
            if 'HToCC' in dataset or 'HToBB' in dataset:
                if self._ewkHcorr:
                    add_HiggsEW_kFactors(weights, events.GenPart, dataset)

            weights.add('genweight', events.genWeight)
            if "PSWeight" in events.fields:
                add_ps_weight(weights, events.PSWeight)
            else:
                add_ps_weight(weights, None)
            if "LHEPdfWeight" in events.fields:
                add_pdf_weight(weights, events.LHEPdfWeight)
            else:
                add_pdf_weight(weights, None)
            if "LHEScaleWeight" in events.fields:
                add_scalevar_7pt(weights, events.LHEScaleWeight)
                add_scalevar_3pt(weights, events.LHEScaleWeight)
            else:
                add_scalevar_7pt(weights, [])
                add_scalevar_3pt(weights, [])

            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            bosons = getBosons(events.GenPart)
            matchedBoson = candidatejet.nearest(bosons,
                                                axis=None,
                                                threshold=0.8)
            if self._tightMatch:
                match_mask = (
                    (candidatejet.pt - matchedBoson.pt) / matchedBoson.pt <
                    0.5) & ((candidatejet.msdcorr - matchedBoson.mass) /
                            matchedBoson.mass < 0.3)
                selmatchedBoson = ak.mask(matchedBoson, match_mask)
                genflavor = bosonFlavor(selmatchedBoson)
            else:
                genflavor = bosonFlavor(matchedBoson)
            genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0)
            if self._newVjetsKfactor:
                add_VJets_kFactors(weights, events.GenPart, dataset)
            else:
                add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)
            if shift_name is None:
                output['btagWeight'].fill(val=self._btagSF.addBtagWeight(
                    weights, ak4_away, self._ak4tagBranch))
            if self._nnlops_rew and dataset in [
                    'GluGluHToCC_M125_13TeV_powheg_pythia8'
            ]:
                weights.add('minlo_rew',
                            powheg_to_nnlops(ak.to_numpy(genBosonPt)))

            if self._newTrigger:
                add_jetTriggerSF(
                    weights, ak.firsts(fatjets),
                    self._year if not self._skipRunB else f'{self._year}CDEF',
                    selection)
            else:
                add_jetTriggerWeight(weights, candidatejet.msdcorr,
                                     candidatejet.pt, self._year)

            add_mutriggerSF(weights, leadingmuon, self._year, selection)
            add_mucorrectionsSF(weights, leadingmuon, self._year, selection)

            if self._year in ("2016", "2017"):
                weights.add("L1Prefiring", events.L1PreFiringWeight.Nom,
                            events.L1PreFiringWeight.Up,
                            events.L1PreFiringWeight.Dn)

            logger.debug("Weight statistics: %r" % weights.weightStatistics)

        msd_matched = candidatejet.msdcorr * self._msdSF[self._year] * (
            genflavor > 0) + candidatejet.msdcorr * (genflavor == 0)

        regions = {
            'signal': [
                'noleptons', 'minjetkin', 'met', 'metfilter', 'jetid',
                'antiak4btagMediumOppHem', 'n2ddt', 'trigger', 'lumimask'
            ],
            'signal_noddt': [
                'noleptons', 'minjetkin', 'met', 'jetid',
                'antiak4btagMediumOppHem', 'trigger', 'lumimask', 'metfilter'
            ],
            # 'muoncontrol': ['minjetkinmu', 'jetid', 'n2ddt', 'ak4btagMedium08', 'onemuon', 'muonkin', 'muonDphiAK8', 'muontrigger', 'lumimask', 'metfilter'],
            'muoncontrol': [
                'onemuon', 'muonkin', 'muonDphiAK8', 'metfilter',
                'minjetkinmu', 'jetid', 'ak4btagMedium08', 'n2ddt',
                'muontrigger', 'lumimask'
            ],
            'muoncontrol_noddt': [
                'onemuon', 'muonkin', 'muonDphiAK8', 'jetid', 'metfilter',
                'minjetkinmu', 'jetid', 'ak4btagMedium08', 'muontrigger',
                'lumimask'
            ],
            'wtag': [
                'onemuon', 'tightMuon', 'minjetkinw', 'jetid', 'met40p',
                'metfilter', 'ptrecoW200', 'ak4btagOld', 'muontrigger',
                'lumimask'
            ],
            'wtag0': [
                'onemuon', 'tightMuon', 'met40p', 'metfilter', 'ptrecoW200',
                'ak4btagOld', 'muontrigger', 'lumimask'
            ],
            'wtag2': [
                'onemuon', 'tightMuon', 'minjetkinw', 'jetid',
                'ak4btagMediumOppHem', 'met40p', 'metfilter', 'ptrecoW200',
                'ak4btagOld', 'muontrigger', 'lumimask'
            ],
            'noselection': [],
        }

        def normalize(val, cut):
            if cut is None:
                ar = ak.to_numpy(ak.fill_none(val, np.nan))
                return ar
            else:
                ar = ak.to_numpy(ak.fill_none(val[cut], np.nan))
                return ar

        import time
        tic = time.time()
        if shift_name is None:
            for region, cuts in regions.items():
                allcuts = set([])
                cut = selection.all(*allcuts)
                output['cutflow_msd'].fill(region=region,
                                           genflavor=normalize(
                                               genflavor, None),
                                           cut=0,
                                           weight=weights.weight(),
                                           msd=normalize(msd_matched, None))
                output['cutflow_eta'].fill(region=region,
                                           genflavor=normalize(genflavor, cut),
                                           cut=0,
                                           weight=weights.weight()[cut],
                                           eta=normalize(
                                               candidatejet.eta, cut))
                output['cutflow_pt'].fill(region=region,
                                          genflavor=normalize(genflavor, cut),
                                          cut=0,
                                          weight=weights.weight()[cut],
                                          pt=normalize(candidatejet.pt, cut))
                for i, cut in enumerate(cuts + ['ddcvbpass', 'ddcpass']):
                    allcuts.add(cut)
                    cut = selection.all(*allcuts)
                    output['cutflow_msd'].fill(region=region,
                                               genflavor=normalize(
                                                   genflavor, cut),
                                               cut=i + 1,
                                               weight=weights.weight()[cut],
                                               msd=normalize(msd_matched, cut))
                    output['cutflow_eta'].fill(
                        region=region,
                        genflavor=normalize(genflavor, cut),
                        cut=i + 1,
                        weight=weights.weight()[cut],
                        eta=normalize(candidatejet.eta, cut))
                    output['cutflow_pt'].fill(
                        region=region,
                        genflavor=normalize(genflavor, cut),
                        cut=i + 1,
                        weight=weights.weight()[cut],
                        pt=normalize(candidatejet.pt, cut))

                    if self._evtVizInfo and 'ddcpass' in allcuts and isRealData and region == 'signal':
                        if 'event' not in events.fields:
                            continue
                        _cut = selection.all(*allcuts, '_strict_mass',
                                             '_high_score')
                        # _cut = selection.all('_strict_mass'')
                        output['to_check'][
                            'mass'] += processor.column_accumulator(
                                normalize(msd_matched, _cut))
                        nfatjet = ak.sum(
                            ((fatjets.pt > 200) &
                             (abs(fatjets.eta) < 2.5) & fatjets.isTight),
                            axis=1)
                        output['to_check'][
                            'njet'] += processor.column_accumulator(
                                normalize(nfatjet, _cut))
                        output['to_check'][
                            'fname'] += processor.column_accumulator(
                                np.array([events.metadata['filename']] *
                                         len(normalize(msd_matched, _cut))))
                        output['to_check'][
                            'event'] += processor.column_accumulator(
                                normalize(events.event, _cut))
                        output['to_check'][
                            'luminosityBlock'] += processor.column_accumulator(
                                normalize(events.luminosityBlock, _cut))
                        output['to_check'][
                            'run'] += processor.column_accumulator(
                                normalize(events.run, _cut))

        if shift_name is None:
            systematics = [None] + list(weights.variations)
        else:
            systematics = [shift_name]

        def fill(region, systematic, wmod=None):
            selections = regions[region]
            cut = selection.all(*selections)
            sname = 'nominal' if systematic is None else systematic
            if wmod is None:
                if systematic in weights.variations:
                    weight = weights.weight(modifier=systematic)[cut]
                else:
                    weight = weights.weight()[cut]
            else:
                weight = weights.weight()[cut] * wmod[cut]

            output['templates'].fill(
                region=region,
                systematic=sname,
                runid=runmap(events.run)[cut],
                genflavor=normalize(genflavor, cut),
                pt=normalize(candidatejet.pt, cut),
                msd=normalize(msd_matched, cut),
                ddb=normalize(bvl, cut),
                ddc=normalize(cvl, cut),
                ddcvb=normalize(cvb, cut),
                weight=weight,
            )
            if region in [
                    'wtag', 'wtag0', 'wtag2', 'wtag3', 'wtag4', 'wtag5',
                    'wtag6', 'wtag7', 'noselection'
            ]:  # and sname in ['nominal', 'pileup_weightDown', 'pileup_weightUp', 'jet_triggerDown', 'jet_triggerUp']:
                output['wtag'].fill(
                    region=region,
                    systematic=sname,
                    genflavor=normalize(genflavor, cut),
                    pt=normalize(candidatejet.pt, cut),
                    msd=normalize(msd_matched, cut),
                    n2ddt=normalize(candidatejet.n2ddt, cut),
                    ddc=normalize(cvl, cut),
                    ddcvb=normalize(cvb, cut),
                    weight=weight,
                )
            # if region in ['signal', 'noselection']:
            #     output['etaphi'].fill(
            #         region=region,
            #         systematic=sname,
            #         runid=runmap(events.run)[cut],
            #         genflavor=normalize(genflavor, cut),
            #         pt=normalize(candidatejet.pt, cut),
            #         eta=normalize(candidatejet.eta, cut),
            #         phi=normalize(candidatejet.phi, cut),
            #         ddc=normalize(cvl, cut),
            #         ddcvb=normalize(cvb, cut),
            #     ),
            if not isRealData:
                if wmod is not None:
                    _custom_weight = events.genWeight[cut] * wmod[cut]
                else:
                    _custom_weight = np.ones_like(weight)
                output['genresponse_noweight'].fill(
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=_custom_weight,
                )

                output['genresponse'].fill(
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=weight,
                )
            if systematic is None:
                output['signal_opt'].fill(
                    region=region,
                    genflavor=normalize(genflavor, cut),
                    ddc=normalize(cvl, cut),
                    ddcvb=normalize(cvb, cut),
                    msd=normalize(msd_matched, cut),
                    weight=weight,
                )
                output['signal_optb'].fill(
                    region=region,
                    genflavor=normalize(genflavor, cut),
                    ddb=normalize(bvl, cut),
                    msd=normalize(msd_matched, cut),
                    weight=weight,
                )

        for region in regions:
            cut = selection.all(*(set(regions[region]) - {'n2ddt'}))
            if shift_name is None:
                output['nminus1_n2ddt'].fill(
                    region=region,
                    n2ddt=normalize(candidatejet.n2ddt, cut),
                    weight=weights.weight()[cut],
                )
            for systematic in systematics:
                if isRealData and systematic is not None:
                    continue
                fill(region, systematic)
            if shift_name is None and 'GluGluH' in dataset and 'LHEWeight' in events.fields:
                for i in range(9):
                    fill(region, 'LHEScale_%d' % i, events.LHEScaleWeight[:,
                                                                          i])
                for c in events.LHEWeight.fields[1:]:
                    fill(region, 'LHEWeight_%s' % c, events.LHEWeight[c])

        toc = time.time()
        output["filltime"] = toc - tic
        if shift_name is None:
            output["weightStats"] = weights.weightStatistics
        return {dataset: output}
    def process(self, events):

        output = self.accumulator.identity()

        output['total']['all'] += len(events)
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        ## Muons
        muon = Collections(ev, "Muon", "vetoTTH").get()
        tightmuon = Collections(ev, "Muon", "tightTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "vetoTTH").get()
        tightelectron = Collections(ev, "Electron", "tightTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)

        lepton = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        dilepton_mass = (leading_lepton + trailing_lepton).mass
        dilepton_pt = (leading_lepton + trailing_lepton).pt
        dilepton_dR = delta_r(leading_lepton, trailing_lepton)

        mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        tau = getTaus(ev)
        track = getIsoTracks(ev)
        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator

        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        bl = cross(lepton, high_score_btag)
        bl_dR = delta_r(bl['0'], bl['1'])
        min_bl_dR = ak.min(bl_dR, axis=1)

        jf = cross(j_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        j_fwd2 = jf[ak.singletons(
            ak.argmax(mjf, axis=1)
        )]['1']  # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta = ak.fill_none(
            ak.pad_none(abs(j_fwd2.eta - j_fwd.eta), 1, clip=True), 0)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)

        ## event selectors
        filters = getFilters(ev, year=self.year, dataset=dataset)

        dilep = ((ak.num(tightelectron) + ak.num(tightmuon)) == 2)
        lep0pt = ((ak.num(electron[(electron.pt > 25)]) +
                   ak.num(muon[(muon.pt > 25)])) > 0)
        lep1pt = ((ak.num(electron[(electron.pt > 20)]) +
                   ak.num(muon[(muon.pt > 20)])) > 1)
        lepveto = ((ak.num(electron) + ak.num(muon)) == 2)

        selection = PackedSelection()
        selection.add('lepveto', lepveto)
        selection.add('dilep', dilep)
        selection.add('filter', (filters))
        selection.add('p_T(lep0)>25', lep0pt)
        selection.add('p_T(lep1)>20', lep1pt)
        selection.add('SS', (SSlepton | SSelectron | SSmuon))
        selection.add('N_jet>3', (ak.num(jet) >= 4))
        selection.add('N_central>2', (ak.num(central) >= 3))
        selection.add('N_btag>0', (ak.num(btag) >= 1))
        selection.add('N_fwd>0', (ak.num(fwd) >= 1))

        #ss_reqs = ['lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS']
        ss_reqs = [
            'lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS'
        ]
        #bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0', 'N_fwd>0']
        bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0']

        ss_reqs_d = {sel: True for sel in ss_reqs}
        ss_selection = selection.require(**ss_reqs_d)
        bl_reqs_d = {sel: True for sel in bl_reqs}
        BL = selection.require(**bl_reqs_d)

        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # lumi weight
            weight.add("weight", ev.weight)

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        #cutflow     = Cutflow(output, ev, weight=weight)
        #cutflow_reqs_d = {}
        #for req in bl_reqs:
        #    cutflow_reqs_d.update({req: True})
        #    cutflow.addRow( req, selection.require(**cutflow_reqs_d) )

        labels = {
            'topW_v3': 0,
            'TTW': 1,
            'TTZ': 2,
            'TTH': 3,
            'ttbar': 4,
            'ttbar1l_MG': 4
        }
        if dataset in labels:
            label_mult = labels[dataset]
        else:
            label_mult = 5
        label = np.ones(len(ev[BL])) * label_mult

        output["n_lep"] += processor.column_accumulator(
            ak.to_numpy((ak.num(electron) + ak.num(muon))[BL]))
        output["n_lep_tight"] += processor.column_accumulator(
            ak.to_numpy((ak.num(tightelectron) + ak.num(tightmuon))[BL]))

        output["lead_lep_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].pt, axis=1)))
        output["lead_lep_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].eta, axis=1)))
        output["lead_lep_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].phi, axis=1)))
        output["lead_lep_charge"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].charge, axis=1)))

        output["sublead_lep_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].pt, axis=1)))
        output["sublead_lep_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].eta, axis=1)))
        output["sublead_lep_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].phi, axis=1)))
        output["sublead_lep_charge"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].charge, axis=1)))

        output["lead_jet_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 0:1][BL].pt, axis=1)))
        output["lead_jet_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 0:1][BL].eta, axis=1)))
        output["lead_jet_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 0:1][BL].phi, axis=1)))

        output["sublead_jet_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 1:2][BL].pt, axis=1)))
        output["sublead_jet_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 1:2][BL].eta, axis=1)))
        output["sublead_jet_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 1:2][BL].phi, axis=1)))

        output["lead_btag_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].pt, axis=1)))
        output["lead_btag_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].eta, axis=1)))
        output["lead_btag_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].phi, axis=1)))

        output["sublead_btag_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].pt, axis=1)))
        output["sublead_btag_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].eta, axis=1)))
        output["sublead_btag_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].phi, axis=1)))

        output["fwd_jet_p"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(ak.pad_none(j_fwd[BL].p, 1, clip=True),
                                        0),
                           axis=1)))
        output["fwd_jet_pt"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(
                    ak.pad_none(j_fwd[BL].pt, 1, clip=True), 0),
                           axis=1)))
        output["fwd_jet_eta"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(
                    ak.pad_none(j_fwd[BL].eta, 1, clip=True), 0),
                           axis=1)))
        output["fwd_jet_phi"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(
                    ak.pad_none(j_fwd[BL].phi, 1, clip=True), 0),
                           axis=1)))

        output["mjj_max"] += processor.column_accumulator(
            ak.to_numpy(ak.fill_none(ak.max(mjf[BL], axis=1), 0)))
        output["delta_eta_jj"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(delta_eta[BL], axis=1)))

        output["met"] += processor.column_accumulator(ak.to_numpy(met_pt[BL]))
        output["ht"] += processor.column_accumulator(ak.to_numpy(ht[BL]))
        output["st"] += processor.column_accumulator(ak.to_numpy(st[BL]))
        output["n_jet"] += processor.column_accumulator(
            ak.to_numpy(ak.num(jet[BL])))
        output["n_btag"] += processor.column_accumulator(
            ak.to_numpy(ak.num(btag[BL])))
        output["n_fwd"] += processor.column_accumulator(
            ak.to_numpy(ak.num(fwd[BL])))
        output["n_central"] += processor.column_accumulator(
            ak.to_numpy(ak.num(central[BL])))
        output["n_tau"] += processor.column_accumulator(
            ak.to_numpy(ak.num(tau[BL])))
        output["n_track"] += processor.column_accumulator(
            ak.to_numpy(ak.num(track[BL])))

        output["dilepton_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(dilepton_pt[BL], axis=1)))
        output["dilepton_mass"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(dilepton_mass[BL], axis=1)))
        output["min_bl_dR"] += processor.column_accumulator(
            ak.to_numpy(min_bl_dR[BL]))
        output["min_mt_lep_met"] += processor.column_accumulator(
            ak.to_numpy(min_mt_lep_met[BL]))

        output["label"] += processor.column_accumulator(label)
        output["weight"] += processor.column_accumulator(weight.weight()[BL])

        output["presel"]["all"] += len(ev[ss_selection])
        output["sel"]["all"] += len(ev[BL])

        return output
Ejemplo n.º 7
0
    def __init__(self, ev, obj, wp, year=2018, verbose=0):
        self.obj = obj
        self.wp = wp
        if self.wp == None:
            self.selection_dict = {}
        else:
            self.selection_dict = obj_def[self.obj][self.wp]

        self.v = verbose
        self.year = year

        id_level = None
        if wp.lower().count('veto') or wp.lower().count('loose'):
            id_level = 0
        elif wp.lower().count('fake'):
            id_level = 1
        elif wp.lower().count('tight'):
            id_level = 2

        if self.obj == "Muon":
            # collections are already there, so we just need to calculate missing ones
            ev['Muon', 'absMiniIso'] = ev.Muon.miniPFRelIso_all * ev.Muon.pt
            ev['Muon', 'ptErrRel'] = ev.Muon.ptErr / ev.Muon.pt

            # this is what we are using:
            # - jetRelIso if the matched jet is within deltaR<0.4, pfRelIso03_all otherwise
            # - btagDeepFlavB discriminator of the matched jet if jet is within deltaR<0.4, 0 otherwise
            # - pt_cone = 0.9*pt of matched jet if jet is within deltaR<0.4, pt/(pt+iso) otherwise

            mask_close = (ak.fill_none(ev.Muon.delta_r(ev.Muon.matched_jet),
                                       99) < 0.4) * 1
            mask_far = ~(ak.fill_none(ev.Muon.delta_r(ev.Muon.matched_jet), 99)
                         < 0.4) * 1

            deepJet = ak.fill_none(ev.Muon.matched_jet.btagDeepFlavB,
                                   0) * mask_close + 0 * mask_far
            jetRelIsoV2 = ev.Muon.jetRelIso * mask_close + ev.Muon.pfRelIso03_all * mask_far  # default to 0 if no match
            #conePt = 0.9 * ak.fill_none(ev.Muon.matched_jet.pt,0) * mask_close + ev.Muon.pt*(1 + ev.Muon.miniPFRelIso_all)*mask_far

            if self.year == 2017 or self.year == 2018:
                I_1 = 0.11
                I_2 = 0.74
                I_3 = 6.8
            elif self.year == 2016:
                I_1 = 0.16
                I_2 = 0.76
                I_3 = 7.2

            PF_unflatten = ak.from_regular(
                ev.Muon.miniPFRelIso_all[:, :, np.newaxis])
            max_miniIso = ak.max(
                ak.concatenate(
                    [PF_unflatten - I_1,
                     ak.zeros_like(PF_unflatten)], axis=2),
                axis=2)  #equivalent to max(0, ev.Muon.miniPFRelIso_all - I_1)
            muon_pt_unflatten = ak.from_regular(ev.Muon.pt[:, :, np.newaxis])
            jet_pt_unflatten = ak.from_regular(
                ev.Muon.matched_jet.pt[:, :, np.newaxis])
            max_pt = ak.max(
                ak.concatenate([muon_pt_unflatten, jet_pt_unflatten * I_2],
                               axis=2),
                axis=2)  #max(ev.Muon.pt, ev.Muon.matched_jet.pt * I_2)
            conePt = (ev.Muon.pt *
                      (1 + max_miniIso)) * (ev.Muon.jetPtRelv2 > I_3) + (
                          max_pt * ~(ev.Muon.jetPtRelv2 > I_3))
            ev['Muon', 'deepJet'] = ak.copy(deepJet)
            ev['Muon', 'jetRelIsoV2'] = jetRelIsoV2
            ev['Muon', 'conePt'] = conePt
            ev['Muon', 'id'] = ak.ones_like(conePt) * id_level

            self.cand = ev.Muon

        elif self.obj == "Electron":
            # calculate new variables. asignment is awkward, but what can you do.
            ev['Electron',
               'absMiniIso'] = ev.Electron.miniPFRelIso_all * ev.Electron.pt
            ev['Electron', 'etaSC'] = ev.Electron.eta + ev.Electron.deltaEtaSC

            if self.year == 2017 or self.year == 2018:
                I_1 = 0.07
                I_2 = 0.78
                I_3 = 8.0

            elif self.year == 2016:
                I_1 = 0.12
                I_2 = 0.80
                I_3 = 7.2

            # the following line is only needed if we do our own matching.
            # right now, we keep using the NanoAOD match, but check the deltaR distance
            # jet_index, mask_match, mask_nomatch = self.matchJets(ev.Electron, ev.Jet)

            # this is what we are using:
            # - jetRelIso if the matched jet is within deltaR<0.4, pfRelIso03_all otherwise
            # - btagDeepFlavB discriminator of the matched jet if jet is within deltaR<0.4, 0 otherwise
            # - pt_cone = 0.9*pt of matched jet if jet is within deltaR<0.4, pt/(pt+iso) otherwise

            mask_close = (ak.fill_none(
                ev.Electron.delta_r(ev.Electron.matched_jet), 99) < 0.4) * 1
            mask_far = ~(ak.fill_none(
                ev.Electron.delta_r(ev.Electron.matched_jet), 99) < 0.4) * 1

            deepJet = ak.fill_none(ev.Electron.matched_jet.btagDeepFlavB,
                                   0) * mask_close
            jetRelIsoV2 = ev.Electron.jetRelIso * mask_close + ev.Electron.pfRelIso03_all * mask_far  # default to 0 if no match
            #conePt = 0.9 * ak.fill_none(ev.Electron.matched_jet.pt,0) * mask_close + ev.Electron.pt*(1 + ev.Electron.miniPFRelIso_all)*mask_far

            PF_unflatten = ak.from_regular(
                ev.Electron.miniPFRelIso_all[:, :, np.newaxis])
            max_miniIso = ak.max(
                ak.concatenate(
                    [PF_unflatten - I_1,
                     ak.zeros_like(PF_unflatten)], axis=2),
                axis=2)  #equivalent to max(0, ev.Muon.miniPFRelIso_all - I_1)
            electron_pt_unflatten = ak.from_regular(ev.Electron.pt[:, :,
                                                                   np.newaxis])
            jet_pt_unflatten = ak.from_regular(
                ev.Electron.matched_jet.pt[:, :, np.newaxis])
            max_pt = ak.max(
                ak.concatenate([electron_pt_unflatten, jet_pt_unflatten * I_2],
                               axis=2),
                axis=2)  #max(ev.Muon.pt, ev.Muon.matched_jet.pt * I_2)
            conePt = (ev.Electron.pt *
                      (1 + max_miniIso)) * (ev.Electron.jetPtRelv2 > I_3) + (
                          max_pt * ~(ev.Electron.jetPtRelv2 > I_3))

            ev['Electron', 'deepJet'] = ak.copy(deepJet)
            ev['Electron', 'jetRelIsoV2'] = jetRelIsoV2
            ev['Electron', 'conePt'] = conePt
            ev['Electron', 'id'] = ak.ones_like(conePt) * id_level

            ev['Electron', 'jetRelIso'] = ev.Electron.jetRelIso
            ev['Electron', 'jetPtRelv2'] = ev.Electron.jetPtRelv2

            self.cand = ev.Electron

        self.getSelection()

        if self.obj == "Electron" and self.wp == "tight":
            self.selection = self.selection & self.getElectronMVAID(
            ) & self.getIsolation(0.07, 0.78, 8.0) & self.isTriggerSafeNoIso()
            if self.v > 0: print(" - custom ID and multi-isolation")

        if self.obj == "Electron" and self.wp == "tightFCNC":
            self.selection = self.selection & self.getElectronMVAID(
            ) & self.getFCNCIsolation(ev.Electron.jetRelIso,
                                      ev.Electron.jetPtRelv2, I_2,
                                      I_3) & (ev.Electron.miniPFRelIso_all <
                                              I_1) & self.isTriggerSafeNoIso()
            if self.v > 0: print(" - custom ID and multi-isolation")

        if self.obj == "Muon" and self.wp == "tight":
            self.selection = self.selection & self.getIsolation(
                0.11, 0.74, 6.8)
            if self.v > 0: print(" - custom multi-isolation")
            #self.selection = self.selection & ak.fill_none(ev.Muon.matched_jet.btagDeepFlavB<0.2770, True)
            #self.selection = self.selection & (ev.Muon.matched_jet.btagDeepFlavB<0.2770)
            #if self.v>0: print (" - deepJet")

        if self.obj == "Muon" and self.wp == "tightFCNC":
            self.selection = self.selection & self.getFCNCIsolation(
                ev.Muon.jetRelIso, ev.Muon.jetPtRelv2, I_2,
                I_3) & (ev.Muon.miniPFRelIso_all < I_1)
            if self.v > 0: print(" - custom multi-isolation")

        if self.obj == "Electron" and (self.wp == "tightTTH"
                                       or self.wp == 'fakeableTTH'
                                       or self.wp == "tightSSTTH"
                                       or self.wp == 'fakeableSSTTH'):
            self.selection = self.selection & self.getSigmaIEtaIEta
            if self.v > 0: print(" - SigmaIEtaIEta")
            #self.selection = self.selection & ak.fill_none(ev.Electron.matched_jet.btagDeepFlavB<0.2770, True)
            #self.selection = self.selection & (ev.Electron.matched_jet.btagDeepFlavB<0.2770)
            #self.selection = self.selection & (ev.Jet[ev.Electron.jetIdx].btagDeepFlavB<0.2770)
            #if self.v>0: print (" - deepJet")

        if self.obj == "Electron" and self.wp == "looseFCNC":
            self.selection = self.selection & (ev.Electron.miniPFRelIso_all <
                                               0.4)

        if self.obj == 'Muon' and (self.wp == 'fakeableTTH'
                                   or self.wp == 'fakeableSSTTH'):
            #self.selection = self.selection & (self.cand.deepJet < self.getThreshold(self.cand.conePt, min_pt=20, max_pt=45, low=0.2770, high=0.0494))
            self.selection = self.selection & (ak.fill_none(
                ev.Muon.matched_jet.btagDeepFlavB, 0) < self.getThreshold(
                    self.cand.conePt, min_pt=20, max_pt=45))
            if self.v > 0: print(" - interpolated deepJet")

        if self.obj == "Muon" and self.wp == "looseFCNC":
            self.selection = self.selection & (ev.Muon.miniPFRelIso_all < 0.4)
def test_min_max():
    array = ak.Array(
        [
            [
                np.datetime64("2020-03-27T10:41:11"),
                np.datetime64("2020-01-27T10:41:11"),
                np.datetime64("2020-05"),
                np.datetime64("2020-01-27T10:41:11"),
                np.datetime64("2020-04-27T10:41:11"),
            ],
            [
                np.datetime64("2020-04-27"),
                np.datetime64("2020-02-27T10:41:11"),
                np.datetime64("2020-01-27T10:41:11"),
                np.datetime64("2020-06-27T10:41:11"),
            ],
            [
                np.datetime64("2020-02-27T10:41:11"),
                np.datetime64("2020-03-27T10:41:11"),
                np.datetime64("2020-01-27T10:41:11"),
            ],
        ]
    )

    assert ak.to_list(array) == [
        [
            np.datetime64("2020-03-27T10:41:11"),
            np.datetime64("2020-01-27T10:41:11"),
            np.datetime64("2020-05"),
            np.datetime64("2020-01-27T10:41:11"),
            np.datetime64("2020-04-27T10:41:11"),
        ],
        [
            np.datetime64("2020-04-27"),
            np.datetime64("2020-02-27T10:41:11"),
            np.datetime64("2020-01-27T10:41:11"),
            np.datetime64("2020-06-27T10:41:11"),
        ],
        [
            np.datetime64("2020-02-27T10:41:11"),
            np.datetime64("2020-03-27T10:41:11"),
            np.datetime64("2020-01-27T10:41:11"),
        ],
    ]

    assert ak.min(array) == np.datetime64("2020-01-27T10:41:11")
    assert ak.max(array) == np.datetime64("2020-06-27T10:41:11")
    assert ak.to_list(ak.min(array, axis=0)) == [
        np.datetime64("2020-02-27T10:41:11"),
        np.datetime64("2020-01-27T10:41:11"),
        np.datetime64("2020-01-27T10:41:11"),
        np.datetime64("2020-01-27T10:41:11"),
        np.datetime64("2020-04-27T10:41:11"),
    ]
    assert ak.to_list(ak.max(array, axis=0)) == [
        np.datetime64("2020-04-27T00:00:00"),
        np.datetime64("2020-03-27T10:41:11"),
        np.datetime64("2020-05-01T20:56:24"),
        np.datetime64("2020-06-27T10:41:11"),
        np.datetime64("2020-04-27T10:41:11"),
    ]
    assert ak.to_list(ak.min(array, axis=1)) == [
        np.datetime64("2020-01-27T10:41:11"),
        np.datetime64("2020-01-27T10:41:11"),
        np.datetime64("2020-01-27T10:41:11"),
    ]
    assert ak.to_list(ak.max(array, axis=1)) == [
        np.datetime64("2020-05-01T20:56:24"),
        np.datetime64("2020-06-27T10:41:11"),
        np.datetime64("2020-03-27T10:41:11"),
    ]
Ejemplo n.º 9
0
def uproot_tree_to_numpy(fname,
                         MeanNormTuple,
                         inbranches_listlist,
                         nMaxslist,
                         nevents,
                         treename="ttree",
                         stop=None,
                         branches=None):

    # array = uproot_root2array(fname, treename, stop=stop, branches=branches)

    # Read in total number of events
    totallengthperjet = 0
    for i in range(len(nMaxslist)):
        if nMaxslist[i] >= 0:
            totallengthperjet += len(inbranches_listlist[i]) * nMaxslist[i]
        else:
            totallengthperjet += len(inbranches_listlist[i])  #flat branch
    # branches = [ak.fill_none(ak.pad_none(tree[barr, target=feature_length), 0.) for feature_length, arr in zip( nMaxslist, inbranches_listlist)]
    tree = u3.open(fname)[treename]
    branches = [
        ak.fill_none(
            ak.pad_none(tree[branch_name].array(),
                        target=feature_length,
                        axis=-1,
                        clip=True if feature_length > 1 else False), 0.)
        for feature_length, branch_list in zip(nMaxslist, inbranches_listlist)
        for branch_name in branch_list
    ]

    branchnames = [n for names in inbranches_listlist for n in names]
    feature_lenghts = [
        f for branches, f in zip(inbranches_listlist, nMaxslist)
        for _ in branches
    ]
    means = [
        m[0] for branches, m in zip(inbranches_listlist, MeanNormTuple)
        for _ in branches
    ]
    norms = [
        m[1] for branches, m in zip(inbranches_listlist, MeanNormTuple)
        for _ in branches
    ]
    print("Debugigng means and norms")
    print(means)
    print(norms)

    print(branchnames)
    branches_numpy = []
    for br, brname, fl, mean, norm in zip(branches, branchnames,
                                          feature_lenghts, means, norms):
        print("DBG {}".format(brname))
        print(br)
        print("Length: {}".format(len(br)))
        if brname == "TagVarCSV_trackJetDistVal":
            print("BONUS DEBUG!")
            print("Min: {}, Max: {}".format(ak.min(ak.count(br, axis=-1)),
                                            ak.max(ak.count(br, axis=-1))))
        if fl > 1:
            # branches_numpy.append( (ak.to_numpy( br ) - mean) / norm)
            branches_numpy.append((ak.to_numpy(br) - 0.) / 1.)
        elif fl == 1:
            # branches_numpy.append( (np.expand_dims( ak.to_numpy( br ), axis=-1) - mean)/norm  )
            branches_numpy.append(
                (np.expand_dims(ak.to_numpy(br), axis=-1) - 0.) / 1.)
    print("FINISHED THIS LOOP, YOU ARE PERFECT! :) ")

    numpyarray = np.concatenate(branches_numpy, axis=-1)
    print("\n" * 5)
    print("Some metrics about this numpy array")
    print(np.mean(numpyarray, axis=0))
    print(np.std(numpyarray, axis=0))
    print("Normalize array")
    numpyarray = (numpyarray - np.mean(numpyarray, axis=0)) / np.std(
        numpyarray, axis=0)
    print("Some metrics about this numpy array")
    print(np.mean(numpyarray, axis=0))
    print(np.std(numpyarray, axis=0))
    return numpyarray
Ejemplo n.º 10
0
    def process(self, events):

        events = events[
            ak.num(events.Jet) >
            0]  #corrects for rare case where there isn't a single jet in event
        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) >= 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ##Jets
        Jets = events.Jet

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ### For FCNC, we want electron -> tightTTH
        ele_t = Collections(ev, "Electron", "tightFCNC", year=self.year).get()
        ele_t_p = ele_t[((ele_t.genPartFlav == 1) | (ele_t.genPartFlav == 15))]
        ele_t_np = ele_t[((ele_t.genPartFlav != 1) &
                          (ele_t.genPartFlav != 15))]

        ele_l = Collections(ev, "Electron", "fakeableFCNC",
                            year=self.year).get()
        ele_l_p = ele_l[((ele_l.genPartFlav == 1) | (ele_l.genPartFlav == 15))]
        ele_l_np = ele_l[((ele_l.genPartFlav != 1) &
                          (ele_l.genPartFlav != 15))]

        mu_t = Collections(ev, "Muon", "tightFCNC", year=self.year).get()
        mu_t_p = mu_t[((mu_t.genPartFlav == 1) | (mu_t.genPartFlav == 15))]
        mu_t_np = mu_t[((mu_t.genPartFlav != 1) & (mu_t.genPartFlav != 15))]

        mu_l = Collections(ev, "Muon", "fakeableFCNC", year=self.year).get()
        mu_l_p = mu_l[((mu_l.genPartFlav == 1) | (mu_l.genPartFlav == 15))]
        mu_l_np = mu_l[((mu_l.genPartFlav != 1) & (mu_l.genPartFlav != 15))]

        #clean jets :
        # we want at least two jets that are outside of the lepton jets by deltaR > 0.4
        jets = getJets(ev, maxEta=2.4, minPt=40, pt_var='pt')
        jet_sel = (ak.num(jets[~(match(jets, ele_l, deltaRCut=0.4)
                                 | match(jets, mu_l, deltaRCut=0.4))]) >= 2)
        """Now We are making the different selections for the different regions. As a reminder, our SR is one tight gen-level prompt and one tight gen-level nonprompt, and our CR is
        one tight gen-level prompt and one loose NOT tight gen-level nonprompt"""

        mumu_SR = ak.concatenate([mu_t_p, mu_t_np], axis=1)
        mumu_SR_SS = (ak.sum(mumu_SR.charge, axis=1) != 0)
        mumu_SR_sel = (ak.num(mu_t_p) == 1) & (ak.num(mu_t_np) == 1) & (
            ak.num(mu_l) == 2) & jet_sel & mumu_SR_SS & (ak.num(
                mumu_SR[mumu_SR.pt > 20]) > 1) & (ak.num(ele_l) == 0)

        mumu_CR = ak.concatenate([mu_t_p, mu_l_np], axis=1)
        mumu_CR_SS = (ak.sum(mumu_CR.charge, axis=1) != 0)
        mumu_CR_sel = (ak.num(mu_t_p) == 1) & (ak.num(mu_l_np) == 1) & (
            ak.num(mu_l) == 2) & jet_sel & mumu_CR_SS & (ak.num(
                mumu_CR[mumu_CR.pt > 20]) > 1) & (ak.num(ele_l) == 0)

        ee_SR = ak.concatenate([ele_t_p, ele_t_np], axis=1)
        ee_SR_SS = (ak.sum(ee_SR.charge, axis=1) != 0)
        ee_SR_sel = (ak.num(ele_t_p) == 1) & (ak.num(ele_t_np) == 1) & (
            ak.num(ele_l) == 2) & jet_sel & ee_SR_SS & (ak.num(
                ee_SR[ee_SR.pt > 20]) > 1) & (ak.num(mu_l) == 0)

        ee_CR = ak.concatenate([ele_t_p, ele_l_np], axis=1)
        ee_CR_SS = (ak.sum(ee_CR.charge, axis=1) != 0)
        ee_CR_sel = (ak.num(ele_t_p) == 1) & (ak.num(ele_l_np) == 1) & (
            ak.num(ele_l) == 2) & jet_sel & ee_CR_SS & (ak.num(
                ee_CR[ee_CR.pt > 20]) > 1) & (ak.num(mu_l) == 0)

        mue_SR = ak.concatenate([mu_t_p, ele_t_np], axis=1)
        mue_SR_SS = (ak.sum(mue_SR.charge, axis=1) != 0)
        mue_SR_sel = (ak.num(mu_t_p) == 1) & (ak.num(ele_t_np) == 1) & (
            ak.num(ele_l) == 1) & jet_sel & mue_SR_SS & (ak.num(
                mue_SR[mue_SR.pt > 20]) > 1) & (ak.num(mu_l) == 1)

        mue_CR = ak.concatenate([mu_t_p, ele_l_np], axis=1)
        mue_CR_SS = (ak.sum(mue_CR.charge, axis=1) != 0)
        mue_CR_sel = (ak.num(mu_t_p) == 1) & (ak.num(ele_l_np) == 1) & (
            ak.num(ele_l) == 1) & jet_sel & mue_CR_SS & (ak.num(
                mue_CR[mue_CR.pt > 20]) > 1) & (ak.num(mu_l) == 1)

        emu_SR = ak.concatenate([ele_t_p, mu_t_np], axis=1)
        emu_SR_SS = (ak.sum(emu_SR.charge, axis=1) != 0)
        emu_SR_sel = (ak.num(ele_t_p) == 1) & (ak.num(mu_t_np) == 1) & (
            ak.num(mu_l) == 1) & jet_sel & emu_SR_SS & (ak.num(
                emu_SR[emu_SR.pt > 20]) > 1) & (ak.num(ele_l) == 1)

        emu_CR = ak.concatenate([ele_t_p, mu_l_np], axis=1)
        emu_CR_SS = (ak.sum(emu_CR.charge, axis=1) != 0)
        emu_CR_sel = (ak.num(ele_t_p) == 1) & (ak.num(mu_l_np) == 1) & (
            ak.num(mu_l) == 1) & jet_sel & emu_CR_SS & (ak.num(
                emu_CR[emu_CR.pt > 20]) > 1) & (ak.num(ele_l) == 1)

        dilep_selections = {
            "mumu_SR": mumu_SR_sel,
            "mumu_CR": mumu_CR_sel,
            "ee_SR": ee_SR_sel,
            "ee_CR": ee_CR_sel,
            "mue_SR": mue_SR_sel,
            "mue_CR": mue_CR_sel,
            "emu_SR": emu_SR_sel,
            "emu_CR": emu_CR_sel
        }

        #combine all selections for generic CR and SR
        CR_sel = mumu_CR_sel | ee_CR_sel | mue_CR_sel | emu_CR_sel
        SR_sel = mumu_SR_sel | ee_SR_sel | mue_SR_sel | emu_SR_sel

        electron_2018 = fake_rate("../data/fake_rate/FR_electron_2018.p")
        electron_2017 = fake_rate("../data/fake_rate/FR_electron_2017.p")
        electron_2016 = fake_rate("../data/fake_rate/FR_electron_2016.p")
        muon_2018 = fake_rate("../data/fake_rate/FR_muon_2018.p")
        muon_2017 = fake_rate("../data/fake_rate/FR_muon_2017.p")
        muon_2016 = fake_rate("../data/fake_rate/FR_muon_2016.p")

        if self.year == 2018:
            weight_muon = muon_2018.FR_weight(mu_l_np)
            weight_electron = electron_2018.FR_weight(ele_l_np)

        elif self.year == 2017:
            weight_muon = muon_2017.FR_weight(mu_l_np)
            weight_electron = electron_2017.FR_weight(ele_l_np)

        elif self.year == 2016:
            weight_muon = muon_2016.FR_weight(mu_l_np)
            weight_electron = electron_2016.FR_weight(ele_l_np)

        #separate by different combinations of two-lepton events
        output['EE_CR'].fill(dataset=dataset,
                             weight=np.sum(ee_CR_sel[ee_CR_sel]))
        output['EE_CR_weighted'].fill(
            dataset=dataset,
            weight=np.sum(ak.to_numpy(weight_electron[ee_CR_sel])))
        output['EE_SR'].fill(dataset=dataset,
                             weight=np.sum(ee_SR_sel[ee_SR_sel]))

        output['MM_CR'].fill(dataset=dataset,
                             weight=np.sum(mumu_CR_sel[mumu_CR_sel]))
        output['MM_CR_weighted'].fill(
            dataset=dataset,
            weight=np.sum(ak.to_numpy(weight_muon[mumu_CR_sel])))
        output['MM_SR'].fill(dataset=dataset,
                             weight=np.sum(mumu_SR_sel[mumu_SR_sel]))

        output['EM_CR'].fill(dataset=dataset,
                             weight=np.sum(emu_CR_sel[emu_CR_sel]))
        output['EM_CR_weighted'].fill(
            dataset=dataset,
            weight=np.sum(ak.to_numpy(weight_muon[emu_CR_sel])))
        output['EM_SR'].fill(dataset=dataset,
                             weight=np.sum(emu_SR_sel[emu_SR_sel]))

        output['ME_CR'].fill(dataset=dataset,
                             weight=np.sum(mue_CR_sel[mue_CR_sel]))
        output['ME_CR_weighted'].fill(
            dataset=dataset,
            weight=np.sum(ak.to_numpy(weight_electron[mue_CR_sel])))
        output['ME_SR'].fill(dataset=dataset,
                             weight=np.sum(mue_SR_sel[mue_SR_sel]))

        #fill combined histograms now (basic definitions are in default_accumulators.py)
        self.SS_fill_weighted(output["MET_CR"],
                              mumu_CR_sel,
                              ee_CR_sel,
                              mue_CR_sel,
                              emu_CR_sel,
                              dataset=dataset,
                              pt=ev.MET.pt)
        self.SS_fill_weighted(output["MET_CR_weighted"],
                              mumu_CR_sel,
                              ee_CR_sel,
                              mue_CR_sel,
                              emu_CR_sel,
                              mu_weights=weight_muon,
                              e_weights=weight_electron,
                              dataset=dataset,
                              pt=ev.MET.pt)
        self.SS_fill_weighted(output["MET_SR"],
                              mumu_SR_sel,
                              ee_SR_sel,
                              mue_SR_sel,
                              emu_SR_sel,
                              dataset=dataset,
                              pt=ev.MET.pt)
        self.fill_pt_individual(output, dataset, ev.MET.pt, "MET",
                                dilep_selections, weight_muon, weight_electron)
        #leading lepton pt
        LeadLep_pt = ak.max(ak.concatenate([ev.Muon.pt, ev.Electron.pt],
                                           axis=1),
                            axis=1)
        #sum of all regions
        self.SS_fill_weighted(output["pt_LeadLep_CR"],
                              mumu_CR_sel,
                              ee_CR_sel,
                              mue_CR_sel,
                              emu_CR_sel,
                              dataset=dataset,
                              pt=LeadLep_pt)
        self.SS_fill_weighted(output["pt_LeadLep_CR_weighted"],
                              mumu_CR_sel,
                              ee_CR_sel,
                              mue_CR_sel,
                              emu_CR_sel,
                              mu_weights=weight_muon,
                              e_weights=weight_electron,
                              dataset=dataset,
                              pt=LeadLep_pt)
        self.SS_fill_weighted(output["pt_LeadLep_SR"],
                              mumu_SR_sel,
                              ee_SR_sel,
                              mue_SR_sel,
                              emu_SR_sel,
                              dataset=dataset,
                              pt=LeadLep_pt)
        self.fill_pt_individual(output, dataset, LeadLep_pt, "pt_LeadLep",
                                dilep_selections, weight_muon, weight_electron)

        #njets
        njets = ak.num(jets, axis=1)
        self.SS_fill_weighted(output["njets_CR"],
                              mumu_CR_sel,
                              ee_CR_sel,
                              mue_CR_sel,
                              emu_CR_sel,
                              dataset=dataset,
                              multiplicity=njets)
        self.SS_fill_weighted(output["njets_CR_weighted"],
                              mumu_CR_sel,
                              ee_CR_sel,
                              mue_CR_sel,
                              emu_CR_sel,
                              mu_weights=weight_muon,
                              e_weights=weight_electron,
                              dataset=dataset,
                              multiplicity=njets)
        self.SS_fill_weighted(output["njets_SR"],
                              mumu_SR_sel,
                              ee_SR_sel,
                              mue_SR_sel,
                              emu_SR_sel,
                              dataset=dataset,
                              multiplicity=njets)
        self.fill_multiplicity_individual(output, dataset, njets, "njets",
                                          dilep_selections, weight_muon,
                                          weight_electron)

        #btags
        btag = ak.num(getBTagsDeepFlavB(jets, year=self.year))
        self.SS_fill_weighted(output["N_b_CR"],
                              mumu_CR_sel,
                              ee_CR_sel,
                              mue_CR_sel,
                              emu_CR_sel,
                              dataset=dataset,
                              multiplicity=btag)
        self.SS_fill_weighted(output["N_b_CR_weighted"],
                              mumu_CR_sel,
                              ee_CR_sel,
                              mue_CR_sel,
                              emu_CR_sel,
                              mu_weights=weight_muon,
                              e_weights=weight_electron,
                              dataset=dataset,
                              multiplicity=btag)
        self.SS_fill_weighted(output["N_b_SR"],
                              mumu_SR_sel,
                              ee_SR_sel,
                              mue_SR_sel,
                              emu_SR_sel,
                              dataset=dataset,
                              multiplicity=btag)
        self.fill_multiplicity_individual(output, dataset, btag, "N_b",
                                          dilep_selections, weight_muon,
                                          weight_electron)

        #HT
        ht = ak.sum(jets.pt, axis=1)
        self.SS_fill_weighted(output["HT_CR"],
                              mumu_CR_sel,
                              ee_CR_sel,
                              mue_CR_sel,
                              emu_CR_sel,
                              dataset=dataset,
                              ht=ht)
        self.SS_fill_weighted(output["HT_CR_weighted"],
                              mumu_CR_sel,
                              ee_CR_sel,
                              mue_CR_sel,
                              emu_CR_sel,
                              mu_weights=weight_muon,
                              e_weights=weight_electron,
                              dataset=dataset,
                              ht=ht)
        self.SS_fill_weighted(output["HT_SR"],
                              mumu_SR_sel,
                              ee_SR_sel,
                              mue_SR_sel,
                              emu_SR_sel,
                              dataset=dataset,
                              ht=ht)
        self.fill_ht_individual(output, dataset, ht, "HT", dilep_selections,
                                weight_muon, weight_electron)
        return output
Ejemplo n.º 11
0
def test_scalar_amax(scalar_type_ak_array):
    amax = ak.max(scalar_type_ak_array, initial=-9000)
    np_array = ak.to_numpy(scalar_type_ak_array)
    amax == np.amax(np_array, initial=-9000)
def make_radius_compatibility_distributions():
    global tree
    all_obj_arrays = tree.arrays(filter_name="pT3*",
                                 entry_start=0,
                                 entry_stop=5,
                                 library="ak")
    matchedMask = all_obj_arrays.pT3_isFake == 0
    layers = np.array(
        list(map(process_layers, ak.flatten(all_obj_arrays.pT3_layer_binary))))
    #    layerTypes = np.array(list(map(process_layerType, layers)))
    layerTypes = np.array(list(map(process_numbers, layers)))
    unique_layerTypes = np.unique(layerTypes, axis=0)
    unique_layerTypes = np.append(unique_layerTypes, "")
    print(unique_layerTypes)

    for layerType in unique_layerTypes:
        print("layerType = {}".format(layerType))

        pixelRadius = ak.to_numpy(ak.flatten(all_obj_arrays.pT3_pixelRadius))
        pixelRadiusResMin = ak.to_numpy(
            ak.flatten(all_obj_arrays.pT3_pixelRadiusMin))
        pixelRadiusResMax = ak.to_numpy(
            ak.flatten(all_obj_arrays.pT3_pixelRadiusMax))

        tripletRadius = ak.to_numpy(
            ak.flatten(all_obj_arrays.pT3_tripletRadius))
        tripletRadiusResMin = ak.to_numpy(
            ak.flatten(all_obj_arrays.pT3_tripletRadiusMin))
        tripletRadiusResMax = ak.to_numpy(
            ak.flatten(all_obj_arrays.pT3_tripletRadiusMax))

        simRadius = ak.flatten(all_obj_arrays.pT3_matched_pt /
                               (2.99792458e-3 * 3.8))
        simRadius = ak.flatten(simRadius)

        pixelRadiusMin = ak.to_numpy(
            ak.min([pixelRadiusResMin, pixelRadius2SMin], axis=0))
        pixelRadiusMax = ak.to_numpy(
            ak.max([pixelRadiusResMax, pixelRadius2SMax], axis=0))
        tripletRadiusMin = ak.to_numpy(
            ak.min([tripletRadiusResMin, tripletRadius2SMin], axis=0))
        tripletRadiusMax = ak.to_numpy(
            ak.max([tripletRadiusResMax, tripletRadius2SMax], axis=0))

        qArrayInnerOuter = compute_interval_overlap(1.0 / pixelRadiusMax,
                                                    1.0 / pixelRadiusMin,
                                                    1.0 / tripletRadiusMax,
                                                    1.0 / tripletRadiusMin)

        for name, qArray in {"innerOuter": qArrayInnerOuter}.items():
            print("qName = ", name)
            if layerType == "":
                qArraySimTrackMatched = qArray[ak.to_numpy(
                    ak.flatten(matchedMask))]
            else:
                qArray = qArray[layerTypes == layerType]
                qArraySimTrackMatched = qArray[ak.to_numpy(
                    ak.flatten(matchedMask)[layerTypes == layerType])]
            print(
                "{} total integral = {}, {} integral below zero = {}, sim-matched {} total integral = {}, sim-matched {} integral above zero = {}"
                .format(name, len(qArray), name, sum(qArray < 0), name,
                        len(qArraySimTrackMatched), name,
                        sum(qArraySimTrackMatched > 0)))
            make_plots(
                qArray, qArraySimTrackMatched,
                "overlap between 1/{} and 1/{}".format("Inner",
                                                       name[5:]), layerType)
def test_highlevel():
    array = ak.Array(
        [[[2, 3, 5], [], [7, 11], [13]], [], [[17, 19], [23]]], check_valid=True
    )

    assert ak.count(array) == 9
    assert ak.to_list(ak.count(array, axis=-1)) == [[3, 0, 2, 1], [], [2, 1]]
    assert ak.to_list(ak.count(array, axis=2)) == [[3, 0, 2, 1], [], [2, 1]]
    assert ak.to_list(ak.count(array, axis=-1, keepdims=True)) == [
        [[3], [0], [2], [1]],
        [],
        [[2], [1]],
    ]
    assert ak.to_list(ak.count(array, axis=-2)) == [[3, 2, 1], [], [2, 1]]
    assert ak.to_list(ak.count(array, axis=1)) == [[3, 2, 1], [], [2, 1]]
    assert ak.to_list(ak.count(array, axis=-2, keepdims=True)) == [
        [[3, 2, 1]],
        [[]],
        [[2, 1]],
    ]

    assert ak.count_nonzero(array) == 9
    assert ak.to_list(ak.count_nonzero(array, axis=-1)) == [[3, 0, 2, 1], [], [2, 1]]
    assert ak.to_list(ak.count_nonzero(array, axis=-2)) == [[3, 2, 1], [], [2, 1]]

    assert ak.sum(array) == 2 + 3 + 5 + 7 + 11 + 13 + 17 + 19 + 23
    assert ak.to_list(ak.sum(array, axis=-1)) == [
        [2 + 3 + 5, 0, 7 + 11, 13],
        [],
        [17 + 19, 23],
    ]
    assert ak.to_list(ak.sum(array, axis=-2)) == [
        [2 + 7 + 13, 3 + 11, 5],
        [],
        [17 + 23, 19],
    ]

    assert ak.prod(array) == 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 * 23
    assert ak.to_list(ak.prod(array, axis=-1)) == [
        [2 * 3 * 5, 1, 7 * 11, 13],
        [],
        [17 * 19, 23],
    ]
    assert ak.to_list(ak.prod(array, axis=-2)) == [
        [2 * 7 * 13, 3 * 11, 5],
        [],
        [17 * 23, 19],
    ]

    assert ak.min(array) == 2
    assert ak.to_list(ak.min(array, axis=-1)) == [[2, None, 7, 13], [], [17, 23]]
    assert ak.to_list(ak.min(array, axis=-2)) == [[2, 3, 5], [], [17, 19]]

    assert ak.max(array) == 23
    assert ak.to_list(ak.max(array, axis=-1)) == [[5, None, 11, 13], [], [19, 23]]
    assert ak.to_list(ak.max(array, axis=-2)) == [[13, 11, 5], [], [23, 19]]

    array = ak.Array(
        [
            [[True, False, True], [], [False, False], [True]],
            [],
            [[False, True], [True]],
        ],
        check_valid=True,
    )

    assert ak.any(array) == True
    assert ak.to_list(ak.any(array, axis=-1)) == [
        [True, False, False, True],
        [],
        [True, True],
    ]
    assert ak.to_list(ak.any(array, axis=-2)) == [[True, False, True], [], [True, True]]

    assert ak.all(array) == False
    assert ak.to_list(ak.all(array, axis=-1)) == [
        [False, True, False, True],
        [],
        [False, True],
    ]
    assert ak.to_list(ak.all(array, axis=-2)) == [
        [False, False, True],
        [],
        [False, True],
    ]
Ejemplo n.º 14
0
def make_leadjet_pt_cut(jets):
    leadpt_cut = (ak.max(jets["pt"], axis=1) >= jet_pars["lead_ptmin"])
    return leadpt_cut
Ejemplo n.º 15
0
    def process(self, events):
        def normalize(val, cut):
            return ak.to_numpy(ak.fill_none(
                val[cut],
                np.nan))  #val[cut].pad(1, clip=True).fillna(0).flatten()

        def fill(region, cuts, systematic=None, wmod=None):
            print('filling %s' % region)
            selections = cuts

            cut = selection.all(*selections)
            if 'signal' in region: weight = weights_signal.weight()[cut]
            elif 'muonCR' in region: weight = weights_muonCR.weight()[cut]
            elif 'VtaggingCR' in region:
                weight = weights_VtaggingCR.weight()[cut]
            output['templates'].fill(
                dataset=dataset,
                region=region,
                pt=normalize(candidatejet.pt, cut),
                msd=normalize(candidatejet.msdcorr, cut),
                n2ddt=normalize(candidatejet.n2ddt, cut),
                #gruddt=normalize(candidatejet.gruddt, cut),
                in_v3_ddt=normalize(candidatejet.in_v3_ddt, cut),
                hadW=normalize(candidatejet.nmatcheddau, cut),
                weight=weight,
            ),
            output['event'].fill(
                dataset=dataset,
                region=region,
                MET=events.MET.pt[cut],
                #nJet=fatjets.counts[cut],
                nPFConstituents=normalize(candidatejet.nPFConstituents, cut),
                weight=weight,
            ),
            output['deepAK8'].fill(
                dataset=dataset,
                region=region,
                deepTagMDWqq=normalize(candidatejet.deepTagMDWqq, cut),
                deepTagMDZqq=normalize(candidatejet.deepTagMDZqq, cut),
                msd=normalize(candidatejet.msdcorr, cut),
                #genflavor=genflavor[cut],
                weight=weight,
            ),
            output['in_v3'].fill(
                dataset=dataset,
                region=region,
                #genflavor=genflavor[cut],
                in_v3=normalize(candidatejet.in_v3, cut),
                n2=normalize(candidatejet.n2b1, cut),
                gru=normalize(candidatejet.gru, cut),
                weight=weight,
            ),
            if 'muonCR' in dataset or 'VtaggingCR' in dataset:
                output['muon'].fill(
                    dataset=dataset,
                    region=region,
                    mu_pt=normalize(candidatemuon.pt, cut),
                    mu_eta=normalize(candidatemuon.eta, cut),
                    mu_pfRelIso04_all=normalize(candidatemuon.pfRelIso04_all,
                                                cut),
                    weight=weight,
                ),

        #common jet kinematics
        gru = events.GRU
        IN = events.IN
        fatjets = events.FatJet
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['qcdrho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt)
        fatjets['gruddt'] = gru.v25 - shift(
            fatjets, algo='gruddt', year='2017')
        fatjets['gru'] = gru.v25
        fatjets['in_v3'] = IN.v3
        fatjets['in_v3_ddt'] = IN.v3 - shift(
            fatjets, algo='inddt', year='2017')
        fatjets['in_v3_ddt_90pctl'] = IN.v3 - shift(
            fatjets, algo='inddt90pctl', year='2017')
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year='2017')
        fatjets['nmatcheddau'] = TTsemileptonicmatch(events)
        dataset = events.metadata['dataset']
        print('process dataset', dataset)
        isRealData = not hasattr(events, 'genWeight')
        output = self.accumulator.identity()
        if (len(events) == 0): return output

        selection = PackedSelection('uint64')

        weights_signal = Weights(len(events))
        weights_muonCR = Weights(len(events))
        weights_VtaggingCR = Weights(len(events))

        if not isRealData:
            output['sumw'][dataset] += ak.sum(events.genWeight)

        #######################
        if 'signal' in self._region:
            if isRealData:
                trigger_fatjet = np.zeros(len(events), dtype='bool')
                for t in self._triggers[self._year]:
                    try:
                        trigger_fatjet = trigger_fatjet | events.HLT[t]
                    except:
                        print('trigger %s not available' % t)
                        continue

            else:
                trigger_fatjet = np.ones(len(events), dtype='bool')

            fatjets["genMatchFull"] = VQQgenmatch(events)
            candidatejet = ak.firsts(fatjets)
            candidatejet["genMatchFull"] = VQQgenmatch(events)
            nelectrons = ak.sum(
                (events.Electron.pt > 10.)
                & (abs(events.Electron.eta) < 2.5)
                & (events.Electron.cutBased >= events.Electron.VETO),
                axis=1,
            )
            nmuons = ak.sum(
                (events.Muon.pt > 10)
                & (abs(events.Muon.eta) < 2.1)
                & (events.Muon.pfRelIso04_all < 0.4)
                & (events.Muon.looseId),
                axis=1,
            )
            ntaus = ak.sum(
                (events.Tau.pt > 20.)
                & (events.Tau.idDecayMode)
                & (events.Tau.rawIso < 5)
                & (abs(events.Tau.eta) < 2.3),
                axis=1,
            )

            cuts = {
                "S_fatjet_trigger":
                trigger_fatjet,
                "S_pt":
                candidatejet.pt > 525,
                "S_eta": (abs(candidatejet.eta) < 2.5),
                "S_msdcorr": (candidatejet.msdcorr > 40),
                "S_rho":
                ((candidatejet.qcdrho > -5.5) & (candidatejet.qcdrho < -2.)),
                "S_jetid": (candidatejet.isTight),
                "S_VQQgenmatch": (candidatejet.genMatchFull),
                "S_noelectron": (nelectrons == 0),
                "S_nomuon": (nmuons == 0),
                "S_notau": (ntaus == 0),
            }

            for name, cut in cuts.items():
                print(name, cut)
                selection.add(name, cut)

            if isRealData:
                genflavor = 0  #candidatejet.pt.zeros_like().pad(1, clip=True).fillna(-1).flatten()
            if not isRealData:
                weights_signal.add('genweight', events.genWeight)
                #add_pileup_weight(weights_signal, events.Pileup.nPU, self._year, dataset)
                add_jetTriggerWeight(weights_signal, candidatejet.msdcorr,
                                     candidatejet.pt, self._year)
                bosons = getBosons(events.GenPart)
                genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0)
                add_VJets_NLOkFactor(weights_signal, genBosonPt, self._year,
                                     dataset)
                #genflavor = matchedBosonFlavor(candidatejet, bosons).pad(1, clip=True).fillna(-1).flatten()

            allcuts_signal = set()
            output['cutflow_signal'][dataset]['none'] += float(
                weights_signal.weight().sum())
            for cut in cuts:
                allcuts_signal.add(cut)
                output['cutflow_signal'][dataset][cut] += float(
                    weights_signal.weight()[selection.all(
                        *allcuts_signal)].sum())

            fill('signal', cuts.keys())

        #######################
        if 'muonCR' in self._region:

            if isRealData:
                trigger_muon = np.zeros(len(events), dtype='bool')
                for t in self._muontriggers[self._year]:
                    trigger_muon = trigger_muon | events.HLT[t]
            else:
                trigger_muon = np.ones(len(events), dtype='bool')

            candidatejet = ak.firsts(fatjets)
            candidatemuon = events.Muon[:, :5]

            jets = events.Jet[((events.Jet.pt > 50.)
                               & (abs(events.Jet.eta) < 2.5)
                               & (events.Jet.isTight))][:, :4]

            dphi = abs(jets.delta_phi(candidatejet))

            ak4_away = jets[(dphi > 0.8)]

            nelectrons = ak.sum(
                (events.Electron.pt > 10.)
                & (abs(events.Electron.eta) < 2.5)
                & (events.Electron.cutBased >= events.Electron.VETO),
                axis=1,
            )
            nmuons = ak.sum(
                (events.Muon.pt > 10)
                & (abs(events.Muon.eta) < 2.4)
                & (events.Muon.pfRelIso04_all < 0.25)
                & (events.Muon.looseId),
                axis=1,
            )
            ntaus = ak.sum(
                (events.Tau.pt > 20.)
                & (events.Tau.idDecayMode)
                & (events.Tau.rawIso < 5)
                & (abs(events.Tau.eta) < 2.3)
                & (events.Tau.idMVAoldDM2017v1 >= 16),
                axis=1,
            )

            cuts = {
                "CR1_muon_trigger":
                trigger_muon,
                "CR1_jet_pt": (candidatejet.pt > 525),
                "CR1_jet_eta": (abs(candidatejet.eta) < 2.5),
                "CR1_jet_msd": (candidatejet.msdcorr > 40),
                "CR1_jet_rho":
                ((candidatejet.qcdrho > -5.5) & (candidatejet.qcdrho < -2.)),
                "CR1_mu_pt":
                ak.any(candidatemuon.pt > 55, axis=1),
                "CR1_mu_eta":
                ak.any(abs(candidatemuon.eta) < 2.1, axis=1),
                "CR1_mu_IDLoose":
                ak.any(candidatemuon.looseId, axis=1),
                "CR1_mu_isolationTight":
                ak.any(candidatemuon.pfRelIso04_all < 0.15, axis=1),
                "CR1_muonDphiAK8":
                ak.any(
                    abs(candidatemuon.delta_phi(candidatejet)) > 2 * np.pi / 3,
                    axis=1),
                "CR1_ak4btagMedium08":
                (ak.max(ak4_away.btagCSVV2, axis=1, mask_identity=False) >
                 BTagEfficiency.btagWPs[self._year]['medium']
                 ),  #(ak4_away.btagCSVV2.max() > 0.8838),
                "CR1_noelectron": (nelectrons == 0),
                "CR1_onemuon": (nmuons == 1),
                "CR1_notau": (ntaus == 0),
            }
            for name, cut in cuts.items():
                selection.add(name, cut)

            if isRealData:
                genflavor = 0  #candidatejet.pt.zeros_like().pad(1, clip=True).fillna(-1).flatten()
            if not isRealData:
                weights_muonCR.add('genweight', events.genWeight)
                #add_pileup_weight(weights_muonCR, events.Pileup.nPU, self._year, dataset)
                #add_singleMuTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year)
                bosons = getBosons(events.GenPart)
                genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0)
                #add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)
                #genflavor = matchedBosonFlavor(candidatejet, bosons).pad(1, clip=True).fillna(-1).flatten()

            allcuts_ttbar_muoncontrol = set()
            output['cutflow_muonCR'][dataset]['none'] += float(
                weights_muonCR.weight().sum())
            for cut in cuts:
                allcuts_ttbar_muoncontrol.add(cut)
                output['cutflow_muonCR'][dataset][cut] += float(
                    weights_muonCR.weight()[selection.all(
                        *allcuts_ttbar_muoncontrol)].sum())
            fill('muonCR', cuts.keys())

        #######################
        if 'VtaggingCR' in self._region:
            if isRealData:
                trigger_muon = np.zeros(len(events), dtype='bool')
                for t in self._muontriggers[self._year]:
                    trigger_muon = trigger_muon | events.HLT[t]
            else:
                trigger_muon = np.ones(len(events), dtype='bool')

            candidatejet = ak.firsts(fatjets)
            candidatemuon = ak.firsts(events.Muon)

            jets = events.Jet[((events.Jet.pt > 30.)
                               & (abs(events.Jet.eta) < 2.4))][:, :4]

            dr_ak4_ak8 = jets.delta_r(candidatejet)
            dr_ak4_muon = jets.delta_r(candidatemuon)

            ak4_away = jets[(dr_ak4_ak8 > 0.8)]  # & (dr_ak4_muon > 0.4)]
            mu_p4 = ak.zip(
                {
                    "pt": ak.fill_none(candidatemuon.pt, 0),
                    "eta": ak.fill_none(candidatemuon.eta, 0),
                    "phi": ak.fill_none(candidatemuon.phi, 0),
                    "mass": ak.fill_none(candidatemuon.mass, 0),
                },
                with_name="PtEtaPhiMLorentzVector")

            met_p4 = ak.zip(
                {
                    "pt": ak.from_iter([[v] for v in events.MET.pt]),
                    "eta": ak.from_iter([[v] for v in np.zeros(len(events))]),
                    "phi": ak.from_iter([[v] for v in events.MET.phi]),
                    "mass": ak.from_iter([[v] for v in np.zeros(len(events))]),
                },
                with_name="PtEtaPhiMLorentzVector")

            Wleptoniccandidate = mu_p4 + met_p4

            nelectrons = ak.sum(
                ((events.Electron.pt > 10.)
                 & (abs(events.Electron.eta) < 2.5)
                 & (events.Electron.cutBased >= events.Electron.VETO)),
                axis=1,
            )
            n_tight_muon = ak.sum(
                ((events.Muon.pt > 53)
                 & (abs(events.Muon.eta) < 2.1)
                 & (events.Muon.tightId)),
                axis=1,
            )
            n_loose_muon = ak.sum(
                ((events.Muon.pt > 20)
                 & (events.Muon.looseId)
                 & (abs(events.Muon.eta) < 2.4)),
                axis=1,
            )
            ntaus = ak.sum(
                ((events.Tau.pt > 20.)
                 & (events.Tau.idDecayMode)
                 & (events.Tau.rawIso < 5)
                 & (abs(events.Tau.eta) < 2.3)
                 & (events.Tau.idMVAoldDM2017v1 >= 16)),
                axis=1,
            )

            cuts = {
                "CR2_muon_trigger":
                trigger_muon,
                "CR2_jet_pt": (candidatejet.pt > 200),
                "CR2_jet_eta": (abs(candidatejet.eta) < 2.5),
                "CR2_jet_msd": (candidatejet.msdcorr > 40),
                "CR2_mu_pt":
                candidatemuon.pt > 53,
                "CR2_mu_eta": (abs(candidatemuon.eta) < 2.1),
                "CR2_mu_IDTight":
                candidatemuon.tightId,
                "CR2_mu_isolationTight": (candidatemuon.pfRelIso04_all < 0.15),
                "CR2_muonDphiAK8":
                abs(candidatemuon.delta_phi(candidatejet)) > 2 * np.pi / 3,
                "CR2_ak4btagMedium08":
                (ak.max(ak4_away.btagCSVV2, axis=1, mask_identity=False) >
                 BTagEfficiency.btagWPs[self._year]['medium']),
                "CR2_leptonicW":
                ak.flatten(Wleptoniccandidate.pt > 200),
                "CR2_MET": (events.MET.pt > 40.),
                "CR2_noelectron": (nelectrons == 0),
                "CR2_one_tightMuon": (n_tight_muon == 1),
                "CR2_one_looseMuon": (n_loose_muon == 1),
                #"CR2_notau"            : (ntaus==0),
            }

            for name, cut in cuts.items():
                print(name, cut)
                selection.add(name, cut)
            #weights.add('metfilter', events.Flag.METFilters)
            if isRealData:
                genflavor = 0  #candidatejet.pt.zeros_like().pad(1, clip=True).fillna(-1).flatten()
            if not isRealData:
                weights_VtaggingCR.add('genweight', events.genWeight)
                #add_pileup_weight(weights_VtaggingCR, events.Pileup.nPU, self._year, dataset)
                #add_singleMuTriggerWeight(weights, abs(candidatemuon.eta), candidatemuon.pt, self._year)
                bosons = getBosons(events.GenPart)
                genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0)
                #add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)
                #genflavor = matchedBosonFlavor(candidatejet, bosons).pad(1, clip=True).fillna(-1).flatten()

                #b-tag weights
            allcuts_vselection = set()
            output['cutflow_VtaggingCR'][dataset]['none'] += float(
                weights_VtaggingCR.weight().sum())

            for cut in cuts:
                allcuts_vselection.add(cut)
                output['cutflow_VtaggingCR'][dataset][cut] += float(
                    weights_VtaggingCR.weight()[selection.all(
                        *allcuts_vselection)].sum())
            fill('VtaggingCR', cuts.keys())

        return output