def test_3D_jagged(wrapped_tree):
    fake_3d = [[np.arange(i + 1) + j for i in range(j % 3)]
               for j in range(len(wrapped_tree))]
    fake_3d = JaggedArray.fromiter(fake_3d)
    wrapped_tree.new_variable("Fake3D", fake_3d)
    assert isinstance(fake_3d.count(), JaggedArray)
    assert all((fake_3d.copy().count() == fake_3d.count()).all())

    aliased = expressions.evaluate(wrapped_tree, "Fake3D")
    assert (aliased == fake_3d).all().all().all()

    doubled = expressions.evaluate(wrapped_tree, "Fake3D * 2")
    assert (doubled == fake_3d * 2).all().all().all()
    assert len(doubled[0, :, :]) == 0
    assert doubled[1, 0, :] == [2]
    assert doubled[2, 0, :] == [4]
    assert all(doubled[2, 1, :] == [4, 6])

    doubled = expressions.evaluate(wrapped_tree, "Fake3D + Fake3D")
    assert (doubled == fake_3d * 2).all().all().all()
    assert len(doubled[0, :, :]) == 0
    assert doubled[1, 0, :] == [2]
    assert doubled[2, 0, :] == [4]
    assert all(doubled[2, 1, :] == [4, 6])

    fake_3d_2 = [[np.arange(i + 3) + j for i in range(j % 2)]
                 for j in range(len(wrapped_tree))]
    fake_3d_2 = JaggedArray.fromiter(fake_3d_2)
    wrapped_tree.new_variable("SecondFake3D", fake_3d_2)

    with pytest.raises(RuntimeError) as e:
        expressions.evaluate(wrapped_tree, "SecondFake3D + Fake3D")
    assert "different jaggedness" in str(e)
Exemple #2
0
def NestNestObjArrayToJagged(objarr):
    """uproot read vector<vector<number>> TBranch
       as objectArray, this function convert it
       to JaggedJaggedArray
    """

    # jaggedArray of lists
    jaggedList = JaggedArray.fromiter(objarr)
    # flat to 1 level
    _jagged = JaggedArray.fromiter(jaggedList.content)

    return JaggedArray.fromoffsets(jaggedList.offsets, _jagged)
 def __call__(self, *args):
     inputs = list(args)
     offsets = None
     # TODO: check can use offsets (this should always be true for striped)
     # Alternatively we can just use starts and stops
     for i in range(len(inputs)):
         if isinstance(inputs[i], JaggedArray):
             if offsets is not None and offsets.base is not inputs[
                     i].offsets.base:
                 if type(offsets) is int:
                     raise Exception(
                         'Do not mix JaggedArrays and numpy arrays when calling derived class of lookup_base'
                     )
                 elif type(offsets
                           ) is np.ndarray and offsets.base is not inputs[
                               i].offsets.base:
                     raise Exception(
                         'All input jagged arrays must have a common structure (offsets)!'
                     )
             offsets = inputs[i].offsets
             inputs[i] = inputs[i].content
         elif isinstance(inputs[i], np.ndarray):
             if offsets is not None:
                 if type(offsets) is np.ndarray:
                     raise Exception(
                         'do not mix JaggedArrays and numpy arrays when calling a derived class of lookup_base'
                     )
             offsets = -1
     retval = self._evaluate(*tuple(inputs))
     if offsets is not None and type(offsets) is not int:
         retval = JaggedArray.fromoffsets(offsets, retval)
     return retval
 def _kExtra(self, kpt, eta, nl, u, s=0, m=0):
     # if it is a jagged array, save the offsets then flatten everything
     # needed for the ternary conditions later
     offsets = None
     if isinstance(kpt, JaggedArray):
         offsets = kpt.offsets
         kpt = kpt.flatten()
         eta = eta.flatten()
         nl = nl.flatten()
         u = u.flatten()
     abseta = abs(eta)
     kData = self._kRes[s][m][1](abseta)  # type 1 is data
     kMC = self._kRes[s][m][0](abseta)  # type 0 is MC
     mask = kData > kMC
     x = np.zeros_like(kpt)
     sigma = self._sigma(kpt, eta, nl, s, m)
     # Rochester cbA = beta, cbN = m, as well as cbM (always 0?) = loc and cbS = scale to transform y = (x-loc)/scale in the pdf method
     cbA = self._cbA[s][m](abseta, nl)
     cbN = self._cbN[s][m](abseta, nl)
     loc = np.zeros_like(u)
     cbS = self._cbS[s][m](abseta, nl)
     invcdf = doublecrystalball.ppf(u, cbA, cbA, cbN, cbN, loc, cbS)
     x[mask] = (np.sqrt(kData[mask] * kData[mask] - kMC[mask] * kMC[mask]) *
                sigma[mask] * invcdf[mask])
     result = np.ones_like(kpt)
     result[(x > -1)] = 1.0 / (1.0 + x[x > -1])
     if offsets is not None:
         result = JaggedArray.fromoffsets(offsets, result)
     return result
def test_jagged_nth_3D(jagged_1):
    fake_3d = [[np.arange(i + 1) + j for i in range(j % 3)] for j in range(5)]
    fake_3d = JaggedArray.fromiter(fake_3d)
    get_second = reductions.JaggedNth(1, np.nan)
    reduced = get_second(fake_3d)
    assert len(reduced[0]) == 0
    assert len(reduced[1]) == 1
    assert np.isnan(reduced[1])
    assert len(reduced[2]) == 2
    assert np.isnan(reduced[2][0])
    assert reduced[2][1] == 3
    assert len(reduced[3]) == 0
    assert len(reduced[4]) == 1
    assert np.isnan(reduced[4])

    get_first = reductions.JaggedNth(0, np.nan)
    reduced = get_first(fake_3d)
    assert len(reduced[0]) == 0
    assert len(reduced[1]) == 1
    assert reduced[1][0] == 1
    assert len(reduced[2]) == 2
    assert reduced[2][0] == 2
    assert reduced[2][1] == 2
    assert len(reduced[3]) == 0
    assert len(reduced[4]) == 1
    assert reduced[4] == 4
Exemple #6
0
def passLooseJetSel(jet):
    outs = np.ones_like(jet.pt.content,dtype=np.bool)
    absEta = np.abs(jet.eta.content)
    etaVFor = (absEta <= 3.0)
    etaFor  = (absEta <= 2.7)
    etaCen  = (absEta <= 2.4)
    
    #forward jets
    outs[etaFor] &= ( (jet.neuHadFrac.content[etaFor] < 0.99) &
                      (jet.neuEmFrac.content[etaFor]  < 0.99) &
                      (jet.nParticles.content[etaFor] > 1   )  )
    #central jets
    outs[etaCen] &= ( (jet.chHadFrac.content[etaCen]  > 0.0 ) &
                      (jet.nCharged.content[etaCen]   > 0   ) &
                      (jet.chEmFrac.content[etaCen]   < 0.99 ) )
    #2.7-3.0
    etaHE = etaVFor & ~etaFor
    outs[etaHE] &= ( (jet.neuEmFrac.content[etaHE] > 0.01)  &
                     (jet.neuHadFrac.content[etaHE] < 0.98) &
                     (jet.nNeutrals.content[etaHE] > 2 )     )
    # > 3.0
    etaHF = ~etaVFor
    outs[etaHF] &= ( (jet.neuEmFrac.content[etaHF] > 0.90)  &
                     (jet.nNeutrals.content[etaHF] > 10 )     )
    
    outs = JaggedArray.fromoffsets(jet.pt.offsets,outs)
    return outs
Exemple #7
0
def getvar(events, name, default=None, parents="run"):
    if name in events:
        return events[name]
    if parents not in events:
        return None
    else:
        if isinstance(events[parents], np.ndarray):
            return np.full_like(events[parents], default)
        if isinstance(events[parents], JaggedArray):
            content = [default] * events[parents].flatten().shape[0]
            return JaggedArray.fromoffsets(events[parents].offsets, content)
def setup_gen_candidates(df):
    # Find first ancestor with different PDG ID
    # before defining the gen candidates
    mothers = JaggedArray.fromcounts(df['nGenPart'],
                                     df['GenPart_genPartIdxMother'])
    pdgids = JaggedArray.fromcounts(df['nGenPart'], df['GenPart_pdgId'])
    parent_index = find_first_parent(mothers, pdgids)

    gen = JaggedCandidateArray.candidatesfromcounts(
        df['nGenPart'],
        pt=df['GenPart_pt'],
        eta=df['GenPart_eta'],
        phi=df['GenPart_phi'],
        mass=df['GenPart_mass'],
        charge=df['GenPart_pdgId'],
        pdg=df['GenPart_pdgId'],
        status=df['GenPart_status'],
        flag=df['GenPart_statusFlags'],
        mother=df['GenPart_genPartIdxMother'],
        parentIndex=parent_index.flatten())
    return gen
Exemple #9
0
 def get_lepton_values(zl, key):
     val = np.zeros_like(zl.flatten(), dtype=float)
     if len(val) == 0:
         return JaggedArray.fromoffsets(zl.offsets, val)
     for i in range(2):
         mask = (i == zl.flatten())
         if key == 'pt':
             val[mask] = z_cands[passZCand][str(
                 i)].flatten()[mask]['p4'].pt
         elif key == 'eta':
             val[mask] = z_cands[passZCand][str(
                 i)].flatten()[mask]['p4'].eta
         elif key == 'phi':
             val[mask] = z_cands[passZCand][str(
                 i)].flatten()[mask]['p4'].phi
         elif key == 'mass':
             val[mask] = z_cands[passZCand][str(
                 i)].flatten()[mask]['p4'].mass
         else:
             val[mask] = z_cands[passZCand][str(i)].flatten()[mask][key]
     return JaggedArray.fromoffsets(zl.offsets, val)
    def event(self, chunk):
        for output, expression, reduction, fill_missing in self._variables:
            branches = get_branches(expression, chunk.tree.allkeys())
            data = chunk.tree.pandas.df(branches)
            result = data.eval(expression)
            if reduction:
                groups = result.groupby(level=0)
                result = reduction(groups)
                array = result.values
            else:
                events = result.index.get_level_values(0).values
                events -= events[0]
                array = JaggedArray.fromparents(events, result.values)

            chunk.tree.new_variable(output, array)
        return True
Exemple #11
0
def passJetTightLepVetoSel(jet):
    outs = np.ones_like(jet.pt.content,dtype=np.bool)
    absEta = np.abs(jet.eta.content)
    etaFor = (absEta <= 2.7)
    etaCen = (absEta <= 2.4)
    #forward jets
    outs[etaFor] &= ( (jet.neuHadFrac.content[etaFor] < 0.90) &
                      (jet.neuEmFrac.content[etaFor]  < 0.90) &
                      (jet.nParticles.content[etaFor] > 1   ) &
                      (jet.muonFrac.content[etaFor]   < 0.8 )  )
    #central jets
    outs[etaCen] &= ( (jet.chHadFrac.content[etaCen]  > 0.0 ) &
                      (jet.nCharged.content[etaCen]   > 0   ) &
                      (jet.chEmFrac.content[etaCen]   < 0.9 )  )
    outs = JaggedArray.fromoffsets(jet.pt.offsets,outs)
    return outs
def test_rochester():
    rochester_data = lookup_tools.txt_converters.convert_rochester_file('tests/samples/RoccoR2018.txt.gz',loaduncs=True)
    rochester = lookup_tools.rochester_lookup.rochester_lookup(rochester_data)

    # to test 1-to-1 agreement with official Rochester requires loading C++ files
    # instead, preload the correct scales in the sample directory
    # the script tests/samples/rochester/build_rochester.py produces these
    official_data_k = np.load('tests/samples/nano_dimuon_rochester.npy')
    official_data_err = np.load('tests/samples/nano_dimuon_rochester_err.npy')
    official_mc_k = np.load('tests/samples/nano_dy_rochester.npy')
    official_mc_err = np.load('tests/samples/nano_dy_rochester_err.npy')
    mc_rand = np.load('tests/samples/nano_dy_rochester_rand.npy')

    # test against nanoaod
    events = NanoEvents.from_file(os.path.abspath('tests/samples/nano_dimuon.root'))

    data_k = rochester.kScaleDT(events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi)
    assert(all(np.isclose(data_k.flatten(), official_data_k)))
    data_err = rochester.kScaleDTerror(events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi)
    data_err = np.array(data_err.flatten(), dtype=float)
    assert(all(np.isclose(data_err, official_data_err, atol=1e-8)))

    # test against mc
    events = NanoEvents.from_file(os.path.abspath('tests/samples/nano_dy.root'))

    hasgen = ~np.isnan(events.Muon.matched_gen.pt.fillna(np.nan))
    mc_rand = JaggedArray.fromoffsets(hasgen.offsets, mc_rand)
    mc_kspread = rochester.kSpreadMC(events.Muon.charge[hasgen], events.Muon.pt[hasgen], events.Muon.eta[hasgen], events.Muon.phi[hasgen],
                                     events.Muon.matched_gen.pt[hasgen])
    mc_ksmear = rochester.kSmearMC(events.Muon.charge[~hasgen], events.Muon.pt[~hasgen], events.Muon.eta[~hasgen], events.Muon.phi[~hasgen],
                                   events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen])
    mc_k = np.ones_like(events.Muon.pt.flatten())
    mc_k[hasgen.flatten()] = mc_kspread.flatten()
    mc_k[~hasgen.flatten()] = mc_ksmear.flatten()
    assert(all(np.isclose(mc_k, official_mc_k)))

    mc_errspread = rochester.kSpreadMCerror(events.Muon.charge[hasgen], events.Muon.pt[hasgen], events.Muon.eta[hasgen], events.Muon.phi[hasgen],
                                            events.Muon.matched_gen.pt[hasgen])
    mc_errsmear = rochester.kSmearMCerror(events.Muon.charge[~hasgen], events.Muon.pt[~hasgen], events.Muon.eta[~hasgen], events.Muon.phi[~hasgen],
                                          events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen])
    mc_err = np.ones_like(events.Muon.pt.flatten())
    mc_err[hasgen.flatten()] = mc_errspread.flatten()
    mc_err[~hasgen.flatten()] = mc_errsmear.flatten()
    assert(all(np.isclose(mc_err, official_mc_err, atol=1e-8)))
Exemple #13
0
 def getSubCorrections(self, **kwargs):
     """
         Returns the set of corrections for all input jets broken down by level
         use like:
         jecs = corrector.getSubCorrections(JetProperty1=jet.property1,...)
         'jecs' will be formatted like [[jec_jet1 jec_jet2 ...] ...]
     """
     localargs = kwargs
     firstarg = localargs[self._signature[0]]
     cumulativeCorrection = 1.0
     offsets = None
     if isinstance(firstarg, JaggedArray):
         offsets = firstarg.offsets
         cumulativeCorrection = firstarg.ones_like().content
         for key in localargs.keys():
             localargs[key] = localargs[key].content
     else:
         cumulativeCorrection = np.ones_like(firstarg)
     corrVars = []
     if 'JetPt' in localargs.keys():
         corrVars.append('JetPt')
     if 'JetE' in localargs.keys():
         corrVars.append('JetE')
     if len(corrVars) == 0:
         raise Exception(
             'No variable to correct, need JetPt or JetE in inputs!')
     corrections = []
     for i, func in enumerate(self._funcs):
         sig = func.signature
         args = []
         for input in sig:
             args.append(localargs[input])
         corr = func(*tuple(args))
         for var in corrVars:
             localargs[var] *= corr
         cumulativeCorrection *= corr
         corrections.append(cumulativeCorrection)
     if offsets is not None:
         for i in range(len(corrections)):
             corrections[i] = JaggedArray.fromoffsets(
                 offsets, corrections[i])
     return corrections
Exemple #14
0
def convert_effective_area_file(eaFilePath):
    ea_f = open(eaFilePath, 'r')
    layoutstr = ea_f.readline().strip().strip('{}')
    ea_f.close()

    name = eaFilePath.split('/')[-1].split('.')[0]

    layout = layoutstr.split()
    if not layout[0].isdigit():
        raise Exception(
            'First column of Effective Area File Header must be a digit!')

    #setup the file format
    nBinnedVars = int(layout[0])
    nBinColumns = 2 * nBinnedVars
    nEvalVars = int(layout[nBinnedVars + 1])

    minMax = ['Min', 'Max']
    columns = []
    dtypes = []
    offset = 1
    for i in range(nBinnedVars):
        columns.extend(['%s%s' % (layout[i + offset], mm) for mm in minMax])
        dtypes.extend(['<f8', '<f8'])
    offset += nBinnedVars + 1
    for i in range(nEvalVars):
        columns.append('%s' % (layout[i + offset]))
        dtypes.append('<f8')

    pars = np.genfromtxt(eaFilePath,
                         dtype=tuple(dtypes),
                         names=tuple(columns),
                         skip_header=1,
                         unpack=True,
                         encoding='ascii')

    bins = {}
    offset_col = 0
    offset_name = 1
    bin_order = []
    for i in range(nBinnedVars):
        binMins = None
        binMaxs = None
        if i == 0:
            binMins = np.unique(pars[columns[0]])
            binMaxs = np.unique(pars[columns[1]])
            bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs)
        else:
            counts = np.zeros(0, dtype=np.int)
            allBins = np.zeros(0, dtype=np.double)
            for binMin in bins[bin_order[0]][:-1]:
                binMins = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col]])
                binMaxs = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col + 1]])
                theBins = np.union1d(binMins, binMaxs)
                allBins = np.append(allBins, theBins)
                counts = np.append(counts, theBins.size)
            bins[layout[i + offset_name]] = JaggedArray.fromcounts(
                counts, allBins)
        bin_order.append(layout[i + offset_name])
        offset_col += 1

    # again this is only for one dimension of binning, fight me
    # we can figure out a 2D EA when we get there
    offset_name += 1
    wrapped_up = {}
    lookup_type = 'dense_lookup'
    dims = bins[layout[1]]
    for i in range(nEvalVars):
        ea_name = '_'.join([name, columns[offset_name + i]])
        values = pars[columns[offset_name + i]]
        wrapped_up[(ea_name, lookup_type)] = (values, dims)

    return wrapped_up
Exemple #15
0
    def process(self, df):

        output = self.accumulator.identity()

        # ---- Define dataset ---- #
        dataset = df['dataset']  #coffea.processor.LazyDataFrame
        Dataset_info = df.available  #list of available columns in LazyDataFrame object (Similar to 'Events->Show()' command in ROOT)

        # ---- Get triggers from Dataset_info ---- #
        #triggers = [itrig for itrig in Dataset_info if 'HLT_PFHT' in itrig]
        #AK8triggers = [itrig for itrig in Dataset_info if 'HLT_AK8PFHT' in itrig]

        # ---- Find numeric values in trigger strings ---- #
        #triggers_cut1 = [sub.split('PFHT')[1] for sub in triggers] # Remove string characters from left of number
        #triggers_cut2 = [sub.split('_')[0] for sub in triggers_cut1] # Remove string characters from right of number
        #isTriggerValue = [val.isnumeric() for val in triggers_cut2] # Boolean -> if string is only a number
        #triggers_cut2 = np.where(isTriggerValue, triggers_cut2, 0) # If string is not a number, replace with 0
        #triggers_vals = [int(val) for val in triggers_cut2] # Convert string numbers to integers

        #AK8triggers_cut1 = [sub.split('HT')[1] for sub in AK8triggers]
        #AK8triggers_cut2 = [sub.split('_')[0] for sub in AK8triggers_cut1]
        #isAK8TriggerValue = [val.isnumeric() for val in AK8triggers_cut2]
        #AK8triggers_cut2 = np.where(isAK8TriggerValue, AK8triggers_cut2, 0)
        #AK8triggers_vals = [int(val) for val in AK8triggers_cut2]

        # ---- Find Largest and Second Largest Value ---- #
        #triggers_vals.sort(reverse = True)
        #AK8triggers_vals.sort(reverse = True)

        #triggers_vals1 = str(triggers_vals[0])
        #triggers_vals2 = str(triggers_vals[1])
        #AK8triggers_vals1 = str(AK8triggers_vals[0])
        #AK8triggers_vals2 = str(AK8triggers_vals[1])

        # ---- Define strings for the selected triggers ---- #
        #HLT_trig1_str = [itrig for itrig in triggers if (triggers_vals1) in itrig][0]
        #HLT_trig2_str = [itrig for itrig in triggers if (triggers_vals2) in itrig][0]
        #HLT_AK8_trig1_str = [itrig for itrig in AK8triggers if (AK8triggers_vals1) in itrig][0]
        #HLT_AK8_trig2_str = [itrig for itrig in AK8triggers if (AK8triggers_vals2) in itrig][0]

        # ---- Define HLT triggers to be used ---- #
        #HLT_trig1 = df[HLT_trig1_str]
        #HLT_trig2 = df[HLT_trig2_str]
        #HLT_AK8_trig1 = df[HLT_AK8_trig1_str]
        #HLT_AK8_trig2 = df[HLT_AK8_trig2_str]

        # ---- Define AK8 Jets as FatJets ---- #
        FatJets = JaggedCandidateArray.candidatesfromcounts(
            df['nFatJet'],
            pt=df['FatJet_pt'],
            eta=df['FatJet_eta'],
            phi=df['FatJet_phi'],
            mass=df['FatJet_mass'],
            area=df['FatJet_area'],
            msoftdrop=df['FatJet_msoftdrop'],
            jetId=df['FatJet_jetId'],
            tau1=df['FatJet_tau1'],
            tau2=df['FatJet_tau2'],
            tau3=df['FatJet_tau3'],
            tau4=df['FatJet_tau4'],
            n3b1=df['FatJet_n3b1'],
            btagDeepB=df['FatJet_btagDeepB'],
            btagCSVV2=df['FatJet_btagCSVV2'],
            deepTag_TvsQCD=df['FatJet_deepTag_TvsQCD'],
            deepTagMD_TvsQCD=df['FatJet_deepTagMD_TvsQCD'],
            subJetIdx1=df['FatJet_subJetIdx1'],
            subJetIdx2=df['FatJet_subJetIdx2'])

        # ---- Define AK4 jets as Jets ---- #
        Jets = JaggedCandidateArray.candidatesfromcounts(df['nJet'],
                                                         pt=df['Jet_pt'],
                                                         eta=df['Jet_eta'],
                                                         phi=df['Jet_phi'],
                                                         mass=df['Jet_mass'],
                                                         area=df['Jet_area'])
        # ---- Define SubJets ---- #
        SubJets = JaggedCandidateArray.candidatesfromcounts(
            df['nSubJet'],
            pt=df['SubJet_pt'],
            eta=df['SubJet_eta'],
            phi=df['SubJet_phi'],
            mass=df['SubJet_mass'],
            btagDeepB=df['SubJet_btagDeepB'],
            btagCSVV2=df['SubJet_btagCSVV2'])

        # ---- Get event weights from dataset ---- #
        if 'JetHT' in dataset:  # If data is used...
            evtweights = np.ones(FatJets.size)  # set all "data weights" to one
        else:  # if Monte Carlo dataset is used...
            evtweights = df["Generator_weight"].reshape(-1, 1).flatten()

        # ---- Show all events ---- #
        output['cutflow']['all events'] += FatJets.size

        # ---- Apply Trigger(s) ---- #
        #FatJets = FatJets[HLT_AK8_trig1]
        #evtweights = evtweights[HLT_AK8_trig1]
        #Jets = Jets[HLT_AK8_trig1]
        #SubJets = SubJets[HLT_AK8_trig1]

        # ---- Jets that satisfy Jet ID ---- #
        jet_id = (FatJets.jetId > 0)  # Loose jet ID
        FatJets = FatJets[jet_id]
        output['cutflow']['jet id'] += jet_id.any().sum()

        # ---- Apply pT Cut and Rapidity Window ---- #
        jetkincut_index = (FatJets.pt > self.ak8PtMin) & (np.abs(
            FatJets.p4.rapidity) < 2.4)
        FatJets = FatJets[jetkincut_index]
        output['cutflow']['jet kin'] += jetkincut_index.any().sum()

        # ---- Find two AK8 Jets ---- #
        twoFatJetsKin = (FatJets.counts == 2)
        FatJets = FatJets[twoFatJetsKin]
        evtweights = evtweights[twoFatJetsKin]
        Jets = Jets[twoFatJetsKin]
        SubJets = SubJets[twoFatJetsKin]
        output['cutflow']['two FatJets and jet kin'] += twoFatJetsKin.sum()

        # ---- Apply HT Cut ---- #
        hT = Jets.pt.sum()
        passhT = (hT > self.htCut)
        evtweights = evtweights[passhT]
        FatJets = FatJets[passhT]
        SubJets = SubJets[passhT]

        # ---- Randomly Assign AK8 Jets as TTbar Candidates 0 and 1 --- #
        if self.RandomDebugMode == True:  # 'Sudo' randomizer for consistent results
            highPhi = FatJets.phi[:, 0] > FatJets.phi[:, 1]
            highRandIndex = np.where(highPhi, 0, 1)
            index = JaggedArray.fromcounts(np.ones(len(FatJets), dtype='i'),
                                           highRandIndex)
        else:  # Truly randomize
            index = JaggedArray.fromcounts(
                np.ones(len(FatJets), dtype='i'),
                self.prng.randint(2, size=len(FatJets)))
        jet0 = FatJets[index]  #J0
        jet1 = FatJets[1 - index]  #J1

        ttbarcands = jet0.cross(jet1)  #FatJets[:,0:2].distincts()

        # ---- Make sure we have at least 1 TTbar candidate pair and re-broadcast releveant arrays  ---- #
        oneTTbar = (ttbarcands.counts >= 1)
        output['cutflow']['>= one oneTTbar'] += oneTTbar.sum()
        ttbarcands = ttbarcands[oneTTbar]
        evtweights = evtweights[oneTTbar]
        FatJets = FatJets[oneTTbar]
        SubJets = SubJets[oneTTbar]

        # ---- Apply Delta Phi Cut for Back to Back Topology ---- #
        dPhiCut = (ttbarcands.i0.p4.delta_phi(ttbarcands.i1.p4) >
                   2.1).flatten()
        output['cutflow']['dPhi > 2.1'] += dPhiCut.sum()
        ttbarcands = ttbarcands[dPhiCut]
        evtweights = evtweights[dPhiCut]
        FatJets = FatJets[dPhiCut]
        SubJets = SubJets[dPhiCut]

        # ---- Identify subjets according to subjet ID ---- #
        hasSubjets0 = ((ttbarcands.i0.subJetIdx1 > -1) &
                       (ttbarcands.i0.subJetIdx2 > -1))
        hasSubjets1 = ((ttbarcands.i1.subJetIdx1 > -1) &
                       (ttbarcands.i1.subJetIdx2 > -1))
        GoodSubjets = ((hasSubjets0) & (hasSubjets1)).flatten()

        ttbarcands = ttbarcands[GoodSubjets]

        SubJets = SubJets[GoodSubjets]
        evtweights = evtweights[GoodSubjets]

        SubJet01 = SubJets[ttbarcands.i0.subJetIdx1]  # FatJet i0 with subjet 1
        SubJet02 = SubJets[ttbarcands.i0.subJetIdx2]  # FatJet i0 with subjet 2
        SubJet11 = SubJets[ttbarcands.i1.subJetIdx1]  # FatJet i1 with subjet 1
        SubJet12 = SubJets[ttbarcands.i1.subJetIdx2]  # FatJet i1 with subjet 2

        # ---- Define Rapidity Regions ---- #
        cen = np.abs(ttbarcands.i0.p4.rapidity -
                     ttbarcands.i1.p4.rapidity) < 1.0
        fwd = (~cen)

        # ---- CMS Top Tagger Version 2 (SD and Tau32 Cuts) ---- #
        tau32_i0 = np.where(ttbarcands.i0.tau2 > 0,
                            ttbarcands.i0.tau3 / ttbarcands.i0.tau2, 0)
        tau32_i1 = np.where(ttbarcands.i1.tau2 > 0,
                            ttbarcands.i1.tau3 / ttbarcands.i1.tau2, 0)
        taucut_i0 = tau32_i0 < self.tau32Cut
        taucut_i1 = tau32_i1 < self.tau32Cut
        mcut_i0 = (self.minMSD < ttbarcands.i0.msoftdrop) & (
            ttbarcands.i0.msoftdrop < self.maxMSD)
        mcut_i1 = (self.minMSD < ttbarcands.i1.msoftdrop) & (
            ttbarcands.i1.msoftdrop < self.maxMSD)

        ttag_i0 = (taucut_i0) & (mcut_i0)
        ttag_i1 = (taucut_i1) & (mcut_i1)

        # ---- Define "Top Tag" Regions ---- #
        antitag = (~taucut_i0) & (mcut_i0
                                  )  #Probe will always be ttbarcands.i1 (at)
        antitag_probe = np.logical_and(
            antitag, ttag_i1
        )  #Found an antitag and ttagged probe pair for mistag rate (Pt)
        pretag = ttag_i0  # Only jet0 (pret)
        ttag0 = (~ttag_i0) & (~ttag_i1)  # No tops tagged (0t)
        ttag1 = ttag_i0 ^ ttag_i1  # Exclusively one top tagged (1t)
        ttagI = ttag_i0 | ttag_i1  # At least one top tagged ('I' for 'inclusive' tagger; >=1t; 1t+2t)
        ttag2 = ttag_i0 & ttag_i1  # Both jets top tagged (2t)
        Alltags = ttag0 | ttagI  #Either no tag or at least one tag (0t+1t+2t)

        # ---- Pick FatJet that passes btag cut based on its subjet with the highest btag value ---- #
        btag_i0 = (np.maximum(SubJet01.btagCSVV2, SubJet02.btagCSVV2) >
                   self.bdisc)
        btag_i1 = (np.maximum(SubJet11.btagCSVV2, SubJet12.btagCSVV2) >
                   self.bdisc)

        # --- Define "B Tag" Regions ---- #
        btag0 = (~btag_i0) & (~btag_i1)  #(0b)
        btag1 = btag_i0 ^ btag_i1  #(1b)
        btag2 = btag_i0 & btag_i1  #(2b)

        # ---- Get Analysis Categories ---- #
        # ---- They are (central, forward) cross (0b,1b,2b) cross (At,at,0t,1t,>=1t,2t) ---- #
        regs = [cen, fwd]
        btags = [btag0, btag1, btag2]
        ttags = [
            antitag_probe, antitag, pretag, ttag0, ttag1, ttagI, ttag2, Alltags
        ]
        cats = [(t & b & y).flatten()
                for t, b, y in itertools.product(ttags, btags, regs)]
        labels_and_categories = dict(zip(self.anacats, cats))

        # ---- Variables for Kinematic Histograms ---- #
        # ---- "i0" is the control jet, "i1" is the probe jet ---- #
        ttbarmass = ttbarcands.p4.sum().mass.flatten()
        jetpt = ttbarcands.i1.pt.flatten()
        jeteta = ttbarcands.i1.eta.flatten()
        jetphi = ttbarcands.i1.phi.flatten()
        jety = ttbarcands.i1.p4.rapidity.flatten()
        jetmass = ttbarcands.i1.p4.mass.flatten()
        SDmass = ttbarcands.i1.msoftdrop.flatten()
        jetdy = np.abs(ttbarcands.i0.p4.rapidity.flatten() -
                       ttbarcands.i1.p4.rapidity.flatten())
        Tau32 = (ttbarcands.i1.tau3 / ttbarcands.i1.tau2).flatten()
        # ---- Variables for Deep Tagger Analysis ---- #
        deepTag = ttbarcands.i1.deepTag_TvsQCD.flatten()
        deepTagMD = ttbarcands.i1.deepTagMD_TvsQCD.flatten()

        weights = evtweights.flatten()

        # ---- Define the SumW2 for MC Datasets ---- #
        output['cutflow']['sumw'] += np.sum(weights)
        output['cutflow']['sumw2'] += np.sum(weights**2)

        # ---- Define Momentum p of probe jet as the Mistag Rate variable; M(p) ---- #
        # ---- Transverse Momentum pT can also be used instead; M(pT) ---- #
        pT = ttbarcands.i1.pt.flatten()
        eta = ttbarcands.i1.eta.flatten()
        pz = np.sinh(eta) * pT
        p = np.absolute(np.sqrt(pT**2 + pz**2))

        # ---- Define the Numerator and Denominator for Mistag Rate ---- #
        numerator = np.where(
            antitag_probe, p,
            -1)  # If no antitag and tagged probe, move event to useless bin
        denominator = np.where(antitag, p,
                               -1)  # If no antitag, move event to useless bin

        df = pd.DataFrame({"momentum":
                           p})  # Used for finding values in LookUp Tables

        for ilabel, icat in labels_and_categories.items():
            ### ------------------------------------ Mistag Scaling ------------------------------------ ###
            if self.UseLookUpTables == True:
                # ---- Weight ttbar M.C. and data by mistag from data (corresponding to its year) ---- #
                if 'TTbar_' in dataset:
                    file_df = self.lu['JetHT' + dataset[-4:] + '_Data'][
                        'at' + str(
                            ilabel[-5:]
                        )]  #Pick out proper JetHT year mistag for TTbar sim.
                elif dataset == 'TTbar':
                    file_df = self.lu['JetHT']['at' + str(
                        ilabel[-5:])]  # All JetHT years mistag for TTbar sim.
                else:
                    file_df = self.lu[dataset]['at' + str(
                        ilabel[-5:])]  # get mistag (lookup) filename for 'at'

                bin_widths = file_df[
                    'p'].values  # collect bins as written in .csv file
                mtr = file_df[
                    'M(p)'].values  # collect mistag rate as function of p as written in file
                wgts = mtr  # Define weights based on mistag rates

                BinKeys = np.arange(
                    bin_widths.size
                )  # Use as label for BinNumber column in the new dataframe

                #Bins = pd.interval_range(start=0, periods=100, freq=100, closed='left') # Recreate the momentum bins from file_df as something readable for pd.cut()
                Bins = np.array(manual_bins)

                df['BinWidth'] = pd.cut(p, bins=Bins)  # new dataframe column
                df['BinNumber'] = pd.cut(p, bins=Bins, labels=BinKeys)

                BinNumber = df[
                    'BinNumber'].values  # Collect the Bin Numbers into a numpy array
                BinNumber = BinNumber.astype(
                    'int64')  # Insures the bin numbers are integers

                WeightMatching = wgts[
                    BinNumber]  # Match 'wgts' with corresponding p bin using the bin number
                Weights = weights * WeightMatching  # Include 'wgts' with the previously defined 'weights'
            else:
                Weights = weights  # No mistag rates, no change to weights
            ###---------------------------------------------------------------------------------------------###
            ### ----------------------------------- Mod-mass Procedure ------------------------------------ ###
            if self.ModMass == True:
                QCD_unweighted = util.load(
                    'TTbarResCoffea_QCD_unweighted_output.coffea')

                # ---- Extract event counts from QCD MC hist in signal region ---- #
                QCD_hist = QCD_unweighted['jetmass'].integrate(
                    'anacat',
                    '2t' + str(ilabel[-5:])).integrate('dataset', 'QCD')
                data = QCD_hist.values()  # Dictionary of values
                QCD_data = [
                    i for i in data.values()
                ][0]  # place every element of the dictionary into a numpy array

                # ---- Re-create Bins from QCD_hist as Numpy Array ---- #
                bins = np.arange(
                    510
                )  #Re-make bins from the jetmass_axis starting with the appropriate range
                QCD_bins = bins[::
                                10]  #Finish re-making bins by insuring exactly 50 bins like the jetmass_axis

                # ---- Define Mod Mass Distribution ---- #
                ModMass_hist_dist = ss.rv_histogram([QCD_data, QCD_bins])
                jet1_modp4 = copy.copy(
                    jet1.p4
                )  #J1's Lorentz four vector that can be safely modified
                jet1_modp4["fMass"] = ModMass_hist_dist.rvs(
                    size=jet1_modp4.size
                )  #Replace J1's mass with random value of mass from mm hist
                ttbarcands_modmass = jet0.p4.cross(
                    jet1_modp4)  #J0's four vector x modified J1's four vector

                # ---- Apply Necessary Selections to new modmass version ---- #
                ttbarcands_modmass = ttbarcands_modmass[oneTTbar]
                ttbarcands_modmass = ttbarcands_modmass[dPhiCut]
                ttbarcands_modmass = ttbarcands_modmass[GoodSubjets]

                # ---- Manually sum the modmass p4 candidates (Coffea technicality) ---- #
                ttbarcands_modmass_p4_sum = (ttbarcands_modmass.i0 +
                                             ttbarcands_modmass.i1)

                # ---- Re-define Mass Variables for ModMass Procedure (pt, eta, phi are redundant to change) ---- #
                ttbarmass = ttbarcands_modmass_p4_sum.flatten().mass
                jetmass = ttbarcands_modmass.i1.mass.flatten()
            ###---------------------------------------------------------------------------------------------###
            output['cutflow'][ilabel] += np.sum(icat)

            output['ttbarmass'].fill(dataset=dataset,
                                     anacat=ilabel,
                                     ttbarmass=ttbarmass[icat],
                                     weight=Weights[icat])
            output['jetpt'].fill(dataset=dataset,
                                 anacat=ilabel,
                                 jetpt=jetpt[icat],
                                 weight=Weights[icat])
            output['probept'].fill(dataset=dataset,
                                   anacat=ilabel,
                                   jetpt=pT[icat],
                                   weight=Weights[icat])
            output['probep'].fill(dataset=dataset,
                                  anacat=ilabel,
                                  jetp=p[icat],
                                  weight=Weights[icat])
            output['jeteta'].fill(dataset=dataset,
                                  anacat=ilabel,
                                  jeteta=jeteta[icat],
                                  weight=Weights[icat])
            output['jetphi'].fill(dataset=dataset,
                                  anacat=ilabel,
                                  jetphi=jetphi[icat],
                                  weight=Weights[icat])
            output['jety'].fill(dataset=dataset,
                                anacat=ilabel,
                                jety=jety[icat],
                                weight=Weights[icat])
            output['jetdy'].fill(dataset=dataset,
                                 anacat=ilabel,
                                 jetdy=jetdy[icat],
                                 weight=Weights[icat])
            output['numerator'].fill(dataset=dataset,
                                     anacat=ilabel,
                                     jetp=numerator[icat],
                                     weight=Weights[icat])
            output['denominator'].fill(dataset=dataset,
                                       anacat=ilabel,
                                       jetp=denominator[icat],
                                       weight=Weights[icat])
            output['jetmass'].fill(dataset=dataset,
                                   anacat=ilabel,
                                   jetmass=jetmass[icat],
                                   weight=Weights[icat])
            output['SDmass'].fill(dataset=dataset,
                                  anacat=ilabel,
                                  jetmass=SDmass[icat],
                                  weight=Weights[icat])
            output['tau32'].fill(dataset=dataset,
                                 anacat=ilabel,
                                 tau32=Tau32[icat],
                                 weight=Weights[icat])
            output['tau32_2D'].fill(dataset=dataset,
                                    anacat=ilabel,
                                    jetpt=pT[icat],
                                    tau32=Tau32[icat],
                                    weight=Weights[icat])
            output['deepTag_TvsQCD'].fill(dataset=dataset,
                                          anacat=ilabel,
                                          jetpt=pT[icat],
                                          tagger=deepTag[icat],
                                          weight=Weights[icat])
            output['deepTagMD_TvsQCD'].fill(dataset=dataset,
                                            anacat=ilabel,
                                            jetpt=pT[icat],
                                            tagger=deepTagMD[icat],
                                            weight=Weights[icat])

        return output
    def process(self, df):
        output = self.accumulator.identity()

        datasetFull = df['dataset']
        dataset = datasetFull.replace('_2016', '')

        isData = 'Data' in dataset

        year = 2016
        yearStr = "2016"
        muTrigger = df['HLT_IsoMu24'] | df['HLT_IsoTkMu24']
        eleTrigger = df['HLT_Ele27_WPTight_Gsf']
        photonBitMapName = 'Photon_cutBased'

        #### These are already applied in the skim
        #         filters = (df['Flag_goodVertices'] &
        #                    df['Flag_globalSuperTightHalo2016Filter'] &
        #                    df['Flag_HBHENoiseFilter'] &
        #                    df['Flag_HBHENoiseIsoFilter'] &
        #                    df['Flag_EcalDeadCellTriggerPrimitiveFilter'] &
        #                    df['Flag_BadPFMuonFilter']
        #                   )
        #         if year > 2016:
        #             filters = (filters &
        #                        df['Flag_ecalBadCalibFilterV2']
        #                       )

        muons = JaggedCandidateArray.candidatesfromcounts(
            df['nMuon'],
            pt=df['Muon_pt'],
            eta=df['Muon_eta'],
            phi=df['Muon_phi'],
            mass=df['Muon_mass'],
            charge=df['Muon_charge'],
            relIso=df['Muon_pfRelIso04_all'],
            tightId=df['Muon_tightId'],
            isPFcand=df['Muon_isPFcand'],
            isTracker=df['Muon_isTracker'],
            isGlobal=df['Muon_isGlobal'],
        )

        electrons = JaggedCandidateArray.candidatesfromcounts(
            df['nElectron'],
            pt=df['Electron_pt'],
            eta=df['Electron_eta'],
            phi=df['Electron_phi'],
            mass=df['Electron_mass'],
            charge=df['Electron_charge'],
            cutBased=df['Electron_cutBased'],
            d0=df['Electron_dxy'],
            dz=df['Electron_dz'],
        )

        jets = JaggedCandidateArray.candidatesfromcounts(
            df['nJet'],
            pt=df['Jet_pt'],
            eta=df['Jet_eta'],
            phi=df['Jet_phi'],
            mass=df['Jet_mass'],
            jetId=df['Jet_jetId'],
            btag=df['Jet_btagDeepB'],
            hadFlav=df['Jet_hadronFlavour']
            if not isData else np.ones_like(df['Jet_jetId']),
            genIdx=df['Jet_genJetIdx']
            if not isData else np.ones_like(df['Jet_jetId']),
        )

        photons = JaggedCandidateArray.candidatesfromcounts(
            df['nPhoton'],
            pt=df['Photon_pt'],
            eta=df['Photon_eta'],
            phi=df['Photon_phi'],
            mass=np.zeros_like(df['Photon_pt']),
            isEE=df['Photon_isScEtaEE'],
            isEB=df['Photon_isScEtaEB'],
            photonId=df[photonBitMapName],
            passEleVeto=df['Photon_electronVeto'],
            pixelSeed=df['Photon_pixelSeed'],
            sieie=df['Photon_sieie'],
            chIso=df['Photon_pfRelIso03_chg'] * df['Photon_pt'],
            vidCuts=df['Photon_vidNestedWPBitmap'],
            genFlav=df['Photon_genPartFlav']
            if not isData else np.ones_like(df['Photon_electronVeto']),
            genIdx=df['Photon_genPartIdx']
            if not isData else np.ones_like(df['Photon_electronVeto']),
        )
        if not isData:
            genPart = JaggedCandidateArray.candidatesfromcounts(
                df['nGenPart'],
                pt=df['GenPart_pt'],
                eta=df['GenPart_eta'],
                phi=df['GenPart_phi'],
                mass=df['GenPart_mass'],
                pdgid=df['GenPart_pdgId'],
                motherIdx=df['GenPart_genPartIdxMother'],
                status=df['GenPart_status'],
                statusFlags=df['GenPart_statusFlags'],
            )

            genmotherIdx = genPart.motherIdx
            genpdgid = genPart.pdgid

        ## TTbar vs TTGamma Overlap Removal (work in progress, still buggy)
        doOverlapRemoval = False
        if 'TTbar' in dataset:
            doOverlapRemoval = True
            overlapPt = 10.
            overlapEta = 5.
            overlapDR = 0.1
        if re.search("^W[1234]jets$", dataset):
            doOverlapRemoval = True
            overlapPt = 10.
            overlapEta = 2.5
            overlapDR = 0.05
        if 'DYjetsM' in dataset:
            doOverlapRemoval = True
            overlapPt = 15.
            overlapEta = 2.6
            overlapDR = 0.05

        if doOverlapRemoval:
            overlapPhoSelect = ((genPart.pt >= overlapPt) &
                                (abs(genPart.eta) < overlapEta) &
                                (genPart.pdgid == 22) & (genPart.status == 1))

            OverlapPhotons = genPart[overlapPhoSelect]

            idx = OverlapPhotons.motherIdx
            maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops,
                                        genpdgid.content, genpdgid.starts,
                                        genpdgid.stops, genmotherIdx.content,
                                        genmotherIdx.starts,
                                        genmotherIdx.stops)

            isNonPrompt = (maxParent > 37).any()

            finalGen = genPart[(
                (genPart.status == 1) | (genPart.status == 71)) & ~(
                    (abs(genPart.pdgid) == 12) | (abs(genPart.pdgid) == 14) |
                    (abs(genPart.pdgid) == 16))]

            genPairs = OverlapPhotons['p4'].cross(finalGen['p4'], nested=True)
            ##remove the case where the cross produce is the gen photon with itself
            genPairs = genPairs[~(genPairs.i0 == genPairs.i1)]

            dRPairs = genPairs.i0.delta_r(genPairs.i1)

            isOverlap = ((dRPairs.min() > overlapDR) & (maxParent < 37)).any()
            passOverlapRemoval = ~isOverlap
        else:
            passOverlapRemoval = np.ones_like(df['event']) == 1

        muonSelectTight = ((muons.pt > 30) & (abs(muons.eta) < 2.4) &
                           (muons.tightId) & (muons.relIso < 0.15))

        muonSelectLoose = ((muons.pt > 15) & (abs(muons.eta) < 2.4) &
                           ((muons.isPFcand) &
                            (muons.isTracker | muons.isGlobal)) &
                           (muons.relIso < 0.25) & np.invert(muonSelectTight))

        eleEtaGap = (abs(electrons.eta) < 1.4442) | (abs(electrons.eta) >
                                                     1.566)
        elePassD0 = ((abs(electrons.eta) < 1.479) & (abs(electrons.d0) < 0.05)
                     | (abs(electrons.eta) > 1.479) &
                     (abs(electrons.d0) < 0.1))
        elePassDZ = ((abs(electrons.eta) < 1.479) & (abs(electrons.dz) < 0.1) |
                     (abs(electrons.eta) > 1.479) & (abs(electrons.dz) < 0.2))

        electronSelectTight = ((electrons.pt > 35) & (abs(electrons.eta) < 2.1)
                               & eleEtaGap & (electrons.cutBased >= 4)
                               & elePassD0 & elePassDZ)

        electronSelectLoose = ((electrons.pt > 15) & (abs(electrons.eta) < 2.4)
                               & eleEtaGap & (electrons.cutBased >= 1)
                               & elePassD0 & elePassDZ
                               & np.invert(electronSelectTight))

        tightMuon = muons[muonSelectTight]
        looseMuon = muons[muonSelectLoose]

        tightElectron = electrons[electronSelectTight]
        looseElectron = electrons[electronSelectLoose]

        oneMuon = (tightMuon.counts == 1)
        muVeto = (tightMuon.counts == 0)
        oneEle = (tightElectron.counts == 1)
        eleVeto = (tightElectron.counts == 0)
        looseMuonSel = (looseMuon.counts == 0)
        looseElectronSel = (looseElectron.counts == 0)

        #### Calculate deltaR between photon and nearest muon
        ####### make combination pairs
        phoMu = photons['p4'].cross(tightMuon['p4'], nested=True)

        ####### check delta R of each combination, if min is >0.1 it is okay, or if there are no tight muons it passes
        dRphomu = (phoMu.i0.delta_r(phoMu.i1) > 0.4).all() | (tightMuon.counts
                                                              == 0)
        phoEle = photons['p4'].cross(tightElectron['p4'], nested=True)
        dRphoele = ((phoEle.i0.delta_r(phoEle.i1)).min() >
                    0.4) | (tightElectron.counts == 0)

        #photon selection (no ID requirement used here)
        photonSelect = ((photons.pt > 20) & (abs(photons.eta) < 1.4442) &
                        (photons.isEE | photons.isEB) & (photons.passEleVeto)
                        & np.invert(photons.pixelSeed) & dRphomu & dRphoele)

        #split out the ID requirement, enabling Iso and SIEIE to be inverted for control regions
        photonID = photons.photonId >= 2

        #parse VID cuts, define loose photons (not used yet)
        photon_MinPtCut = (photons.vidCuts >> 0 & 3) >= 2
        photon_PhoSCEtaMultiRangeCut = (photons.vidCuts >> 2 & 3) >= 2
        photon_PhoSingleTowerHadOverEmCut = (photons.vidCuts >> 4 & 3) >= 2
        photon_PhoFull5x5SigmaIEtaIEtaCut = (photons.vidCuts >> 6 & 3) >= 2
        photon_ChIsoCut = (photons.vidCuts >> 8 & 3) >= 2
        photon_NeuIsoCut = (photons.vidCuts >> 10 & 3) >= 2
        photon_PhoIsoCut = (photons.vidCuts >> 12 & 3) >= 2

        photonID_NoChIsoSIEIE = (photon_MinPtCut & photon_PhoSCEtaMultiRangeCut
                                 & photon_PhoSingleTowerHadOverEmCut
                                 & photon_PhoFull5x5SigmaIEtaIEtaCut
                                 & photon_NeuIsoCut & photon_PhoIsoCut)

        tightPhotons = photons[photonSelect & photonID]
        loosePhotons = photons[photonSelect & photonID_NoChIsoSIEIE
                               & photon_PhoFull5x5SigmaIEtaIEtaCut]
        loosePhotonsSideband = photons[photonSelect & photonID_NoChIsoSIEIE &
                                       (photons.sieie > 0.012)]

        ##medium jet ID cut
        jetIDbit = 1
        if year > 2016: jetIDbit = 2

        ##check dR jet,lepton & jet,photon
        jetMu = jets['p4'].cross(tightMuon['p4'], nested=True)
        dRjetmu = (
            (jetMu.i0.delta_r(jetMu.i1)).min() > 0.4) | (tightMuon.counts == 0)

        jetEle = jets['p4'].cross(tightElectron['p4'], nested=True)
        dRjetele = ((jetEle.i0.delta_r(jetEle.i1)).min() >
                    0.4) | (tightElectron.counts == 0)

        jetPho = jets['p4'].cross(tightPhotons['p4'], nested=True)
        dRjetpho = ((jetPho.i0.delta_r(jetPho.i1)).min() >
                    0.1) | (tightPhotons.counts == 0)

        jetSelect = ((jets.pt > 30) & (abs(jets.eta) < 2.4) &
                     ((jets.jetId >> jetIDbit & 1) == 1) & dRjetmu & dRjetele
                     & dRjetpho)

        tightJets = jets[jetSelect]

        bTagWP = 0.6321  #2016 DeepCSV working point

        btagged = tightJets.btag > bTagWP

        bJets = tightJets[btagged]

        ## Define M3, mass of 3-jet pair with highest pT
        triJet = tightJets['p4'].choose(3)

        triJetPt = (triJet.i0 + triJet.i1 + triJet.i2).pt
        triJetMass = (triJet.i0 + triJet.i1 + triJet.i2).mass
        M3 = triJetMass[triJetPt.argmax()]

        leadingMuon = tightMuon[::1]
        leadingElectron = tightElectron[::1]

        leadingPhoton = tightPhotons[:, :1]
        leadingPhotonLoose = loosePhotons[:, :1]
        leadingPhotonSideband = loosePhotonsSideband[:, :1]

        #        egammaMass = (leadingElectron['p4'] + leadingPhoton['p4']).mass
        egamma = leadingElectron['p4'].cross(leadingPhoton['p4'])
        mugamma = leadingMuon['p4'].cross(leadingPhoton['p4'])
        egammaMass = (egamma.i0 + egamma.i1).mass
        mugammaMass = (mugamma.i0 + mugamma.i1).mass

        if not isData:
            #### Photon categories, using genIdx branch
            # reco photons really generated as electrons
            idx = leadingPhoton.genIdx

            matchedPho = (genpdgid[idx] == 22).any()
            isMisIDele = (abs(genpdgid[idx]) == 11).any()

            maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops,
                                        genpdgid.content, genpdgid.starts,
                                        genpdgid.stops, genmotherIdx.content,
                                        genmotherIdx.starts,
                                        genmotherIdx.stops)

            hadronicParent = maxParent > 25

            isGenPho = matchedPho & ~hadronicParent
            isHadPho = matchedPho & hadronicParent
            isHadFake = ~(isMisIDele | isGenPho | isHadPho) & (
                leadingPhoton.counts == 1)

            #define integer definition for the photon category axis
            phoCategory = 1 * isGenPho + 2 * isMisIDele + 3 * isHadPho + 4 * isHadFake

            isMisIDeleLoose = (leadingPhotonLoose.genFlav == 13).any()
            matchedPhoLoose = (leadingPhotonLoose.genFlav == 1).any()

            # look through parentage to find if any hadrons in genPhoton parent history
            idx = leadingPhotonLoose.genIdx

            maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops,
                                        genpdgid.content, genpdgid.starts,
                                        genpdgid.stops, genmotherIdx.content,
                                        genmotherIdx.starts,
                                        genmotherIdx.stops)

            hadronicParent = maxParent > 25

            isGenPhoLoose = matchedPhoLoose & ~hadronicParent
            isHadPhoLoose = matchedPhoLoose & hadronicParent
            isHadFakeLoose = ~(isMisIDeleLoose | isGenPhoLoose
                               | isHadPhoLoose) & (leadingPhotonLoose.counts
                                                   == 1)

            #define integer definition for the photon category axis
            phoCategoryLoose = 1 * isGenPhoLoose + 2 * isMisIDeleLoose + 3 * isHadPhoLoose + 4 * isHadFakeLoose

            isMisIDeleSideband = (leadingPhotonSideband.genFlav == 13).any()
            matchedPhoSideband = (leadingPhotonSideband.genFlav == 1).any()

            # look through parentage to find if any hadrons in genPhoton parent history
            idx = leadingPhotonSideband.genIdx

            maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops,
                                        genpdgid.content, genpdgid.starts,
                                        genpdgid.stops, genmotherIdx.content,
                                        genmotherIdx.starts,
                                        genmotherIdx.stops)

            hadronicParent = maxParent > 25

            isGenPhoSideband = matchedPhoSideband & ~hadronicParent
            isHadPhoSideband = matchedPhoSideband & hadronicParent
            isHadFakeSideband = ~(isMisIDeleSideband | isGenPhoSideband
                                  | isHadPhoSideband) & (
                                      leadingPhotonSideband.counts == 1)

            #define integer definition for the photon category axis
            phoCategorySideband = 1 * isGenPhoSideband + 2 * isMisIDeleSideband + 3 * isHadPhoSideband + 4 * isHadFakeSideband
        else:
            phoCategory = np.ones_like(df['event'])
            phoCategoryLoose = np.ones_like(df['event'])
            phoCategorySideband = np.ones_like(df['event'])

        ### remove filter selection
        ###    This is already applied in the skim, and is causing data to fail for some reason (the flag branches are duplicated in NanoAOD for data, is it causing problems???)
#         mu_noLoose = (muTrigger & filters & passOverlapRemoval &
#                       oneMuon & eleVeto &
#                       looseMuonSel & looseElectronSel)
#         ele_noLoose = (eleTrigger & filters & passOverlapRemoval &
#                        oneEle & muVeto &
#                        looseMuonSel & looseElectronSel)

        mu_noLoose = (muTrigger & passOverlapRemoval & oneMuon & eleVeto
                      & looseMuonSel & looseElectronSel)
        ele_noLoose = (eleTrigger & passOverlapRemoval & oneEle & muVeto
                       & looseMuonSel & looseElectronSel)

        lep_noLoose = mu_noLoose | ele_noLoose

        lep_jetSel = (lep_noLoose & (tightJets.counts >= 4) &
                      (bJets.counts >= 1))
        lep_zeropho = (lep_jetSel & (tightPhotons.counts == 0))
        lep_phosel = (lep_jetSel & (tightPhotons.counts == 1))
        lep_phoselLoose = (lep_jetSel & (loosePhotons.counts == 1))
        lep_phoselSideband = (lep_jetSel & (loosePhotonsSideband.counts == 1))

        lep_phosel_3j0t = (lep_noLoose & (tightJets.counts >= 3) &
                           (bJets.counts == 0) & (tightPhotons.counts == 1))

        lepFlavor = -0.5 * ele_noLoose + 0.5 * mu_noLoose

        evtWeight = np.ones_like(df['event'], dtype=np.float64)
        if not 'Data' in dataset:
            nMCevents = self.mcEventYields[datasetFull]
            xsec = crossSections[dataset]

            evtWeight *= xsec * lumis[year] / nMCevents

            #btag key name
            #name / working Point / type / systematic / jetType
            #  ... / 0-loose 1-medium 2-tight / comb,mujets,iterativefit / central,up,down / 0-b 1-c 2-udcsg

            bJetSF_b = self.evaluator['btag%iDeepCSV_1_comb_central_0' % year](
                tightJets[tightJets.hadFlav == 5].eta,
                tightJets[tightJets.hadFlav == 5].pt,
                tightJets[tightJets.hadFlav == 5].btag)
            bJetSF_c = self.evaluator['btag%iDeepCSV_1_comb_central_1' % year](
                tightJets[tightJets.hadFlav == 4].eta,
                tightJets[tightJets.hadFlav == 4].pt,
                tightJets[tightJets.hadFlav == 4].btag)
            bJetSF_udcsg = self.evaluator[
                'btag%iDeepCSV_1_incl_central_2' %
                year](tightJets[tightJets.hadFlav == 0].eta,
                      tightJets[tightJets.hadFlav == 0].pt,
                      tightJets[tightJets.hadFlav == 0].btag)

            bJetSF = JaggedArray(content=np.ones_like(tightJets.pt.content,
                                                      dtype=np.float64),
                                 starts=tightJets.starts,
                                 stops=tightJets.stops)
            bJetSF.content[(tightJets.hadFlav == 5).content] = bJetSF_b.content
            bJetSF.content[(tightJets.hadFlav == 4).content] = bJetSF_c.content
            bJetSF.content[(
                tightJets.hadFlav == 0).content] = bJetSF_udcsg.content

            ## mc efficiency lookup, data efficiency is eff* scale factor
            btagEfficiencies = taggingEffLookup(datasetFull, tightJets.hadFlav,
                                                tightJets.pt, tightJets.eta)
            btagEfficienciesData = btagEfficiencies * bJetSF

            ##probability is the product of all efficiencies of tagged jets, times product of 1-eff for all untagged jets
            ## https://twiki.cern.ch/twiki/bin/view/CMS/BTagSFMethods#1a_Event_reweighting_using_scale
            pMC = btagEfficiencies[btagged].prod() * (
                1. - btagEfficiencies[np.invert(btagged)]).prod()
            pData = btagEfficienciesData[btagged].prod() * (
                1. - btagEfficienciesData[np.invert(btagged)]).prod()
            btagWeight = pData / pMC
            btagWeight[pData == 0] = 0

            evtWeight *= btagWeight

            eleID = self.ele_id_sf(tightElectron.eta, tightElectron.pt)
            eleIDerr = self.ele_id_err(tightElectron.eta, tightElectron.pt)
            eleRECO = self.ele_reco_sf(tightElectron.eta, tightElectron.pt)
            eleRECOerr = self.ele_reco_err(tightElectron.eta, tightElectron.pt)

            eleSF = (eleID * eleRECO).prod()
            eleSFup = ((eleID + eleIDerr) * (eleRECO + eleRECOerr)).prod()
            eleSFdo = ((eleID - eleIDerr) * (eleRECO - eleRECOerr)).prod()

            evtWeight *= eleSF

            muID = self.mu_id_sf(tightMuon.eta, tightMuon.pt)
            muIDerr = self.mu_id_err(tightMuon.eta, tightMuon.pt)
            muIso = self.mu_iso_sf(tightMuon.eta, tightMuon.pt)
            muIsoerr = self.mu_iso_err(tightMuon.eta, tightMuon.pt)
            muTrig = self.mu_iso_sf(abs(tightMuon.eta), tightMuon.pt)
            muTrigerr = self.mu_iso_err(abs(tightMuon.eta), tightMuon.pt)

            muSF = (muID * muIso * muTrig).prod()
            muSF_up = ((muID + muIDerr) * (muIso + muIsoerr) *
                       (muTrig + muTrigerr)).prod()
            muSF_down = ((muID - muIDerr) * (muIso - muIsoerr) *
                         (muTrig - muTrigerr)).prod()

            evtWeight *= muSF

        output['photon_pt'].fill(
            dataset=dataset,
            pt=tightPhotons.p4.pt[:, :1][lep_phosel].flatten(),
            category=phoCategory[lep_phosel].flatten(),
            lepFlavor=lepFlavor[lep_phosel],
            weight=evtWeight[lep_phosel].flatten())

        output['photon_eta'].fill(
            dataset=dataset,
            eta=tightPhotons.eta[:, :1][lep_phosel].flatten(),
            category=phoCategory[lep_phosel].flatten(),
            lepFlavor=lepFlavor[lep_phosel],
            weight=evtWeight[lep_phosel].flatten())

        output['photon_chIsoSideband'].fill(
            dataset=dataset,
            chIso=loosePhotonsSideband.chIso[:, :1]
            [lep_phoselSideband].flatten(),
            category=phoCategorySideband[lep_phoselSideband].flatten(),
            lepFlavor=lepFlavor[lep_phoselSideband],
            weight=evtWeight[lep_phoselSideband].flatten())

        output['photon_chIso'].fill(
            dataset=dataset,
            chIso=loosePhotons.chIso[:, :1][lep_phoselLoose].flatten(),
            category=phoCategoryLoose[lep_phoselLoose].flatten(),
            lepFlavor=lepFlavor[lep_phoselLoose],
            weight=evtWeight[lep_phoselLoose].flatten())

        output['photon_lepton_mass'].fill(
            dataset=dataset,
            mass=egammaMass[lep_phosel & ele_noLoose].flatten(),
            category=phoCategory[lep_phosel & ele_noLoose].flatten(),
            lepFlavor=lepFlavor[lep_phosel & ele_noLoose],
            weight=evtWeight[lep_phosel & ele_noLoose].flatten())
        output['photon_lepton_mass'].fill(
            dataset=dataset,
            mass=mugammaMass[lep_phosel & mu_noLoose].flatten(),
            category=phoCategory[lep_phosel & mu_noLoose].flatten(),
            lepFlavor=lepFlavor[lep_phosel & mu_noLoose],
            weight=evtWeight[lep_phosel & mu_noLoose].flatten())

        output['photon_lepton_mass_3j0t'].fill(
            dataset=dataset,
            mass=egammaMass[lep_phosel_3j0t & ele_noLoose].flatten(),
            category=phoCategory[lep_phosel_3j0t & ele_noLoose].flatten(),
            lepFlavor=lepFlavor[lep_phosel_3j0t & ele_noLoose],
            weight=evtWeight[lep_phosel_3j0t & ele_noLoose].flatten())
        output['photon_lepton_mass_3j0t'].fill(
            dataset=dataset,
            mass=mugammaMass[lep_phosel_3j0t & mu_noLoose].flatten(),
            category=phoCategory[lep_phosel_3j0t & mu_noLoose].flatten(),
            lepFlavor=lepFlavor[lep_phosel_3j0t & mu_noLoose],
            weight=evtWeight[lep_phosel_3j0t & mu_noLoose].flatten())

        output['M3'].fill(dataset=dataset,
                          M3=M3[lep_phosel].flatten(),
                          category=phoCategoryLoose[lep_phosel].flatten(),
                          lepFlavor=lepFlavor[lep_phosel],
                          weight=evtWeight[lep_phosel].flatten())

        output['M3Presel'].fill(dataset=dataset,
                                M3=M3[lep_zeropho].flatten(),
                                lepFlavor=lepFlavor[lep_zeropho],
                                weight=evtWeight[lep_zeropho].flatten())

        output['EventCount'] = len(df['event'])

        return output
def convert_jec_txt_file(jecFilePath):
    jec_f = open(jecFilePath,'r')
    layoutstr = jec_f.readline().strip().strip('{}')
    jec_f.close()

    name = jecFilePath.split('/')[-1].split('.')[0]
    
    layout = layoutstr.split()
    if not layout[0].isdigit():
        raise Exception('First column of JEC descriptor must be a digit!')

    #setup the file format
    nBinnedVars = int(layout[0])
    nBinColumns = 2*nBinnedVars
    nEvalVars   = int(layout[nBinnedVars+1])
    formula     = layout[nBinnedVars+nEvalVars+2]
    nParms      = 0
    while( formula.count('[%i]'%nParms) ):
        formula = formula.replace('[%i]'%nParms,'p%i'%nParms)
        nParms += 1
    #protect function names with vars in them
    funcs_to_cap = ['max','exp']
    for f in funcs_to_cap:
        formula = formula.replace(f,f.upper())

    templatevars = ['x','y','z','w','t','s']
    varnames = [layout[i+nBinnedVars+2] for i in range(nEvalVars)]
    for find,replace in zip(templatevars,varnames):
        formula = formula.replace(find,replace)
    #restore max
    for f in funcs_to_cap:
        formula = formula.replace(f.upper(),f)
    nFuncColumns = 2*nEvalVars + nParms
    nTotColumns = nFuncColumns + 1

    #parse the columns
    minMax = ['Min','Max']
    columns = []
    dtypes = []
    offset = 1
    for i in range(nBinnedVars):
        columns.extend(['%s%s'%(layout[i+offset],mm) for mm in minMax])
        dtypes.extend(['<f8','<f8'])
    columns.append('NVars')
    dtypes.append('<i8')
    offset += nBinnedVars + 1
    for i in range(nEvalVars):
        columns.extend(['%s%s'%(layout[i+offset],mm) for mm in minMax])
        dtypes.extend(['<f8','<f8'])
    for i in range(nParms):
        columns.append('p%i'%i)
        dtypes.append('<f8')

    pars = np.genfromtxt(jecFilePath,
                         dtype=tuple(dtypes),
                         names=tuple(columns),
                         skip_header=1,
                         unpack=True,
                         encoding='ascii'
                         )

    #the first bin is always usual for JECs
    #the next bins may vary in number, so they're jagged arrays... yay
    bins = {}
    offset_col = 0
    offset_name = 1
    bin_order = []
    for i in range(nBinnedVars):
        binMins = None
        binMaxs = None
        if i == 0:
            binMins = np.unique(pars[columns[0]])
            binMaxs = np.unique(pars[columns[1]])
            bins[layout[i+offset_name]] = np.union1d(binMins,binMaxs)
        else:
            counts = np.zeros(0,dtype=np.int)
            allBins = np.zeros(0,dtype=np.double)
            for binMin in bins[bin_order[0]][:-1]:
                binMins = np.unique(pars[np.where(pars[columns[0]] == binMin)][columns[i+offset_col]])
                binMaxs = np.unique(pars[np.where(pars[columns[0]] == binMin)][columns[i+offset_col+1]])
                theBins = np.union1d(binMins,binMaxs)
                allBins = np.append(allBins,theBins)
                counts  = np.append(counts,theBins.size)
            bins[layout[i+offset_name]] = JaggedArray.fromcounts(counts,allBins)
        bin_order.append(layout[i+offset_name])
        offset_col += 1

    #skip nvars to the variable columns
    #the columns here define clamps for the variables defined in columns[]
    # ----> clamps can be different from bins
    # ----> if there is more than one binning variable this array is jagged
    # ----> just make it jagged all the time
    binshapes = tuple([bins[thebin].size-1 for thebin in bin_order])
    clamp_mins = {}
    clamp_maxs = {}
    var_order = []
    offset_col = 2*nBinnedVars+1
    offset_name = nBinnedVars + 2
    jagged_counts = np.ones(bins[bin_order[0]].size-1,dtype=np.int)
    if len(bin_order) > 1:
        jagged_counts = np.maximum(bins[bin_order[1]].counts - 1,0) #need counts-1 since we only care about Nbins
    for i in range(nEvalVars):
        clamp_mins[layout[i+offset_name]] = JaggedArray.fromcounts(jagged_counts,np.atleast_1d(pars[columns[i+offset_col]]))
        clamp_maxs[layout[i+offset_name]] = JaggedArray.fromcounts(jagged_counts,np.atleast_1d(pars[columns[i+offset_col+1]]))
        var_order.append(layout[i+offset_name])
        offset_col += 1

    #now get the parameters, which we will look up with the clamps
    parms = []
    parm_order = []
    offset_col = 2*nBinnedVars+1 + 2*nEvalVars
    for i in range(nParms):
        parms.append(JaggedArray.fromcounts(jagged_counts,pars[columns[i+offset_col]]))
        parm_order.append('p%i'%(i))
    
    wrapped_up = {}
    wrapped_up[(name,'jet_energy_corrector')] = (formula,
                                                 (bins,bin_order),
                                                 (clamp_mins,clamp_maxs,var_order),
                                                 (parms,parm_order))
    return wrapped_up
Exemple #18
0
def jagged_1():
    boundaries = [0, 3, 5, 6, 9, 12, 12]
    return JaggedArray(
        boundaries[:-1], boundaries[1:],
        [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0])
    loose_muons = muons[loose_muon_selection]
    loose_photons = photons[loose_photon_selection]
    selected_taus = taus[tau_selection]
    selected_jets = jets[jet_selection]
    # end seletion

    # clean leptons
    e_combinations = loose_electrons.p4.cross(selected_jets.p4, nested=True)
    mask = (e_combinations.i0.delta_r(e_combinations.i1) < 0.3).any()
    clean_electrons = loose_electrons[~mask]

    m_combinations = loose_muons.p4.cross(selected_jets.p4, nested=True)
    mask = (m_combinations.i0.delta_r(m_combinations.i1) < 0.3).any()
    clean_muons = loose_muons[mask]

    clean_leptons = JaggedArray.fromiter([clean_electrons, clean_muons])

    # once merge is done
    # mask = loose_electrons.p4.match(selected_jets.p4, 0.3)
    # clean electrons = loose_electrons[~mask]
    # end cleaning

    # weights evaluation
    e_counts = clean_electrons.counts
    e_sfTrigg = np.ones(clean_electrons.size)
    e_sfTrigg[e_counts > 0] = 1 - evaluator["hEffEtaPt"](
        clean_electrons.eta[e_counts > 0, 0], clean_electrons.pt[e_counts > 0,
                                                                 0])
    e_sfTrigg[e_counts > 1] = 1 - (1 - evaluator["hEffEtaPt"](
        clean_electrons.eta[e_counts > 1, 0],
        clean_electrons.pt[e_counts > 1, 0])) * (
Exemple #20
0
    def process(self, events):
        logging.debug('starting process')
        output = self.accumulator.identity()

        dataset = events.metadata['dataset']
        self._isData = dataset in [
            'SingleMuon', 'DoubleMuon', 'SingleElectron', 'DoubleEG', 'EGamma',
            'MuonEG'
        ]

        selection = processor.PackedSelection()

        # TODO: instead of cutflow, use processor.PackedSelection
        output['cutflow']['all events'] += events.size

        logging.debug('applying lumi mask')
        if self._isData:
            lumiMask = lumi_tools.LumiMask(self._corrections['golden'])
            events['passLumiMask'] = lumiMask(np.array(events.run),
                                              np.array(events.luminosityBlock))
        else:
            events['passLumiMask'] = np.ones_like(events.run, dtype=bool)
        passLumiMask = events.passLumiMask
        selection.add('lumiMask', passLumiMask)

        logging.debug('adding trigger')
        self._add_trigger(events)

        passHLT = events.passHLT
        selection.add('trigger', passHLT)
        output['cutflow']['pass trigger'] += passHLT.sum()
        # if no trigger: fast return
        if passHLT.sum() == 0:
            return output

        # require one good vertex
        logging.debug('checking vertices')
        passGoodVertex = (events.PV.npvsGood > 0)
        output['cutflow']['good vertex'] += passGoodVertex.sum()
        selection.add('goodVertex', passGoodVertex)

        # run rochester
        rochester = self._rochester
        _muon_offsets = events.Muon.pt.offsets
        _charge = events.Muon.charge
        _pt = events.Muon.pt
        _eta = events.Muon.eta
        _phi = events.Muon.phi
        if self._isData:
            _k = rochester.kScaleDT(_charge, _pt, _eta, _phi)
            # _kErr = rochester.kScaleDTerror(_charge, _pt, _eta, _phi)
        else:
            # for default if gen present
            _gpt = events.Muon.matched_gen.pt
            # for backup w/o gen
            _nl = events.Muon.nTrackerLayers
            _u = JaggedArray.fromoffsets(_muon_offsets,
                                         np.random.rand(*_pt.flatten().shape))
            _hasgen = (_gpt.fillna(-1) > 0)
            _kspread = rochester.kSpreadMC(_charge[_hasgen], _pt[_hasgen],
                                           _eta[_hasgen], _phi[_hasgen],
                                           _gpt[_hasgen])
            _ksmear = rochester.kSmearMC(_charge[~_hasgen], _pt[~_hasgen],
                                         _eta[~_hasgen], _phi[~_hasgen],
                                         _nl[~_hasgen], _u[~_hasgen])
            _k = np.ones_like(_pt.flatten())
            _k[_hasgen.flatten()] = _kspread.flatten()
            _k[~_hasgen.flatten()] = _ksmear.flatten()
            _k = JaggedArray.fromoffsets(_muon_offsets, _k)
            # _kErrspread = rochester.kSpreadMCerror(_charge[_hasgen], _pt[_hasgen], _eta[_hasgen], _phi[_hasgen],
            #                                        _gpt[_hasgen])
            # _kErrsmear  = rochester.kSmearMCerror(_charge[~_hasgen], _pt[~_hasgen], _eta[~_hasgen], _phi[~_hasgen],
            #                                       _nl[~_hasgen], _u[~_hasgen])
            # _kErr = np.ones_like(_pt.flatten())
            # _kErr[_hasgen.flatten()] = _kErrspread.flatten()
            # _kErr[~_hasgen.flatten()] = _kErrsmear.flatten()
            # _kErr = JaggedArray.fromoffsets(_muon_offsets, _kErr)

        mask = _pt.flatten() < 200
        rochester_pt = _pt.flatten()
        rochester_pt[mask] = (_k * _pt).flatten()[mask]
        events.Muon['pt'] = JaggedArray.fromoffsets(_muon_offsets,
                                                    rochester_pt)

        logging.debug('adding muon id')
        self._add_muon_id(events.Muon)
        logging.debug('adding electron id')
        self._add_electron_id(events.Electron)

        logging.debug('selecting muons')
        muonId = (events.Muon.passId > 0)
        muons = events.Muon[muonId]

        logging.debug('selecting electrons')
        electronId = (events.Electron.passId > 0)
        electrons = events.Electron[electronId]

        passTwoLeptons = (muons.counts >= 2) | (electrons.counts >= 2)
        output['cutflow']['two leptons'] += passTwoLeptons.sum()
        selection.add('twoLeptons', passTwoLeptons)

        # build cands
        # remake z to have same columns
        # pt eta phi mass charge pdgId
        logging.debug('rebuilding leptons')

        def rebuild(leptons):
            return JaggedCandidateArray.candidatesfromoffsets(
                leptons.offsets,
                pt=leptons.pt.flatten(),
                eta=leptons.eta.flatten(),
                phi=leptons.phi.flatten(),
                mass=leptons.mass.flatten(),
                charge=leptons.charge.flatten(),
                pdgId=leptons.pdgId.flatten(),
                # needed for electron SF
                etaSC=leptons.etaSC.flatten()
                if hasattr(leptons, 'etaSC') else leptons.eta.flatten(),
            )

        newMuons = rebuild(muons)
        newElectrons = rebuild(electrons)

        logging.debug('building 2 leptons')
        ee_cands = newElectrons.choose(2)
        mm_cands = newMuons.choose(2)

        # combine them
        z_cands = JaggedArray.concatenate([ee_cands, mm_cands], axis=1)

        def bestcombination(zcands):
            good_charge = sum(zcands[str(i)]['charge'] for i in range(2)) == 0
            # this keeps the first z cand in each event
            # should instead sort the best first
            # TODO: select best
            zcands = zcands[good_charge][:, :1]
            return zcands

        logging.debug('selecting best combinations')
        z_cands = bestcombination(z_cands)

        z1 = np.zeros_like(z_cands['p4'].pt.flatten(), dtype='i')
        z2 = np.ones_like(z_cands['p4'].pt.flatten(), dtype='i')
        z1[(z_cands['0']['p4'].pt.flatten() <
            z_cands['1']['p4'].pt.flatten())] = 1
        z2[(z_cands['0']['p4'].pt.flatten() <
            z_cands['1']['p4'].pt.flatten())] = 0
        z1 = JaggedArray.fromoffsets(z_cands.offsets, z1)
        z2 = JaggedArray.fromoffsets(z_cands.offsets, z2)

        passZCand = (z_cands.counts > 0)
        output['cutflow']['z cand'] += passZCand.sum()
        selection.add('zCand', passZCand)

        passMassWindow = (passZCand & z_cands[(
            (z_cands.p4.mass > 60) & (z_cands.p4.mass < 120))].counts > 0)
        output['cutflow']['mass window'] += passMassWindow.sum()
        selection.add('massWindow', passMassWindow)

        # im sure there is a better way, but for now just do this
        def get_lepton_values(zl, key):
            val = np.zeros_like(zl.flatten(), dtype=float)
            if len(val) == 0:
                return JaggedArray.fromoffsets(zl.offsets, val)
            for i in range(2):
                mask = (i == zl.flatten())
                if key == 'pt':
                    val[mask] = z_cands[passZCand][str(
                        i)].flatten()[mask]['p4'].pt
                elif key == 'eta':
                    val[mask] = z_cands[passZCand][str(
                        i)].flatten()[mask]['p4'].eta
                elif key == 'phi':
                    val[mask] = z_cands[passZCand][str(
                        i)].flatten()[mask]['p4'].phi
                elif key == 'mass':
                    val[mask] = z_cands[passZCand][str(
                        i)].flatten()[mask]['p4'].mass
                else:
                    val[mask] = z_cands[passZCand][str(i)].flatten()[mask][key]
            return JaggedArray.fromoffsets(zl.offsets, val)

        z1pt = get_lepton_values(z1, 'pt')
        z2pt = get_lepton_values(z2, 'pt')
        passPt = ((z1pt > 30) & (z2pt > 20)).counts > 0
        output['cutflow']['pt threshold'] += passPt.sum()
        selection.add('ptThreshold', passPt)

        chanSels = {}
        z1pdg = get_lepton_values(z1, 'pdgId')
        z2pdg = get_lepton_values(z2, 'pdgId')
        for chan in ['ee', 'mm']:
            if chan == 'ee':
                pdgIds = (11, 11)
            if chan == 'mm':
                pdgIds = (13, 13)
            chanSels[chan] = ((abs(z1pdg) == pdgIds[0])
                              & (abs(z2pdg) == pdgIds[1]))

        weights = processor.Weights(events.run.size)
        if self._isData:
            output['sumw'][dataset] = 0  # always set to 0 for data
        else:
            output['sumw'][dataset] += events.genWeight.sum()
            weights.add('genWeight', events.genWeight)
            weights.add(
                'pileupWeight',
                self._corrections['pileupWeight'](events.Pileup.nPU),
                self._corrections['pileupWeightUp'](events.Pileup.nPU),
                self._corrections['pileupWeightDown'](events.Pileup.nPU),
            )
            zls = [z1, z2]
            # electron sf
            for ei, zl in enumerate(zls):
                ei = str(ei)
                eta = get_lepton_values(zl, 'etaSC')
                pt = get_lepton_values(zl, 'pt')
                electronRecoSF = self._corrections['electron_reco'](eta, pt)
                electronIdSF = self._corrections['electron_id_MVA90'](eta, pt)
                electronSF = np.ones_like(electronRecoSF.prod())
                if ei in ['0', '1']:
                    chans = ['ee']
                else:
                    chans = []
                for chan in chans:
                    # turns empty arrays into 0's, nonempty int 1's
                    chanSel = (chanSels[chan].ones_like().sum() > 0)
                    electronSF[chanSel] *= electronRecoSF[chanSel].prod()
                    electronSF[chanSel] *= electronIdSF[chanSel].prod()
                weights.add('electronSF' + ei, electronSF)

            # muon SF
            for mi, zl in enumerate(zls):
                mi = str(mi)
                eta = get_lepton_values(zl, 'eta')
                pt = get_lepton_values(zl, 'pt')
                if self._year == '2016':
                    idSF = self._corrections['muon_id_MediumID'](eta, pt)
                    isoSF = self._corrections['muon_iso_TightRelIso_MediumID'](
                        eta, pt)
                else:
                    idSF = self._corrections['muon_id_MediumPromptID'](
                        pt, abs(eta))
                    isoSF = self._corrections['muon_iso_TightRelIso_MediumID'](
                        pt, abs(eta))

                muonSF = np.ones_like(idSF.prod())
                if mi in ['0', '1']:
                    chans = ['mm']
                else:
                    chans = []
                for chan in chans:
                    # turns empty arrays into 0's, nonempty int 1's
                    chanSel = (chanSels[chan].ones_like().sum() > 0)
                    muonSF[chanSel] *= idSF[chanSel].prod()
                    muonSF[chanSel] *= isoSF[chanSel].prod()
                weights.add('muonSF' + mi, muonSF)

        logging.debug('filling')
        for sel in self._selections:
            if sel == 'massWindow':
                cut = selection.all('lumiMask', 'trigger', 'goodVertex',
                                    'twoLeptons', 'zCand', 'massWindow',
                                    'ptThreshold')
            for chan in ['ee', 'mm']:
                chanSel = chanSels[chan]
                weight = chanSel.astype(float) * weights.weight()

                output[sel + '_zmass'].fill(
                    dataset=dataset,
                    channel=chan,
                    mass=z_cands[cut].p4.mass.flatten(),
                    weight=weight[cut].flatten(),
                )
                output[sel + '_met'].fill(
                    dataset=dataset,
                    channel=chan,
                    met=events.MET.pt[cut],
                    weight=weight[cut].flatten(),
                )
                output[sel + '_pileup'].fill(
                    dataset=dataset,
                    channel=chan,
                    npvs=events.PV.npvs[cut],
                    weight=weight[cut].flatten(),
                )

        return output
Exemple #21
0
                    treename,
                    branches=branches,
                    namedecode='utf-8',
                    entrysteps=200000)):
 charge = arrays['Muon_charge']
 pt = arrays['Muon_pt']
 eta = arrays['Muon_eta']
 phi = arrays['Muon_phi']
 if not isData:
     # for default if gen present
     gid = arrays['Muon_genPartIdx']
     gpt = arrays['GenPart_pt']
     # for backup w/o gen
     nl = arrays['Muon_nTrackerLayers']
     u = np.random.rand(*pt.flatten().shape)
     u = JaggedArray.fromoffsets(pt.offsets, u)
     fullu += [u]
 for ie in range(len(pt)):
     subres = []
     suberr = []
     for im in range(len(pt[ie])):
         if isData:
             subres += [
                 roccor.kScaleDT(int(charge[ie][im]), float(pt[ie][im]),
                                 float(eta[ie][im]), float(phi[ie][im]))
             ]
             suberr += [
                 roccor.kScaleDTerror(int(charge[ie][im]),
                                      float(pt[ie][im]),
                                      float(eta[ie][im]),
                                      float(phi[ie][im]))
Exemple #22
0
def jagged_1():
    return JaggedArray.fromiter([[0.0, 1.1, 2.2], [3.3, 4.4], [5.5],
                                 [6.6, 7.7, 8.8], [9.9, 10.0, 11.0], []])
Exemple #23
0
def _build_standard_jme_lookup(name,
                               layout,
                               pars,
                               nBinnedVars,
                               nBinColumns,
                               nEvalVars,
                               formula,
                               nParms,
                               columns,
                               dtypes,
                               interpolatedFunc=False):
    #the first bin is always usual for JECs
    #the next bins may vary in number, so they're jagged arrays... yay
    bins = {}
    offset_col = 0
    offset_name = 1
    bin_order = []
    for i in range(nBinnedVars):
        binMins = None
        binMaxs = None
        if i == 0:
            binMins = np.unique(pars[columns[0]])
            binMaxs = np.unique(pars[columns[1]])
            bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs)
        else:
            counts = np.zeros(0, dtype=np.int)
            allBins = np.zeros(0, dtype=np.double)
            for binMin in bins[bin_order[0]][:-1]:
                binMins = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col]])
                binMaxs = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col + 1]])
                theBins = np.union1d(binMins, binMaxs)
                allBins = np.append(allBins, theBins)
                counts = np.append(counts, theBins.size)
            bins[layout[i + offset_name]] = JaggedArray.fromcounts(
                counts, allBins)
        bin_order.append(layout[i + offset_name])
        offset_col += 1

    #skip nvars to the variable columns
    #the columns here define clamps for the variables defined in columns[]
    # ----> clamps can be different from bins
    # ----> if there is more than one binning variable this array is jagged
    # ----> just make it jagged all the time
    binshapes = tuple([bins[thebin].size - 1 for thebin in bin_order])
    clamp_mins = {}
    clamp_maxs = {}
    var_order = []
    offset_col = 2 * nBinnedVars + 1
    offset_name = nBinnedVars + 2
    jagged_counts = np.ones(bins[bin_order[0]].size - 1, dtype=np.int)
    if len(bin_order) > 1:
        jagged_counts = np.maximum(
            bins[bin_order[1]].counts - 1,
            0)  #need counts-1 since we only care about Nbins
    for i in range(nEvalVars):
        var_order.append(layout[i + offset_name])
        if not interpolatedFunc:
            clamp_mins[layout[i + offset_name]] = JaggedArray.fromcounts(
                jagged_counts, np.atleast_1d(pars[columns[i + offset_col]]))
            clamp_maxs[layout[i + offset_name]] = JaggedArray.fromcounts(
                jagged_counts,
                np.atleast_1d(pars[columns[i + offset_col + 1]]))
            offset_col += 1

    #now get the parameters, which we will look up with the clamped values
    parms = []
    parm_order = []
    offset_col = 2 * nBinnedVars + 1 + (interpolatedFunc
                                        == False) * 2 * nEvalVars
    for i in range(nParms):
        parms.append(
            JaggedArray.fromcounts(jagged_counts,
                                   pars[columns[i + offset_col]]))
        parm_order.append('p%i' % (i))

    wrapped_up = {}
    wrapped_up[(name, 'jme_standard_function')] = (formula, (bins, bin_order),
                                                   (clamp_mins, clamp_maxs,
                                                    var_order), (parms,
                                                                 parm_order))
    return wrapped_up