def setup_gen_candidates(df):
    # Find first ancestor with different PDG ID
    # before defining the gen candidates
    mothers = JaggedArray.fromcounts(df['nGenPart'],
                                     df['GenPart_genPartIdxMother'])
    pdgids = JaggedArray.fromcounts(df['nGenPart'], df['GenPart_pdgId'])
    parent_index = find_first_parent(mothers, pdgids)

    gen = JaggedCandidateArray.candidatesfromcounts(
        df['nGenPart'],
        pt=df['GenPart_pt'],
        eta=df['GenPart_eta'],
        phi=df['GenPart_phi'],
        mass=df['GenPart_mass'],
        charge=df['GenPart_pdgId'],
        pdg=df['GenPart_pdgId'],
        status=df['GenPart_status'],
        flag=df['GenPart_statusFlags'],
        mother=df['GenPart_genPartIdxMother'],
        parentIndex=parent_index.flatten())
    return gen
def convert_jec_txt_file(jecFilePath):
    jec_f = open(jecFilePath,'r')
    layoutstr = jec_f.readline().strip().strip('{}')
    jec_f.close()

    name = jecFilePath.split('/')[-1].split('.')[0]
    
    layout = layoutstr.split()
    if not layout[0].isdigit():
        raise Exception('First column of JEC descriptor must be a digit!')

    #setup the file format
    nBinnedVars = int(layout[0])
    nBinColumns = 2*nBinnedVars
    nEvalVars   = int(layout[nBinnedVars+1])
    formula     = layout[nBinnedVars+nEvalVars+2]
    nParms      = 0
    while( formula.count('[%i]'%nParms) ):
        formula = formula.replace('[%i]'%nParms,'p%i'%nParms)
        nParms += 1
    #protect function names with vars in them
    funcs_to_cap = ['max','exp']
    for f in funcs_to_cap:
        formula = formula.replace(f,f.upper())

    templatevars = ['x','y','z','w','t','s']
    varnames = [layout[i+nBinnedVars+2] for i in range(nEvalVars)]
    for find,replace in zip(templatevars,varnames):
        formula = formula.replace(find,replace)
    #restore max
    for f in funcs_to_cap:
        formula = formula.replace(f.upper(),f)
    nFuncColumns = 2*nEvalVars + nParms
    nTotColumns = nFuncColumns + 1

    #parse the columns
    minMax = ['Min','Max']
    columns = []
    dtypes = []
    offset = 1
    for i in range(nBinnedVars):
        columns.extend(['%s%s'%(layout[i+offset],mm) for mm in minMax])
        dtypes.extend(['<f8','<f8'])
    columns.append('NVars')
    dtypes.append('<i8')
    offset += nBinnedVars + 1
    for i in range(nEvalVars):
        columns.extend(['%s%s'%(layout[i+offset],mm) for mm in minMax])
        dtypes.extend(['<f8','<f8'])
    for i in range(nParms):
        columns.append('p%i'%i)
        dtypes.append('<f8')

    pars = np.genfromtxt(jecFilePath,
                         dtype=tuple(dtypes),
                         names=tuple(columns),
                         skip_header=1,
                         unpack=True,
                         encoding='ascii'
                         )

    #the first bin is always usual for JECs
    #the next bins may vary in number, so they're jagged arrays... yay
    bins = {}
    offset_col = 0
    offset_name = 1
    bin_order = []
    for i in range(nBinnedVars):
        binMins = None
        binMaxs = None
        if i == 0:
            binMins = np.unique(pars[columns[0]])
            binMaxs = np.unique(pars[columns[1]])
            bins[layout[i+offset_name]] = np.union1d(binMins,binMaxs)
        else:
            counts = np.zeros(0,dtype=np.int)
            allBins = np.zeros(0,dtype=np.double)
            for binMin in bins[bin_order[0]][:-1]:
                binMins = np.unique(pars[np.where(pars[columns[0]] == binMin)][columns[i+offset_col]])
                binMaxs = np.unique(pars[np.where(pars[columns[0]] == binMin)][columns[i+offset_col+1]])
                theBins = np.union1d(binMins,binMaxs)
                allBins = np.append(allBins,theBins)
                counts  = np.append(counts,theBins.size)
            bins[layout[i+offset_name]] = JaggedArray.fromcounts(counts,allBins)
        bin_order.append(layout[i+offset_name])
        offset_col += 1

    #skip nvars to the variable columns
    #the columns here define clamps for the variables defined in columns[]
    # ----> clamps can be different from bins
    # ----> if there is more than one binning variable this array is jagged
    # ----> just make it jagged all the time
    binshapes = tuple([bins[thebin].size-1 for thebin in bin_order])
    clamp_mins = {}
    clamp_maxs = {}
    var_order = []
    offset_col = 2*nBinnedVars+1
    offset_name = nBinnedVars + 2
    jagged_counts = np.ones(bins[bin_order[0]].size-1,dtype=np.int)
    if len(bin_order) > 1:
        jagged_counts = np.maximum(bins[bin_order[1]].counts - 1,0) #need counts-1 since we only care about Nbins
    for i in range(nEvalVars):
        clamp_mins[layout[i+offset_name]] = JaggedArray.fromcounts(jagged_counts,np.atleast_1d(pars[columns[i+offset_col]]))
        clamp_maxs[layout[i+offset_name]] = JaggedArray.fromcounts(jagged_counts,np.atleast_1d(pars[columns[i+offset_col+1]]))
        var_order.append(layout[i+offset_name])
        offset_col += 1

    #now get the parameters, which we will look up with the clamps
    parms = []
    parm_order = []
    offset_col = 2*nBinnedVars+1 + 2*nEvalVars
    for i in range(nParms):
        parms.append(JaggedArray.fromcounts(jagged_counts,pars[columns[i+offset_col]]))
        parm_order.append('p%i'%(i))
    
    wrapped_up = {}
    wrapped_up[(name,'jet_energy_corrector')] = (formula,
                                                 (bins,bin_order),
                                                 (clamp_mins,clamp_maxs,var_order),
                                                 (parms,parm_order))
    return wrapped_up
Exemple #3
0
def _build_standard_jme_lookup(name,
                               layout,
                               pars,
                               nBinnedVars,
                               nBinColumns,
                               nEvalVars,
                               formula,
                               nParms,
                               columns,
                               dtypes,
                               interpolatedFunc=False):
    #the first bin is always usual for JECs
    #the next bins may vary in number, so they're jagged arrays... yay
    bins = {}
    offset_col = 0
    offset_name = 1
    bin_order = []
    for i in range(nBinnedVars):
        binMins = None
        binMaxs = None
        if i == 0:
            binMins = np.unique(pars[columns[0]])
            binMaxs = np.unique(pars[columns[1]])
            bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs)
        else:
            counts = np.zeros(0, dtype=np.int)
            allBins = np.zeros(0, dtype=np.double)
            for binMin in bins[bin_order[0]][:-1]:
                binMins = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col]])
                binMaxs = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col + 1]])
                theBins = np.union1d(binMins, binMaxs)
                allBins = np.append(allBins, theBins)
                counts = np.append(counts, theBins.size)
            bins[layout[i + offset_name]] = JaggedArray.fromcounts(
                counts, allBins)
        bin_order.append(layout[i + offset_name])
        offset_col += 1

    #skip nvars to the variable columns
    #the columns here define clamps for the variables defined in columns[]
    # ----> clamps can be different from bins
    # ----> if there is more than one binning variable this array is jagged
    # ----> just make it jagged all the time
    binshapes = tuple([bins[thebin].size - 1 for thebin in bin_order])
    clamp_mins = {}
    clamp_maxs = {}
    var_order = []
    offset_col = 2 * nBinnedVars + 1
    offset_name = nBinnedVars + 2
    jagged_counts = np.ones(bins[bin_order[0]].size - 1, dtype=np.int)
    if len(bin_order) > 1:
        jagged_counts = np.maximum(
            bins[bin_order[1]].counts - 1,
            0)  #need counts-1 since we only care about Nbins
    for i in range(nEvalVars):
        var_order.append(layout[i + offset_name])
        if not interpolatedFunc:
            clamp_mins[layout[i + offset_name]] = JaggedArray.fromcounts(
                jagged_counts, np.atleast_1d(pars[columns[i + offset_col]]))
            clamp_maxs[layout[i + offset_name]] = JaggedArray.fromcounts(
                jagged_counts,
                np.atleast_1d(pars[columns[i + offset_col + 1]]))
            offset_col += 1

    #now get the parameters, which we will look up with the clamped values
    parms = []
    parm_order = []
    offset_col = 2 * nBinnedVars + 1 + (interpolatedFunc
                                        == False) * 2 * nEvalVars
    for i in range(nParms):
        parms.append(
            JaggedArray.fromcounts(jagged_counts,
                                   pars[columns[i + offset_col]]))
        parm_order.append('p%i' % (i))

    wrapped_up = {}
    wrapped_up[(name, 'jme_standard_function')] = (formula, (bins, bin_order),
                                                   (clamp_mins, clamp_maxs,
                                                    var_order), (parms,
                                                                 parm_order))
    return wrapped_up
Exemple #4
0
def convert_effective_area_file(eaFilePath):
    ea_f = open(eaFilePath, 'r')
    layoutstr = ea_f.readline().strip().strip('{}')
    ea_f.close()

    name = eaFilePath.split('/')[-1].split('.')[0]

    layout = layoutstr.split()
    if not layout[0].isdigit():
        raise Exception(
            'First column of Effective Area File Header must be a digit!')

    #setup the file format
    nBinnedVars = int(layout[0])
    nBinColumns = 2 * nBinnedVars
    nEvalVars = int(layout[nBinnedVars + 1])

    minMax = ['Min', 'Max']
    columns = []
    dtypes = []
    offset = 1
    for i in range(nBinnedVars):
        columns.extend(['%s%s' % (layout[i + offset], mm) for mm in minMax])
        dtypes.extend(['<f8', '<f8'])
    offset += nBinnedVars + 1
    for i in range(nEvalVars):
        columns.append('%s' % (layout[i + offset]))
        dtypes.append('<f8')

    pars = np.genfromtxt(eaFilePath,
                         dtype=tuple(dtypes),
                         names=tuple(columns),
                         skip_header=1,
                         unpack=True,
                         encoding='ascii')

    bins = {}
    offset_col = 0
    offset_name = 1
    bin_order = []
    for i in range(nBinnedVars):
        binMins = None
        binMaxs = None
        if i == 0:
            binMins = np.unique(pars[columns[0]])
            binMaxs = np.unique(pars[columns[1]])
            bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs)
        else:
            counts = np.zeros(0, dtype=np.int)
            allBins = np.zeros(0, dtype=np.double)
            for binMin in bins[bin_order[0]][:-1]:
                binMins = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col]])
                binMaxs = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col + 1]])
                theBins = np.union1d(binMins, binMaxs)
                allBins = np.append(allBins, theBins)
                counts = np.append(counts, theBins.size)
            bins[layout[i + offset_name]] = JaggedArray.fromcounts(
                counts, allBins)
        bin_order.append(layout[i + offset_name])
        offset_col += 1

    # again this is only for one dimension of binning, fight me
    # we can figure out a 2D EA when we get there
    offset_name += 1
    wrapped_up = {}
    lookup_type = 'dense_lookup'
    dims = bins[layout[1]]
    for i in range(nEvalVars):
        ea_name = '_'.join([name, columns[offset_name + i]])
        values = pars[columns[offset_name + i]]
        wrapped_up[(ea_name, lookup_type)] = (values, dims)

    return wrapped_up
Exemple #5
0
    def process(self, df):

        output = self.accumulator.identity()

        # ---- Define dataset ---- #
        dataset = df['dataset']  #coffea.processor.LazyDataFrame
        Dataset_info = df.available  #list of available columns in LazyDataFrame object (Similar to 'Events->Show()' command in ROOT)

        # ---- Get triggers from Dataset_info ---- #
        #triggers = [itrig for itrig in Dataset_info if 'HLT_PFHT' in itrig]
        #AK8triggers = [itrig for itrig in Dataset_info if 'HLT_AK8PFHT' in itrig]

        # ---- Find numeric values in trigger strings ---- #
        #triggers_cut1 = [sub.split('PFHT')[1] for sub in triggers] # Remove string characters from left of number
        #triggers_cut2 = [sub.split('_')[0] for sub in triggers_cut1] # Remove string characters from right of number
        #isTriggerValue = [val.isnumeric() for val in triggers_cut2] # Boolean -> if string is only a number
        #triggers_cut2 = np.where(isTriggerValue, triggers_cut2, 0) # If string is not a number, replace with 0
        #triggers_vals = [int(val) for val in triggers_cut2] # Convert string numbers to integers

        #AK8triggers_cut1 = [sub.split('HT')[1] for sub in AK8triggers]
        #AK8triggers_cut2 = [sub.split('_')[0] for sub in AK8triggers_cut1]
        #isAK8TriggerValue = [val.isnumeric() for val in AK8triggers_cut2]
        #AK8triggers_cut2 = np.where(isAK8TriggerValue, AK8triggers_cut2, 0)
        #AK8triggers_vals = [int(val) for val in AK8triggers_cut2]

        # ---- Find Largest and Second Largest Value ---- #
        #triggers_vals.sort(reverse = True)
        #AK8triggers_vals.sort(reverse = True)

        #triggers_vals1 = str(triggers_vals[0])
        #triggers_vals2 = str(triggers_vals[1])
        #AK8triggers_vals1 = str(AK8triggers_vals[0])
        #AK8triggers_vals2 = str(AK8triggers_vals[1])

        # ---- Define strings for the selected triggers ---- #
        #HLT_trig1_str = [itrig for itrig in triggers if (triggers_vals1) in itrig][0]
        #HLT_trig2_str = [itrig for itrig in triggers if (triggers_vals2) in itrig][0]
        #HLT_AK8_trig1_str = [itrig for itrig in AK8triggers if (AK8triggers_vals1) in itrig][0]
        #HLT_AK8_trig2_str = [itrig for itrig in AK8triggers if (AK8triggers_vals2) in itrig][0]

        # ---- Define HLT triggers to be used ---- #
        #HLT_trig1 = df[HLT_trig1_str]
        #HLT_trig2 = df[HLT_trig2_str]
        #HLT_AK8_trig1 = df[HLT_AK8_trig1_str]
        #HLT_AK8_trig2 = df[HLT_AK8_trig2_str]

        # ---- Define AK8 Jets as FatJets ---- #
        FatJets = JaggedCandidateArray.candidatesfromcounts(
            df['nFatJet'],
            pt=df['FatJet_pt'],
            eta=df['FatJet_eta'],
            phi=df['FatJet_phi'],
            mass=df['FatJet_mass'],
            area=df['FatJet_area'],
            msoftdrop=df['FatJet_msoftdrop'],
            jetId=df['FatJet_jetId'],
            tau1=df['FatJet_tau1'],
            tau2=df['FatJet_tau2'],
            tau3=df['FatJet_tau3'],
            tau4=df['FatJet_tau4'],
            n3b1=df['FatJet_n3b1'],
            btagDeepB=df['FatJet_btagDeepB'],
            btagCSVV2=df['FatJet_btagCSVV2'],
            deepTag_TvsQCD=df['FatJet_deepTag_TvsQCD'],
            deepTagMD_TvsQCD=df['FatJet_deepTagMD_TvsQCD'],
            subJetIdx1=df['FatJet_subJetIdx1'],
            subJetIdx2=df['FatJet_subJetIdx2'])

        # ---- Define AK4 jets as Jets ---- #
        Jets = JaggedCandidateArray.candidatesfromcounts(df['nJet'],
                                                         pt=df['Jet_pt'],
                                                         eta=df['Jet_eta'],
                                                         phi=df['Jet_phi'],
                                                         mass=df['Jet_mass'],
                                                         area=df['Jet_area'])
        # ---- Define SubJets ---- #
        SubJets = JaggedCandidateArray.candidatesfromcounts(
            df['nSubJet'],
            pt=df['SubJet_pt'],
            eta=df['SubJet_eta'],
            phi=df['SubJet_phi'],
            mass=df['SubJet_mass'],
            btagDeepB=df['SubJet_btagDeepB'],
            btagCSVV2=df['SubJet_btagCSVV2'])

        # ---- Get event weights from dataset ---- #
        if 'JetHT' in dataset:  # If data is used...
            evtweights = np.ones(FatJets.size)  # set all "data weights" to one
        else:  # if Monte Carlo dataset is used...
            evtweights = df["Generator_weight"].reshape(-1, 1).flatten()

        # ---- Show all events ---- #
        output['cutflow']['all events'] += FatJets.size

        # ---- Apply Trigger(s) ---- #
        #FatJets = FatJets[HLT_AK8_trig1]
        #evtweights = evtweights[HLT_AK8_trig1]
        #Jets = Jets[HLT_AK8_trig1]
        #SubJets = SubJets[HLT_AK8_trig1]

        # ---- Jets that satisfy Jet ID ---- #
        jet_id = (FatJets.jetId > 0)  # Loose jet ID
        FatJets = FatJets[jet_id]
        output['cutflow']['jet id'] += jet_id.any().sum()

        # ---- Apply pT Cut and Rapidity Window ---- #
        jetkincut_index = (FatJets.pt > self.ak8PtMin) & (np.abs(
            FatJets.p4.rapidity) < 2.4)
        FatJets = FatJets[jetkincut_index]
        output['cutflow']['jet kin'] += jetkincut_index.any().sum()

        # ---- Find two AK8 Jets ---- #
        twoFatJetsKin = (FatJets.counts == 2)
        FatJets = FatJets[twoFatJetsKin]
        evtweights = evtweights[twoFatJetsKin]
        Jets = Jets[twoFatJetsKin]
        SubJets = SubJets[twoFatJetsKin]
        output['cutflow']['two FatJets and jet kin'] += twoFatJetsKin.sum()

        # ---- Apply HT Cut ---- #
        hT = Jets.pt.sum()
        passhT = (hT > self.htCut)
        evtweights = evtweights[passhT]
        FatJets = FatJets[passhT]
        SubJets = SubJets[passhT]

        # ---- Randomly Assign AK8 Jets as TTbar Candidates 0 and 1 --- #
        if self.RandomDebugMode == True:  # 'Sudo' randomizer for consistent results
            highPhi = FatJets.phi[:, 0] > FatJets.phi[:, 1]
            highRandIndex = np.where(highPhi, 0, 1)
            index = JaggedArray.fromcounts(np.ones(len(FatJets), dtype='i'),
                                           highRandIndex)
        else:  # Truly randomize
            index = JaggedArray.fromcounts(
                np.ones(len(FatJets), dtype='i'),
                self.prng.randint(2, size=len(FatJets)))
        jet0 = FatJets[index]  #J0
        jet1 = FatJets[1 - index]  #J1

        ttbarcands = jet0.cross(jet1)  #FatJets[:,0:2].distincts()

        # ---- Make sure we have at least 1 TTbar candidate pair and re-broadcast releveant arrays  ---- #
        oneTTbar = (ttbarcands.counts >= 1)
        output['cutflow']['>= one oneTTbar'] += oneTTbar.sum()
        ttbarcands = ttbarcands[oneTTbar]
        evtweights = evtweights[oneTTbar]
        FatJets = FatJets[oneTTbar]
        SubJets = SubJets[oneTTbar]

        # ---- Apply Delta Phi Cut for Back to Back Topology ---- #
        dPhiCut = (ttbarcands.i0.p4.delta_phi(ttbarcands.i1.p4) >
                   2.1).flatten()
        output['cutflow']['dPhi > 2.1'] += dPhiCut.sum()
        ttbarcands = ttbarcands[dPhiCut]
        evtweights = evtweights[dPhiCut]
        FatJets = FatJets[dPhiCut]
        SubJets = SubJets[dPhiCut]

        # ---- Identify subjets according to subjet ID ---- #
        hasSubjets0 = ((ttbarcands.i0.subJetIdx1 > -1) &
                       (ttbarcands.i0.subJetIdx2 > -1))
        hasSubjets1 = ((ttbarcands.i1.subJetIdx1 > -1) &
                       (ttbarcands.i1.subJetIdx2 > -1))
        GoodSubjets = ((hasSubjets0) & (hasSubjets1)).flatten()

        ttbarcands = ttbarcands[GoodSubjets]

        SubJets = SubJets[GoodSubjets]
        evtweights = evtweights[GoodSubjets]

        SubJet01 = SubJets[ttbarcands.i0.subJetIdx1]  # FatJet i0 with subjet 1
        SubJet02 = SubJets[ttbarcands.i0.subJetIdx2]  # FatJet i0 with subjet 2
        SubJet11 = SubJets[ttbarcands.i1.subJetIdx1]  # FatJet i1 with subjet 1
        SubJet12 = SubJets[ttbarcands.i1.subJetIdx2]  # FatJet i1 with subjet 2

        # ---- Define Rapidity Regions ---- #
        cen = np.abs(ttbarcands.i0.p4.rapidity -
                     ttbarcands.i1.p4.rapidity) < 1.0
        fwd = (~cen)

        # ---- CMS Top Tagger Version 2 (SD and Tau32 Cuts) ---- #
        tau32_i0 = np.where(ttbarcands.i0.tau2 > 0,
                            ttbarcands.i0.tau3 / ttbarcands.i0.tau2, 0)
        tau32_i1 = np.where(ttbarcands.i1.tau2 > 0,
                            ttbarcands.i1.tau3 / ttbarcands.i1.tau2, 0)
        taucut_i0 = tau32_i0 < self.tau32Cut
        taucut_i1 = tau32_i1 < self.tau32Cut
        mcut_i0 = (self.minMSD < ttbarcands.i0.msoftdrop) & (
            ttbarcands.i0.msoftdrop < self.maxMSD)
        mcut_i1 = (self.minMSD < ttbarcands.i1.msoftdrop) & (
            ttbarcands.i1.msoftdrop < self.maxMSD)

        ttag_i0 = (taucut_i0) & (mcut_i0)
        ttag_i1 = (taucut_i1) & (mcut_i1)

        # ---- Define "Top Tag" Regions ---- #
        antitag = (~taucut_i0) & (mcut_i0
                                  )  #Probe will always be ttbarcands.i1 (at)
        antitag_probe = np.logical_and(
            antitag, ttag_i1
        )  #Found an antitag and ttagged probe pair for mistag rate (Pt)
        pretag = ttag_i0  # Only jet0 (pret)
        ttag0 = (~ttag_i0) & (~ttag_i1)  # No tops tagged (0t)
        ttag1 = ttag_i0 ^ ttag_i1  # Exclusively one top tagged (1t)
        ttagI = ttag_i0 | ttag_i1  # At least one top tagged ('I' for 'inclusive' tagger; >=1t; 1t+2t)
        ttag2 = ttag_i0 & ttag_i1  # Both jets top tagged (2t)
        Alltags = ttag0 | ttagI  #Either no tag or at least one tag (0t+1t+2t)

        # ---- Pick FatJet that passes btag cut based on its subjet with the highest btag value ---- #
        btag_i0 = (np.maximum(SubJet01.btagCSVV2, SubJet02.btagCSVV2) >
                   self.bdisc)
        btag_i1 = (np.maximum(SubJet11.btagCSVV2, SubJet12.btagCSVV2) >
                   self.bdisc)

        # --- Define "B Tag" Regions ---- #
        btag0 = (~btag_i0) & (~btag_i1)  #(0b)
        btag1 = btag_i0 ^ btag_i1  #(1b)
        btag2 = btag_i0 & btag_i1  #(2b)

        # ---- Get Analysis Categories ---- #
        # ---- They are (central, forward) cross (0b,1b,2b) cross (At,at,0t,1t,>=1t,2t) ---- #
        regs = [cen, fwd]
        btags = [btag0, btag1, btag2]
        ttags = [
            antitag_probe, antitag, pretag, ttag0, ttag1, ttagI, ttag2, Alltags
        ]
        cats = [(t & b & y).flatten()
                for t, b, y in itertools.product(ttags, btags, regs)]
        labels_and_categories = dict(zip(self.anacats, cats))

        # ---- Variables for Kinematic Histograms ---- #
        # ---- "i0" is the control jet, "i1" is the probe jet ---- #
        ttbarmass = ttbarcands.p4.sum().mass.flatten()
        jetpt = ttbarcands.i1.pt.flatten()
        jeteta = ttbarcands.i1.eta.flatten()
        jetphi = ttbarcands.i1.phi.flatten()
        jety = ttbarcands.i1.p4.rapidity.flatten()
        jetmass = ttbarcands.i1.p4.mass.flatten()
        SDmass = ttbarcands.i1.msoftdrop.flatten()
        jetdy = np.abs(ttbarcands.i0.p4.rapidity.flatten() -
                       ttbarcands.i1.p4.rapidity.flatten())
        Tau32 = (ttbarcands.i1.tau3 / ttbarcands.i1.tau2).flatten()
        # ---- Variables for Deep Tagger Analysis ---- #
        deepTag = ttbarcands.i1.deepTag_TvsQCD.flatten()
        deepTagMD = ttbarcands.i1.deepTagMD_TvsQCD.flatten()

        weights = evtweights.flatten()

        # ---- Define the SumW2 for MC Datasets ---- #
        output['cutflow']['sumw'] += np.sum(weights)
        output['cutflow']['sumw2'] += np.sum(weights**2)

        # ---- Define Momentum p of probe jet as the Mistag Rate variable; M(p) ---- #
        # ---- Transverse Momentum pT can also be used instead; M(pT) ---- #
        pT = ttbarcands.i1.pt.flatten()
        eta = ttbarcands.i1.eta.flatten()
        pz = np.sinh(eta) * pT
        p = np.absolute(np.sqrt(pT**2 + pz**2))

        # ---- Define the Numerator and Denominator for Mistag Rate ---- #
        numerator = np.where(
            antitag_probe, p,
            -1)  # If no antitag and tagged probe, move event to useless bin
        denominator = np.where(antitag, p,
                               -1)  # If no antitag, move event to useless bin

        df = pd.DataFrame({"momentum":
                           p})  # Used for finding values in LookUp Tables

        for ilabel, icat in labels_and_categories.items():
            ### ------------------------------------ Mistag Scaling ------------------------------------ ###
            if self.UseLookUpTables == True:
                # ---- Weight ttbar M.C. and data by mistag from data (corresponding to its year) ---- #
                if 'TTbar_' in dataset:
                    file_df = self.lu['JetHT' + dataset[-4:] + '_Data'][
                        'at' + str(
                            ilabel[-5:]
                        )]  #Pick out proper JetHT year mistag for TTbar sim.
                elif dataset == 'TTbar':
                    file_df = self.lu['JetHT']['at' + str(
                        ilabel[-5:])]  # All JetHT years mistag for TTbar sim.
                else:
                    file_df = self.lu[dataset]['at' + str(
                        ilabel[-5:])]  # get mistag (lookup) filename for 'at'

                bin_widths = file_df[
                    'p'].values  # collect bins as written in .csv file
                mtr = file_df[
                    'M(p)'].values  # collect mistag rate as function of p as written in file
                wgts = mtr  # Define weights based on mistag rates

                BinKeys = np.arange(
                    bin_widths.size
                )  # Use as label for BinNumber column in the new dataframe

                #Bins = pd.interval_range(start=0, periods=100, freq=100, closed='left') # Recreate the momentum bins from file_df as something readable for pd.cut()
                Bins = np.array(manual_bins)

                df['BinWidth'] = pd.cut(p, bins=Bins)  # new dataframe column
                df['BinNumber'] = pd.cut(p, bins=Bins, labels=BinKeys)

                BinNumber = df[
                    'BinNumber'].values  # Collect the Bin Numbers into a numpy array
                BinNumber = BinNumber.astype(
                    'int64')  # Insures the bin numbers are integers

                WeightMatching = wgts[
                    BinNumber]  # Match 'wgts' with corresponding p bin using the bin number
                Weights = weights * WeightMatching  # Include 'wgts' with the previously defined 'weights'
            else:
                Weights = weights  # No mistag rates, no change to weights
            ###---------------------------------------------------------------------------------------------###
            ### ----------------------------------- Mod-mass Procedure ------------------------------------ ###
            if self.ModMass == True:
                QCD_unweighted = util.load(
                    'TTbarResCoffea_QCD_unweighted_output.coffea')

                # ---- Extract event counts from QCD MC hist in signal region ---- #
                QCD_hist = QCD_unweighted['jetmass'].integrate(
                    'anacat',
                    '2t' + str(ilabel[-5:])).integrate('dataset', 'QCD')
                data = QCD_hist.values()  # Dictionary of values
                QCD_data = [
                    i for i in data.values()
                ][0]  # place every element of the dictionary into a numpy array

                # ---- Re-create Bins from QCD_hist as Numpy Array ---- #
                bins = np.arange(
                    510
                )  #Re-make bins from the jetmass_axis starting with the appropriate range
                QCD_bins = bins[::
                                10]  #Finish re-making bins by insuring exactly 50 bins like the jetmass_axis

                # ---- Define Mod Mass Distribution ---- #
                ModMass_hist_dist = ss.rv_histogram([QCD_data, QCD_bins])
                jet1_modp4 = copy.copy(
                    jet1.p4
                )  #J1's Lorentz four vector that can be safely modified
                jet1_modp4["fMass"] = ModMass_hist_dist.rvs(
                    size=jet1_modp4.size
                )  #Replace J1's mass with random value of mass from mm hist
                ttbarcands_modmass = jet0.p4.cross(
                    jet1_modp4)  #J0's four vector x modified J1's four vector

                # ---- Apply Necessary Selections to new modmass version ---- #
                ttbarcands_modmass = ttbarcands_modmass[oneTTbar]
                ttbarcands_modmass = ttbarcands_modmass[dPhiCut]
                ttbarcands_modmass = ttbarcands_modmass[GoodSubjets]

                # ---- Manually sum the modmass p4 candidates (Coffea technicality) ---- #
                ttbarcands_modmass_p4_sum = (ttbarcands_modmass.i0 +
                                             ttbarcands_modmass.i1)

                # ---- Re-define Mass Variables for ModMass Procedure (pt, eta, phi are redundant to change) ---- #
                ttbarmass = ttbarcands_modmass_p4_sum.flatten().mass
                jetmass = ttbarcands_modmass.i1.mass.flatten()
            ###---------------------------------------------------------------------------------------------###
            output['cutflow'][ilabel] += np.sum(icat)

            output['ttbarmass'].fill(dataset=dataset,
                                     anacat=ilabel,
                                     ttbarmass=ttbarmass[icat],
                                     weight=Weights[icat])
            output['jetpt'].fill(dataset=dataset,
                                 anacat=ilabel,
                                 jetpt=jetpt[icat],
                                 weight=Weights[icat])
            output['probept'].fill(dataset=dataset,
                                   anacat=ilabel,
                                   jetpt=pT[icat],
                                   weight=Weights[icat])
            output['probep'].fill(dataset=dataset,
                                  anacat=ilabel,
                                  jetp=p[icat],
                                  weight=Weights[icat])
            output['jeteta'].fill(dataset=dataset,
                                  anacat=ilabel,
                                  jeteta=jeteta[icat],
                                  weight=Weights[icat])
            output['jetphi'].fill(dataset=dataset,
                                  anacat=ilabel,
                                  jetphi=jetphi[icat],
                                  weight=Weights[icat])
            output['jety'].fill(dataset=dataset,
                                anacat=ilabel,
                                jety=jety[icat],
                                weight=Weights[icat])
            output['jetdy'].fill(dataset=dataset,
                                 anacat=ilabel,
                                 jetdy=jetdy[icat],
                                 weight=Weights[icat])
            output['numerator'].fill(dataset=dataset,
                                     anacat=ilabel,
                                     jetp=numerator[icat],
                                     weight=Weights[icat])
            output['denominator'].fill(dataset=dataset,
                                       anacat=ilabel,
                                       jetp=denominator[icat],
                                       weight=Weights[icat])
            output['jetmass'].fill(dataset=dataset,
                                   anacat=ilabel,
                                   jetmass=jetmass[icat],
                                   weight=Weights[icat])
            output['SDmass'].fill(dataset=dataset,
                                  anacat=ilabel,
                                  jetmass=SDmass[icat],
                                  weight=Weights[icat])
            output['tau32'].fill(dataset=dataset,
                                 anacat=ilabel,
                                 tau32=Tau32[icat],
                                 weight=Weights[icat])
            output['tau32_2D'].fill(dataset=dataset,
                                    anacat=ilabel,
                                    jetpt=pT[icat],
                                    tau32=Tau32[icat],
                                    weight=Weights[icat])
            output['deepTag_TvsQCD'].fill(dataset=dataset,
                                          anacat=ilabel,
                                          jetpt=pT[icat],
                                          tagger=deepTag[icat],
                                          weight=Weights[icat])
            output['deepTagMD_TvsQCD'].fill(dataset=dataset,
                                            anacat=ilabel,
                                            jetpt=pT[icat],
                                            tagger=deepTagMD[icat],
                                            weight=Weights[icat])

        return output