Example #1
0
 def GetNminus1Group(self, tagger):
     # Use after ApplyTopPickViaMatch
     cutgroup = CutGroup('taggingVars')
     cutgroup.Add(
         'mH_%s_cut' % tagger,
         'SubleadHiggs_msoftdrop_corrH > {0} && SubleadHiggs_msoftdrop_corrH < {1}'
         .format(*self.cuts['mh']))
     cutgroup.Add(
         'mt_%s_cut' % tagger,
         'LeadTop_msoftdrop_corrT > {0} && LeadTop_msoftdrop_corrT < {1}'.
         format(*self.cuts['mt']))
     cutgroup.Add(
         '%s_H_cut' % tagger, 'SubleadHiggs_{0}MD_HbbvsQCD > {1}'.format(
             tagger, self.cuts[tagger + 'MD_HbbvsQCD']))
     cutgroup.Add(
         '%s_top_cut' % tagger,
         'LeadTop_{0}_TvsQCD > {1}'.format(tagger,
                                           self.cuts[tagger + '_TvsQCD']))
     return cutgroup
def select(setname, year):
    ROOT.ROOT.EnableImplicitMT(
        2)  # Just use two threads - no need to kill the interactive nodes

    # Initialize TIMBER analyzer
    file_path = '%s/%s_bstar%s.root' % (rootfile_path, setname, year)
    a = analyzer(file_path)

    # Determine normalization weight
    if not a.isData:
        norm = helpers.getNormFactor(setname, year, config)
    else:
        norm = 1.

    # Book actions on the RDataFrame
    a.Cut('filters', a.GetFlagString(flags))
    a.Cut('trigger', a.GetTriggerString(triggers))
    a.Define(
        'jetIdx', 'hemispherize(FatJet_phi, FatJet_jetId)'
    )  # need to calculate if we have two jets (with Id) that are back-to-back
    a.Cut(
        'nFatJets_cut', 'nFatJet > max(jetIdx[0],jetIdx[1])'
    )  # If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault)
    a.Cut("hemis",
          "(jetIdx[0] != -1)&&(jetIdx[1] != -1)")  # cut on that calculation
    a.Cut('pt_cut', 'FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400')
    a.Cut(
        'eta_cut',
        'abs(FatJet_eta[jetIdx[0]]) < 2.4 && abs(FatJet_eta[jetIdx[1]]) < 2.4')
    a.Define('norm', str(norm))

    #################################
    # Build some variables for jets #
    #################################
    # Wtagging decision logic
    # Returns 0 for no tag, 1 for lead tag, 2 for sublead tag, and 3 for both tag (which is physics-wise equivalent to 2)
    wtag_str = "1*Wtag(FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]],0,{0}, FatJet_msoftdrop[jetIdx[0]],65,105) + 2*Wtag(FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]],0,{0}, FatJet_msoftdrop[jetIdx[1]],65,105)".format(
        cuts['tau21'])

    jets = VarGroup('jets')
    jets.Add('wtag_bit', wtag_str)
    jets.Add(
        'top_bit', '(wtag_bit & 2)? 0: (wtag_bit & 1)? 1: -1'
    )  # (if wtag==3 or 2 (subleading w), top_index=0) else (if wtag==1, top_index=1) else (-1)
    jets.Add('top_index', 'top_bit >= 0 ? jetIdx[top_bit] : -1')
    jets.Add('w_index',
             'top_index == 0 ? jetIdx[1] : top_index == 1 ? jetIdx[0] : -1')
    # Calculate some new comlumns that we'd like to cut on (that were costly to do before the other filtering)
    jets.Add(
        "lead_vect",
        "hardware::TLvector(FatJet_pt[jetIdx[0]],FatJet_eta[jetIdx[0]],FatJet_phi[jetIdx[0]],FatJet_msoftdrop[jetIdx[0]])"
    )
    jets.Add(
        "sublead_vect",
        "hardware::TLvector(FatJet_pt[jetIdx[1]],FatJet_eta[jetIdx[1]],FatJet_phi[jetIdx[1]],FatJet_msoftdrop[jetIdx[1]])"
    )
    jets.Add("deltaY", "abs(lead_vect.Rapidity()-sublead_vect.Rapidity())")
    jets.Add("mtw", "hardware::invariantMass({lead_vect,sublead_vect})")

    #########
    # N - 1 #
    #########
    plotting_vars = VarGroup(
        'plotting_vars')  # assume leading is top and subleading is W
    plotting_vars.Add("mtop", "FatJet_msoftdrop[jetIdx[0]]")
    plotting_vars.Add("mW", "FatJet_msoftdrop[jetIdx[1]]")
    plotting_vars.Add("tau32", "FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]]")
    plotting_vars.Add(
        "subjet_btag",
        "max(SubJet_btagDeepB[FatJet_subJetIdx1[jetIdx[0]]],SubJet_btagDeepB[FatJet_subJetIdx2[jetIdx[0]]])"
    )
    plotting_vars.Add("tau21", "FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]]")
    plotting_vars.Add("lead_jet_deepAK8_MD_WvsQCD",
                      "FatJet_deepTagMD_WvsQCD[jetIdx[0]]")
    plotting_vars.Add("sublead_jet_deepAK8_MD_WvsQCD",
                      "FatJet_deepTagMD_WvsQCD[jetIdx[1]]")
    plotting_vars.Add("lead_jet_deepAK8_MD_TvsQCD",
                      "FatJet_deepTagMD_TvsQCD[jetIdx[0]]")
    plotting_vars.Add("sublead_jet_deepAK8_MD_TvsQCD",
                      "FatJet_deepTagMD_TvsQCD[jetIdx[1]]")

    N_cuts = CutGroup('Ncuts')  # cuts
    N_cuts.Add("deltaY_cut", "deltaY<1.6")
    N_cuts.Add("mtop_cut", "(mtop > 105.)&&(mtop < 220.)")
    N_cuts.Add("mW_cut", "(mW > 65.)&&(mW < 105.)")
    #N_cuts.Add("tau32_cut",       "(tau32 > 0.0)&&(tau32 < %s)"%(cuts['tau32']))
    #N_cuts.Add("subjet_btag_cut", "(subjet_btag > %s)&&(subjet_btag < 1.)"%(cuts['sjbtag']))
    #N_cuts.Add("tau21_cut",       "(tau21 > 0.0)&&(tau21 < %s)"%(cuts['tau21']))
    N_cuts.Add("lead_jet_deepAK8_MD_WvsQCD_cut",
               "lead_jet_deepAK8_MD_WvsQCD > 0.9")
    N_cuts.Add("sublead_jet_deepAK8_MD_WvsQCD_cut",
               "sublead_jet_deepAK8_MD_WvsQCD > 0.9")
    N_cuts.Add("lead_jet_deepAK8_MD_TvsQCD_cut",
               "lead_jet_deepAK8_MD_TvsQCD > 0.9")
    N_cuts.Add("sublead_jet_deepAK8_MD_TvsQCD_cut",
               "sublead_jet_deepAK8_MD_TvsQCD > 0.9")

    # Organize N-1 of tagging variables when assuming top is always leading
    nodeToPlot = a.Apply([jets, plotting_vars])
    nminus1Nodes = a.Nminus1(
        N_cuts, nodeToPlot
    )  # constructs N nodes with a different N-1 selection for each
    nminus1Hists = HistGroup('nminus1Hists')
    binning = {
        'mtop': [25, 50, 300],
        'mW': [25, 30, 270],
        'tau32': [20, 0, 1],
        'tau21': [20, 0, 1],
        'subjet_btag': [20, 0, 1],
        'deltaY': [20, 0, 2.0],
        'lead_jet_deepAK8_MD_WvsQCD': [20, 0, 1],
        'sublead_jet_deepAK8_MD_WvsQCD': [20, 0, 1],
        'lead_jet_deepAK8_MD_TvsQCD': [20, 0, 1],
        'sublead_jet_deepAK8_MD_TvsQCD': [20, 0, 1]
    }
    # Add hists to group and write out
    for nkey in nminus1Nodes.keys():
        if nkey == 'full': continue
        var = nkey.replace('_cut', '').replace('minus_', '')
        hist_tuple = (var, var, binning[var][0], binning[var][1],
                      binning[var][2])
        hist = nminus1Nodes[nkey].DataFrame.Histo1D(hist_tuple, var, 'norm')
        hist.GetValue()
        nminus1Hists.Add(var, hist)
        a.PrintNodeTree('exercises/nminus1_tree.dot')
    # Return the group
    return nminus1Hists