def select(setname,year):
    ROOT.ROOT.EnableImplicitMT(2) # Just use two threads - no need to kill the interactive nodes

    # Initialize TIMBER analyzer
    file_path = '%s/%s_bstar%s.root' %(rootfile_path,setname, year)
    a = analyzer(file_path)

    # Determine normalization weight
    if not a.isData: 
        norm = helpers.getNormFactor(setname,year,config,a.genEventCount)
    else: 
        norm = 1.

    # Book actions on the RDataFrame
    a.Cut('filters',a.GetFlagString(flags))
    a.Cut('trigger',a.GetTriggerString(triggers))
    a.Define('jetIdx','hemispherize(FatJet_phi, FatJet_jetId)') # need to calculate if we have two jets (with Id) that are back-to-back
    a.Cut('nFatJets_cut','nFatJet > max(jetIdx[0],jetIdx[1])') # If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault)
    a.Cut("hemis","(jetIdx[0] != -1)&&(jetIdx[1] != -1)") # cut on that calculation
    a.Cut('pt_cut','FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400')
    a.Cut('eta_cut','abs(FatJet_eta[jetIdx[0]]) < 2.4 && abs(FatJet_eta[jetIdx[1]]) < 2.4')
    a.Cut('mjet_cut','FatJet_msoftdrop[jetIdx[0]] > 50 && FatJet_msoftdrop[jetIdx[1]] > 50')
    a.Cut('mtw_cut','analyzer::invariantMass(jetIdx[0],jetIdx[1],FatJet_pt,FatJet_eta,FatJet_phi,FatJet_msoftdrop) > 1200')
    a.Define('lead_tau32','FatJet_tau2[jetIdx[0]] > 0 ? FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]] : -1') # Conditional to make sure tau2 != 0 for division
    a.Define('sublead_tau32','FatJet_tau2[jetIdx[1]] > 0 ? FatJet_tau3[jetIdx[1]]/FatJet_tau2[jetIdx[1]] : -1') # condition ? <do if true> : <do if false>
    a.Define('lead_tau21','FatJet_tau1[jetIdx[0]] > 0 ? FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]] : -1') # Conditional to make sure tau2 != 0 for division
    a.Define('sublead_tau21','FatJet_tau1[jetIdx[1]] > 0 ? FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]] : -1') # condition ? <do if true> : <do if false>
    a.Define('norm',str(norm))

    # Book a group to save the histograms
    out = HistGroup("%s_%s"%(setname,year))
    for varname in varnames.keys():
        histname = '%s_%s_%s'%(setname,year,varname)
        hist_tuple = (histname,histname,20,0,1) # Arguments for binning that you would normally pass to a TH1
        hist = a.GetActiveNode().DataFrame.Histo1D(hist_tuple,varname,'norm') # Project dataframe into a histogram (hist name/binning tuple, variable to plot from dataframe, weight)
        hist.GetValue() # This gets the actual TH1 instead of a pointer to the TH1
        out.Add(varname,hist) # Add it to our group

    # Return the group
    return out
def select(setname, year):
    ROOT.ROOT.EnableImplicitMT(
        2)  # Just use two threads - no need to kill the interactive nodes

    # Initialize TIMBER analyzer
    file_path = '%s/%s_bstar%s.root' % (rootfile_path, setname, year)
    a = analyzer(file_path)

    # Determine normalization weight
    if not a.isData:
        norm = helpers.getNormFactor(setname, year, config)
    else:
        norm = 1.

    # Book actions on the RDataFrame
    a.Cut('filters', a.GetFlagString(flags))
    a.Cut('trigger', a.GetTriggerString(triggers))
    a.Define(
        'jetIdx', 'hemispherize(FatJet_phi, FatJet_jetId)'
    )  # need to calculate if we have two jets (with Id) that are back-to-back
    a.Cut(
        'nFatJets_cut', 'nFatJet > max(jetIdx[0],jetIdx[1])'
    )  # If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault)
    a.Cut("hemis",
          "(jetIdx[0] != -1)&&(jetIdx[1] != -1)")  # cut on that calculation
    a.Cut('pt_cut', 'FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400')
    a.Cut(
        'eta_cut',
        'abs(FatJet_eta[jetIdx[0]]) < 2.4 && abs(FatJet_eta[jetIdx[1]]) < 2.4')
    a.Define('norm', str(norm))

    #################################
    # Build some variables for jets #
    #################################
    # Wtagging decision logic
    # Returns 0 for no tag, 1 for lead tag, 2 for sublead tag, and 3 for both tag (which is physics-wise equivalent to 2)
    wtag_str = "1*Wtag(FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]],0,{0}, FatJet_msoftdrop[jetIdx[0]],65,105) + 2*Wtag(FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]],0,{0}, FatJet_msoftdrop[jetIdx[1]],65,105)".format(
        cuts['tau21'])

    jets = VarGroup('jets')
    jets.Add('wtag_bit', wtag_str)
    jets.Add(
        'top_bit', '(wtag_bit & 2)? 0: (wtag_bit & 1)? 1: -1'
    )  # (if wtag==3 or 2 (subleading w), top_index=0) else (if wtag==1, top_index=1) else (-1)
    jets.Add('top_index', 'top_bit >= 0 ? jetIdx[top_bit] : -1')
    jets.Add('w_index',
             'top_index == 0 ? jetIdx[1] : top_index == 1 ? jetIdx[0] : -1')
    # Calculate some new comlumns that we'd like to cut on (that were costly to do before the other filtering)
    jets.Add(
        "lead_vect",
        "hardware::TLvector(FatJet_pt[jetIdx[0]],FatJet_eta[jetIdx[0]],FatJet_phi[jetIdx[0]],FatJet_msoftdrop[jetIdx[0]])"
    )
    jets.Add(
        "sublead_vect",
        "hardware::TLvector(FatJet_pt[jetIdx[1]],FatJet_eta[jetIdx[1]],FatJet_phi[jetIdx[1]],FatJet_msoftdrop[jetIdx[1]])"
    )
    jets.Add("deltaY", "abs(lead_vect.Rapidity()-sublead_vect.Rapidity())")
    jets.Add("mtw", "hardware::invariantMass({lead_vect,sublead_vect})")

    #########
    # N - 1 #
    #########
    plotting_vars = VarGroup(
        'plotting_vars')  # assume leading is top and subleading is W
    plotting_vars.Add("mtop", "FatJet_msoftdrop[jetIdx[0]]")
    plotting_vars.Add("mW", "FatJet_msoftdrop[jetIdx[1]]")
    plotting_vars.Add("tau32", "FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]]")
    plotting_vars.Add(
        "subjet_btag",
        "max(SubJet_btagDeepB[FatJet_subJetIdx1[jetIdx[0]]],SubJet_btagDeepB[FatJet_subJetIdx2[jetIdx[0]]])"
    )
    plotting_vars.Add("tau21", "FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]]")
    plotting_vars.Add("lead_jet_deepAK8_MD_WvsQCD",
                      "FatJet_deepTagMD_WvsQCD[jetIdx[0]]")
    plotting_vars.Add("sublead_jet_deepAK8_MD_WvsQCD",
                      "FatJet_deepTagMD_WvsQCD[jetIdx[1]]")
    plotting_vars.Add("lead_jet_deepAK8_MD_TvsQCD",
                      "FatJet_deepTagMD_TvsQCD[jetIdx[0]]")
    plotting_vars.Add("sublead_jet_deepAK8_MD_TvsQCD",
                      "FatJet_deepTagMD_TvsQCD[jetIdx[1]]")

    N_cuts = CutGroup('Ncuts')  # cuts
    N_cuts.Add("deltaY_cut", "deltaY<1.6")
    N_cuts.Add("mtop_cut", "(mtop > 105.)&&(mtop < 220.)")
    N_cuts.Add("mW_cut", "(mW > 65.)&&(mW < 105.)")
    #N_cuts.Add("tau32_cut",       "(tau32 > 0.0)&&(tau32 < %s)"%(cuts['tau32']))
    #N_cuts.Add("subjet_btag_cut", "(subjet_btag > %s)&&(subjet_btag < 1.)"%(cuts['sjbtag']))
    #N_cuts.Add("tau21_cut",       "(tau21 > 0.0)&&(tau21 < %s)"%(cuts['tau21']))
    N_cuts.Add("lead_jet_deepAK8_MD_WvsQCD_cut",
               "lead_jet_deepAK8_MD_WvsQCD > 0.9")
    N_cuts.Add("sublead_jet_deepAK8_MD_WvsQCD_cut",
               "sublead_jet_deepAK8_MD_WvsQCD > 0.9")
    N_cuts.Add("lead_jet_deepAK8_MD_TvsQCD_cut",
               "lead_jet_deepAK8_MD_TvsQCD > 0.9")
    N_cuts.Add("sublead_jet_deepAK8_MD_TvsQCD_cut",
               "sublead_jet_deepAK8_MD_TvsQCD > 0.9")

    # Organize N-1 of tagging variables when assuming top is always leading
    nodeToPlot = a.Apply([jets, plotting_vars])
    nminus1Nodes = a.Nminus1(
        N_cuts, nodeToPlot
    )  # constructs N nodes with a different N-1 selection for each
    nminus1Hists = HistGroup('nminus1Hists')
    binning = {
        'mtop': [25, 50, 300],
        'mW': [25, 30, 270],
        'tau32': [20, 0, 1],
        'tau21': [20, 0, 1],
        'subjet_btag': [20, 0, 1],
        'deltaY': [20, 0, 2.0],
        'lead_jet_deepAK8_MD_WvsQCD': [20, 0, 1],
        'sublead_jet_deepAK8_MD_WvsQCD': [20, 0, 1],
        'lead_jet_deepAK8_MD_TvsQCD': [20, 0, 1],
        'sublead_jet_deepAK8_MD_TvsQCD': [20, 0, 1]
    }
    # Add hists to group and write out
    for nkey in nminus1Nodes.keys():
        if nkey == 'full': continue
        var = nkey.replace('_cut', '').replace('minus_', '')
        hist_tuple = (var, var, binning[var][0], binning[var][1],
                      binning[var][2])
        hist = nminus1Nodes[nkey].DataFrame.Histo1D(hist_tuple, var, 'norm')
        hist.GetValue()
        nminus1Hists.Add(var, hist)
        a.PrintNodeTree('exercises/nminus1_tree.dot')
    # Return the group
    return nminus1Hists
def select(setname, year):
    ROOT.ROOT.EnableImplicitMT(
        2)  # Just use two threads - no need to kill the interactive nodes

    # Initialize TIMBER analyzer
    file_path = '%s/%s_bstar%s.root' % (rootfile_path, setname, year)
    a = analyzer(file_path)

    # Determine normalization weight
    if not a.isData:
        # For MC we need to apply the xsec * lumi / NumberOfGeneratedEvents weight
        # This function is a helper defined here: https://github.com/cmantill/BstarToTW_CMSDAS2021/blob/master/helpers.py#L5-L18
        norm = helpers.getNormFactor(setname, year, config, a.genEventCount)
    else:
        norm = 1.

    # Book actions on the RDataFrame

    # First - we will cut on the filters we specified above
    a.Cut('filters', a.GetFlagString(flags))
    a.Cut('trigger', a.GetTriggerString(triggers))

    # Second - we need to calculate if we have two jets (with Id) that are back-to-back
    # The following function will check for jets in opposite hemispheres (of phi) that also pass a jetId
    # it is defined here: https://github.com/cmantill/BstarToTW_CMSDAS2021/blob/master/bstar.cc#L17-L66
    # so first we *define* jetIdx as the index of these two jets back-to-back - ordered by pT
    a.Define('jetIdx', 'hemispherize(FatJet_phi, FatJet_jetId)')

    # Third - we will perform a selection:
    # by requiring at least two fat-jets (step 1) that are back to back (step 2) and that have a minimum pT of 400 (step 3)
    # some of these functions used below such as max() and Sum() are defined in RDataFrame - see the cheatsheet: https://root.cern/doc/master/classROOT_1_1RDataFrame.html#cheatsheet
    a.Cut(
        'nFatJets_cut', 'nFatJet > max(jetIdx[0],jetIdx[1])'
    )  # (step 1) If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault)
    a.Cut(
        "hemis", "(jetIdx[0] != -1)&&(jetIdx[1] != -1)"
    )  # (step 2) we cut on the variable we just defined - so that both jet indices exist and are different that the default value -1
    a.Cut(
        'pt_cut',
        'FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400')  # (step 3)

    # Now we are ready to define our first variable to plot: lead_jetPt
    a.Define('lead_jetPt', 'FatJet_pt[jetIdx[0]]')

    #ADD SOFT DROP MASS
    a.Define('lead_softdrop_mass', 'FatJet_msoftdrop[jetIdx[0]]')
    #    a.Cut('softdrop_cut','lead_softdrop_mass > 50')

    #EX 2 ADD MORE VARS
    #  a.Define('lead_jet_pt','FatJet_pt[jetIdx[0]]')
    a.Define('lead_jet_pt_nom', 'FatJet_pt_nom[jetIdx[0]]')
    a.Define('lead_tau2', 'FatJet_tau2')

    a.Define(
        'lead_tau21',
        'FatJet_tau1[jetIdx[0]] > 0 ? FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]] : -1'
    )  #Don't divide by zero
    a.Define(
        'lead_tau32',
        'FatJet_tau2[jetIdx[0]] > 0 ? FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]] : -1'
    )
    a.Define('lead_deepAK8_Wscore', 'FatJet_deepTagMD_WvsQCD[jetIdx[0]]')
    a.Define('lead_deepAK8_topscore', 'FatJet_deepTagMD_TvsQCD[jetIdx[0]]')

    a.Define('sublead_softdrop_mass', 'FatJet_msoftdrop[jetIdx[1]]')
    a.Define('sublead_jet_pt', 'FatJet_pt[jetIdx[1]]')
    a.Define('sublead_jet_pt_nom', 'FatJet_pt_nom[jetIdx[1]]')

    a.Define(
        'sublead_tau21',
        'FatJet_tau1[jetIdx[1]] > 0 ? FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]] : -1'
    )
    a.Define(
        'sublead_tau32',
        'FatJet_tau2[jetIdx[1]] > 0 ? FatJet_tau3[jetIdx[1]]/FatJet_tau2[jetIdx[1]] : -1'
    )
    a.Define('sublead_deepAK8_Wscore', 'FatJet_deepTagMD_WvsQCD[jetIdx[1]]')
    a.Define('sublead_deepAK8_topscore', 'FatJet_deepTagMD_TvsQCD[jetIdx[1]]')

    a.Define(
        'lead_vector',
        'hardware::TLvector(FatJet_pt[jetIdx[0]],FatJet_eta[jetIdx[0]],FatJet_phi[jetIdx[0]],FatJet_mass[jetIdx[0]])'
    )
    a.Define(
        'sublead_vector',
        'hardware::TLvector(FatJet_pt[jetIdx[1]],FatJet_eta[jetIdx[1]],FatJet_phi[jetIdx[1]],FatJet_mass[jetIdx[1]])'
    )
    a.Define('invariantMass',
             'hardware::invariantMass({lead_vector,sublead_vector})')

    # To define our second variable, the number of loose b-jets, let's define the b-tagging working points
    # These [loose, medium, tight] working points are for the DeepCSV variable (ranging between 0 and 1) - saved in NanoAOD as Jet_btagDeepB:
    bcut = []
    if year == '16':
        bcut = [0.2217, 0.6321, 0.8953]
    elif year == '17':
        bcut = [0.1522, 0.4941, 0.8001]
    elif year == '18':
        bcut = [0.1241, 0.4184, 0.7571]
    # Then, we use the Sum function of RDataFrame to count the number of AK4Jets with DeepCSV score larger than the loose WP
    a.Define('nbjet_loose',
             'Sum(Jet_btagDeepB > ' + str(bcut[0]) + ')')  # DeepCSV loose WP

    # Finally let's define the normalization weight of the sample as one variable as well
    a.Define('norm', str(norm))

    # A nice functionality of TIMBER is to print all the selections that we have done:

    a.PrintNodeTree(plotdir + '/signal_tree.dot', verbose=True)

    # Now we are ready to save histograms (in a HistGroup)
    out = HistGroup("%s_%s" % (setname, year))
    for varname in varnames.keys():
        histname = '%s_%s_%s' % (setname, year, varname)
        # Arguments for binning that you would normally pass to a TH1 (histname, histname, number of bins, min bin, max bin)
        if "nbjet" in varname:
            hist_tuple = (histname, histname, 10, 0, 10)
        elif "lead_jet" in varname:
            hist_tuple = (histname, histname, 30, 0, 3000)
        elif "lead_softdrop" in varname:
            hist_tuple = (histname, histname, 30, 0, 300)
        elif "lead_tau21" in varname:
            hist_tuple = (histname, histname, 30, 0, 1)
        elif "lead_tau2" in varname:
            hist_tuple = (histname, histname, 30, 0, 1)
        elif "lead_tau32" in varname:
            hist_tuple = (histname, histname, 30, 0, 1)
        elif "lead_deepAK8_Wscore" in varname:
            hist_tuple = (histname, histname, 30, 0, 1)
        elif "lead_deepAK8_topscore" in varname:
            hist_tuple = (histname, histname, 30, 0, 1)
        elif "Mass" in varname:
            hist_tuple = (histname, histname, 50, 0, 5000)
    #  print(varname)
    # elif "Pt" in varname :
    #     hist_tuple = (histname,histname,30,400,2000)
    # elif "msd" in varname :
    #     hist_tuple = (histname,histname,30,40,200)
    # else:
    #     hist_tuple = (histname,histname,30,40,200)
        hist = a.GetActiveNode().DataFrame.Histo1D(
            hist_tuple, varname, 'norm'
        )  # Project dataframe into a histogram (hist name/binning tuple, variable to plot from dataframe, weight)
        hist.GetValue(
        )  # This gets the actual TH1 instead of a pointer to the TH1
        out.Add(varname, hist)  # Add it to our group

    # Return the group
    return out
Example #4
0
def run(args):
    ROOT.ROOT.EnableImplicitMT(4)
    a = analyzer(args.input)

    # Config loading - will have cuts, xsec, and lumi
    config = OpenJSON(args.config)
    cuts = config['CUTS'][args.year]

    # Determine normalization weight
    if not a.isData:
        norm = helpers.getNormFactor(setname, args.year, args.config)
    else:
        norm = 1.

    # Initial cuts
    a.Cut('filters', a.GetFlagString(flags))
    a.Cut('trigger', a.GetTriggerString(triggers))
    a.Define(
        'jetIdx', 'hemispherize(FatJet_phi, FatJet_jetId)'
    )  # need to calculate if we have two jets (with Id) that are back-to-back
    a.Cut(
        'nFatJets_cut', 'nFatJet > max(jetIdx[0], jetIdx[1])'
    )  # If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault)
    a.Cut("hemis",
          "(jetIdx[0] != -1)&&(jetIdx[1] != -1)")  # cut on that calculation

    # Kinematics
    a.Cut("pt_cut", "FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400")
    a.Cut(
        "eta_cut",
        "abs(FatJet_eta[jetIdx[0]]) < 2.4 && abs(FatJet_eta[jetIdx[1]]) < 2.4")

    #################################
    # Build some variables for jets #
    #################################
    # Wtagging decision logic
    # This statement returns 0 for no tag, 1 for lead tag, 2 for sublead tag, and 3 for both tag (which is equivalent to 2 for the sake of deciding what is the W)
    wtag_str = "1*Wtag(FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]],0,{0}, FatJet_msoftdrop[jetIdx[0]],65,105) + 2*Wtag(FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]],0,{0}, FatJet_msoftdrop[jetIdx[1]],65,105)".format(
        cuts['tau21'])
    if args.deep:
        wtag_str = "1*WtagDeepAK8(FatJet_deepTagMD_WvsQCD[jetIdx[0]],{0},1, FatJet_msoftdrop[jetIdx[0]],65,105) + 2*WtagDeepAK8(FatJet_deepTagMD_WvsQCD[jetIdx[1]],{0},1, FatJet_msoftdrop[jetIdx[1]],65,105)".format(
            cuts['deepAK8w'])

    jets = VarGroup('jets')
    jets.Add('wtag_bit', wtag_str)
    jets.Add(
        'top_bit', '(wtag_bit & 2)? 0: (wtag_bit & 1)? 1: -1'
    )  # (if wtag==3 or 2 (subleading w), top_index=0) else (if wtag==1, top_index=1) else (-1)
    jets.Add('top_index', 'top_bit >= 0 ? jetIdx[top_bit] : -1')
    jets.Add('w_index',
             'top_index == 0 ? jetIdx[1] : top_index == 1 ? jetIdx[0] : -1')
    # Calculate some new comlumns that we'd like to cut on (that were costly to do before the other filtering)
    jets.Add(
        "lead_vect",
        "hardware::TLvector(FatJet_pt[jetIdx[0]],FatJet_eta[jetIdx[0]],FatJet_phi[jetIdx[0]],FatJet_msoftdrop[jetIdx[0]])"
    )
    jets.Add(
        "sublead_vect",
        "hardware::TLvector(FatJet_pt[jetIdx[1]],FatJet_eta[jetIdx[1]],FatJet_phi[jetIdx[1]],FatJet_msoftdrop[jetIdx[1]])"
    )
    jets.Add("deltaY", "lead_vect.Rapidity()-sublead_vect.Rapidity()")
    jets.Add("mtw", "hardware::invariantMass({lead_vect,sublead_vect})")

    # W and top
    tagging_vars = VarGroup('tagging_vars')
    tagging_vars.Add("mtop",
                     "top_index > -1 ? FatJet_msoftdrop[top_index] : -10")
    tagging_vars.Add("mW", "w_index   > -1 ? FatJet_msoftdrop[w_index]: -10")
    tagging_vars.Add(
        "tau32",
        "top_index > -1 ? FatJet_tau3[top_index]/FatJet_tau2[top_index]: -1")
    tagging_vars.Add(
        "subjet_btag",
        "top_index > -1 ? max(SubJet_btagDeepB[FatJet_subJetIdx1[top_index]],SubJet_btagDeepB[FatJet_subJetIdx2[top_index]]) : -1"
    )
    tagging_vars.Add(
        "tau21",
        "w_index   > -1 ? FatJet_tau2[w_index]/FatJet_tau1[w_index]: -1")
    tagging_vars.Add(
        "deepAK8_MD_TvsQCD",
        "top_index > -1 ? FatJet_deepTagMD_TvsQCD[top_index] : -1")
    tagging_vars.Add("deepAK8_MD_WvsQCD",
                     "w_index > -1 ? FatJet_deepTagMD_WvsQCD[w_index] : -1")

    toptag_str = "TopTag(tau32,0,{0}, subjet_btag,{1},1, mtop,50,1000)==1".format(
        cuts['tau32'], cuts['sjbtag'])
    if args.deep:
        toptag_str = "TopTagDeepAK8(deepAK8_MD_TvsQCD,{0},1, mtop,50,1000)==1".format(
            cuts['deepAK8top'])
    tagging_vars.Add("wtag", 'wtag_bit>0')
    tagging_vars.Add("top_tag", toptag_str)

    # Write cut on new column
    jet_sel = CutGroup('jet_sel')
    jet_sel.Add('wtag_cut', 'wtag')
    jet_sel.Add("mtw_cut", "mtw>1000.")
    jet_sel.Add('deltaY_cut', 'abs(deltaY)<1.6')

    #########
    # Apply #
    #########
    a.Apply([jets, tagging_vars, jet_sel])
    a.Define('norm', str(norm))

    # Finally discriminate on top tag
    final = a.Discriminate("top_tag_cut", "top_tag==1")

    outfile = ROOT.TFile.Open('Presel_%s.root' % (outputname), 'RECREATE')
    hpass = final["pass"].DataFrame.Histo2D(
        ('MtwvMtPass', 'MtwvMtPass', 60, 50, 350, 70, 500, 4000), 'mtop',
        'mtw', 'norm')
    hfail = final["fail"].DataFrame.Histo2D(
        ('MtwvMtFail', 'MtwvMtFail', 60, 50, 350, 70, 500, 4000), 'mtop',
        'mtw', 'norm')
    outfile.cd()
    hpass.Write()
    hfail.Write()
    outfile.Close()
Example #5
0
def select(setname, year):
    ROOT.ROOT.EnableImplicitMT(
        2)  # Just use two threads - no need to kill the interactive nodes

    # Initialize TIMBER analyzer
    file_path = '%s/%s_bstar%s.root' % (rootfile_path, setname, year)
    a = analyzer(file_path)

    # Determine normalization weight
    if not a.isData:
        norm = helpers.getNormFactor(setname, year, config, a.genEventCount)
    else:
        norm = 1.

    # Book actions on the RDataFrame
    a.Cut('filters', a.GetFlagString(flags))
    a.Cut('trigger', a.GetTriggerString(triggers))
    a.Define(
        'jetIdx', 'hemispherize(FatJet_phi, FatJet_jetId)'
    )  # need to calculate if we have two jets (with Id) that are back-to-back
    a.Cut(
        'nFatJets_cut', 'nFatJet > max(jetIdx[0],jetIdx[1])'
    )  # If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault)
    a.Cut("hemis",
          "(jetIdx[0] != -1)&&(jetIdx[1] != -1)")  # cut on that calculation
    a.Cut('pt_cut', 'FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400')
    a.Cut(
        'eta_cut',
        'abs(FatJet_eta[jetIdx[0]]) < 2.4 && abs(FatJet_eta[jetIdx[1]]) < 2.4')
    a.Cut(
        'mjet_cut',
        'FatJet_msoftdrop[jetIdx[0]] > 50 && FatJet_msoftdrop[jetIdx[1]] > 50')
    a.Define(
        'lead_vector',
        'hardware::TLvector(Jet_pt[jetIdx[0]],Jet_eta[jetIdx[0]],Jet_phi[jetIdx[0]],Jet_mass[jetIdx[0]])'
    )
    a.Define(
        'sublead_vector',
        'hardware::TLvector(Jet_pt[jetIdx[1]],Jet_eta[jetIdx[1]],Jet_phi[jetIdx[1]],Jet_mass[jetIdx[1]])'
    )
    a.Define('invariantMass',
             'hardware::invariantMass({lead_vector,sublead_vector})')
    a.Cut('mtw_cut', 'invariantMass > 1200')
    a.Define(
        'deltaphi',
        'hardware::DeltaPhi(FatJet_phi[jetIdx[0]],FatJet_phi[jetIdx[1]])')
    a.Define(
        'lead_tau32',
        'FatJet_tau2[jetIdx[0]] > 0 ? FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]] : -1'
    )  # Conditional to make sure tau2 != 0 for division
    a.Define(
        'sublead_tau32',
        'FatJet_tau2[jetIdx[1]] > 0 ? FatJet_tau3[jetIdx[1]]/FatJet_tau2[jetIdx[1]] : -1'
    )  # condition ? <do if true> : <do if false>
    a.Define(
        'lead_tau21',
        'FatJet_tau1[jetIdx[0]] > 0 ? FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]] : -1'
    )  # Conditional to make sure tau2 != 0 for division
    a.Define(
        'sublead_tau21',
        'FatJet_tau1[jetIdx[1]] > 0 ? FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]] : -1'
    )  # condition ? <do if true> : <do if false>
    a.Define('lead_deepAK8_TvsQCD', 'FatJet_deepTag_TvsQCD[jetIdx[0]]')
    a.Define('sublead_deepAK8_TvsQCD', 'FatJet_deepTag_TvsQCD[jetIdx[1]]')
    a.Define('lead_deepAK8_WvsQCD', 'FatJet_deepTag_WvsQCD[jetIdx[0]]')
    a.Define('sublead_deepAK8_WvsQCD', 'FatJet_deepTag_WvsQCD[jetIdx[1]]')

    bcut = []
    if year == '16':
        bcut = [0.2217, 0.6321, 0.8953]
    elif year == '17':
        bcut = [0.1522, 0.4941, 0.8001]
    elif year == '18':
        bcut = [0.1241, 0.4184, 0.7571]
    a.Define('nbjet_loose',
             'Sum(Jet_btagDeepB > ' + str(bcut[0]) + ')')  # DeepCSV loose WP
    a.Define('nbjet_medium',
             'Sum(Jet_btagDeepB > ' + str(bcut[1]) + ')')  # DeepCSV medium WP
    a.Define('nbjet_tight',
             'Sum(Jet_btagDeepB > ' + str(bcut[2]) + ')')  # DeepCSV tight WP
    a.Define('lead_jetPt', 'FatJet_pt[jetIdx[0]]')
    a.Define('sublead_jetPt', 'FatJet_pt[jetIdx[1]]')
    a.Define('lead_softdrop_mass', 'FatJet_msoftdrop[jetIdx[0]]')
    a.Define('sublead_softdrop_mass', 'FatJet_msoftdrop[jetIdx[1]]')
    a.Define('norm', str(norm))

    # Book a group to save the histograms
    out = HistGroup("%s_%s" % (setname, year))
    for varname in varnames.keys():
        histname = '%s_%s_%s' % (setname, year, varname)
        # Arguments for binning that you would normally pass to a TH1
        if "nbjet" in varname:
            hist_tuple = (histname, histname, 10, 0, 10)
        elif "tau" in varname:
            hist_tuple = (histname, histname, 20, 0, 1)
        elif "Pt" in varname:
            hist_tuple = (histname, histname, 30, 400, 1000)
        elif "phi" in varname:
            hist_tuple = (histname, histname, 30, -3.2, 3.2)
        elif "softdrop_mass" in varname:
            hist_tuple = (histname, histname, 30, 0, 300)
        else:
            hist_tuple = (histname, histname, 20, 0, 1)
        hist = a.GetActiveNode().DataFrame.Histo1D(
            hist_tuple, varname, 'norm'
        )  # Project dataframe into a histogram (hist name/binning tuple, variable to plot from dataframe, weight)
        hist.GetValue(
        )  # This gets the actual TH1 instead of a pointer to the TH1
        out.Add(varname, hist)  # Add it to our group

    # Return the group
    return out