def select(setname,year): ROOT.ROOT.EnableImplicitMT(2) # Just use two threads - no need to kill the interactive nodes # Initialize TIMBER analyzer file_path = '%s/%s_bstar%s.root' %(rootfile_path,setname, year) a = analyzer(file_path) # Determine normalization weight if not a.isData: norm = helpers.getNormFactor(setname,year,config,a.genEventCount) else: norm = 1. # Book actions on the RDataFrame a.Cut('filters',a.GetFlagString(flags)) a.Cut('trigger',a.GetTriggerString(triggers)) a.Define('jetIdx','hemispherize(FatJet_phi, FatJet_jetId)') # need to calculate if we have two jets (with Id) that are back-to-back a.Cut('nFatJets_cut','nFatJet > max(jetIdx[0],jetIdx[1])') # If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault) a.Cut("hemis","(jetIdx[0] != -1)&&(jetIdx[1] != -1)") # cut on that calculation a.Cut('pt_cut','FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400') a.Cut('eta_cut','abs(FatJet_eta[jetIdx[0]]) < 2.4 && abs(FatJet_eta[jetIdx[1]]) < 2.4') a.Cut('mjet_cut','FatJet_msoftdrop[jetIdx[0]] > 50 && FatJet_msoftdrop[jetIdx[1]] > 50') a.Cut('mtw_cut','analyzer::invariantMass(jetIdx[0],jetIdx[1],FatJet_pt,FatJet_eta,FatJet_phi,FatJet_msoftdrop) > 1200') a.Define('lead_tau32','FatJet_tau2[jetIdx[0]] > 0 ? FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]] : -1') # Conditional to make sure tau2 != 0 for division a.Define('sublead_tau32','FatJet_tau2[jetIdx[1]] > 0 ? FatJet_tau3[jetIdx[1]]/FatJet_tau2[jetIdx[1]] : -1') # condition ? <do if true> : <do if false> a.Define('lead_tau21','FatJet_tau1[jetIdx[0]] > 0 ? FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]] : -1') # Conditional to make sure tau2 != 0 for division a.Define('sublead_tau21','FatJet_tau1[jetIdx[1]] > 0 ? FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]] : -1') # condition ? <do if true> : <do if false> a.Define('norm',str(norm)) # Book a group to save the histograms out = HistGroup("%s_%s"%(setname,year)) for varname in varnames.keys(): histname = '%s_%s_%s'%(setname,year,varname) hist_tuple = (histname,histname,20,0,1) # Arguments for binning that you would normally pass to a TH1 hist = a.GetActiveNode().DataFrame.Histo1D(hist_tuple,varname,'norm') # Project dataframe into a histogram (hist name/binning tuple, variable to plot from dataframe, weight) hist.GetValue() # This gets the actual TH1 instead of a pointer to the TH1 out.Add(varname,hist) # Add it to our group # Return the group return out
def select(setname, year): ROOT.ROOT.EnableImplicitMT( 2) # Just use two threads - no need to kill the interactive nodes # Initialize TIMBER analyzer file_path = '%s/%s_bstar%s.root' % (rootfile_path, setname, year) a = analyzer(file_path) # Determine normalization weight if not a.isData: norm = helpers.getNormFactor(setname, year, config) else: norm = 1. # Book actions on the RDataFrame a.Cut('filters', a.GetFlagString(flags)) a.Cut('trigger', a.GetTriggerString(triggers)) a.Define( 'jetIdx', 'hemispherize(FatJet_phi, FatJet_jetId)' ) # need to calculate if we have two jets (with Id) that are back-to-back a.Cut( 'nFatJets_cut', 'nFatJet > max(jetIdx[0],jetIdx[1])' ) # If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault) a.Cut("hemis", "(jetIdx[0] != -1)&&(jetIdx[1] != -1)") # cut on that calculation a.Cut('pt_cut', 'FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400') a.Cut( 'eta_cut', 'abs(FatJet_eta[jetIdx[0]]) < 2.4 && abs(FatJet_eta[jetIdx[1]]) < 2.4') a.Define('norm', str(norm)) ################################# # Build some variables for jets # ################################# # Wtagging decision logic # Returns 0 for no tag, 1 for lead tag, 2 for sublead tag, and 3 for both tag (which is physics-wise equivalent to 2) wtag_str = "1*Wtag(FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]],0,{0}, FatJet_msoftdrop[jetIdx[0]],65,105) + 2*Wtag(FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]],0,{0}, FatJet_msoftdrop[jetIdx[1]],65,105)".format( cuts['tau21']) jets = VarGroup('jets') jets.Add('wtag_bit', wtag_str) jets.Add( 'top_bit', '(wtag_bit & 2)? 0: (wtag_bit & 1)? 1: -1' ) # (if wtag==3 or 2 (subleading w), top_index=0) else (if wtag==1, top_index=1) else (-1) jets.Add('top_index', 'top_bit >= 0 ? jetIdx[top_bit] : -1') jets.Add('w_index', 'top_index == 0 ? jetIdx[1] : top_index == 1 ? jetIdx[0] : -1') # Calculate some new comlumns that we'd like to cut on (that were costly to do before the other filtering) jets.Add( "lead_vect", "hardware::TLvector(FatJet_pt[jetIdx[0]],FatJet_eta[jetIdx[0]],FatJet_phi[jetIdx[0]],FatJet_msoftdrop[jetIdx[0]])" ) jets.Add( "sublead_vect", "hardware::TLvector(FatJet_pt[jetIdx[1]],FatJet_eta[jetIdx[1]],FatJet_phi[jetIdx[1]],FatJet_msoftdrop[jetIdx[1]])" ) jets.Add("deltaY", "abs(lead_vect.Rapidity()-sublead_vect.Rapidity())") jets.Add("mtw", "hardware::invariantMass({lead_vect,sublead_vect})") ######### # N - 1 # ######### plotting_vars = VarGroup( 'plotting_vars') # assume leading is top and subleading is W plotting_vars.Add("mtop", "FatJet_msoftdrop[jetIdx[0]]") plotting_vars.Add("mW", "FatJet_msoftdrop[jetIdx[1]]") plotting_vars.Add("tau32", "FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]]") plotting_vars.Add( "subjet_btag", "max(SubJet_btagDeepB[FatJet_subJetIdx1[jetIdx[0]]],SubJet_btagDeepB[FatJet_subJetIdx2[jetIdx[0]]])" ) plotting_vars.Add("tau21", "FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]]") plotting_vars.Add("lead_jet_deepAK8_MD_WvsQCD", "FatJet_deepTagMD_WvsQCD[jetIdx[0]]") plotting_vars.Add("sublead_jet_deepAK8_MD_WvsQCD", "FatJet_deepTagMD_WvsQCD[jetIdx[1]]") plotting_vars.Add("lead_jet_deepAK8_MD_TvsQCD", "FatJet_deepTagMD_TvsQCD[jetIdx[0]]") plotting_vars.Add("sublead_jet_deepAK8_MD_TvsQCD", "FatJet_deepTagMD_TvsQCD[jetIdx[1]]") N_cuts = CutGroup('Ncuts') # cuts N_cuts.Add("deltaY_cut", "deltaY<1.6") N_cuts.Add("mtop_cut", "(mtop > 105.)&&(mtop < 220.)") N_cuts.Add("mW_cut", "(mW > 65.)&&(mW < 105.)") #N_cuts.Add("tau32_cut", "(tau32 > 0.0)&&(tau32 < %s)"%(cuts['tau32'])) #N_cuts.Add("subjet_btag_cut", "(subjet_btag > %s)&&(subjet_btag < 1.)"%(cuts['sjbtag'])) #N_cuts.Add("tau21_cut", "(tau21 > 0.0)&&(tau21 < %s)"%(cuts['tau21'])) N_cuts.Add("lead_jet_deepAK8_MD_WvsQCD_cut", "lead_jet_deepAK8_MD_WvsQCD > 0.9") N_cuts.Add("sublead_jet_deepAK8_MD_WvsQCD_cut", "sublead_jet_deepAK8_MD_WvsQCD > 0.9") N_cuts.Add("lead_jet_deepAK8_MD_TvsQCD_cut", "lead_jet_deepAK8_MD_TvsQCD > 0.9") N_cuts.Add("sublead_jet_deepAK8_MD_TvsQCD_cut", "sublead_jet_deepAK8_MD_TvsQCD > 0.9") # Organize N-1 of tagging variables when assuming top is always leading nodeToPlot = a.Apply([jets, plotting_vars]) nminus1Nodes = a.Nminus1( N_cuts, nodeToPlot ) # constructs N nodes with a different N-1 selection for each nminus1Hists = HistGroup('nminus1Hists') binning = { 'mtop': [25, 50, 300], 'mW': [25, 30, 270], 'tau32': [20, 0, 1], 'tau21': [20, 0, 1], 'subjet_btag': [20, 0, 1], 'deltaY': [20, 0, 2.0], 'lead_jet_deepAK8_MD_WvsQCD': [20, 0, 1], 'sublead_jet_deepAK8_MD_WvsQCD': [20, 0, 1], 'lead_jet_deepAK8_MD_TvsQCD': [20, 0, 1], 'sublead_jet_deepAK8_MD_TvsQCD': [20, 0, 1] } # Add hists to group and write out for nkey in nminus1Nodes.keys(): if nkey == 'full': continue var = nkey.replace('_cut', '').replace('minus_', '') hist_tuple = (var, var, binning[var][0], binning[var][1], binning[var][2]) hist = nminus1Nodes[nkey].DataFrame.Histo1D(hist_tuple, var, 'norm') hist.GetValue() nminus1Hists.Add(var, hist) a.PrintNodeTree('exercises/nminus1_tree.dot') # Return the group return nminus1Hists
def MakeEfficiency(year): selection = THClass('../dijet_nano_files/THsnapshot_Data_%s.root' % (year), year, 1, 1) selection.OpenForSelection('None') # selection.a.Define('mth_trig','hardware::InvariantMass(Dijet_vect)') # selection.a.Define('m_javg','(Dijet_msoftdrop[0]+Dijet_msoftdrop[0])/2') # selection.a.Cut('morePt','ROOT::VecOps::All(Dijet_pt > 400)') hists = HistGroup('out') noTag = selection.a.Cut('pretrig', 'HLT_PFJet320==1') # Baseline - no tagging hists.Add( 'preTagDenominator', selection.a.DataFrame.Histo2D( ('preTagDenominator', '', 20, 60, 260, 22, 800, 3000), 'm_javg', 'mth_trig')) selection.ApplyTrigs() hists.Add( 'preTagNumerator', selection.a.DataFrame.Histo2D( ('preTagNumerator', '', 20, 60, 260, 22, 800, 3000), 'm_javg', 'mth_trig')) # DeepAK8 SR selection.a.SetActiveNode(noTag) selection.ApplyTopPick('deepTag_TvsQCD') hists.Add( 'postTagDenominator_DAK8_SR', selection.a.DataFrame.Histo2D( ('postTagDenominator_DAK8_SR', '', 20, 60, 260, 22, 800, 3000), 'm_javg', 'mth_trig')) selection.ApplyTrigs() hists.Add( 'preTagNumerator_DAK8_SR', selection.a.DataFrame.Histo2D( ('preTagNumerator_DAK8_SR', '', 20, 60, 260, 22, 800, 3000), 'm_javg', 'mth_trig')) # DeepAK8 CR selection.a.SetActiveNode(noTag) selection.ApplyTopPick('deepTag_TvsQCD', invert=True) hists.Add( 'postTagDenominator_DAK8_CR', selection.a.DataFrame.Histo2D( ('postTagDenominator_DAK8_CR', '', 20, 60, 260, 22, 800, 3000), 'm_javg', 'mth_trig')) selection.ApplyTrigs() hists.Add( 'preTagNumerator_DAK8_CR', selection.a.DataFrame.Histo2D( ('preTagNumerator_DAK8_CR', '', 20, 60, 260, 22, 800, 3000), 'm_javg', 'mth_trig')) # ParticleNet SR selection.a.SetActiveNode(noTag) selection.ApplyTopPick('particleNet_TvsQCD') hists.Add( 'postTagDenominator_PN_SR', selection.a.DataFrame.Histo2D( ('postTagDenominator_PN_SR', '', 20, 60, 260, 22, 800, 3000), 'm_javg', 'mth_trig')) selection.ApplyTrigs() hists.Add( 'preTagNumerator_PN_SR', selection.a.DataFrame.Histo2D( ('preTagNumerator_PN_SR', '', 20, 60, 260, 22, 800, 3000), 'm_javg', 'mth_trig')) selection.a.SetActiveNode(noTag) selection.ApplyTopPick('particleNet_TvsQCD', invert=True) hists.Add( 'postTagDenominator_PN_CR', selection.a.DataFrame.Histo2D( ('postTagDenominator_PN_CR', '', 20, 60, 260, 22, 800, 3000), 'm_javg', 'mth_trig')) selection.ApplyTrigs() hists.Add( 'preTagNumerator_PN_CR', selection.a.DataFrame.Histo2D( ('preTagNumerator_PN_CR', '', 20, 60, 260, 22, 800, 3000), 'm_javg', 'mth_trig')) # Make efficieincies effs = { "Pretag": ROOT.TEfficiency(hists['preTagNumerator'], hists['preTagDenominator']), "DAK8_SR": ROOT.TEfficiency(hists['preTagNumerator_DAK8_SR'], hists['postTagDenominator_DAK8_SR']), "DAK8_CR": ROOT.TEfficiency(hists['preTagNumerator_DAK8_CR'], hists['postTagDenominator_DAK8_CR']), "PN_SR": ROOT.TEfficiency(hists['preTagNumerator_PN_SR'], hists['postTagDenominator_PN_SR']), "PN_CR": ROOT.TEfficiency(hists['preTagNumerator_PN_CR'], hists['postTagDenominator_PN_CR']) } out = ROOT.TFile.Open('THtrigger2D_%s.root' % year, 'RECREATE') out.cd() for name, eff in effs.items(): g = eff.CreateHistogram() g.SetName(name + '_hist') g.SetTitle(name) g.GetXaxis().SetTitle('m_{j}^{avg} (GeV)') g.GetYaxis().SetTitle('m_{jj} (GeV)') g.GetZaxis().SetTitle('Efficiency') g.SetMinimum(0.6) g.SetMaximum(1.0) f = ROOT.TF2("eff_func", "1-[0]/10*exp([1]*y/1000)*exp([2]*x/200)", 60, 260, 800, 2600) f.SetParameter(0, 1) f.SetParameter(1, -2) f.SetParameter(2, -2) g.Fit(f) g.Write() eff.SetName(name) eff.Write() out.Close()
def select(setname, year): ROOT.ROOT.EnableImplicitMT( 2) # Just use two threads - no need to kill the interactive nodes # Initialize TIMBER analyzer file_path = '%s/%s_bstar%s.root' % (rootfile_path, setname, year) a = analyzer(file_path) # Determine normalization weight if not a.isData: # For MC we need to apply the xsec * lumi / NumberOfGeneratedEvents weight # This function is a helper defined here: https://github.com/cmantill/BstarToTW_CMSDAS2021/blob/master/helpers.py#L5-L18 norm = helpers.getNormFactor(setname, year, config, a.genEventCount) else: norm = 1. # Book actions on the RDataFrame # First - we will cut on the filters we specified above a.Cut('filters', a.GetFlagString(flags)) a.Cut('trigger', a.GetTriggerString(triggers)) # Second - we need to calculate if we have two jets (with Id) that are back-to-back # The following function will check for jets in opposite hemispheres (of phi) that also pass a jetId # it is defined here: https://github.com/cmantill/BstarToTW_CMSDAS2021/blob/master/bstar.cc#L17-L66 # so first we *define* jetIdx as the index of these two jets back-to-back - ordered by pT a.Define('jetIdx', 'hemispherize(FatJet_phi, FatJet_jetId)') # Third - we will perform a selection: # by requiring at least two fat-jets (step 1) that are back to back (step 2) and that have a minimum pT of 400 (step 3) # some of these functions used below such as max() and Sum() are defined in RDataFrame - see the cheatsheet: https://root.cern/doc/master/classROOT_1_1RDataFrame.html#cheatsheet a.Cut( 'nFatJets_cut', 'nFatJet > max(jetIdx[0],jetIdx[1])' ) # (step 1) If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault) a.Cut( "hemis", "(jetIdx[0] != -1)&&(jetIdx[1] != -1)" ) # (step 2) we cut on the variable we just defined - so that both jet indices exist and are different that the default value -1 a.Cut( 'pt_cut', 'FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400') # (step 3) # Now we are ready to define our first variable to plot: lead_jetPt a.Define('lead_jetPt', 'FatJet_pt[jetIdx[0]]') #ADD SOFT DROP MASS a.Define('lead_softdrop_mass', 'FatJet_msoftdrop[jetIdx[0]]') # a.Cut('softdrop_cut','lead_softdrop_mass > 50') #EX 2 ADD MORE VARS # a.Define('lead_jet_pt','FatJet_pt[jetIdx[0]]') a.Define('lead_jet_pt_nom', 'FatJet_pt_nom[jetIdx[0]]') a.Define('lead_tau2', 'FatJet_tau2') a.Define( 'lead_tau21', 'FatJet_tau1[jetIdx[0]] > 0 ? FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]] : -1' ) #Don't divide by zero a.Define( 'lead_tau32', 'FatJet_tau2[jetIdx[0]] > 0 ? FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]] : -1' ) a.Define('lead_deepAK8_Wscore', 'FatJet_deepTagMD_WvsQCD[jetIdx[0]]') a.Define('lead_deepAK8_topscore', 'FatJet_deepTagMD_TvsQCD[jetIdx[0]]') a.Define('sublead_softdrop_mass', 'FatJet_msoftdrop[jetIdx[1]]') a.Define('sublead_jet_pt', 'FatJet_pt[jetIdx[1]]') a.Define('sublead_jet_pt_nom', 'FatJet_pt_nom[jetIdx[1]]') a.Define( 'sublead_tau21', 'FatJet_tau1[jetIdx[1]] > 0 ? FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]] : -1' ) a.Define( 'sublead_tau32', 'FatJet_tau2[jetIdx[1]] > 0 ? FatJet_tau3[jetIdx[1]]/FatJet_tau2[jetIdx[1]] : -1' ) a.Define('sublead_deepAK8_Wscore', 'FatJet_deepTagMD_WvsQCD[jetIdx[1]]') a.Define('sublead_deepAK8_topscore', 'FatJet_deepTagMD_TvsQCD[jetIdx[1]]') a.Define( 'lead_vector', 'hardware::TLvector(FatJet_pt[jetIdx[0]],FatJet_eta[jetIdx[0]],FatJet_phi[jetIdx[0]],FatJet_mass[jetIdx[0]])' ) a.Define( 'sublead_vector', 'hardware::TLvector(FatJet_pt[jetIdx[1]],FatJet_eta[jetIdx[1]],FatJet_phi[jetIdx[1]],FatJet_mass[jetIdx[1]])' ) a.Define('invariantMass', 'hardware::invariantMass({lead_vector,sublead_vector})') # To define our second variable, the number of loose b-jets, let's define the b-tagging working points # These [loose, medium, tight] working points are for the DeepCSV variable (ranging between 0 and 1) - saved in NanoAOD as Jet_btagDeepB: bcut = [] if year == '16': bcut = [0.2217, 0.6321, 0.8953] elif year == '17': bcut = [0.1522, 0.4941, 0.8001] elif year == '18': bcut = [0.1241, 0.4184, 0.7571] # Then, we use the Sum function of RDataFrame to count the number of AK4Jets with DeepCSV score larger than the loose WP a.Define('nbjet_loose', 'Sum(Jet_btagDeepB > ' + str(bcut[0]) + ')') # DeepCSV loose WP # Finally let's define the normalization weight of the sample as one variable as well a.Define('norm', str(norm)) # A nice functionality of TIMBER is to print all the selections that we have done: a.PrintNodeTree(plotdir + '/signal_tree.dot', verbose=True) # Now we are ready to save histograms (in a HistGroup) out = HistGroup("%s_%s" % (setname, year)) for varname in varnames.keys(): histname = '%s_%s_%s' % (setname, year, varname) # Arguments for binning that you would normally pass to a TH1 (histname, histname, number of bins, min bin, max bin) if "nbjet" in varname: hist_tuple = (histname, histname, 10, 0, 10) elif "lead_jet" in varname: hist_tuple = (histname, histname, 30, 0, 3000) elif "lead_softdrop" in varname: hist_tuple = (histname, histname, 30, 0, 300) elif "lead_tau21" in varname: hist_tuple = (histname, histname, 30, 0, 1) elif "lead_tau2" in varname: hist_tuple = (histname, histname, 30, 0, 1) elif "lead_tau32" in varname: hist_tuple = (histname, histname, 30, 0, 1) elif "lead_deepAK8_Wscore" in varname: hist_tuple = (histname, histname, 30, 0, 1) elif "lead_deepAK8_topscore" in varname: hist_tuple = (histname, histname, 30, 0, 1) elif "Mass" in varname: hist_tuple = (histname, histname, 50, 0, 5000) # print(varname) # elif "Pt" in varname : # hist_tuple = (histname,histname,30,400,2000) # elif "msd" in varname : # hist_tuple = (histname,histname,30,40,200) # else: # hist_tuple = (histname,histname,30,40,200) hist = a.GetActiveNode().DataFrame.Histo1D( hist_tuple, varname, 'norm' ) # Project dataframe into a histogram (hist name/binning tuple, variable to plot from dataframe, weight) hist.GetValue( ) # This gets the actual TH1 instead of a pointer to the TH1 out.Add(varname, hist) # Add it to our group # Return the group return out
def main(args): ROOT.ROOT.EnableImplicitMT(args.threads) start = time.time() selection = THClass( 'dijet_nano/%s_%s_snapshot.txt' % (args.setname, args.era), int(args.era), 1, 1) kinOnly = selection.OpenForSelection('None') # Kinematic plots jetPlots = HistGroup('jetPlots') # Taggers after mass selection selection.a.Define( 'TopMassBools', 'Dijet_msoftdrop_corrT > 105 && Dijet_msoftdrop_corrT < 210') selection.a.Define('DAK8TopScoresInMassWindow', 'Dijet_deepTag_TvsQCD[TopMassBools]') selection.a.Define('PNTopScoresInMassWindow', 'Dijet_particleNet_TvsQCD[TopMassBools]') jetPlots.Add( 'DAK8TopScoresInMassWindow', selection.a.DataFrame.Histo1D( ('DAK8TopScoresInMassWindow', 'DeepAK8 top score for jets in top mass window', 50, 0, 1), 'DAK8TopScoresInMassWindow')) jetPlots.Add( 'PNTopScoresInMassWindow', selection.a.DataFrame.Histo1D( ('PNTopScoresInMassWindow', 'ParticleNet top score for jets in top mass window', 50, 0, 1), 'PNTopScoresInMassWindow')) selection.a.Define( 'HiggsMassBools', 'Dijet_msoftdrop_corrH > 100 && Dijet_msoftdrop_corrH < 140') selection.a.Define('DAK8HiggsScoresInMassWindow', 'Dijet_deepTagMD_HbbvsQCD[HiggsMassBools]') selection.a.Define('PNHiggsScoresInMassWindow', 'Dijet_particleNet_HbbvsQCD[HiggsMassBools]') jetPlots.Add( 'DAK8HiggsScoresInMassWindow', selection.a.DataFrame.Histo1D( ('DAK8HiggsScoresInMassWindow', 'DeepAK8 Higgs score for jets in Higgs mass window', 50, 0, 1), 'DAK8HiggsScoresInMassWindow')) jetPlots.Add( 'PNHiggsScoresInMassWindow', selection.a.DataFrame.Histo1D( ('PNHiggsScoresInMassWindow', 'ParticleNet Higgs score for jets in Higgs mass window', 50, 0, 1), 'PNHiggsScoresInMassWindow')) # Mass after tagger selection selection.a.Define('TopDAK8Bools', 'Dijet_deepTag_TvsQCD > 0.9') selection.a.Define('TopPNBools', 'Dijet_particleNet_TvsQCD > 0.9') selection.a.Define('TopMassAfterDAK8Tag', 'Dijet_msoftdrop_corrT[TopDAK8Bools]') selection.a.Define('TopMassAfterPNTag', 'Dijet_msoftdrop_corrT[TopPNBools]') jetPlots.Add( 'TopMassAfterDAK8Tag', selection.a.DataFrame.Histo1D( ('TopMassAfterDAK8Tag', 'Jet mass after DAK8 top score > 0.9', 25, 50, 300), 'TopMassAfterDAK8Tag')) jetPlots.Add( 'TopMassAfterPNTag', selection.a.DataFrame.Histo1D( ('TopMassAfterPNTag', 'Jet mass after PN top score > 0.9', 25, 50, 300), 'TopMassAfterPNTag')) selection.a.Define('HiggsDAK8Bools', 'Dijet_deepTagMD_HbbvsQCD > 0.9') selection.a.Define('HiggsPNBools', 'Dijet_particleNet_HbbvsQCD > 0.9') selection.a.Define('HiggsMassAfterDAK8Tag', 'Dijet_msoftdrop_corrH[HiggsDAK8Bools]') selection.a.Define('HiggsMassAfterPNTag', 'Dijet_msoftdrop_corrH[HiggsPNBools]') jetPlots.Add( 'HiggsMassAfterDAK8Tag', selection.a.DataFrame.Histo1D( ('HiggsMassAfterDAK8Tag', 'Jet mass after DAK8 Higgs score > 0.9', 25, 50, 300), 'HiggsMassAfterDAK8Tag')) jetPlots.Add( 'HiggsMassAfterPNTag', selection.a.DataFrame.Histo1D( ('HiggsMassAfterPNTag', 'Jet mass after PN Higgs score > 0.9', 25, 50, 300), 'HiggsMassAfterPNTag')) selection.a.Define( 'GenPart_vect', 'hardware::TLvector(GenPart_pt, GenPart_eta, GenPart_phi, GenPart_mass)' ) out = ROOT.TFile.Open( 'rootfiles/THjetstudy_%s_%s.root' % (args.setname, args.era), 'RECREATE') out.cd() presel = selection.a.GetActiveNode() # Assign jets on truth in parallel selection.a.SetActiveNode(presel) selection.ApplyTopPickViaMatch() truthtag = selection.a.Define( 'MassDiff', 'Top_msoftdrop_corrT - Higgs_msoftdrop_corrH') nicenames = {"deepTag": "DAK8^{top}", "particleNet": "PN^{top}"} for t in ['deepTag', 'particleNet']: selection.a.SetActiveNode(presel) top_tagger = '%s_TvsQCD' % t # higgs_tagger = '%s_HbbvsQCD'%t # Signal region selection.ApplyTopPick(tagger=top_tagger, invert=False) selection.a.Define('MassDiff', 'Top_msoftdrop_corrT - Higgs_msoftdrop_corrH') selection.a.Define('NNDiff', 'Top_{0} - Higgs_{0}'.format(top_tagger)) jetPlots.Add( 'MassDiffvsNNDiff_%s' % t, selection.a.DataFrame.Histo2D( ('MassDiffvsNNDiff_%s' % t, '(m_{{t}} - m_{{H}}) vs ({0}_{{t}} - {0}_{{H}})'.format( nicenames[t]), 25, -100, 150, 40, -1, 1), 'MassDiff', 'NNDiff')) # Look at unmatched pieces checkpoint = selection.a.GetActiveNode() selection.a.Cut( 'NotGenMatchTop', '!MatchToGen(6, Top_vect, GenPart_vect, GenPart_pdgId)') selection.a.Cut( 'NotGenMatchH', '!MatchToGen(25, Higgs_vect, GenPart_vect, GenPart_pdgId)') jetPlots.Add( 'MassDiffvsNNDiff_%s_BadMatch' % t, selection.a.DataFrame.Histo2D( ('MassDiffvsNNDiff_%s_BadMatch' % t, '(m_{{t}} - m_{{H}}) vs ({0}_{{t}} - {0}_{{H}}) - Bad matches' .format(nicenames[t]), 25, -100, 150, 40, -1, 1), 'MassDiff', 'NNDiff')) # Look at matched pieces selection.a.SetActiveNode(checkpoint) selection.a.Cut( 'GenMatchTop', 'MatchToGen(6, Top_vect, GenPart_vect, GenPart_pdgId)') selection.a.Cut( 'GenMatchH', 'MatchToGen(25, Higgs_vect, GenPart_vect, GenPart_pdgId)') jetPlots.Add( 'MassDiffvsNNDiff_%s_GoodMatch' % t, selection.a.DataFrame.Histo2D(( 'MassDiffvsNNDiff_%s_GoodMatch' % t, '(m_{{t}} - m_{{H}}) vs ({0}_{{t}} - {0}_{{H}}) - Good matches' .format(nicenames[t]), 25, -100, 150, 40, -1, 1), 'MassDiff', 'NNDiff')) # Assign jets on truth selection.a.SetActiveNode(truthtag) selection.a.Define('NNDiff_%s' % t, 'Top_{0} - Higgs_{0}'.format(top_tagger)) jetPlots.Add( 'MassDiffvsNNDiff_%s_TruthMatch' % t, selection.a.DataFrame.Histo2D(( 'MassDiffvsNNDiff_%s_TruthMatch' % t, '(m_{{t}} - m_{{H}}) vs ({0}_{{t}} - {0}_{{H}}) - Truth matches' .format(nicenames[t]), 25, -100, 150, 40, -1, 1), 'MassDiff', 'NNDiff_%s' % t)) jetPlots.Do('Write') selection.a.PrintNodeTree('NodeTree.pdf') print('%s sec' % (time.time() - start))
def THstudies(args): print('PROCESSING: %s %s' % (args.setname, args.era)) ROOT.ROOT.EnableImplicitMT(args.threads) start = time.time() # Base setup selection = THClass( 'dijet_nano/%s_%s_snapshot.txt' % (args.setname, args.era), int(args.era), 1, 1) selection.OpenForSelection('None') selection.a.Define( 'Dijet_vect', 'hardware::TLvector(Dijet_pt_corr, Dijet_eta, Dijet_phi, Dijet_msoftdrop_corrT)' ) selection.a.Define('mth', 'hardware::InvariantMass(Dijet_vect)') selection.a.Define('m_avg', '(Dijet_msoftdrop_corrT[0]+Dijet_msoftdrop_corrT[1])/2' ) # Use the top version of the corrected mass # since it still has JES/JER which both would get anyway selection.ApplyTrigs(args.trigEff) selection.a.MakeWeightCols( extraNominal='' if selection.a.isData else 'genWeight*%s' % selection.GetXsecScale()) # Kinematic definitions selection.a.Define('pt0', 'Dijet_pt_corr[0]') selection.a.Define('pt1', 'Dijet_pt_corr[1]') selection.a.Define('HT', 'pt0+pt1') selection.a.Define('deltaEta', 'abs(Dijet_eta[0] - Dijet_eta[1])') selection.a.Define('deltaPhi', 'hardware::DeltaPhi(Dijet_phi[0],Dijet_phi[1])') kinOnly = selection.a.Define( 'deltaY', 'abs(Dijet_vect[0].Rapidity() - Dijet_vect[1].Rapidity())') # Kinematic plots kinPlots = HistGroup('kinPlots') kinPlots.Add( 'pt0', selection.a.DataFrame.Histo1D(('pt0', 'Lead jet pt', 100, 350, 2350), 'pt0', 'weight__nominal')) kinPlots.Add( 'pt1', selection.a.DataFrame.Histo1D( ('pt1', 'Sublead jet pt', 100, 350, 2350), 'pt1', 'weight__nominal')) kinPlots.Add( 'HT', selection.a.DataFrame.Histo1D( ('HT', 'Sum of pt of two leading jets', 150, 700, 3700), 'HT', 'weight__nominal')) kinPlots.Add( 'deltaEta', selection.a.DataFrame.Histo1D( ('deltaEta', '| #Delta #eta |', 48, 0, 4.8), 'deltaEta', 'weight__nominal')) kinPlots.Add( 'deltaPhi', selection.a.DataFrame.Histo1D( ('deltaPhi', '| #Delta #phi |', 32, 1, 3.14), 'deltaPhi', 'weight__nominal')) kinPlots.Add( 'deltaY', selection.a.DataFrame.Histo1D(('deltaY', '| #Delta y |', 60, 0, 3), 'deltaY', 'weight__nominal')) # Check MC truth to get jet idx assignment selection.ApplyTopPickViaMatch() kinPlots.Add( 'tIdx_true', selection.a.DataFrame.Histo1D( ('tIdx_true', 'Top jet idx based on MC truth', 2, 0, 2), 'tIdx')) kinPlots.Add( 'hIdx_true', selection.a.DataFrame.Histo1D( ('hIdx_true', 'Higgs jet idx based on MC truth', 2, 0, 2), 'hIdx')) # Do N-1 setup before splitting into DAK8 and PN - assume leading top # This is a 50/50 assumption that kills the stats by 50% but # it allows us to make the plots with real world possibility that # there's Higgs and top cross contamination. Also helps to do this without # too much hastle. selection.a.SetActiveNode(kinOnly) selection.a.ObjectFromCollection('LeadTop', 'Dijet', 0) nminus1Node = selection.a.ObjectFromCollection('SubleadHiggs', 'Dijet', 1) out = ROOT.TFile.Open( 'rootfiles/THstudies_%s_%s%s.root' % (args.setname, args.era, '_' + args.variation if args.variation != 'None' else ''), 'RECREATE') out.cd() for t in ['deepTag', 'particleNet']: top_tagger = '%s_TvsQCD' % t higgs_tagger = '%sMD_HbbvsQCD' % t # N-1 selection.a.SetActiveNode(nminus1Node) nminusGroup = selection.GetNminus1Group(t) nminusNodes = selection.a.Nminus1(nminusGroup) for n in nminusNodes.keys(): if n.startswith('m'): bins = [25, 50, 300] if n.startswith('mH'): var = 'SubleadHiggs_msoftdrop_corrH' else: var = 'LeadTop_msoftdrop_corrT' elif n == 'full': continue else: bins = [50, 0, 1] if n.endswith('H_cut'): var = 'SubleadHiggs_%s' % higgs_tagger else: var = 'LeadTop_%s' % top_tagger print('N-1: Plotting %s for node %s' % (var, n)) kinPlots.Add( n + '_nminus1', nminusNodes[n].DataFrame.Histo1D( (n + '_nminus1', n + '_nminus1', bins[0], bins[1], bins[2]), var, 'weight__nominal')) kinPlots.Do('Write') selection.a.PrintNodeTree('NodeTree.pdf', verbose=True) print('%s sec' % (time.time() - start))
def select(setname, year): ROOT.ROOT.EnableImplicitMT( 2) # Just use two threads - no need to kill the interactive nodes # Initialize TIMBER analyzer file_path = '%s/%s_bstar%s.root' % (rootfile_path, setname, year) a = analyzer(file_path) # Determine normalization weight if not a.isData: norm = helpers.getNormFactor(setname, year, config, a.genEventCount) else: norm = 1. # Book actions on the RDataFrame a.Cut('filters', a.GetFlagString(flags)) a.Cut('trigger', a.GetTriggerString(triggers)) a.Define( 'jetIdx', 'hemispherize(FatJet_phi, FatJet_jetId)' ) # need to calculate if we have two jets (with Id) that are back-to-back a.Cut( 'nFatJets_cut', 'nFatJet > max(jetIdx[0],jetIdx[1])' ) # If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault) a.Cut("hemis", "(jetIdx[0] != -1)&&(jetIdx[1] != -1)") # cut on that calculation a.Cut('pt_cut', 'FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400') a.Cut( 'eta_cut', 'abs(FatJet_eta[jetIdx[0]]) < 2.4 && abs(FatJet_eta[jetIdx[1]]) < 2.4') a.Cut( 'mjet_cut', 'FatJet_msoftdrop[jetIdx[0]] > 50 && FatJet_msoftdrop[jetIdx[1]] > 50') a.Define( 'lead_vector', 'hardware::TLvector(Jet_pt[jetIdx[0]],Jet_eta[jetIdx[0]],Jet_phi[jetIdx[0]],Jet_mass[jetIdx[0]])' ) a.Define( 'sublead_vector', 'hardware::TLvector(Jet_pt[jetIdx[1]],Jet_eta[jetIdx[1]],Jet_phi[jetIdx[1]],Jet_mass[jetIdx[1]])' ) a.Define('invariantMass', 'hardware::invariantMass({lead_vector,sublead_vector})') a.Cut('mtw_cut', 'invariantMass > 1200') a.Define( 'deltaphi', 'hardware::DeltaPhi(FatJet_phi[jetIdx[0]],FatJet_phi[jetIdx[1]])') a.Define( 'lead_tau32', 'FatJet_tau2[jetIdx[0]] > 0 ? FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]] : -1' ) # Conditional to make sure tau2 != 0 for division a.Define( 'sublead_tau32', 'FatJet_tau2[jetIdx[1]] > 0 ? FatJet_tau3[jetIdx[1]]/FatJet_tau2[jetIdx[1]] : -1' ) # condition ? <do if true> : <do if false> a.Define( 'lead_tau21', 'FatJet_tau1[jetIdx[0]] > 0 ? FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]] : -1' ) # Conditional to make sure tau2 != 0 for division a.Define( 'sublead_tau21', 'FatJet_tau1[jetIdx[1]] > 0 ? FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]] : -1' ) # condition ? <do if true> : <do if false> a.Define('lead_deepAK8_TvsQCD', 'FatJet_deepTag_TvsQCD[jetIdx[0]]') a.Define('sublead_deepAK8_TvsQCD', 'FatJet_deepTag_TvsQCD[jetIdx[1]]') a.Define('lead_deepAK8_WvsQCD', 'FatJet_deepTag_WvsQCD[jetIdx[0]]') a.Define('sublead_deepAK8_WvsQCD', 'FatJet_deepTag_WvsQCD[jetIdx[1]]') bcut = [] if year == '16': bcut = [0.2217, 0.6321, 0.8953] elif year == '17': bcut = [0.1522, 0.4941, 0.8001] elif year == '18': bcut = [0.1241, 0.4184, 0.7571] a.Define('nbjet_loose', 'Sum(Jet_btagDeepB > ' + str(bcut[0]) + ')') # DeepCSV loose WP a.Define('nbjet_medium', 'Sum(Jet_btagDeepB > ' + str(bcut[1]) + ')') # DeepCSV medium WP a.Define('nbjet_tight', 'Sum(Jet_btagDeepB > ' + str(bcut[2]) + ')') # DeepCSV tight WP a.Define('lead_jetPt', 'FatJet_pt[jetIdx[0]]') a.Define('sublead_jetPt', 'FatJet_pt[jetIdx[1]]') a.Define('lead_softdrop_mass', 'FatJet_msoftdrop[jetIdx[0]]') a.Define('sublead_softdrop_mass', 'FatJet_msoftdrop[jetIdx[1]]') a.Define('norm', str(norm)) # Book a group to save the histograms out = HistGroup("%s_%s" % (setname, year)) for varname in varnames.keys(): histname = '%s_%s_%s' % (setname, year, varname) # Arguments for binning that you would normally pass to a TH1 if "nbjet" in varname: hist_tuple = (histname, histname, 10, 0, 10) elif "tau" in varname: hist_tuple = (histname, histname, 20, 0, 1) elif "Pt" in varname: hist_tuple = (histname, histname, 30, 400, 1000) elif "phi" in varname: hist_tuple = (histname, histname, 30, -3.2, 3.2) elif "softdrop_mass" in varname: hist_tuple = (histname, histname, 30, 0, 300) else: hist_tuple = (histname, histname, 20, 0, 1) hist = a.GetActiveNode().DataFrame.Histo1D( hist_tuple, varname, 'norm' ) # Project dataframe into a histogram (hist name/binning tuple, variable to plot from dataframe, weight) hist.GetValue( ) # This gets the actual TH1 instead of a pointer to the TH1 out.Add(varname, hist) # Add it to our group # Return the group return out