def run(self): outpostfix = self.postfix if self.postfix != None else ( "_Friend" if self.friend else "_Skim") if not self.noOut: if self.compression != "none": ROOT.gInterpreter.ProcessLine("#include <Compression.h>") (algo, level) = self.compression.split(":") compressionLevel = int(level) if algo == "LZMA": compressionAlgo = ROOT.ROOT.kLZMA elif algo == "ZLIB": compressionAlgo = ROOT.ROOT.kZLIB elif algo == "LZ4": compressionAlgo = ROOT.ROOT.kLZ4 else: raise RuntimeError("Unsupported compression %s" % algo) else: compressionLevel = 0 print("Will write selected trees to " + self.outputDir) if not self.justcount: if not os.path.exists(self.outputDir): os.system("mkdir -p " + self.outputDir) else: compressionLevel = 0 if self.noOut: if len(self.modules) == 0: raise RuntimeError( "Running with --noout and no modules does nothing!") # Open histogram file, if desired if (self.histFileName != None and self.histDirName == None) or ( self.histFileName == None and self.histDirName != None): raise RuntimeError( "Must specify both histogram file and histogram directory!") elif self.histFileName != None and self.histDirName != None: self.histFile = ROOT.TFile.Open(self.histFileName, "RECREATE") else: self.histFile = None for m in self.modules: if hasattr(m, 'writeHistFile') and m.writeHistFile: m.beginJob(histFile=self.histFile, histDirName=self.histDirName) else: m.beginJob() fullClone = (len(self.modules) == 0) outFileNames = [] t0 = time.time() totEntriesRead = 0 for fname in self.inputFiles: ffnames = [] if "," in fname: fnames = fname.split(',') fname, ffnames = fnames[0], fnames[1:] # open input file if self.prefetch: ftoread, toBeDeleted = self.prefetchFile(fname) inFile = ROOT.TFile.Open(ftoread) else: inFile = ROOT.TFile.Open(fname) # get input tree inTree = inFile.Get("Events") if inTree == None: inTree = inFile.Get("Friends") nEntries = min(inTree.GetEntries() - self.firstEntry, self.maxEntries) totEntriesRead += nEntries # pre-skimming elist, jsonFilter = preSkim(inTree, self.json, self.cut, maxEntries=self.maxEntries, firstEntry=self.firstEntry) if self.justcount: print('Would select %d / %d entries from %s (%.2f%%)' % (elist.GetN() if elist else nEntries, nEntries, fname, (elist.GetN() if elist else nEntries) / (0.01 * nEntries) if nEntries else 0)) if self.prefetch: if toBeDeleted: os.unlink(ftoread) continue else: print('Pre-select %d entries out of %s (%.2f%%)' % (elist.GetN() if elist else nEntries, nEntries, (elist.GetN() if elist else nEntries) / (0.01 * nEntries) if nEntries else 0)) inAddFiles = [] inAddTrees = [] for ffname in ffnames: inAddFiles.append(ROOT.TFile.Open(ffname)) inAddTree = inAddFiles[-1].Get("Events") if inAddTree == None: inAddTree = inAddFiles[-1].Get("Friends") inAddTrees.append(inAddTree) inTree.AddFriend(inAddTree) if fullClone: # no need of a reader (no event loop), but set up the elist if available if elist: inTree.SetEntryList(elist) else: # initialize reader inTree = InputTree(inTree, elist) # prepare output file if not self.noOut: outFileName = os.path.join( self.outputDir, os.path.basename(fname).replace(".root", outpostfix + ".root")) outFile = ROOT.TFile.Open(outFileName, "RECREATE", "", compressionLevel) outFileNames.append(outFileName) if compressionLevel: outFile.SetCompressionAlgorithm(compressionAlgo) # prepare output tree if self.friend: outTree = FriendOutput(inFile, inTree, outFile) else: outTree = FullOutput( inFile, inTree, outFile, branchSelection=self.branchsel, outputbranchSelection=self.outputbranchsel, fullClone=fullClone, maxEntries=self.maxEntries, firstEntry=self.firstEntry, jsonFilter=jsonFilter, provenance=self.provenance) else: outFile = None outTree = None if self.branchsel: self.branchsel.selectBranches(inTree) # process events, if needed if not fullClone: eventRange = range( self.firstEntry, self.firstEntry + nEntries) if nEntries > 0 and not elist else None (nall, npass, timeLoop) = eventLoop(self.modules, inFile, outFile, inTree, outTree, eventRange=eventRange, maxEvents=self.maxEntries) print( 'Processed %d preselected entries from %s (%s entries). Finally selected %d entries' % (nall, fname, nEntries, npass)) else: nall = nEntries print('Selected %d / %d entries from %s (%.2f%%)' % (outTree.tree().GetEntries(), nall, fname, outTree.tree().GetEntries() / (0.01 * nall) if nall else 0)) # now write the output if not self.noOut: outTree.write() outFile.Close() print("Done %s" % outFileName) if self.jobReport: self.jobReport.addInputFile(fname, nall) if self.prefetch: if toBeDeleted: os.unlink(ftoread) for m in self.modules: m.endJob() print("Total time %.1f sec. to process %i events. Rate = %.1f Hz." % ((time.time() - t0), totEntriesRead, totEntriesRead / (time.time() - t0))) if self.haddFileName: haddnano = "./haddnano.py" if os.path.isfile( "./haddnano.py") else "haddnano.py" os.system("%s %s %s" % (haddnano, self.haddFileName, " ".join(outFileNames))) if self.jobReport: self.jobReport.addOutputFile(self.haddFileName) self.jobReport.save()
def NanoReader(process_flag, inputFileNames=["in.root"], outputFileName="out.root", json = '', year = 2016, nEventsMax = -1): #Just tried to copy common filters, feel free to add any i am missing filters = ["Flag_goodVertices", "Flag_globalTightHalo2016Filter", "Flag_eeBadScFilter", "Flag_HBHENoiseFilter", "Flag_HBHENoiseIsoFilter", "Flag_ecalBadCalibFilter", "Flag_EcalDeadCellTriggerPrimitiveFilter", "Flag_BadChargedCandidateFilter", ] if(year == 2016): filters.append("Flag_CSCTightHaloFilter") if(year == 2016): triggers = ["HLT_PFHT800", "HLT_PFHT900", "HLT_PFJet450", "HLT_PFJet500", "HLT_AK8PFJet450", "HLT_AK8PFJet500"] elif(year == 2017): triggers = ["HLT_PFHT1050", "HLT_PFJet500", "HLT_AK8PFJet380_TrimMass30", 'HLT_AK8PFJet400_TrimMass30'] elif(year == 2018): triggers = ["HLT_PFHT1050", "HLT_PFJet500", "HLT_AK8PFJet380_TrimMass30", 'HLT_AK8PFJet400_TrimMass30'] else: print("Invalid year option of %i. Year must be 2016, 2017, or 2018! \n" % year) exit(1) triggers = [ 'HLT_PFHT780', 'HLT_PFHT890', 'HLT_PFHT1050', 'HLT_PFJet500', 'HLT_AK8PFJet500', 'HLT_AK8PFHT700_TrimMass50', 'HLT_AK8PFHT800_TrimMass50', 'HLT_AK8PFHT900_TrimMass50', 'HLT_AK8PFJet360_TrimMass30', 'HLT_AK8PFJet380_TrimMass30', 'HLT_AK8PFJet400_TrimMass30', 'HLT_AK8PFJet420_TrimMass30', ] mjj_cut = 1200. nFiles = len(inputFileNames) print("Will run over %i files and output to %s with truth label %i" % (nFiles, outputFileName, process_flag)) count = 0 saved = 0 #----------------- Begin loop over files --------------------------------- for fileName in inputFileNames: print("Opening file %s" % fileName) inputFile = TFile.Open(fileName) if(not inputFile): #check for null pointer print("Unable to open file %s, exting \n" % fileName) return 1 #get input tree inTree = inputFile.Get("Events") # pre-skimming if(json != ''): elist,jsonFilter = preSkim(inTree, json) #number of events to be processed nTotal = elist.GetN() if elist else inTree.GetEntries() print('Pre-select %d entries out of %s '%(nTotal,inTree.GetEntries())) inTree= InputTree(inTree, elist) else: nTotal = inTree.GetEntries() inTree= InputTree(inTree) print('Running over %i entries \n' % nTotal) out = Outputer(outputFileName, truth_label = process_flag) # Grab event tree from nanoAOD eventBranch = inTree.GetBranch('event') treeEntries = eventBranch.GetEntries() # -------- Begin Loop over tree------------------------------------- entries = inTree.entries for entry in xrange(entries): if count % 10000 == 0 : print('--------- Processing Event ' + str(count) +' -- percent complete ' + str(100*count/nTotal/nFiles) + '% -- ') count +=1 # Grab the event event = Event(inTree, entry) passTrigger = False passFilter = True for fil in filters: passFilter = passFilter and inTree.readBranch(fil) for trig in triggers: passTrigger = passTrigger or inTree.readBranch(trig) if(not passFilter): continue if(not passTrigger): continue PFCands = Collection(event, "FatJetPFCands") AK8Jets = Collection(event, "FatJet") #MuonsCol = Collection(event, "Muon") #ElectronsCol = Collection(event, "Electron") #PhotonsCol = Collection(event, "Photon") subjets = Collection(event, "SubJet") min_pt = 200 #keep 2 jets with pt > 200, tight id jet1 = jet2 = jet3 = None pf_conts_start = 0 #keep track of indices for PF candidates for jet in AK8Jets: #jetId : bit1 = loose, bit2 = tight, bit3 = tightLepVeto #want tight id if((jet.jetId & 2 == 2) and jet.pt > min_pt and abs(jet.eta) < 2.5): jet.PFConstituents_Start = pf_conts_start if(jet1 == None or jet.pt > jet1.pt): jet3 = jet2 jet2 = jet1 jet1 = jet elif(jet2 == None or jet.pt > jet2.pt): jet3 = jet2 jet2 = jet elif(jet3 == None or jet.pt > jet3.pt): jet3 = jet pf_conts_start += jet.nPFConstituents if(jet1 == None or jet2 == None): continue #Order jets so jet1 is always the higher mass one if(jet1.msoftdrop < jet2.msoftdrop): temp = jet1 jet1 = jet2 jet2 = temp j1_4vec = ROOT.Math.PtEtaPhiMVector(jet1.pt, jet1.eta, jet1.phi, jet1.msoftdrop) j2_4vec = ROOT.Math.PtEtaPhiMVector(jet2.pt, jet2.eta, jet2.phi, jet2.msoftdrop) dijet = j1_4vec + j2_4vec mjj = dijet.M() if(mjj< mjj_cut): continue saved+=1 out.fill_event(inTree, jet1, jet2, jet3, PFCands, subjets, mjj) if(nEventsMax > 0 and saved >= nEventsMax): break # -------- End Loop over tree------------------------------------- # -------- End Loop over files------------------------------------- efficiency = float(saved)/count out.final_write_out(efficiency) print("Done. Selected %i events. Selection efficiency is %.3f \n" % (saved, efficiency)) print("Outputed to %s" % outputFileName) return saved
def run(self): outpostfix = self.postfix if self.postfix != None else ( "_Friend" if self.friend else "_Skim") fullClone = False outFileNames = [] totEntriesRead = 0 t0 = time.time() for fileName in self.inputFiles: # open file print("Opening file %s" % fileName) inFile = ROOT.TFile.Open(fileName) if (not inFile): #check for null pointer print("Unable to open file %s, exting \n" % fileName) return 1 # get input tree inTree = inFile.Get("Events") nEntries = min(inTree.GetEntries() - self.firstEntry, self.maxEntries) totEntriesRead += nEntries # pre-skimming elist, jsonFilter = preSkim(inTree, self.json, self.cut, maxEntries=self.maxEntries, firstEntry=self.firstEntry) # number of events to be processed nTotal = elist.GetN() if elist else nEntries print('Pre-select %d entries out of %s ' % (nTotal, nEntries)) inTree = InputTree(inTree, elist) # output outFileName = os.path.join( self.outputDir, os.path.basename(fileName).replace(".root", outpostfix + ".root")) #compressionAlgo = ROOT.ROOT.kLZMA #compressionLevel = int(9) compressionAlgo = ROOT.ROOT.kLZ4 compressionLevel = int(4) outFile = ROOT.TFile.Open(outFileName, "RECREATE", "", compressionLevel) outFileNames.append(outFileName) outFile.SetCompressionAlgorithm(compressionAlgo) maxEntries = self.maxEntries if self.perJet: #save two first jets maxEntries = self.maxEntries * 2 outTree = FullOutput(inFile, inTree, outFile, branchSelection=self.branchsel, outputbranchSelection=self.outputbranchsel, fullClone=fullClone, maxEntries=maxEntries, firstEntry=self.firstEntry, jsonFilter=jsonFilter, provenance=self.provenance) t0 = time.time() tlast = t0 doneEvents = 0 acceptedEvents = 0 if elist: eventRange = [(elist.GetEntry(0) if i == 0 else elist.Next()) for i in range(elist.GetN())] else: eventRange = range(self.firstEntry, self.firstEntry + nEntries) if nEntries > 0 else None entries = inTree.entries if eventRange: entries = len(eventRange) maxEvents = self.maxEntries if maxEvents > 0: entries = min(entries, self.maxEntries) entriesRange = range(entries) if eventRange == None else eventRange for m in self.modules: m.beginFile(inFile, outFile, inTree, outTree, entriesRange) for ie, i in enumerate(entriesRange): if maxEvents > 0 and ie >= maxEvents: break e = Event(inTree, ie) ret = True if self.perJet: #print('ie ',ie) for m in self.modules: ret = m.analyze(e, ie) if not ret: break else: clearExtraBranches(inTree) m.fill(e, ie) else: clearExtraBranches(inTree) for m in self.modules: ret = m.analyze(e, ie) if not ret: break if ret and outTree is not None: outTree.fill() if ret: acceptedEvents += 1 for m in self.modules: m.endFile(inFile, outFile, inTree, outTree) outTree.write() outFile.Close() print("Done %s" % outFileName) for m in self.modules: m.endJob() print("Total time %.1f sec. to process %i events. Rate = %.1f Hz." % ((time.time() - t0), totEntriesRead, totEntriesRead / (time.time() - t0)))
def run(self): if not self.noOut: outpostfix = self.postfix if self.postfix != None else ( "_Friend" if self.friend else "_Skim") if self.compression != "none": ROOT.gInterpreter.ProcessLine("#include <Compression.h>") (algo, level) = self.compression.split(":") compressionLevel = int(level) if algo == "LZMA": compressionAlgo = ROOT.ROOT.kLZMA elif algo == "ZLIB": compressionAlgo = ROOT.ROOT.kZLIB else: raise RuntimeError("Unsupported compression %s" % algo) else: compressionLevel = 0 print "Will write selected trees to " + self.outputDir if not self.justcount: if not os.path.exists(self.outputDir): os.system("mkdir -p " + self.outputDir) if self.noOut: if len(self.modules) == 0: raise RuntimeError( "Running with --noout and no modules does nothing!") for m in self.modules: m.beginJob() fullClone = (len(self.modules) == 0) outFileNames = [] t0 = time.clock() totEntriesRead = 0 for fname in self.inputFiles: # open input file inFile = ROOT.TFile.Open(fname) #get input tree inTree = inFile.Get("Events") totEntriesRead += inTree.GetEntries() # pre-skimming elist, jsonFilter = preSkim(inTree, self.json, self.cut) if self.justcount: print 'Would select %d entries from %s' % ( elist.GetN() if elist else inTree.GetEntries(), fname) continue else: print 'Pre-select %d entries out of %s ' % ( elist.GetN() if elist else inTree.GetEntries(), inTree.GetEntries()) if fullClone: # no need of a reader (no event loop), but set up the elist if available if elist: inTree.SetEntryList(elist) else: # initialize reader inTree = InputTree(inTree, elist) # prepare output file outFileName = os.path.join( self.outputDir, os.path.basename(fname).replace(".root", outpostfix + ".root")) outFile = ROOT.TFile.Open(outFileName, "RECREATE", "", compressionLevel) outFileNames.append(outFileName) if compressionLevel: outFile.SetCompressionAlgorithm(compressionAlgo) # prepare output tree if self.friend: outTree = FriendOutput(inFile, inTree, outFile) else: outTree = FullOutput(inFile, inTree, outFile, branchSelection=self.branchsel, fullClone=fullClone, jsonFilter=jsonFilter, provenance=self.provenance) # process events, if needed if not fullClone: (nall, npass, timeLoop) = eventLoop(self.modules, inFile, outFile, inTree, outTree) print 'Processed %d preselected entries from %s (%s entries). Finally selected %d entries' % ( nall, fname, inTree.GetEntries(), npass) else: print 'Selected %d entries from %s' % ( outTree.tree().GetEntries(), fname) # now write the output outTree.write() outFile.Close() print "Done %s" % outFileName if self.jobReport: self.jobReport.addInputFile(fname, nall) for m in self.modules: m.endJob() print totEntriesRead / (time.clock() - t0), "Hz" if self.haddFileName: os.system( "./haddnano.py %s %s" % (self.haddFileName, " ".join(outFileNames)) ) #FIXME: remove "./" once haddnano.py is distributed with cms releases if self.jobReport: self.jobReport.addOutputFile(self.haddFileName) self.jobReport.save()
def run(self): outpostfix = self.postfix if self.postfix != None else ( "_Friend" if self.friend else "_Skim") if not self.noOut: if self.compression != "none": ROOT.gInterpreter.ProcessLine("#include <Compression.h>") (algo, level) = self.compression.split(":") compressionLevel = int(level) if algo == "LZMA": compressionAlgo = ROOT.ROOT.kLZMA elif algo == "ZLIB": compressionAlgo = ROOT.ROOT.kZLIB else: raise RuntimeError("Unsupported compression %s" % algo) else: compressionLevel = 0 print "Will write selected trees to " + self.outputDir if not self.justcount: if not os.path.exists(self.outputDir): os.system("mkdir -p " + self.outputDir) else: compressionLevel = 0 if self.noOut: if len(self.modules) == 0: raise RuntimeError( "Running with --noout and no modules does nothing!") # Open histogram file, if desired if (self.histFileName != None and self.histDirName == None) or ( self.histFileName == None and self.histDirName != None): raise RuntimeError( "Must specify both histogram file and histogram directory!") elif self.histFileName != None and self.histDirName != None: self.histFile = ROOT.TFile.Open(self.histFileName, "RECREATE") else: self.histFile = None for m in self.modules: if hasattr(m, 'writeHistFile') and m.writeHistFile: m.beginJob(histFile=self.histFile, histDirName=self.histDirName) else: m.beginJob() fullClone = (len(self.modules) == 0) outFileNames = [] t0 = time.clock() totEntriesRead = 0 for fname in self.inputFiles: # open input file inFile = ROOT.TFile.Open(fname) #get input tree inTree = inFile.Get("Events") totEntriesRead += inTree.GetEntries() self.hcount.SetBinContent(1, inTree.GetEntries()) ROOT.gROOT.SetBatch(True) if self.SMSMasses != None: inTree.Draw( "MaxIf$(GenPart_mass, abs(GenPart_pdgId) == %i):MaxIf$(GenPart_mass, abs(GenPart_pdgId) == %i) >> hSMS(2000, -0.5, 1999.5, 2000, -0.5, 1999.5)" % (self.SMSMasses[0], self.SMSMasses[1])) self.hsmscount = ROOT.gDirectory.Get('hSMS') if self.doISR != None: #Dirty ISR recipe for EWKinos #Need to correct for each mass point #Can't correct per sample (wrong normalization), need to save whole unskimmed histogram per point an then postprocess pt1 = "MaxIf$(GenPart_pt, abs(GenPart_pdgId) == %i && GenPart_status == 22)" % self.doISR[ 0] pt2 = "MaxIf$(GenPart_pt, abs(GenPart_pdgId) == %i && GenPart_status == 22)" % self.doISR[ 1] phi1 = "MaxIf$(GenPart_pt, abs(GenPart_pdgId) == %i && GenPart_status == 22)" % self.doISR[ 0] phi2 = "MaxIf$(GenPart_pt, abs(GenPart_pdgId) == %i && GenPart_status == 22)" % self.doISR[ 1] pt_ISR = "hypot(%s + %s * cos(%s-%s), %s*sin(%s - %s))" % ( pt1, pt2, phi2, phi1, pt2, phi2, phi1) inTree.Draw( " %s : MaxIf$(GenPart_mass, abs(GenPart_pdgId) == %i) : MaxIf$(GenPart_mass, abs(GenPart_pdgId) == %i) >> hISR(1000, -0.5, 1999.5, 1000, -0.5, 1999.5, 20, 0, 1000)" % (pt_ISR, self.SMSMasses[0], self.SMSMasses[1])) self.hISR = ROOT.gDirectory.Get("hISR") if inTree.GetBranchStatus("genWeight"): inTree.Project("SumWeightsTemp", "1.0", "genWeight") sow = ROOT.gROOT.FindObject("SumWeightsTemp").Integral() self.hsumofweights.SetBinContent(1, sow) # pre-skimming elist, jsonFilter = preSkim(inTree, self.json, self.cut) if self.justcount: print 'Would select %d entries from %s' % ( elist.GetN() if elist else inTree.GetEntries(), fname) continue else: print 'Pre-select %d entries out of %s ' % ( elist.GetN() if elist else inTree.GetEntries(), inTree.GetEntries()) if fullClone: # no need of a reader (no event loop), but set up the elist if available if elist: inTree.SetEntryList(elist) else: # initialize reader inTree = InputTree(inTree, elist) # prepare output file if not self.noOut: outFileName = os.path.join( self.outputDir, os.path.basename(fname).replace(".root", outpostfix + ".root")) outFile = ROOT.TFile.Open(outFileName, "RECREATE", "", compressionLevel) outFileNames.append(outFileName) if compressionLevel: outFile.SetCompressionAlgorithm(compressionAlgo) # prepare output tree if self.friend: outTree = FriendOutput(inFile, inTree, outFile) else: outTree = FullOutput( inFile, inTree, outFile, branchSelection=self.branchsel, outputbranchSelection=self.outputbranchsel, fullClone=fullClone, jsonFilter=jsonFilter, provenance=self.provenance) else: outFile = None outTree = None # process events, if needed if not fullClone: (nall, npass, timeLoop) = eventLoop(self.modules, inFile, outFile, inTree, outTree) print 'Processed %d preselected entries from %s (%s entries). Finally selected %d entries' % ( nall, fname, inTree.GetEntries(), npass) else: nall = inTree.GetEntries() print 'Selected %d entries from %s' % ( outTree.tree().GetEntries(), fname) # now write the output if not self.noOut: print "Start writing" self.hcount.Write() print "Start writing" if self.SMSMasses != None: self.hsmscount.Write() print "Start writing" if self.doISR != None: self.hISR.Write() print "Start writing" self.hsumofweights.Write() outTree.write() outFile.Close() print "Done %s" % outFileName if self.jobReport: self.jobReport.addInputFile(fname, nall) for m in self.modules: m.endJob() print totEntriesRead / (time.clock() - t0), "Hz" if self.haddFileName: os.system( "./haddnano.py %s %s" % (self.haddFileName, " ".join(outFileNames)) ) #FIXME: remove "./" once haddnano.py is distributed with cms releases if self.jobReport: self.jobReport.addOutputFile(self.haddFileName) self.jobReport.save()
'16': TFile.Open('data16C_sample.root'), '17': TFile.Open('data17C_sample.root') } ratios = {} if openfile == '': out = TFile('YearComparisonOut.root', 'RECREATE') for string_f in files.keys(): f = files[string_f] print 'Working on ' + string_f inTree = f.Get("Events") elist, jsonFiter = preSkim(inTree, None, '') inTree = InputTree(inTree, elist) treeEntries = inTree.entries Jet1ptpass = TH1F('Jet1ptpass' + string_f, 'Jet1ptpass' + string_f, 160, 400, 2000) Jet2ptpass = TH1F('Jet2ptpass' + string_f, 'Jet2ptpass' + string_f, 160, 400, 2000) Jet1etapass = TH1F('Jet1etapass' + string_f, 'Jet1etapass' + string_f, 40, -4.0, 4.0) Jet2etapass = TH1F('Jet2etapass' + string_f, 'Jet2etapass' + string_f, 40, -4.0, 4.0) eptpass = TH1F('eptpass' + string_f, 'eptpass' + string_f, 100, 0, 200) muptpass = TH1F('muptpass' + string_f, 'muptpass' + string_f, 100, 0, 200)
def run(self): outpostfix = self.postfix if self.postfix is not None else ( "_Friend" if self.friend else "_Skim") if self.allowNoPostfix and self.postfix is None: outpostfix = "" if not self.noOut: if self.compression != "none": ROOT.gInterpreter.ProcessLine("#include <Compression.h>") (algo, level) = self.compression.split(":") compressionLevel = int(level) if algo == "LZMA": compressionAlgo = ROOT.ROOT.kLZMA elif algo == "ZLIB": compressionAlgo = ROOT.ROOT.kZLIB elif algo == "LZ4": compressionAlgo = ROOT.ROOT.kLZ4 else: raise RuntimeError("Unsupported compression %s" % algo) else: compressionLevel = 0 print("Will write selected trees to " + self.outputDir) if not self.justcount: if not os.path.exists(self.outputDir): os.system("mkdir -p " + self.outputDir) else: compressionLevel = 0 if self.noOut: if len(self.modules) == 0: raise RuntimeError( "Running with --noout and no modules does nothing!") # Open histogram file, if desired if (self.histFileName is not None and self.histDirName is None) or ( self.histFileName is None and self.histDirName is not None): raise RuntimeError( "Must specify both histogram file and histogram directory!") elif self.histFileName is not None and self.histDirName is None: self.histFile = ROOT.TFile.Open(self.histFileName, "RECREATE") else: self.histFile = None for m in self.modules: if hasattr(m, 'writeHistFile') and m.writeHistFile: m.beginJob(histFile=self.histFile, histDirName=self.histDirName) else: m.beginJob() fullClone = (len(self.modules) == 0) outFileNames = [] t0 = time.time() totEntriesRead = 0 for fname in self.inputFiles: ffnames = [] if "," in fname: fnames = fname.split(',') fname, ffnames = fnames[0], fnames[1:] # open input file if self.prefetch: ftoread, toBeDeleted = self.prefetchFile(fname) inFile = ROOT.TFile.Open(ftoread) else: inFile = ROOT.TFile.Open(fname) if not inFile: print 'ERROR: file does not exist, check!' print ' filename:', fname exit(0) # get input tree inTree = inFile.Get("Events") if inTree is None: inTree = inFile.Get("Friends") nEntries = min(inTree.GetEntries() - self.firstEntry, self.maxEntries) # first check that the histogram with weights is not already in the file hasWeightHistograms = False if inFile.GetListOfKeys().Contains( "hGenWeights") and inFile.GetListOfKeys().Contains( "hNumWeights"): hasWeightHistograms = True print "Histogram hGenWeights already exists, I will just copy it without recreating it" if self.saveHistoGenWeights and inTree.GetName( ) == "Events" and not hasWeightHistograms: print "Histogram hGenWeights does not exist yet, I will create it" # check that the tree contains all the original events, otherwise the sum of gen weights will miss some tmpTreeRuns = inFile.Get("Runs") for ievt, event in enumerate(tmpTreeRuns): if ievt: break # only need first event (but there should be only 1 here) nGenEvents = event.genEventCount if nGenEvents != inTree.GetEntries(): raise RuntimeError( "I am creating the histogram with genWeight, but tree Events has less entries than genEventCount in tree Runs (%s instead of %s). The sum of weights will thus be wrong, please check" % (str(inTree.GetEntries()), str(nGenEvents))) # saving distribution of genWeight for offline usage # idea is to fill the distribution of Log10(genWeight) with the sign, so to have a histogram from about -10 to 10 # with about 10k bins (genWeights can take valus spanning several orders of magnitude, especially for fancy weights) # then one can compute the sum of genWeight in a given range using its integral (using Log10(threshold) ). # This somehow relies on having always |genWeight|>1, should it be < 1 the Log would change the sign. # So for the purpose of choosing the bin to be filled, we use |value| or 1.001, whatever is larger (this will not affect the integral) # then, need a second histogram to keep the integer number of events in each bin, so to allow for clipping of large weights hGenWeights = ROOT.TH1D("hGenWeights", "distribution of Log10(genWeight)", 4800, -12.0, 12.0) hNumWeights = ROOT.TH1D( "hNumWeights", "distribution of Log10(genWeight) (unweighted)", 4800, -12.0, 12.0) drawResult = inTree.Draw( "TMath::Sign(1.0,genWeight)*TMath::Log10(max(1.001,abs(genWeight)))>>hGenWeights", "genWeight", "goff", nEntries, self.firstEntry) drawResult = inTree.Draw( "TMath::Sign(1.0,genWeight)*TMath::Log10(max(1.001,abs(genWeight)))>>hNumWeights", "1", "goff", nEntries, self.firstEntry) totEntriesRead += nEntries # pre-skimming elist, jsonFilter = preSkim(inTree, self.json, self.cut, maxEntries=self.maxEntries, firstEntry=self.firstEntry) if self.justcount: print('Would select %d / %d entries from %s (%.2f%%)' % (elist.GetN() if elist else nEntries, nEntries, fname, (elist.GetN() if elist else nEntries) / (0.01 * nEntries) if nEntries else 0)) if self.prefetch: if toBeDeleted: os.unlink(ftoread) continue else: print('Pre-select %d entries out of %s (%.2f%%)' % (elist.GetN() if elist else nEntries, nEntries, (elist.GetN() if elist else nEntries) / (0.01 * nEntries) if nEntries else 0)) inAddFiles = [] inAddTrees = [] for ffname in ffnames: inAddFiles.append(ROOT.TFile.Open(ffname)) inAddTree = inAddFiles[-1].Get("Events") if inAddTree is None: inAddTree = inAddFiles[-1].Get("Friends") inAddTrees.append(inAddTree) inTree.AddFriend(inAddTree) if fullClone: # no need of a reader (no event loop), but set up the elist if available if elist: inTree.SetEntryList(elist) else: # initialize reader inTree = InputTree(inTree, elist) # prepare output file if not self.noOut: outFileName = os.path.join( self.outputDir, os.path.basename(fname).replace(".root", outpostfix + ".root")) outFile = ROOT.TFile.Open(outFileName, "RECREATE", "", compressionLevel) outFileNames.append(outFileName) if compressionLevel: outFile.SetCompressionAlgorithm(compressionAlgo) # prepare output tree if self.friend: outTree = FriendOutput(inFile, inTree, outFile) else: outTree = FullOutput( inFile, inTree, outFile, branchSelection=self.branchsel, outputbranchSelection=self.outputbranchsel, fullClone=fullClone, maxEntries=self.maxEntries, firstEntry=self.firstEntry, jsonFilter=jsonFilter, provenance=self.provenance) else: outFile = None outTree = None if self.branchsel: self.branchsel.selectBranches(inTree) # process events, if needed if not fullClone: eventRange = range( self.firstEntry, self.firstEntry + nEntries) if nEntries > 0 and not elist else None (nall, npass, timeLoop) = eventLoop(self.modules, inFile, outFile, inTree, outTree, eventRange=eventRange, maxEvents=self.maxEntries) print( 'Processed %d preselected entries from %s (%s entries). Finally selected %d entries' % (nall, fname, nEntries, npass)) else: nall = nEntries print('Selected %d / %d entries from %s (%.2f%%)' % (outTree.tree().GetEntries(), nall, fname, outTree.tree().GetEntries() / (0.01 * nall) if nall else 0)) # now write the output if not self.noOut: outTree.write() if not hasWeightHistograms: if self.saveHistoGenWeights: hGenWeights.Write(hGenWeights.GetName()) hNumWeights.Write(hNumWeights.GetName()) outFile.Close() print("Done %s" % outFileName) if self.jobReport: self.jobReport.addInputFile(fname, nall) if self.prefetch: if toBeDeleted: os.unlink(ftoread) for m in self.modules: m.endJob() print("Total time %.1f sec. to process %i events. Rate = %.1f Hz." % ((time.time() - t0), totEntriesRead, totEntriesRead / (time.time() - t0))) if self.haddFileName: haddnano = "./haddnano.py" if os.path.isfile( "./haddnano.py") else "haddnano.py" os.system("%s %s %s" % (haddnano, self.haddFileName, " ".join(outFileNames))) if self.jobReport: self.jobReport.addOutputFile(self.haddFileName) self.jobReport.save()
"Running with --noout and no modules does nothing!") for m in modules: m.beginJob() fullClone = (len(modules) == 0) for fname in args: # open input file inFile = ROOT.TFile.Open(fname) #get input tree inTree = inFile.Get("Events") # pre-skimming elist = preSkim(inTree, options.json, options.cut) if options.justcount: print 'Would select %d entries from %s' % ( elist.GetN() if elist else inTree.GetEntries(), fname) continue if fullClone: # no need of a reader (no event loop), but set up the elist if available if elist: inTree.SetEntryList(elist) else: # initialize reader inTree = InputTree(inTree, elist) # prepare output file outFileName = os.path.join( outdir,
def NanoReader(inputFileName="in.root", outputFileName="out.root", cut=None, nJobs=1, jobNum=1, json=None): inputFile = TFile.Open(inputFileName) if (not inputFile): #check for null pointer print("Unable to open file %s, exting \n" % inputFileName) return 1 #get input tree inTree = inputFile.Get("Events") # pre-skimming elist, jsonFilter = preSkim(inTree, json, cut) #number of events to be processed nTotal = elist.GetN() if elist else inTree.GetEntries() print 'Pre-select %d entries out of %s ' % (nTotal, inTree.GetEntries()) inTree = InputTree(inTree, elist) # Grab event tree from nanoAOD eventBranch = inTree.GetBranch('event') treeEntries = eventBranch.GetEntries() # Design the splitting if necessary #if nJobs != 1: # evInJob = int(treeEntries/nJobs) # lowBinEdge = evInJob*(jobNum-1) # highBinEdge = evInJob*jobNum # if jobNum == nJobs: # highBinEdge = treeEntries #else: # lowBinEdge = 0 # highBinEdge = treeEntries #print "Range of events: (" + str(lowBinEdge) + ", " + str(highBinEdge) + ")" tout_floats = { 'ST': array('f', [0.]), 'HT': array('f', [0.]), 'MET': array('f', [0.]), 'Mu_Pt': array('f', [0.]), 'El_Pt': array('f', [0.]), 'Mass': array('f', [0.]), 'Weight': array('f', [0.]) } tout_ints = { 'trigger': array('i', [0]), 'NJets': array('i', [0]), 'NMus': array('i', [0]), 'NEls': array('i', [0]) } outputFile = TFile(outputFileName, "recreate") outTree = TTree("Events", "Events") outTree = add_dict_to_tree(outTree, tout_floats, "/F") outTree = add_dict_to_tree(outTree, tout_ints, "/I") crabOutput = CrabOutput(inputFile, inTree, outputFile, outTree, provenance=True, jsonFilter=jsonFilter) min_pt = 70. count = 0 # -------- Begin Loop------------------------------------- entries = inTree.entries for entry in xrange(entries): count = count + 1 if count % 10000 == 0: print '--------- Processing Event ' + str( count) + ' -- percent complete ' + str( 100 * count / nTotal) + '% -- ' # Grab the event event = Event(inTree, entry) ST = 0. MET = 0. HT = 0. Mu_Pt = 0. El_Pt = 0. Mass = 0. NJets = 0 NMus = 0 NEls = 0 Event_vector = ROOT.TLorentzVector() trigger = (int)(inTree.readBranch('HLT_PFHT900') or inTree.readBranch('HLT_PFHT800')) Weight = inTree.readBranch('genWeight') AK4JetsCol = Collection(event, "Jet") MuonsCol = Collection(event, "Muon") ElectronsCol = Collection(event, "Electron") PhotonsCol = Collection(event, "Photon") MET = inTree.readBranch('MET_pt') jets = set() mus = set() els = set() phots = set() R_min = 0.3 for jet in AK4JetsCol: #jetId : bit1 = loose, bit2 = tight, bit3 = tightLepVeto #want loose id if ((jet.jetId % 2 == 1) and jet.pt > min_pt): jets.add(jet.p4()) for mu in MuonsCol: if (mu.tightId and abs(mu.eta) < 2.4 and mu.pt > min_pt): mus.add(mu.p4()) for el in ElectronsCol: #cut based id: 0 = fail, 1 = veto, 2 = loose, 3 = medium , 4 = tight #want medium id if (el.cutBased >= 3 and abs(el.eta) < 2.5 and el.pt > min_pt): els.add(el.p4()) for phot in PhotonsCol: #cut based id: 0 = fail, 1 = veto, 2 = loose, 3 = medium , 4 = tight #want medium id if (phot.cutBased >= 3 and abs(phot.eta) < 2.5 and phot.pt > min_pt): phots.add(phot.p4()) jets_to_remove = set() els_to_remove = set() phots_to_remove = set() # Cleanup overlapping jets for jet in jets: for el in els: if (jet.DeltaR(el) < R_min): if ((el.Et() / jet.Et()) > 0.7): jets_to_remove.add(jet) else: els_to_remove.add(el) for mu in mus: if (jet.DeltaR(mu) < R_min): if ((mu.Et() / jet.Et()) > 0.8): jets_to_remove.add(jet) for phot in phots: if (jet.DeltaR(phot) < R_min): if ((phot.Et() / jet.Et()) > 0.5): jets_to_remove.add(jet) else: phots_to_remove.add(phot) #cleanup overlapping photons and leptons for phot in phots: for el in els: if (phot.DeltaR(el) < R_min): phots_to_remove.add(phot) for mu in mus: if (phot.DeltaR(mu) < R_min): phots_to_remove.add(phot) for el in els: for mu in mus: if (el.DeltaR(mu) < R_min): els_to_remove.add(el) #do the removal for jet in jets_to_remove: jets.remove(jet) for el in els_to_remove: els.remove(el) for phot in phots_to_remove: phots.remove(phot) for jet in jets: ST += jet.Et() HT += jet.Et() NJets += 1 Event_vector += jet for mu in mus: ST += mu.Et() Mu_Pt += mu.Et() NMus += 1 Event_vector += mu for el in els: ST += el.Et() El_Pt += el.Et() NEls += 1 Event_vector += el for phot in phots: ST += phot.Et() Event_vector += phot Mass = Event_vector.M() Float_dict = { "ST": ST, "MET": MET, "HT": HT, "Mu_Pt": Mu_Pt, "El_Pt": El_Pt, "Mass": Mass, "Weight": Weight } Int_dict = { "trigger": trigger, "NJets": NJets, "NMus": NMus, "NEls": NEls, } for key in Int_dict.keys(): tout_ints[key][0] = Int_dict[key] for key in Float_dict.keys(): tout_floats[key][0] = Float_dict[key] outTree.Fill() crabOutput.Write() return count
def run(self) : if not self.noOut: outpostfix = self.postfix if self.postfix != None else ("_Friend" if self.friend else "_Skim") if self.compression != "none": ROOT.gInterpreter.ProcessLine("#include <Compression.h>") (algo, level) = self.compression.split(":") compressionLevel = int(level) if algo == "LZMA": compressionAlgo = ROOT.ROOT.kLZMA elif algo == "ZLIB": compressionAlgo = ROOT.ROOT.kZLIB else: raise RuntimeError("Unsupported compression %s" % algo) else: compressionLevel = 0 print "Will write selected trees to "+self.outputDir if not self.justcount: if not os.path.exists(self.outputDir): os.system("mkdir -p "+self.outputDir) if self.noOut: if len(self.modules) == 0: raise RuntimeError("Running with --noout and no modules does nothing!") for m in self.modules: m.beginJob() fullClone = (len(self.modules) == 0) outFileNames=[] for fname in self.inputFiles: # open input file inFile = ROOT.TFile.Open(fname) #get input tree inTree = inFile.Get("Events") # pre-skimming elist,jsonFilter = preSkim(inTree, self.json, self.cut) if self.justcount: print 'Would select %d entries from %s'%(elist.GetN() if elist else inTree.GetEntries(), fname) continue if fullClone: # no need of a reader (no event loop), but set up the elist if available if elist: inTree.SetEntryList(elist) else: # initialize reader inTree = InputTree(inTree, elist) # prepare output file outFileName = os.path.join(self.outputDir, os.path.basename(fname).replace(".root",outpostfix+".root")) outFile = ROOT.TFile.Open(outFileName, "RECREATE", "", compressionLevel) outFileNames.append(outFileName) if compressionLevel: outFile.SetCompressionAlgorithm(compressionAlgo) # prepare output tree if self.friend: outTree = FriendOutput(inFile, inTree, outFile) else: outTree = FullOutput(inFile, inTree, outFile, branchSelection = self.branchsel, fullClone = fullClone, jsonFilter = jsonFilter,provenance=self.provenance) # process events, if needed if not fullClone: (nall, npass, time) = eventLoop(self.modules, inFile, outFile, inTree, outTree) print 'Processed %d entries from %s, selected %d entries' % (nall, fname, npass) else: print 'Selected %d entries from %s' % (outTree.tree().GetEntries(), fname) # now write the output outTree.write() outFile.Close() print "Done %s" % outFileName if self.jobReport: self.jobReport.addInputFile(fname,nall) for m in self.modules: m.endJob() if self.haddFileName : os.system("./haddnano.py %s %s" %(self.haddFileName," ".join(outFileNames))) #FIXME: remove "./" once haddnano.py is distributed with cms releases if self.jobReport : self.jobReport.addOutputFile(self.haddFileName) self.jobReport.save()
##################### if os.environ.get('CMSSW_BASE') == None: file = TFile.Open('~/CMS/temp/ttbar_bstar18.root') else: file = TFile.Open( 'root://cmseos.fnal.gov//store/user/lcorcodi/bstar_nano/rootfiles/ttbar_bstar18.root' ) Cuts = LoadCuts('ttbar', '18') tau32cut = 'tau32medium' ################################ # Grab event tree from nanoAOD # ################################ inTree = file.Get("Events") elist, jsonFiter = preSkim(inTree, None, '') # Needs to be done like this because inTree = InputTree(inTree, elist) # TTree does not have entries attribute otherwise treeEntries = inTree.entries # Now inTree is a NanoAOD ttree count = 0 ############## # Begin Loop # ############## nevents = treeEntries for entry in range(0, nevents): count = count + 1 sys.stdout.write("%i / %i ... \r" % (count, nevents)) sys.stdout.flush()