Exemple #1
0
    def run(self):
        outpostfix = self.postfix if self.postfix != None else (
            "_Friend" if self.friend else "_Skim")
        if not self.noOut:

            if self.compression != "none":
                ROOT.gInterpreter.ProcessLine("#include <Compression.h>")
                (algo, level) = self.compression.split(":")
                compressionLevel = int(level)
                if algo == "LZMA":
                    compressionAlgo = ROOT.ROOT.kLZMA
                elif algo == "ZLIB":
                    compressionAlgo = ROOT.ROOT.kZLIB
                elif algo == "LZ4":
                    compressionAlgo = ROOT.ROOT.kLZ4
                else:
                    raise RuntimeError("Unsupported compression %s" % algo)
            else:
                compressionLevel = 0
            print("Will write selected trees to " + self.outputDir)
            if not self.justcount:
                if not os.path.exists(self.outputDir):
                    os.system("mkdir -p " + self.outputDir)
        else:
            compressionLevel = 0

        if self.noOut:
            if len(self.modules) == 0:
                raise RuntimeError(
                    "Running with --noout and no modules does nothing!")

        # Open histogram file, if desired
        if (self.histFileName != None and self.histDirName == None) or (
                self.histFileName == None and self.histDirName != None):
            raise RuntimeError(
                "Must specify both histogram file and histogram directory!")
        elif self.histFileName != None and self.histDirName != None:
            self.histFile = ROOT.TFile.Open(self.histFileName, "RECREATE")
        else:
            self.histFile = None

        for m in self.modules:
            if hasattr(m, 'writeHistFile') and m.writeHistFile:
                m.beginJob(histFile=self.histFile,
                           histDirName=self.histDirName)
            else:
                m.beginJob()

        fullClone = (len(self.modules) == 0)
        outFileNames = []
        t0 = time.time()
        totEntriesRead = 0
        for fname in self.inputFiles:
            ffnames = []
            if "," in fname:
                fnames = fname.split(',')
                fname, ffnames = fnames[0], fnames[1:]

            # open input file
            if self.prefetch:
                ftoread, toBeDeleted = self.prefetchFile(fname)
                inFile = ROOT.TFile.Open(ftoread)
            else:
                inFile = ROOT.TFile.Open(fname)

            # get input tree
            inTree = inFile.Get("Events")
            if inTree == None:
                inTree = inFile.Get("Friends")
            nEntries = min(inTree.GetEntries() - self.firstEntry,
                           self.maxEntries)
            totEntriesRead += nEntries
            # pre-skimming
            elist, jsonFilter = preSkim(inTree,
                                        self.json,
                                        self.cut,
                                        maxEntries=self.maxEntries,
                                        firstEntry=self.firstEntry)
            if self.justcount:
                print('Would select %d / %d entries from %s (%.2f%%)' %
                      (elist.GetN() if elist else nEntries, nEntries, fname,
                       (elist.GetN() if elist else nEntries) /
                       (0.01 * nEntries) if nEntries else 0))
                if self.prefetch:
                    if toBeDeleted:
                        os.unlink(ftoread)
                continue
            else:
                print('Pre-select %d entries out of %s (%.2f%%)' %
                      (elist.GetN() if elist else nEntries, nEntries,
                       (elist.GetN() if elist else nEntries) /
                       (0.01 * nEntries) if nEntries else 0))
                inAddFiles = []
                inAddTrees = []
            for ffname in ffnames:
                inAddFiles.append(ROOT.TFile.Open(ffname))
                inAddTree = inAddFiles[-1].Get("Events")
                if inAddTree == None:
                    inAddTree = inAddFiles[-1].Get("Friends")
                inAddTrees.append(inAddTree)
                inTree.AddFriend(inAddTree)

            if fullClone:
                # no need of a reader (no event loop), but set up the elist if available
                if elist:
                    inTree.SetEntryList(elist)
            else:
                # initialize reader
                inTree = InputTree(inTree, elist)

            # prepare output file
            if not self.noOut:
                outFileName = os.path.join(
                    self.outputDir,
                    os.path.basename(fname).replace(".root",
                                                    outpostfix + ".root"))
                outFile = ROOT.TFile.Open(outFileName, "RECREATE", "",
                                          compressionLevel)
                outFileNames.append(outFileName)
                if compressionLevel:
                    outFile.SetCompressionAlgorithm(compressionAlgo)
                # prepare output tree
                if self.friend:
                    outTree = FriendOutput(inFile, inTree, outFile)
                else:
                    outTree = FullOutput(
                        inFile,
                        inTree,
                        outFile,
                        branchSelection=self.branchsel,
                        outputbranchSelection=self.outputbranchsel,
                        fullClone=fullClone,
                        maxEntries=self.maxEntries,
                        firstEntry=self.firstEntry,
                        jsonFilter=jsonFilter,
                        provenance=self.provenance)
            else:
                outFile = None
                outTree = None
                if self.branchsel:
                    self.branchsel.selectBranches(inTree)

            # process events, if needed
            if not fullClone:
                eventRange = range(
                    self.firstEntry, self.firstEntry +
                    nEntries) if nEntries > 0 and not elist else None
                (nall, npass, timeLoop) = eventLoop(self.modules,
                                                    inFile,
                                                    outFile,
                                                    inTree,
                                                    outTree,
                                                    eventRange=eventRange,
                                                    maxEvents=self.maxEntries)
                print(
                    'Processed %d preselected entries from %s (%s entries). Finally selected %d entries'
                    % (nall, fname, nEntries, npass))
            else:
                nall = nEntries
                print('Selected %d / %d entries from %s (%.2f%%)' %
                      (outTree.tree().GetEntries(), nall, fname,
                       outTree.tree().GetEntries() /
                       (0.01 * nall) if nall else 0))

            # now write the output
            if not self.noOut:
                outTree.write()
                outFile.Close()
                print("Done %s" % outFileName)
            if self.jobReport:
                self.jobReport.addInputFile(fname, nall)
            if self.prefetch:
                if toBeDeleted:
                    os.unlink(ftoread)

        for m in self.modules:
            m.endJob()

        print("Total time %.1f sec. to process %i events. Rate = %.1f Hz." %
              ((time.time() - t0), totEntriesRead, totEntriesRead /
               (time.time() - t0)))

        if self.haddFileName:
            haddnano = "./haddnano.py" if os.path.isfile(
                "./haddnano.py") else "haddnano.py"
            os.system("%s %s %s" %
                      (haddnano, self.haddFileName, " ".join(outFileNames)))
        if self.jobReport:
            self.jobReport.addOutputFile(self.haddFileName)
            self.jobReport.save()
Exemple #2
0
def NanoReader(process_flag, inputFileNames=["in.root"], outputFileName="out.root", json = '', year = 2016, nEventsMax = -1):

    #Just tried to copy common filters, feel free to add any i am missing
    filters = ["Flag_goodVertices",
    "Flag_globalTightHalo2016Filter",
    "Flag_eeBadScFilter", 
    "Flag_HBHENoiseFilter",
    "Flag_HBHENoiseIsoFilter",
    "Flag_ecalBadCalibFilter",
    "Flag_EcalDeadCellTriggerPrimitiveFilter",
    "Flag_BadChargedCandidateFilter",
    ]
    if(year == 2016): filters.append("Flag_CSCTightHaloFilter")

    if(year == 2016):
        triggers = ["HLT_PFHT800", "HLT_PFHT900", "HLT_PFJet450", "HLT_PFJet500", "HLT_AK8PFJet450", "HLT_AK8PFJet500"]
    elif(year == 2017):
        triggers = ["HLT_PFHT1050", "HLT_PFJet500", "HLT_AK8PFJet380_TrimMass30", 'HLT_AK8PFJet400_TrimMass30']
    elif(year == 2018):
        triggers = ["HLT_PFHT1050", "HLT_PFJet500", "HLT_AK8PFJet380_TrimMass30", 'HLT_AK8PFJet400_TrimMass30']
    else:
        print("Invalid year option of %i. Year must be 2016, 2017, or 2018! \n" % year)
        exit(1)

        triggers = [
                'HLT_PFHT780',
                'HLT_PFHT890',
                'HLT_PFHT1050',
                'HLT_PFJet500',
                'HLT_AK8PFJet500',
                'HLT_AK8PFHT700_TrimMass50',
                'HLT_AK8PFHT800_TrimMass50',
                'HLT_AK8PFHT900_TrimMass50',
                'HLT_AK8PFJet360_TrimMass30',
                'HLT_AK8PFJet380_TrimMass30',
                'HLT_AK8PFJet400_TrimMass30',
                'HLT_AK8PFJet420_TrimMass30',
                ]

    mjj_cut = 1200.
    
    nFiles = len(inputFileNames)
    print("Will run over %i files and output to %s with truth label %i" % (nFiles, outputFileName, process_flag))
    count = 0
    saved = 0

#----------------- Begin loop over files ---------------------------------

    for fileName in inputFileNames:

        print("Opening file %s" % fileName)

        inputFile = TFile.Open(fileName)
        if(not inputFile): #check for null pointer
            print("Unable to open file %s, exting \n" % fileName)
            return 1

        #get input tree
        inTree = inputFile.Get("Events")

        # pre-skimming
        if(json != ''):
            elist,jsonFilter = preSkim(inTree, json)

            #number of events to be processed 
            nTotal = elist.GetN() if elist else inTree.GetEntries()
            
            print('Pre-select %d entries out of %s '%(nTotal,inTree.GetEntries()))


            inTree= InputTree(inTree, elist) 
        else:
            nTotal = inTree.GetEntries()
            inTree= InputTree(inTree) 
            print('Running over %i entries \n' % nTotal)

        out = Outputer(outputFileName, truth_label =  process_flag)


        # Grab event tree from nanoAOD
        eventBranch = inTree.GetBranch('event')
        treeEntries = eventBranch.GetEntries()




# -------- Begin Loop over tree-------------------------------------

        entries = inTree.entries
        for entry in xrange(entries):

            if count % 10000 == 0 :
                print('--------- Processing Event ' + str(count) +'   -- percent complete ' + str(100*count/nTotal/nFiles) + '% -- ')

            count +=1
            # Grab the event
            event = Event(inTree, entry)
	    
            passTrigger = False
            passFilter = True
            for fil in filters:
                passFilter = passFilter and inTree.readBranch(fil)
            for trig in triggers:
                passTrigger = passTrigger or inTree.readBranch(trig)
            if(not passFilter): continue
            if(not passTrigger): continue



            PFCands = Collection(event, "FatJetPFCands")
            AK8Jets = Collection(event, "FatJet")
            #MuonsCol = Collection(event, "Muon")
            #ElectronsCol = Collection(event, "Electron")
            #PhotonsCol = Collection(event, "Photon")
            subjets = Collection(event, "SubJet")

            min_pt = 200
            #keep 2 jets with pt > 200, tight id
            jet1 = jet2 = jet3 =  None
        
            pf_conts_start = 0 #keep track of indices for PF candidates
            for jet in AK8Jets:
                #jetId : bit1 = loose, bit2 = tight, bit3 = tightLepVeto
                #want tight id
                if((jet.jetId & 2 == 2) and jet.pt > min_pt and abs(jet.eta) < 2.5):
                    jet.PFConstituents_Start = pf_conts_start
                    if(jet1 == None or jet.pt > jet1.pt):
                        jet3 = jet2
                        jet2 = jet1
                        jet1 = jet
                    elif(jet2 == None or jet.pt > jet2.pt):
                        jet3 = jet2
                        jet2 = jet
                    elif(jet3 == None or jet.pt > jet3.pt):
                        jet3 = jet
                pf_conts_start += jet.nPFConstituents

            if(jet1 == None or jet2 == None): continue

            #Order jets so jet1 is always the higher mass one
            if(jet1.msoftdrop < jet2.msoftdrop):
                temp = jet1
                jet1 = jet2
                jet2 = temp

            j1_4vec = ROOT.Math.PtEtaPhiMVector(jet1.pt, jet1.eta, jet1.phi, jet1.msoftdrop)
            j2_4vec = ROOT.Math.PtEtaPhiMVector(jet2.pt, jet2.eta, jet2.phi, jet2.msoftdrop)

            dijet = j1_4vec + j2_4vec
            mjj = dijet.M()

            if(mjj< mjj_cut): continue

            saved+=1
            out.fill_event(inTree, jet1, jet2, jet3, PFCands, subjets, mjj)
            if(nEventsMax > 0 and saved >= nEventsMax): break
# -------- End Loop over tree-------------------------------------
# -------- End Loop over files-------------------------------------

    efficiency = float(saved)/count
    out.final_write_out(efficiency)
    print("Done. Selected %i events. Selection efficiency is %.3f \n" % (saved, efficiency))
    print("Outputed to %s" % outputFileName)
    return saved
    def run(self):
        outpostfix = self.postfix if self.postfix != None else (
            "_Friend" if self.friend else "_Skim")
        fullClone = False
        outFileNames = []
        totEntriesRead = 0
        t0 = time.time()

        for fileName in self.inputFiles:
            # open file
            print("Opening file %s" % fileName)
            inFile = ROOT.TFile.Open(fileName)
            if (not inFile):  #check for null pointer
                print("Unable to open file %s, exting \n" % fileName)
                return 1

            # get input tree
            inTree = inFile.Get("Events")
            nEntries = min(inTree.GetEntries() - self.firstEntry,
                           self.maxEntries)
            totEntriesRead += nEntries

            # pre-skimming
            elist, jsonFilter = preSkim(inTree,
                                        self.json,
                                        self.cut,
                                        maxEntries=self.maxEntries,
                                        firstEntry=self.firstEntry)

            # number of events to be processed
            nTotal = elist.GetN() if elist else nEntries
            print('Pre-select %d entries out of %s ' % (nTotal, nEntries))

            inTree = InputTree(inTree, elist)

            # output
            outFileName = os.path.join(
                self.outputDir,
                os.path.basename(fileName).replace(".root",
                                                   outpostfix + ".root"))
            #compressionAlgo  = ROOT.ROOT.kLZMA
            #compressionLevel = int(9)
            compressionAlgo = ROOT.ROOT.kLZ4
            compressionLevel = int(4)
            outFile = ROOT.TFile.Open(outFileName, "RECREATE", "",
                                      compressionLevel)
            outFileNames.append(outFileName)
            outFile.SetCompressionAlgorithm(compressionAlgo)
            maxEntries = self.maxEntries
            if self.perJet:  #save two first jets
                maxEntries = self.maxEntries * 2
            outTree = FullOutput(inFile,
                                 inTree,
                                 outFile,
                                 branchSelection=self.branchsel,
                                 outputbranchSelection=self.outputbranchsel,
                                 fullClone=fullClone,
                                 maxEntries=maxEntries,
                                 firstEntry=self.firstEntry,
                                 jsonFilter=jsonFilter,
                                 provenance=self.provenance)

            t0 = time.time()
            tlast = t0
            doneEvents = 0
            acceptedEvents = 0
            if elist:
                eventRange = [(elist.GetEntry(0) if i == 0 else elist.Next())
                              for i in range(elist.GetN())]
            else:
                eventRange = range(self.firstEntry, self.firstEntry +
                                   nEntries) if nEntries > 0 else None

            entries = inTree.entries
            if eventRange:
                entries = len(eventRange)
            maxEvents = self.maxEntries
            if maxEvents > 0:
                entries = min(entries, self.maxEntries)
            entriesRange = range(entries) if eventRange == None else eventRange

            for m in self.modules:
                m.beginFile(inFile, outFile, inTree, outTree, entriesRange)

            for ie, i in enumerate(entriesRange):
                if maxEvents > 0 and ie >= maxEvents: break
                e = Event(inTree, ie)

                ret = True
                if self.perJet:
                    #print('ie ',ie)
                    for m in self.modules:
                        ret = m.analyze(e, ie)
                        if not ret: break
                        else:
                            clearExtraBranches(inTree)
                            m.fill(e, ie)
                else:
                    clearExtraBranches(inTree)
                    for m in self.modules:
                        ret = m.analyze(e, ie)
                        if not ret: break
                    if ret and outTree is not None:
                        outTree.fill()
                if ret:
                    acceptedEvents += 1
            for m in self.modules:
                m.endFile(inFile, outFile, inTree, outTree)

            outTree.write()
            outFile.Close()
            print("Done %s" % outFileName)

        for m in self.modules:
            m.endJob()
        print("Total time %.1f sec. to process %i events. Rate = %.1f Hz." %
              ((time.time() - t0), totEntriesRead, totEntriesRead /
               (time.time() - t0)))
Exemple #4
0
    def run(self):
        if not self.noOut:
            outpostfix = self.postfix if self.postfix != None else (
                "_Friend" if self.friend else "_Skim")
            if self.compression != "none":
                ROOT.gInterpreter.ProcessLine("#include <Compression.h>")
                (algo, level) = self.compression.split(":")
                compressionLevel = int(level)
                if algo == "LZMA": compressionAlgo = ROOT.ROOT.kLZMA
                elif algo == "ZLIB": compressionAlgo = ROOT.ROOT.kZLIB
                else: raise RuntimeError("Unsupported compression %s" % algo)
            else:
                compressionLevel = 0
            print "Will write selected trees to " + self.outputDir
            if not self.justcount:
                if not os.path.exists(self.outputDir):
                    os.system("mkdir -p " + self.outputDir)

        if self.noOut:
            if len(self.modules) == 0:
                raise RuntimeError(
                    "Running with --noout and no modules does nothing!")

        for m in self.modules:
            m.beginJob()

        fullClone = (len(self.modules) == 0)
        outFileNames = []
        t0 = time.clock()
        totEntriesRead = 0
        for fname in self.inputFiles:

            # open input file
            inFile = ROOT.TFile.Open(fname)

            #get input tree
            inTree = inFile.Get("Events")
            totEntriesRead += inTree.GetEntries()
            # pre-skimming
            elist, jsonFilter = preSkim(inTree, self.json, self.cut)
            if self.justcount:
                print 'Would select %d entries from %s' % (
                    elist.GetN() if elist else inTree.GetEntries(), fname)
                continue
            else:
                print 'Pre-select %d entries out of %s ' % (
                    elist.GetN() if elist else inTree.GetEntries(),
                    inTree.GetEntries())

            if fullClone:
                # no need of a reader (no event loop), but set up the elist if available
                if elist: inTree.SetEntryList(elist)
            else:
                # initialize reader
                inTree = InputTree(inTree, elist)

            # prepare output file
            outFileName = os.path.join(
                self.outputDir,
                os.path.basename(fname).replace(".root", outpostfix + ".root"))
            outFile = ROOT.TFile.Open(outFileName, "RECREATE", "",
                                      compressionLevel)
            outFileNames.append(outFileName)
            if compressionLevel:
                outFile.SetCompressionAlgorithm(compressionAlgo)

            # prepare output tree
            if self.friend:
                outTree = FriendOutput(inFile, inTree, outFile)
            else:
                outTree = FullOutput(inFile,
                                     inTree,
                                     outFile,
                                     branchSelection=self.branchsel,
                                     fullClone=fullClone,
                                     jsonFilter=jsonFilter,
                                     provenance=self.provenance)

            # process events, if needed
            if not fullClone:
                (nall, npass, timeLoop) = eventLoop(self.modules, inFile,
                                                    outFile, inTree, outTree)
                print 'Processed %d preselected entries from %s (%s entries). Finally selected %d entries' % (
                    nall, fname, inTree.GetEntries(), npass)
            else:
                print 'Selected %d entries from %s' % (
                    outTree.tree().GetEntries(), fname)

            # now write the output
            outTree.write()
            outFile.Close()
            print "Done %s" % outFileName
            if self.jobReport:
                self.jobReport.addInputFile(fname, nall)

        for m in self.modules:
            m.endJob()

        print totEntriesRead / (time.clock() - t0), "Hz"

        if self.haddFileName:
            os.system(
                "./haddnano.py %s %s" %
                (self.haddFileName, " ".join(outFileNames))
            )  #FIXME: remove "./" once haddnano.py is distributed with cms releases
        if self.jobReport:
            self.jobReport.addOutputFile(self.haddFileName)
            self.jobReport.save()
    def run(self):
        outpostfix = self.postfix if self.postfix != None else (
            "_Friend" if self.friend else "_Skim")
        if not self.noOut:

            if self.compression != "none":
                ROOT.gInterpreter.ProcessLine("#include <Compression.h>")
                (algo, level) = self.compression.split(":")
                compressionLevel = int(level)
                if algo == "LZMA": compressionAlgo = ROOT.ROOT.kLZMA
                elif algo == "ZLIB": compressionAlgo = ROOT.ROOT.kZLIB
                else: raise RuntimeError("Unsupported compression %s" % algo)
            else:
                compressionLevel = 0
            print "Will write selected trees to " + self.outputDir
            if not self.justcount:
                if not os.path.exists(self.outputDir):
                    os.system("mkdir -p " + self.outputDir)
        else:
            compressionLevel = 0

        if self.noOut:
            if len(self.modules) == 0:
                raise RuntimeError(
                    "Running with --noout and no modules does nothing!")

        # Open histogram file, if desired
        if (self.histFileName != None and self.histDirName == None) or (
                self.histFileName == None and self.histDirName != None):
            raise RuntimeError(
                "Must specify both histogram file and histogram directory!")
        elif self.histFileName != None and self.histDirName != None:
            self.histFile = ROOT.TFile.Open(self.histFileName, "RECREATE")
        else:
            self.histFile = None

        for m in self.modules:
            if hasattr(m, 'writeHistFile') and m.writeHistFile:
                m.beginJob(histFile=self.histFile,
                           histDirName=self.histDirName)
            else:
                m.beginJob()

        fullClone = (len(self.modules) == 0)
        outFileNames = []
        t0 = time.clock()
        totEntriesRead = 0
        for fname in self.inputFiles:

            # open input file
            inFile = ROOT.TFile.Open(fname)

            #get input tree
            inTree = inFile.Get("Events")
            totEntriesRead += inTree.GetEntries()
            self.hcount.SetBinContent(1, inTree.GetEntries())
            ROOT.gROOT.SetBatch(True)
            if self.SMSMasses != None:
                inTree.Draw(
                    "MaxIf$(GenPart_mass, abs(GenPart_pdgId) == %i):MaxIf$(GenPart_mass, abs(GenPart_pdgId) == %i) >> hSMS(2000, -0.5, 1999.5, 2000, -0.5, 1999.5)"
                    % (self.SMSMasses[0], self.SMSMasses[1]))
                self.hsmscount = ROOT.gDirectory.Get('hSMS')
            if self.doISR != None:
                #Dirty ISR recipe for EWKinos
                #Need to correct for each mass point
                #Can't correct per sample (wrong normalization), need to save whole unskimmed histogram per point an then postprocess
                pt1 = "MaxIf$(GenPart_pt, abs(GenPart_pdgId) == %i && GenPart_status == 22)" % self.doISR[
                    0]
                pt2 = "MaxIf$(GenPart_pt, abs(GenPart_pdgId) == %i && GenPart_status == 22)" % self.doISR[
                    1]
                phi1 = "MaxIf$(GenPart_pt, abs(GenPart_pdgId) == %i && GenPart_status == 22)" % self.doISR[
                    0]
                phi2 = "MaxIf$(GenPart_pt, abs(GenPart_pdgId) == %i && GenPart_status == 22)" % self.doISR[
                    1]
                pt_ISR = "hypot(%s + %s * cos(%s-%s), %s*sin(%s - %s))" % (
                    pt1, pt2, phi2, phi1, pt2, phi2, phi1)
                inTree.Draw(
                    " %s : MaxIf$(GenPart_mass, abs(GenPart_pdgId) == %i) : MaxIf$(GenPart_mass, abs(GenPart_pdgId) == %i)  >> hISR(1000, -0.5, 1999.5, 1000, -0.5, 1999.5, 20, 0, 1000)"
                    % (pt_ISR, self.SMSMasses[0], self.SMSMasses[1]))
                self.hISR = ROOT.gDirectory.Get("hISR")
            if inTree.GetBranchStatus("genWeight"):
                inTree.Project("SumWeightsTemp", "1.0", "genWeight")
                sow = ROOT.gROOT.FindObject("SumWeightsTemp").Integral()
                self.hsumofweights.SetBinContent(1, sow)
            # pre-skimming
            elist, jsonFilter = preSkim(inTree, self.json, self.cut)
            if self.justcount:
                print 'Would select %d entries from %s' % (
                    elist.GetN() if elist else inTree.GetEntries(), fname)
                continue
            else:
                print 'Pre-select %d entries out of %s ' % (
                    elist.GetN() if elist else inTree.GetEntries(),
                    inTree.GetEntries())

            if fullClone:
                # no need of a reader (no event loop), but set up the elist if available
                if elist: inTree.SetEntryList(elist)
            else:
                # initialize reader
                inTree = InputTree(inTree, elist)

            # prepare output file
            if not self.noOut:
                outFileName = os.path.join(
                    self.outputDir,
                    os.path.basename(fname).replace(".root",
                                                    outpostfix + ".root"))
                outFile = ROOT.TFile.Open(outFileName, "RECREATE", "",
                                          compressionLevel)
                outFileNames.append(outFileName)
                if compressionLevel:
                    outFile.SetCompressionAlgorithm(compressionAlgo)
                # prepare output tree
                if self.friend:
                    outTree = FriendOutput(inFile, inTree, outFile)
                else:
                    outTree = FullOutput(
                        inFile,
                        inTree,
                        outFile,
                        branchSelection=self.branchsel,
                        outputbranchSelection=self.outputbranchsel,
                        fullClone=fullClone,
                        jsonFilter=jsonFilter,
                        provenance=self.provenance)
            else:
                outFile = None
                outTree = None

            # process events, if needed
            if not fullClone:
                (nall, npass, timeLoop) = eventLoop(self.modules, inFile,
                                                    outFile, inTree, outTree)
                print 'Processed %d preselected entries from %s (%s entries). Finally selected %d entries' % (
                    nall, fname, inTree.GetEntries(), npass)
            else:
                nall = inTree.GetEntries()
                print 'Selected %d entries from %s' % (
                    outTree.tree().GetEntries(), fname)

            # now write the output
            if not self.noOut:
                print "Start writing"
                self.hcount.Write()
                print "Start writing"
                if self.SMSMasses != None: self.hsmscount.Write()
                print "Start writing"
                if self.doISR != None: self.hISR.Write()
                print "Start writing"
                self.hsumofweights.Write()
                outTree.write()
                outFile.Close()
                print "Done %s" % outFileName
            if self.jobReport:
                self.jobReport.addInputFile(fname, nall)

        for m in self.modules:
            m.endJob()

        print totEntriesRead / (time.clock() - t0), "Hz"

        if self.haddFileName:
            os.system(
                "./haddnano.py %s %s" %
                (self.haddFileName, " ".join(outFileNames))
            )  #FIXME: remove "./" once haddnano.py is distributed with cms releases
        if self.jobReport:
            self.jobReport.addOutputFile(self.haddFileName)
            self.jobReport.save()
    '16': TFile.Open('data16C_sample.root'),
    '17': TFile.Open('data17C_sample.root')
}

ratios = {}

if openfile == '':
    out = TFile('YearComparisonOut.root', 'RECREATE')

    for string_f in files.keys():
        f = files[string_f]

        print 'Working on ' + string_f

        inTree = f.Get("Events")
        elist, jsonFiter = preSkim(inTree, None, '')
        inTree = InputTree(inTree, elist)
        treeEntries = inTree.entries

        Jet1ptpass = TH1F('Jet1ptpass' + string_f, 'Jet1ptpass' + string_f,
                          160, 400, 2000)
        Jet2ptpass = TH1F('Jet2ptpass' + string_f, 'Jet2ptpass' + string_f,
                          160, 400, 2000)
        Jet1etapass = TH1F('Jet1etapass' + string_f, 'Jet1etapass' + string_f,
                           40, -4.0, 4.0)
        Jet2etapass = TH1F('Jet2etapass' + string_f, 'Jet2etapass' + string_f,
                           40, -4.0, 4.0)
        eptpass = TH1F('eptpass' + string_f, 'eptpass' + string_f, 100, 0, 200)
        muptpass = TH1F('muptpass' + string_f, 'muptpass' + string_f, 100, 0,
                        200)
Exemple #7
0
    def run(self):
        outpostfix = self.postfix if self.postfix is not None else (
            "_Friend" if self.friend else "_Skim")
        if self.allowNoPostfix and self.postfix is None:
            outpostfix = ""
        if not self.noOut:

            if self.compression != "none":
                ROOT.gInterpreter.ProcessLine("#include <Compression.h>")
                (algo, level) = self.compression.split(":")
                compressionLevel = int(level)
                if algo == "LZMA":
                    compressionAlgo = ROOT.ROOT.kLZMA
                elif algo == "ZLIB":
                    compressionAlgo = ROOT.ROOT.kZLIB
                elif algo == "LZ4":
                    compressionAlgo = ROOT.ROOT.kLZ4
                else:
                    raise RuntimeError("Unsupported compression %s" % algo)
            else:
                compressionLevel = 0
            print("Will write selected trees to " + self.outputDir)
            if not self.justcount:
                if not os.path.exists(self.outputDir):
                    os.system("mkdir -p " + self.outputDir)
        else:
            compressionLevel = 0

        if self.noOut:
            if len(self.modules) == 0:
                raise RuntimeError(
                    "Running with --noout and no modules does nothing!")

        # Open histogram file, if desired
        if (self.histFileName is not None and self.histDirName is None) or (
                self.histFileName is None and self.histDirName is not None):
            raise RuntimeError(
                "Must specify both histogram file and histogram directory!")
        elif self.histFileName is not None and self.histDirName is None:
            self.histFile = ROOT.TFile.Open(self.histFileName, "RECREATE")
        else:
            self.histFile = None

        for m in self.modules:
            if hasattr(m, 'writeHistFile') and m.writeHistFile:
                m.beginJob(histFile=self.histFile,
                           histDirName=self.histDirName)
            else:
                m.beginJob()

        fullClone = (len(self.modules) == 0)
        outFileNames = []
        t0 = time.time()
        totEntriesRead = 0
        for fname in self.inputFiles:
            ffnames = []
            if "," in fname:
                fnames = fname.split(',')
                fname, ffnames = fnames[0], fnames[1:]

            # open input file
            if self.prefetch:
                ftoread, toBeDeleted = self.prefetchFile(fname)
                inFile = ROOT.TFile.Open(ftoread)
            else:
                inFile = ROOT.TFile.Open(fname)

            if not inFile:
                print 'ERROR: file does not exist, check!'
                print '    filename:', fname
                exit(0)

            # get input tree
            inTree = inFile.Get("Events")
            if inTree is None:
                inTree = inFile.Get("Friends")
            nEntries = min(inTree.GetEntries() - self.firstEntry,
                           self.maxEntries)
            # first check that the histogram with weights is not already in the file
            hasWeightHistograms = False
            if inFile.GetListOfKeys().Contains(
                    "hGenWeights") and inFile.GetListOfKeys().Contains(
                        "hNumWeights"):
                hasWeightHistograms = True
                print "Histogram hGenWeights already exists, I will just copy it without recreating it"
            if self.saveHistoGenWeights and inTree.GetName(
            ) == "Events" and not hasWeightHistograms:
                print "Histogram hGenWeights does not exist yet, I will create it"
                # check that the tree contains all the original events, otherwise the sum of gen weights will miss some
                tmpTreeRuns = inFile.Get("Runs")
                for ievt, event in enumerate(tmpTreeRuns):
                    if ievt:
                        break  # only need first event (but there should be only 1 here)
                    nGenEvents = event.genEventCount
                if nGenEvents != inTree.GetEntries():
                    raise RuntimeError(
                        "I am creating the histogram with genWeight, but tree Events has less entries than genEventCount in tree Runs (%s instead of %s). The sum of weights will thus be wrong, please check"
                        % (str(inTree.GetEntries()), str(nGenEvents)))

                # saving distribution of genWeight for offline usage
                # idea is to fill the distribution of Log10(genWeight) with the sign, so to have a histogram from about -10 to 10
                # with about 10k bins (genWeights can take valus spanning several orders of magnitude, especially for fancy weights)
                # then one can compute the sum of genWeight in a given range using its integral (using Log10(threshold) ).
                # This somehow relies on having always |genWeight|>1, should it be < 1 the Log would change the sign.
                # So for the purpose of choosing the bin to be filled, we use |value| or 1.001, whatever is larger (this will not affect the integral)
                # then, need a second histogram to keep the integer number of events in each bin, so to allow for clipping of large weights
                hGenWeights = ROOT.TH1D("hGenWeights",
                                        "distribution of Log10(genWeight)",
                                        4800, -12.0, 12.0)
                hNumWeights = ROOT.TH1D(
                    "hNumWeights",
                    "distribution of Log10(genWeight) (unweighted)", 4800,
                    -12.0, 12.0)
                drawResult = inTree.Draw(
                    "TMath::Sign(1.0,genWeight)*TMath::Log10(max(1.001,abs(genWeight)))>>hGenWeights",
                    "genWeight", "goff", nEntries, self.firstEntry)
                drawResult = inTree.Draw(
                    "TMath::Sign(1.0,genWeight)*TMath::Log10(max(1.001,abs(genWeight)))>>hNumWeights",
                    "1", "goff", nEntries, self.firstEntry)
            totEntriesRead += nEntries
            # pre-skimming
            elist, jsonFilter = preSkim(inTree,
                                        self.json,
                                        self.cut,
                                        maxEntries=self.maxEntries,
                                        firstEntry=self.firstEntry)
            if self.justcount:
                print('Would select %d / %d entries from %s (%.2f%%)' %
                      (elist.GetN() if elist else nEntries, nEntries, fname,
                       (elist.GetN() if elist else nEntries) /
                       (0.01 * nEntries) if nEntries else 0))
                if self.prefetch:
                    if toBeDeleted:
                        os.unlink(ftoread)
                continue
            else:
                print('Pre-select %d entries out of %s (%.2f%%)' %
                      (elist.GetN() if elist else nEntries, nEntries,
                       (elist.GetN() if elist else nEntries) /
                       (0.01 * nEntries) if nEntries else 0))
                inAddFiles = []
                inAddTrees = []
            for ffname in ffnames:
                inAddFiles.append(ROOT.TFile.Open(ffname))
                inAddTree = inAddFiles[-1].Get("Events")
                if inAddTree is None:
                    inAddTree = inAddFiles[-1].Get("Friends")
                inAddTrees.append(inAddTree)
                inTree.AddFriend(inAddTree)

            if fullClone:
                # no need of a reader (no event loop), but set up the elist if available
                if elist:
                    inTree.SetEntryList(elist)
            else:
                # initialize reader
                inTree = InputTree(inTree, elist)

            # prepare output file
            if not self.noOut:
                outFileName = os.path.join(
                    self.outputDir,
                    os.path.basename(fname).replace(".root",
                                                    outpostfix + ".root"))
                outFile = ROOT.TFile.Open(outFileName, "RECREATE", "",
                                          compressionLevel)
                outFileNames.append(outFileName)
                if compressionLevel:
                    outFile.SetCompressionAlgorithm(compressionAlgo)
                # prepare output tree
                if self.friend:
                    outTree = FriendOutput(inFile, inTree, outFile)
                else:
                    outTree = FullOutput(
                        inFile,
                        inTree,
                        outFile,
                        branchSelection=self.branchsel,
                        outputbranchSelection=self.outputbranchsel,
                        fullClone=fullClone,
                        maxEntries=self.maxEntries,
                        firstEntry=self.firstEntry,
                        jsonFilter=jsonFilter,
                        provenance=self.provenance)
            else:
                outFile = None
                outTree = None
                if self.branchsel:
                    self.branchsel.selectBranches(inTree)

            # process events, if needed
            if not fullClone:
                eventRange = range(
                    self.firstEntry, self.firstEntry +
                    nEntries) if nEntries > 0 and not elist else None
                (nall, npass, timeLoop) = eventLoop(self.modules,
                                                    inFile,
                                                    outFile,
                                                    inTree,
                                                    outTree,
                                                    eventRange=eventRange,
                                                    maxEvents=self.maxEntries)
                print(
                    'Processed %d preselected entries from %s (%s entries). Finally selected %d entries'
                    % (nall, fname, nEntries, npass))
            else:
                nall = nEntries
                print('Selected %d / %d entries from %s (%.2f%%)' %
                      (outTree.tree().GetEntries(), nall, fname,
                       outTree.tree().GetEntries() /
                       (0.01 * nall) if nall else 0))

            # now write the output
            if not self.noOut:
                outTree.write()
                if not hasWeightHistograms:
                    if self.saveHistoGenWeights:
                        hGenWeights.Write(hGenWeights.GetName())
                        hNumWeights.Write(hNumWeights.GetName())
                outFile.Close()
                print("Done %s" % outFileName)
            if self.jobReport:
                self.jobReport.addInputFile(fname, nall)
            if self.prefetch:
                if toBeDeleted:
                    os.unlink(ftoread)

        for m in self.modules:
            m.endJob()

        print("Total time %.1f sec. to process %i events. Rate = %.1f Hz." %
              ((time.time() - t0), totEntriesRead, totEntriesRead /
               (time.time() - t0)))

        if self.haddFileName:
            haddnano = "./haddnano.py" if os.path.isfile(
                "./haddnano.py") else "haddnano.py"
            os.system("%s %s %s" %
                      (haddnano, self.haddFileName, " ".join(outFileNames)))
        if self.jobReport:
            self.jobReport.addOutputFile(self.haddFileName)
            self.jobReport.save()
Exemple #8
0
                "Running with --noout and no modules does nothing!")

    for m in modules:
        m.beginJob()

    fullClone = (len(modules) == 0)

    for fname in args:
        # open input file
        inFile = ROOT.TFile.Open(fname)

        #get input tree
        inTree = inFile.Get("Events")

        # pre-skimming
        elist = preSkim(inTree, options.json, options.cut)
        if options.justcount:
            print 'Would select %d entries from %s' % (
                elist.GetN() if elist else inTree.GetEntries(), fname)
            continue

        if fullClone:
            # no need of a reader (no event loop), but set up the elist if available
            if elist: inTree.SetEntryList(elist)
        else:
            # initialize reader
            inTree = InputTree(inTree, elist)

        # prepare output file
        outFileName = os.path.join(
            outdir,
Exemple #9
0
def NanoReader(inputFileName="in.root",
               outputFileName="out.root",
               cut=None,
               nJobs=1,
               jobNum=1,
               json=None):

    inputFile = TFile.Open(inputFileName)
    if (not inputFile):  #check for null pointer
        print("Unable to open file %s, exting \n" % inputFileName)
        return 1

    #get input tree
    inTree = inputFile.Get("Events")
    # pre-skimming
    elist, jsonFilter = preSkim(inTree, json, cut)

    #number of events to be processed
    nTotal = elist.GetN() if elist else inTree.GetEntries()

    print 'Pre-select %d entries out of %s ' % (nTotal, inTree.GetEntries())

    inTree = InputTree(inTree, elist)

    # Grab event tree from nanoAOD
    eventBranch = inTree.GetBranch('event')
    treeEntries = eventBranch.GetEntries()

    # Design the splitting if necessary
    #if nJobs != 1:
    #    evInJob = int(treeEntries/nJobs)

    #    lowBinEdge = evInJob*(jobNum-1)
    #    highBinEdge = evInJob*jobNum

    #    if jobNum == nJobs:
    #        highBinEdge = treeEntries
    #else:
    #    lowBinEdge = 0
    #    highBinEdge = treeEntries

    #print "Range of events: (" + str(lowBinEdge) + ", " + str(highBinEdge) + ")"

    tout_floats = {
        'ST': array('f', [0.]),
        'HT': array('f', [0.]),
        'MET': array('f', [0.]),
        'Mu_Pt': array('f', [0.]),
        'El_Pt': array('f', [0.]),
        'Mass': array('f', [0.]),
        'Weight': array('f', [0.])
    }

    tout_ints = {
        'trigger': array('i', [0]),
        'NJets': array('i', [0]),
        'NMus': array('i', [0]),
        'NEls': array('i', [0])
    }

    outputFile = TFile(outputFileName, "recreate")

    outTree = TTree("Events", "Events")
    outTree = add_dict_to_tree(outTree, tout_floats, "/F")
    outTree = add_dict_to_tree(outTree, tout_ints, "/I")

    crabOutput = CrabOutput(inputFile,
                            inTree,
                            outputFile,
                            outTree,
                            provenance=True,
                            jsonFilter=jsonFilter)

    min_pt = 70.
    count = 0

    # -------- Begin Loop-------------------------------------
    entries = inTree.entries
    for entry in xrange(entries):

        count = count + 1
        if count % 10000 == 0:
            print '--------- Processing Event ' + str(
                count) + '   -- percent complete ' + str(
                    100 * count / nTotal) + '% -- '

        # Grab the event
        event = Event(inTree, entry)

        ST = 0.
        MET = 0.
        HT = 0.
        Mu_Pt = 0.
        El_Pt = 0.
        Mass = 0.
        NJets = 0
        NMus = 0
        NEls = 0
        Event_vector = ROOT.TLorentzVector()

        trigger = (int)(inTree.readBranch('HLT_PFHT900')
                        or inTree.readBranch('HLT_PFHT800'))
        Weight = inTree.readBranch('genWeight')

        AK4JetsCol = Collection(event, "Jet")
        MuonsCol = Collection(event, "Muon")
        ElectronsCol = Collection(event, "Electron")
        PhotonsCol = Collection(event, "Photon")
        MET = inTree.readBranch('MET_pt')

        jets = set()
        mus = set()
        els = set()
        phots = set()
        R_min = 0.3
        for jet in AK4JetsCol:
            #jetId : bit1 = loose, bit2 = tight, bit3 = tightLepVeto
            #want loose id
            if ((jet.jetId % 2 == 1) and jet.pt > min_pt):
                jets.add(jet.p4())
        for mu in MuonsCol:
            if (mu.tightId and abs(mu.eta) < 2.4 and mu.pt > min_pt):
                mus.add(mu.p4())

        for el in ElectronsCol:
            #cut based id: 0 = fail, 1 = veto, 2 = loose, 3 = medium , 4 = tight
            #want medium id
            if (el.cutBased >= 3 and abs(el.eta) < 2.5 and el.pt > min_pt):
                els.add(el.p4())

        for phot in PhotonsCol:
            #cut based id: 0 = fail, 1 = veto, 2 = loose, 3 = medium , 4 = tight
            #want medium id
            if (phot.cutBased >= 3 and abs(phot.eta) < 2.5
                    and phot.pt > min_pt):
                phots.add(phot.p4())

        jets_to_remove = set()
        els_to_remove = set()
        phots_to_remove = set()

        # Cleanup overlapping jets
        for jet in jets:
            for el in els:
                if (jet.DeltaR(el) < R_min):
                    if ((el.Et() / jet.Et()) > 0.7):
                        jets_to_remove.add(jet)
                    else:
                        els_to_remove.add(el)
            for mu in mus:
                if (jet.DeltaR(mu) < R_min):
                    if ((mu.Et() / jet.Et()) > 0.8):
                        jets_to_remove.add(jet)
            for phot in phots:
                if (jet.DeltaR(phot) < R_min):
                    if ((phot.Et() / jet.Et()) > 0.5):
                        jets_to_remove.add(jet)
                    else:
                        phots_to_remove.add(phot)

        #cleanup overlapping photons and leptons
        for phot in phots:
            for el in els:
                if (phot.DeltaR(el) < R_min):
                    phots_to_remove.add(phot)
            for mu in mus:
                if (phot.DeltaR(mu) < R_min):
                    phots_to_remove.add(phot)
        for el in els:
            for mu in mus:
                if (el.DeltaR(mu) < R_min):
                    els_to_remove.add(el)

        #do the removal
        for jet in jets_to_remove:
            jets.remove(jet)
        for el in els_to_remove:
            els.remove(el)
        for phot in phots_to_remove:
            phots.remove(phot)

        for jet in jets:
            ST += jet.Et()
            HT += jet.Et()
            NJets += 1
            Event_vector += jet
        for mu in mus:
            ST += mu.Et()
            Mu_Pt += mu.Et()
            NMus += 1
            Event_vector += mu

        for el in els:
            ST += el.Et()
            El_Pt += el.Et()
            NEls += 1
            Event_vector += el

        for phot in phots:
            ST += phot.Et()
            Event_vector += phot

        Mass = Event_vector.M()

        Float_dict = {
            "ST": ST,
            "MET": MET,
            "HT": HT,
            "Mu_Pt": Mu_Pt,
            "El_Pt": El_Pt,
            "Mass": Mass,
            "Weight": Weight
        }

        Int_dict = {
            "trigger": trigger,
            "NJets": NJets,
            "NMus": NMus,
            "NEls": NEls,
        }
        for key in Int_dict.keys():
            tout_ints[key][0] = Int_dict[key]

        for key in Float_dict.keys():
            tout_floats[key][0] = Float_dict[key]

        outTree.Fill()

    crabOutput.Write()
    return count
Exemple #10
0
    def run(self) :
    	if not self.noOut:
            outpostfix = self.postfix if self.postfix != None else ("_Friend" if self.friend else "_Skim")
            if self.compression != "none":
                ROOT.gInterpreter.ProcessLine("#include <Compression.h>")
                (algo, level) = self.compression.split(":")
                compressionLevel = int(level)
                if   algo == "LZMA": compressionAlgo  = ROOT.ROOT.kLZMA
                elif algo == "ZLIB": compressionAlgo  = ROOT.ROOT.kZLIB
                else: raise RuntimeError("Unsupported compression %s" % algo)
            else:
                compressionLevel = 0 
	    print "Will write selected trees to "+self.outputDir
            if not self.justcount:
                if not os.path.exists(self.outputDir):
                    os.system("mkdir -p "+self.outputDir)

	if self.noOut:
	    if len(self.modules) == 0: 
		raise RuntimeError("Running with --noout and no modules does nothing!")

	for m in self.modules: m.beginJob()

	fullClone = (len(self.modules) == 0)
	outFileNames=[]
	for fname in self.inputFiles:
	    # open input file
	    inFile = ROOT.TFile.Open(fname)

	    #get input tree
	    inTree = inFile.Get("Events")
	    
	    # pre-skimming
	    elist,jsonFilter = preSkim(inTree, self.json, self.cut)
	    if self.justcount:
		print 'Would select %d entries from %s'%(elist.GetN() if elist else inTree.GetEntries(), fname)
		continue

	    if fullClone:
		# no need of a reader (no event loop), but set up the elist if available
		if elist: inTree.SetEntryList(elist)
	    else:
		# initialize reader
		inTree = InputTree(inTree, elist) 

	    # prepare output file
	    outFileName = os.path.join(self.outputDir, os.path.basename(fname).replace(".root",outpostfix+".root"))
	    outFile = ROOT.TFile.Open(outFileName, "RECREATE", "", compressionLevel)
	    outFileNames.append(outFileName)
	    if compressionLevel: outFile.SetCompressionAlgorithm(compressionAlgo)

	    # prepare output tree
	    if self.friend:
		outTree = FriendOutput(inFile, inTree, outFile)
	    else:
		outTree = FullOutput(inFile, inTree, outFile, branchSelection = self.branchsel, fullClone = fullClone, jsonFilter = jsonFilter,provenance=self.provenance)

	    # process events, if needed
	    if not fullClone:
		(nall, npass, time) = eventLoop(self.modules, inFile, outFile, inTree, outTree)
		print 'Processed %d entries from %s, selected %d entries' % (nall, fname, npass)
	    else:
		print 'Selected %d entries from %s' % (outTree.tree().GetEntries(), fname)

	    # now write the output
	    outTree.write()
	    outFile.Close()
	    print "Done %s" % outFileName
	    if self.jobReport:
		self.jobReport.addInputFile(fname,nall)
		
	for m in self.modules: m.endJob()

	if self.haddFileName :
		os.system("./haddnano.py %s %s" %(self.haddFileName," ".join(outFileNames))) #FIXME: remove "./" once haddnano.py is distributed with cms releases
	if self.jobReport :
		self.jobReport.addOutputFile(self.haddFileName)
		self.jobReport.save()
Exemple #11
0
#####################
if os.environ.get('CMSSW_BASE') == None:
    file = TFile.Open('~/CMS/temp/ttbar_bstar18.root')
else:
    file = TFile.Open(
        'root://cmseos.fnal.gov//store/user/lcorcodi/bstar_nano/rootfiles/ttbar_bstar18.root'
    )

Cuts = LoadCuts('ttbar', '18')
tau32cut = 'tau32medium'

################################
# Grab event tree from nanoAOD #
################################
inTree = file.Get("Events")
elist, jsonFiter = preSkim(inTree, None,
                           '')  # Needs to be done like this because
inTree = InputTree(inTree,
                   elist)  # TTree does not have entries attribute otherwise
treeEntries = inTree.entries  # Now inTree is a NanoAOD ttree

count = 0

##############
# Begin Loop #
##############
nevents = treeEntries
for entry in range(0, nevents):
    count = count + 1
    sys.stdout.write("%i / %i ... \r" % (count, nevents))
    sys.stdout.flush()