Exemple #1
0
class PostProcessor:
    def __init__(self,
                 outputDir,
                 inputFiles,
                 cut=None,
                 branchsel=None,
                 modules=[],
                 compression="LZMA:9",
                 friend=False,
                 postfix=None,
                 jsonInput=None,
                 noOut=False,
                 justcount=False,
                 provenance=False,
                 haddFileName=None,
                 fwkJobReport=False):
        self.outputDir = outputDir
        self.inputFiles = inputFiles
        self.cut = cut
        self.modules = modules
        self.compression = compression
        self.postfix = postfix
        self.json = jsonInput
        self.noOut = noOut
        self.friend = friend
        self.justcount = justcount
        self.provenance = provenance
        self.jobReport = JobReport() if fwkJobReport else None
        self.haddFileName = haddFileName
        if self.jobReport and not self.haddFileName:
            print "Because you requested a FJR we assume you want the final hadd. No name specified for the output file, will use tree.root"
            self.haddFileName = "tree.root"
        self.branchsel = BranchSelection(branchsel) if branchsel else None

    def run(self):
        if not self.noOut:
            outpostfix = self.postfix if self.postfix != None else (
                "_Friend" if self.friend else "_Skim")
            if self.compression != "none":
                ROOT.gInterpreter.ProcessLine("#include <Compression.h>")
                (algo, level) = self.compression.split(":")
                compressionLevel = int(level)
                if algo == "LZMA": compressionAlgo = ROOT.ROOT.kLZMA
                elif algo == "ZLIB": compressionAlgo = ROOT.ROOT.kZLIB
                else: raise RuntimeError("Unsupported compression %s" % algo)
            else:
                compressionLevel = 0
            print "Will write selected trees to " + self.outputDir
            if not self.justcount:
                if not os.path.exists(self.outputDir):
                    os.system("mkdir -p " + self.outputDir)

        if self.noOut:
            if len(self.modules) == 0:
                raise RuntimeError(
                    "Running with --noout and no modules does nothing!")

        for m in self.modules:
            m.beginJob()

        fullClone = (len(self.modules) == 0)
        outFileNames = []
        t0 = time.clock()
        totEntriesRead = 0
        for fname in self.inputFiles:

            # open input file
            inFile = ROOT.TFile.Open(fname)

            #get input tree
            inTree = inFile.Get("Events")
            totEntriesRead += inTree.GetEntries()
            # pre-skimming
            elist, jsonFilter = preSkim(inTree, self.json, self.cut)
            if self.justcount:
                print 'Would select %d entries from %s' % (
                    elist.GetN() if elist else inTree.GetEntries(), fname)
                continue
            else:
                print 'Pre-select %d entries out of %s ' % (
                    elist.GetN() if elist else inTree.GetEntries(),
                    inTree.GetEntries())

            if fullClone:
                # no need of a reader (no event loop), but set up the elist if available
                if elist: inTree.SetEntryList(elist)
            else:
                # initialize reader
                inTree = InputTree(inTree, elist)

            # prepare output file
            outFileName = os.path.join(
                self.outputDir,
                os.path.basename(fname).replace(".root", outpostfix + ".root"))
            outFile = ROOT.TFile.Open(outFileName, "RECREATE", "",
                                      compressionLevel)
            outFileNames.append(outFileName)
            if compressionLevel:
                outFile.SetCompressionAlgorithm(compressionAlgo)

            # prepare output tree
            if self.friend:
                outTree = FriendOutput(inFile, inTree, outFile)
            else:
                outTree = FullOutput(inFile,
                                     inTree,
                                     outFile,
                                     branchSelection=self.branchsel,
                                     fullClone=fullClone,
                                     jsonFilter=jsonFilter,
                                     provenance=self.provenance)

            # process events, if needed
            if not fullClone:
                (nall, npass, timeLoop) = eventLoop(self.modules, inFile,
                                                    outFile, inTree, outTree)
                print 'Processed %d preselected entries from %s (%s entries). Finally selected %d entries' % (
                    nall, fname, inTree.GetEntries(), npass)
            else:
                print 'Selected %d entries from %s' % (
                    outTree.tree().GetEntries(), fname)

            # now write the output
            outTree.write()
            outFile.Close()
            print "Done %s" % outFileName
            if self.jobReport:
                self.jobReport.addInputFile(fname, nall)

        for m in self.modules:
            m.endJob()

        print totEntriesRead / (time.clock() - t0), "Hz"

        if self.haddFileName:
            os.system(
                "./haddnano.py %s %s" %
                (self.haddFileName, " ".join(outFileNames))
            )  #FIXME: remove "./" once haddnano.py is distributed with cms releases
        if self.jobReport:
            self.jobReport.addOutputFile(self.haddFileName)
            self.jobReport.save()
Exemple #2
0
class PostProcessor:
    def __init__(self,
                 outputDir,
                 inputFiles,
                 cut=None,
                 branchsel=None,
                 modules=[],
                 compression="LZMA:9",
                 friend=False,
                 postfix=None,
                 jsonInput=None,
                 noOut=False,
                 justcount=False,
                 provenance=False,
                 haddFileName=None,
                 fwkJobReport=False,
                 histFileName=None,
                 histDirName=None,
                 outputbranchsel=None,
                 maxEntries=None,
                 firstEntry=0,
                 prefetch=False,
                 longTermCache=False):
        self.outputDir = outputDir
        self.inputFiles = inputFiles
        self.cut = cut
        self.modules = modules
        self.compression = compression
        self.postfix = postfix
        self.json = jsonInput
        self.noOut = noOut
        self.friend = friend
        self.justcount = justcount
        self.provenance = provenance
        self.jobReport = JobReport() if fwkJobReport else None
        self.haddFileName = haddFileName
        self.histFile = None
        self.histDirName = None
        if self.jobReport and not self.haddFileName:
            print("Because you requested a FJR we assume you want the final " \
                "hadd. No name specified for the output file, will use tree.root")
            self.haddFileName = "tree.root"
        self.branchsel = BranchSelection(branchsel) if branchsel else None
        if outputbranchsel != None:
            self.outputbranchsel = BranchSelection(outputbranchsel)
        elif outputbranchsel == None and branchsel != None:
            # Use the same branches in the output as in input
            self.outputbranchsel = BranchSelection(branchsel)
        else:
            self.outputbranchsel = None

        self.histFileName = histFileName
        self.histDirName = histDirName
        # 2^63 - 1, largest int64
        self.maxEntries = maxEntries if maxEntries else 9223372036854775807
        self.firstEntry = firstEntry
        self.prefetch = prefetch  # prefetch files to TMPDIR using xrdcp
        # keep cached files across runs (it's then up to you to clean up the temp)
        self.longTermCache = longTermCache

    def prefetchFile(self, fname, verbose=True):
        tmpdir = os.environ['TMPDIR'] if 'TMPDIR' in os.environ else "/tmp"
        if not fname.startswith("root://"):
            return fname, False
        rndchars = "".join([hex(ord(i))[2:] for i in os.urandom(
            8)]) if not self.longTermCache else "long_cache-id%d-%s" \
            % (os.getuid(), hashlib.sha1(fname).hexdigest())
        localfile = "%s/%s-%s.root" \
            % (tmpdir, os.path.basename(fname).replace(".root", ""), rndchars)
        if self.longTermCache and os.path.exists(localfile):
            if verbose:
                print("Filename %s is already available in local path %s " \
                    % (fname, localfile))
            return localfile, False
        try:
            if verbose:
                print("Filename %s is remote, will do a copy to local path %s"\
                    % (fname, localfile))
            start = time.time()
            subprocess.check_output(["xrdcp", "-f", "-N", fname, localfile])
            if verbose:
                print("Time used for transferring the file locally: %.2f s"\
                    % (time.time() - start))
            return localfile, (not self.longTermCache)
        except:
            if verbose:
                print(
                    "Error: could not save file locally, will run from remote")
            if os.path.exists(localfile):
                if verbose:
                    print("Deleting partially transferred file %s" % localfile)
                try:
                    os.unlink(localfile)
                except:
                    pass
            return fname, False

    def run(self):
        outpostfix = self.postfix if self.postfix != None else (
            "_Friend" if self.friend else "_Skim")
        if not self.noOut:

            if self.compression != "none":
                ROOT.gInterpreter.ProcessLine("#include <Compression.h>")
                (algo, level) = self.compression.split(":")
                compressionLevel = int(level)
                if algo == "LZMA":
                    compressionAlgo = ROOT.ROOT.kLZMA
                elif algo == "ZLIB":
                    compressionAlgo = ROOT.ROOT.kZLIB
                elif algo == "LZ4":
                    compressionAlgo = ROOT.ROOT.kLZ4
                else:
                    raise RuntimeError("Unsupported compression %s" % algo)
            else:
                compressionLevel = 0
            print("Will write selected trees to " + self.outputDir)
            if not self.justcount:
                if not os.path.exists(self.outputDir):
                    os.system("mkdir -p " + self.outputDir)
        else:
            compressionLevel = 0

        if self.noOut:
            if len(self.modules) == 0:
                raise RuntimeError(
                    "Running with --noout and no modules does nothing!")

        # Open histogram file, if desired
        if (self.histFileName != None and self.histDirName == None) or (
                self.histFileName == None and self.histDirName != None):
            raise RuntimeError(
                "Must specify both histogram file and histogram directory!")
        elif self.histFileName != None and self.histDirName != None:
            self.histFile = ROOT.TFile.Open(self.histFileName, "RECREATE")
        else:
            self.histFile = None

        for m in self.modules:
            if hasattr(m, 'writeHistFile') and m.writeHistFile:
                m.beginJob(histFile=self.histFile,
                           histDirName=self.histDirName)
            else:
                m.beginJob()

        fullClone = (len(self.modules) == 0)
        outFileNames = []
        t0 = time.time()
        totEntriesRead = 0
        for fname in self.inputFiles:
            ffnames = []
            if "," in fname:
                fnames = fname.split(',')
                fname, ffnames = fnames[0], fnames[1:]

            # open input file
            if self.prefetch:
                ftoread, toBeDeleted = self.prefetchFile(fname)
                inFile = ROOT.TFile.Open(ftoread)
            else:
                inFile = ROOT.TFile.Open(fname)

            # get input tree
            inTree = inFile.Get("Events")
            if inTree == None:
                inTree = inFile.Get("Friends")
            nEntries = min(inTree.GetEntries() - self.firstEntry,
                           self.maxEntries)
            totEntriesRead += nEntries
            # pre-skimming
            elist, jsonFilter = preSkim(inTree,
                                        self.json,
                                        self.cut,
                                        maxEntries=self.maxEntries,
                                        firstEntry=self.firstEntry)
            if self.justcount:
                print('Would select %d / %d entries from %s (%.2f%%)' %
                      (elist.GetN() if elist else nEntries, nEntries, fname,
                       (elist.GetN() if elist else nEntries) /
                       (0.01 * nEntries) if nEntries else 0))
                if self.prefetch:
                    if toBeDeleted:
                        os.unlink(ftoread)
                continue
            else:
                print('Pre-select %d entries out of %s (%.2f%%)' %
                      (elist.GetN() if elist else nEntries, nEntries,
                       (elist.GetN() if elist else nEntries) /
                       (0.01 * nEntries) if nEntries else 0))
                inAddFiles = []
                inAddTrees = []
            for ffname in ffnames:
                inAddFiles.append(ROOT.TFile.Open(ffname))
                inAddTree = inAddFiles[-1].Get("Events")
                if inAddTree == None:
                    inAddTree = inAddFiles[-1].Get("Friends")
                inAddTrees.append(inAddTree)
                inTree.AddFriend(inAddTree)

            if fullClone:
                # no need of a reader (no event loop), but set up the elist if available
                if elist:
                    inTree.SetEntryList(elist)
            else:
                # initialize reader
                inTree = InputTree(inTree, elist)

            # prepare output file
            if not self.noOut:
                outFileName = os.path.join(
                    self.outputDir,
                    os.path.basename(fname).replace(".root",
                                                    outpostfix + ".root"))
                outFile = ROOT.TFile.Open(outFileName, "RECREATE", "",
                                          compressionLevel)
                outFileNames.append(outFileName)
                if compressionLevel:
                    outFile.SetCompressionAlgorithm(compressionAlgo)
                # prepare output tree
                if self.friend:
                    outTree = FriendOutput(inFile, inTree, outFile)
                else:
                    outTree = FullOutput(
                        inFile,
                        inTree,
                        outFile,
                        branchSelection=self.branchsel,
                        outputbranchSelection=self.outputbranchsel,
                        fullClone=fullClone,
                        maxEntries=self.maxEntries,
                        firstEntry=self.firstEntry,
                        jsonFilter=jsonFilter,
                        provenance=self.provenance)
            else:
                outFile = None
                outTree = None
                if self.branchsel:
                    self.branchsel.selectBranches(inTree)

            # process events, if needed
            if not fullClone:
                eventRange = range(
                    self.firstEntry, self.firstEntry +
                    nEntries) if nEntries > 0 and not elist else None
                (nall, npass, timeLoop) = eventLoop(self.modules,
                                                    inFile,
                                                    outFile,
                                                    inTree,
                                                    outTree,
                                                    eventRange=eventRange,
                                                    maxEvents=self.maxEntries)
                print(
                    'Processed %d preselected entries from %s (%s entries). Finally selected %d entries'
                    % (nall, fname, nEntries, npass))
            else:
                nall = nEntries
                print('Selected %d / %d entries from %s (%.2f%%)' %
                      (outTree.tree().GetEntries(), nall, fname,
                       outTree.tree().GetEntries() /
                       (0.01 * nall) if nall else 0))

            # now write the output
            if not self.noOut:
                outTree.write()
                outFile.Close()
                print("Done %s" % outFileName)
            if self.jobReport:
                self.jobReport.addInputFile(fname, nall)
            if self.prefetch:
                if toBeDeleted:
                    os.unlink(ftoread)

        for m in self.modules:
            m.endJob()

        print("Total time %.1f sec. to process %i events. Rate = %.1f Hz." %
              ((time.time() - t0), totEntriesRead, totEntriesRead /
               (time.time() - t0)))

        if self.haddFileName:
            haddnano = "./haddnano.py" if os.path.isfile(
                "./haddnano.py") else "haddnano.py"
            os.system("%s %s %s" %
                      (haddnano, self.haddFileName, " ".join(outFileNames)))
        if self.jobReport:
            self.jobReport.addOutputFile(self.haddFileName)
            self.jobReport.save()
class PostProcessor:
    def __init__(self,
                 outputDir,
                 inputFiles,
                 cut=None,
                 branchsel=None,
                 modules=[],
                 compression="LZMA:9",
                 friend=False,
                 postfix=None,
                 jsonInput=None,
                 noOut=False,
                 justcount=False,
                 provenance=False,
                 haddFileName=None,
                 fwkJobReport=False,
                 histFileName=None,
                 histDirName=None,
                 outputbranchsel=None,
                 SMSMasses=None,
                 doISR=None):
        self.outputDir = outputDir
        self.inputFiles = inputFiles
        self.cut = cut
        self.modules = modules
        self.compression = compression
        self.postfix = postfix
        self.json = jsonInput
        self.noOut = noOut
        self.friend = friend
        self.justcount = justcount
        self.provenance = provenance
        self.jobReport = JobReport() if fwkJobReport else None
        self.haddFileName = haddFileName
        self.histFile = None
        self.histDirName = None
        self.hcount = ROOT.TH1F("Count", "Count", 1, 0, 1)
        self.hsumofweights = ROOT.TH1F("SumWeights", "SumWeights", 1, 0, 1)
        self.SMSMasses = SMSMasses
        self.doISR = doISR
        if SMSMasses != None:
            self.hsmscount = ROOT.TH2F("SMSCount", "SMSCount", 2000, -0.5,
                                       1999.5, 2000, -0.5, 1999.5)
        if doISR != None: self.isrconstant = 1.
        if self.jobReport and not self.haddFileName:
            print "Because you requested a FJR we assume you want the final hadd. No name specified for the output file, will use tree.root"
            self.haddFileName = "tree.root"
        self.branchsel = BranchSelection(branchsel) if branchsel else None
        self.outputbranchsel = BranchSelection(
            outputbranchsel) if outputbranchsel else None
        self.histFileName = histFileName
        self.histDirName = histDirName

    def run(self):
        outpostfix = self.postfix if self.postfix != None else (
            "_Friend" if self.friend else "_Skim")
        if not self.noOut:

            if self.compression != "none":
                ROOT.gInterpreter.ProcessLine("#include <Compression.h>")
                (algo, level) = self.compression.split(":")
                compressionLevel = int(level)
                if algo == "LZMA": compressionAlgo = ROOT.ROOT.kLZMA
                elif algo == "ZLIB": compressionAlgo = ROOT.ROOT.kZLIB
                else: raise RuntimeError("Unsupported compression %s" % algo)
            else:
                compressionLevel = 0
            print "Will write selected trees to " + self.outputDir
            if not self.justcount:
                if not os.path.exists(self.outputDir):
                    os.system("mkdir -p " + self.outputDir)
        else:
            compressionLevel = 0

        if self.noOut:
            if len(self.modules) == 0:
                raise RuntimeError(
                    "Running with --noout and no modules does nothing!")

        # Open histogram file, if desired
        if (self.histFileName != None and self.histDirName == None) or (
                self.histFileName == None and self.histDirName != None):
            raise RuntimeError(
                "Must specify both histogram file and histogram directory!")
        elif self.histFileName != None and self.histDirName != None:
            self.histFile = ROOT.TFile.Open(self.histFileName, "RECREATE")
        else:
            self.histFile = None

        for m in self.modules:
            if hasattr(m, 'writeHistFile') and m.writeHistFile:
                m.beginJob(histFile=self.histFile,
                           histDirName=self.histDirName)
            else:
                m.beginJob()

        fullClone = (len(self.modules) == 0)
        outFileNames = []
        t0 = time.clock()
        totEntriesRead = 0
        for fname in self.inputFiles:

            # open input file
            inFile = ROOT.TFile.Open(fname)

            #get input tree
            inTree = inFile.Get("Events")
            totEntriesRead += inTree.GetEntries()
            self.hcount.SetBinContent(1, inTree.GetEntries())
            ROOT.gROOT.SetBatch(True)
            if self.SMSMasses != None:
                inTree.Draw(
                    "MaxIf$(GenPart_mass, abs(GenPart_pdgId) == %i):MaxIf$(GenPart_mass, abs(GenPart_pdgId) == %i) >> hSMS(2000, -0.5, 1999.5, 2000, -0.5, 1999.5)"
                    % (self.SMSMasses[0], self.SMSMasses[1]))
                self.hsmscount = ROOT.gDirectory.Get('hSMS')
            if self.doISR != None:
                #Dirty ISR recipe for EWKinos
                #Need to correct for each mass point
                #Can't correct per sample (wrong normalization), need to save whole unskimmed histogram per point an then postprocess
                pt1 = "MaxIf$(GenPart_pt, abs(GenPart_pdgId) == %i && GenPart_status == 22)" % self.doISR[
                    0]
                pt2 = "MaxIf$(GenPart_pt, abs(GenPart_pdgId) == %i && GenPart_status == 22)" % self.doISR[
                    1]
                phi1 = "MaxIf$(GenPart_pt, abs(GenPart_pdgId) == %i && GenPart_status == 22)" % self.doISR[
                    0]
                phi2 = "MaxIf$(GenPart_pt, abs(GenPart_pdgId) == %i && GenPart_status == 22)" % self.doISR[
                    1]
                pt_ISR = "hypot(%s + %s * cos(%s-%s), %s*sin(%s - %s))" % (
                    pt1, pt2, phi2, phi1, pt2, phi2, phi1)
                inTree.Draw(
                    " %s : MaxIf$(GenPart_mass, abs(GenPart_pdgId) == %i) : MaxIf$(GenPart_mass, abs(GenPart_pdgId) == %i)  >> hISR(1000, -0.5, 1999.5, 1000, -0.5, 1999.5, 20, 0, 1000)"
                    % (pt_ISR, self.SMSMasses[0], self.SMSMasses[1]))
                self.hISR = ROOT.gDirectory.Get("hISR")
            if inTree.GetBranchStatus("genWeight"):
                inTree.Project("SumWeightsTemp", "1.0", "genWeight")
                sow = ROOT.gROOT.FindObject("SumWeightsTemp").Integral()
                self.hsumofweights.SetBinContent(1, sow)
            # pre-skimming
            elist, jsonFilter = preSkim(inTree, self.json, self.cut)
            if self.justcount:
                print 'Would select %d entries from %s' % (
                    elist.GetN() if elist else inTree.GetEntries(), fname)
                continue
            else:
                print 'Pre-select %d entries out of %s ' % (
                    elist.GetN() if elist else inTree.GetEntries(),
                    inTree.GetEntries())

            if fullClone:
                # no need of a reader (no event loop), but set up the elist if available
                if elist: inTree.SetEntryList(elist)
            else:
                # initialize reader
                inTree = InputTree(inTree, elist)

            # prepare output file
            if not self.noOut:
                outFileName = os.path.join(
                    self.outputDir,
                    os.path.basename(fname).replace(".root",
                                                    outpostfix + ".root"))
                outFile = ROOT.TFile.Open(outFileName, "RECREATE", "",
                                          compressionLevel)
                outFileNames.append(outFileName)
                if compressionLevel:
                    outFile.SetCompressionAlgorithm(compressionAlgo)
                # prepare output tree
                if self.friend:
                    outTree = FriendOutput(inFile, inTree, outFile)
                else:
                    outTree = FullOutput(
                        inFile,
                        inTree,
                        outFile,
                        branchSelection=self.branchsel,
                        outputbranchSelection=self.outputbranchsel,
                        fullClone=fullClone,
                        jsonFilter=jsonFilter,
                        provenance=self.provenance)
            else:
                outFile = None
                outTree = None

            # process events, if needed
            if not fullClone:
                (nall, npass, timeLoop) = eventLoop(self.modules, inFile,
                                                    outFile, inTree, outTree)
                print 'Processed %d preselected entries from %s (%s entries). Finally selected %d entries' % (
                    nall, fname, inTree.GetEntries(), npass)
            else:
                nall = inTree.GetEntries()
                print 'Selected %d entries from %s' % (
                    outTree.tree().GetEntries(), fname)

            # now write the output
            if not self.noOut:
                print "Start writing"
                self.hcount.Write()
                print "Start writing"
                if self.SMSMasses != None: self.hsmscount.Write()
                print "Start writing"
                if self.doISR != None: self.hISR.Write()
                print "Start writing"
                self.hsumofweights.Write()
                outTree.write()
                outFile.Close()
                print "Done %s" % outFileName
            if self.jobReport:
                self.jobReport.addInputFile(fname, nall)

        for m in self.modules:
            m.endJob()

        print totEntriesRead / (time.clock() - t0), "Hz"

        if self.haddFileName:
            os.system(
                "./haddnano.py %s %s" %
                (self.haddFileName, " ".join(outFileNames))
            )  #FIXME: remove "./" once haddnano.py is distributed with cms releases
        if self.jobReport:
            self.jobReport.addOutputFile(self.haddFileName)
            self.jobReport.save()
Exemple #4
0
class PostProcessor:
    def __init__(self,
                 outputDir,
                 inputFiles,
                 cut=None,
                 branchsel=None,
                 modules=[],
                 compression="LZMA:9",
                 friend=False,
                 postfix=None,
                 jsonInput=None,
                 noOut=False,
                 justcount=False,
                 provenance=False,
                 haddFileName=None,
                 fwkJobReport=False,
                 histFileName=None,
                 histDirName=None,
                 outputbranchsel=None,
                 maxEntries=None,
                 firstEntry=0,
                 prefetch=False,
                 longTermCache=False,
                 saveHistoGenWeights=False,
                 allowNoPostfix=False):
        self.outputDir = outputDir
        self.inputFiles = inputFiles
        self.cut = cut
        self.modules = modules
        self.compression = compression
        self.postfix = postfix
        self.allowNoPostfix = allowNoPostfix
        self.json = jsonInput
        self.noOut = noOut
        self.friend = friend
        self.justcount = justcount
        self.provenance = provenance
        self.jobReport = JobReport() if fwkJobReport else None
        self.haddFileName = haddFileName
        self.saveHistoGenWeights = saveHistoGenWeights
        self.histFile = None
        self.histDirName = None
        if self.jobReport and not self.haddFileName:
            print("Because you requested a FJR we assume you want the final " \
                "hadd. No name specified for the output file, will use tree.root")
            self.haddFileName = "tree.root"
        self.branchsel = BranchSelection(branchsel) if branchsel else None
        if outputbranchsel is not None:
            self.outputbranchsel = BranchSelection(outputbranchsel)
        elif outputbranchsel is None and branchsel is not None:
            # Use the same branches in the output as in input
            self.outputbranchsel = BranchSelection(branchsel)
        else:
            self.outputbranchsel = None

        self.histFileName = histFileName
        self.histDirName = histDirName
        # 2^63 - 1, largest int64
        self.maxEntries = maxEntries if maxEntries else 9223372036854775807
        self.firstEntry = firstEntry
        self.prefetch = prefetch  # prefetch files to TMPDIR using xrdcp
        # keep cached files across runs (it's then up to you to clean up the temp)
        self.longTermCache = longTermCache

    def prefetchFile(self, fname, verbose=True):
        tmpdir = os.environ['TMPDIR'] if 'TMPDIR' in os.environ else "/tmp"
        if not fname.startswith("root://"):
            return fname, False
        rndchars = "".join([hex(ord(i))[2:] for i in os.urandom(
            8)]) if not self.longTermCache else "long_cache-id%d-%s" \
            % (os.getuid(), hashlib.sha1(fname).hexdigest())
        localfile = "%s/%s-%s.root" \
            % (tmpdir, os.path.basename(fname).replace(".root", ""), rndchars)
        if self.longTermCache and os.path.exists(localfile):
            if verbose:
                print("Filename %s is already available in local path %s " \
                    % (fname, localfile))
            return localfile, False
        try:
            if verbose:
                print("Filename %s is remote, will do a copy to local path %s"\
                    % (fname, localfile))
            start = time.time()
            subprocess.check_output(["xrdcp", "-f", "-N", fname, localfile])
            if verbose:
                print("Time used for transferring the file locally: %.2f s"\
                    % (time.time() - start))
            return localfile, (not self.longTermCache)
        except:
            if verbose:
                print(
                    "Error: could not save file locally, will run from remote")
            if os.path.exists(localfile):
                if verbose:
                    print("Deleting partially transferred file %s" % localfile)
                try:
                    os.unlink(localfile)
                except:
                    pass
            return fname, False

    def run(self):
        outpostfix = self.postfix if self.postfix is not None else (
            "_Friend" if self.friend else "_Skim")
        if self.allowNoPostfix and self.postfix is None:
            outpostfix = ""
        if not self.noOut:

            if self.compression != "none":
                ROOT.gInterpreter.ProcessLine("#include <Compression.h>")
                (algo, level) = self.compression.split(":")
                compressionLevel = int(level)
                if algo == "LZMA":
                    compressionAlgo = ROOT.ROOT.kLZMA
                elif algo == "ZLIB":
                    compressionAlgo = ROOT.ROOT.kZLIB
                elif algo == "LZ4":
                    compressionAlgo = ROOT.ROOT.kLZ4
                else:
                    raise RuntimeError("Unsupported compression %s" % algo)
            else:
                compressionLevel = 0
            print("Will write selected trees to " + self.outputDir)
            if not self.justcount:
                if not os.path.exists(self.outputDir):
                    os.system("mkdir -p " + self.outputDir)
        else:
            compressionLevel = 0

        if self.noOut:
            if len(self.modules) == 0:
                raise RuntimeError(
                    "Running with --noout and no modules does nothing!")

        # Open histogram file, if desired
        if (self.histFileName is not None and self.histDirName is None) or (
                self.histFileName is None and self.histDirName is not None):
            raise RuntimeError(
                "Must specify both histogram file and histogram directory!")
        elif self.histFileName is not None and self.histDirName is None:
            self.histFile = ROOT.TFile.Open(self.histFileName, "RECREATE")
        else:
            self.histFile = None

        for m in self.modules:
            if hasattr(m, 'writeHistFile') and m.writeHistFile:
                m.beginJob(histFile=self.histFile,
                           histDirName=self.histDirName)
            else:
                m.beginJob()

        fullClone = (len(self.modules) == 0)
        outFileNames = []
        t0 = time.time()
        totEntriesRead = 0
        for fname in self.inputFiles:
            ffnames = []
            if "," in fname:
                fnames = fname.split(',')
                fname, ffnames = fnames[0], fnames[1:]

            # open input file
            if self.prefetch:
                ftoread, toBeDeleted = self.prefetchFile(fname)
                inFile = ROOT.TFile.Open(ftoread)
            else:
                inFile = ROOT.TFile.Open(fname)

            if not inFile:
                print 'ERROR: file does not exist, check!'
                print '    filename:', fname
                exit(0)

            # get input tree
            inTree = inFile.Get("Events")
            if inTree is None:
                inTree = inFile.Get("Friends")
            nEntries = min(inTree.GetEntries() - self.firstEntry,
                           self.maxEntries)
            # first check that the histogram with weights is not already in the file
            hasWeightHistograms = False
            if inFile.GetListOfKeys().Contains(
                    "hGenWeights") and inFile.GetListOfKeys().Contains(
                        "hNumWeights"):
                hasWeightHistograms = True
                print "Histogram hGenWeights already exists, I will just copy it without recreating it"
            if self.saveHistoGenWeights and inTree.GetName(
            ) == "Events" and not hasWeightHistograms:
                print "Histogram hGenWeights does not exist yet, I will create it"
                # check that the tree contains all the original events, otherwise the sum of gen weights will miss some
                tmpTreeRuns = inFile.Get("Runs")
                for ievt, event in enumerate(tmpTreeRuns):
                    if ievt:
                        break  # only need first event (but there should be only 1 here)
                    nGenEvents = event.genEventCount
                if nGenEvents != inTree.GetEntries():
                    raise RuntimeError(
                        "I am creating the histogram with genWeight, but tree Events has less entries than genEventCount in tree Runs (%s instead of %s). The sum of weights will thus be wrong, please check"
                        % (str(inTree.GetEntries()), str(nGenEvents)))

                # saving distribution of genWeight for offline usage
                # idea is to fill the distribution of Log10(genWeight) with the sign, so to have a histogram from about -10 to 10
                # with about 10k bins (genWeights can take valus spanning several orders of magnitude, especially for fancy weights)
                # then one can compute the sum of genWeight in a given range using its integral (using Log10(threshold) ).
                # This somehow relies on having always |genWeight|>1, should it be < 1 the Log would change the sign.
                # So for the purpose of choosing the bin to be filled, we use |value| or 1.001, whatever is larger (this will not affect the integral)
                # then, need a second histogram to keep the integer number of events in each bin, so to allow for clipping of large weights
                hGenWeights = ROOT.TH1D("hGenWeights",
                                        "distribution of Log10(genWeight)",
                                        4800, -12.0, 12.0)
                hNumWeights = ROOT.TH1D(
                    "hNumWeights",
                    "distribution of Log10(genWeight) (unweighted)", 4800,
                    -12.0, 12.0)
                drawResult = inTree.Draw(
                    "TMath::Sign(1.0,genWeight)*TMath::Log10(max(1.001,abs(genWeight)))>>hGenWeights",
                    "genWeight", "goff", nEntries, self.firstEntry)
                drawResult = inTree.Draw(
                    "TMath::Sign(1.0,genWeight)*TMath::Log10(max(1.001,abs(genWeight)))>>hNumWeights",
                    "1", "goff", nEntries, self.firstEntry)
            totEntriesRead += nEntries
            # pre-skimming
            elist, jsonFilter = preSkim(inTree,
                                        self.json,
                                        self.cut,
                                        maxEntries=self.maxEntries,
                                        firstEntry=self.firstEntry)
            if self.justcount:
                print('Would select %d / %d entries from %s (%.2f%%)' %
                      (elist.GetN() if elist else nEntries, nEntries, fname,
                       (elist.GetN() if elist else nEntries) /
                       (0.01 * nEntries) if nEntries else 0))
                if self.prefetch:
                    if toBeDeleted:
                        os.unlink(ftoread)
                continue
            else:
                print('Pre-select %d entries out of %s (%.2f%%)' %
                      (elist.GetN() if elist else nEntries, nEntries,
                       (elist.GetN() if elist else nEntries) /
                       (0.01 * nEntries) if nEntries else 0))
                inAddFiles = []
                inAddTrees = []
            for ffname in ffnames:
                inAddFiles.append(ROOT.TFile.Open(ffname))
                inAddTree = inAddFiles[-1].Get("Events")
                if inAddTree is None:
                    inAddTree = inAddFiles[-1].Get("Friends")
                inAddTrees.append(inAddTree)
                inTree.AddFriend(inAddTree)

            if fullClone:
                # no need of a reader (no event loop), but set up the elist if available
                if elist:
                    inTree.SetEntryList(elist)
            else:
                # initialize reader
                inTree = InputTree(inTree, elist)

            # prepare output file
            if not self.noOut:
                outFileName = os.path.join(
                    self.outputDir,
                    os.path.basename(fname).replace(".root",
                                                    outpostfix + ".root"))
                outFile = ROOT.TFile.Open(outFileName, "RECREATE", "",
                                          compressionLevel)
                outFileNames.append(outFileName)
                if compressionLevel:
                    outFile.SetCompressionAlgorithm(compressionAlgo)
                # prepare output tree
                if self.friend:
                    outTree = FriendOutput(inFile, inTree, outFile)
                else:
                    outTree = FullOutput(
                        inFile,
                        inTree,
                        outFile,
                        branchSelection=self.branchsel,
                        outputbranchSelection=self.outputbranchsel,
                        fullClone=fullClone,
                        maxEntries=self.maxEntries,
                        firstEntry=self.firstEntry,
                        jsonFilter=jsonFilter,
                        provenance=self.provenance)
            else:
                outFile = None
                outTree = None
                if self.branchsel:
                    self.branchsel.selectBranches(inTree)

            # process events, if needed
            if not fullClone:
                eventRange = range(
                    self.firstEntry, self.firstEntry +
                    nEntries) if nEntries > 0 and not elist else None
                (nall, npass, timeLoop) = eventLoop(self.modules,
                                                    inFile,
                                                    outFile,
                                                    inTree,
                                                    outTree,
                                                    eventRange=eventRange,
                                                    maxEvents=self.maxEntries)
                print(
                    'Processed %d preselected entries from %s (%s entries). Finally selected %d entries'
                    % (nall, fname, nEntries, npass))
            else:
                nall = nEntries
                print('Selected %d / %d entries from %s (%.2f%%)' %
                      (outTree.tree().GetEntries(), nall, fname,
                       outTree.tree().GetEntries() /
                       (0.01 * nall) if nall else 0))

            # now write the output
            if not self.noOut:
                outTree.write()
                if not hasWeightHistograms:
                    if self.saveHistoGenWeights:
                        hGenWeights.Write(hGenWeights.GetName())
                        hNumWeights.Write(hNumWeights.GetName())
                outFile.Close()
                print("Done %s" % outFileName)
            if self.jobReport:
                self.jobReport.addInputFile(fname, nall)
            if self.prefetch:
                if toBeDeleted:
                    os.unlink(ftoread)

        for m in self.modules:
            m.endJob()

        print("Total time %.1f sec. to process %i events. Rate = %.1f Hz." %
              ((time.time() - t0), totEntriesRead, totEntriesRead /
               (time.time() - t0)))

        if self.haddFileName:
            haddnano = "./haddnano.py" if os.path.isfile(
                "./haddnano.py") else "haddnano.py"
            os.system("%s %s %s" %
                      (haddnano, self.haddFileName, " ".join(outFileNames)))
        if self.jobReport:
            self.jobReport.addOutputFile(self.haddFileName)
            self.jobReport.save()
Exemple #5
0
        shutil.move(output_file,cwd)
    else:
        print "no output root file"
    logfile.close()
    
except Exception as e:
    print "problem at runtime ", e
# print out
logfile=open(output_log,"r")
print logfile.read()
os.chdir(cwd)
input_events=1
for line in logfile:
    if "TOTAL EVENTS:" in line:
        input_events=int(line.split(":")[-1].strip())

for infile in inputFiles():
    job_report.addInputFile(infile,input_events)

job_report.addOutputFile(output_file,input_events)

job_report.save()
try:
    if os.path.exists(output_log):
        shutil.copy(output_log,"cmsRun-stderr.log")
except Exception as e:
    print "could not copy logfile ",e
print "DONE"
os.system("ls -lR")

Exemple #6
0
nFiles = 0
outFilesList = []
jobReport = JobReport()
for f in inputFiles():
    nFiles += 1
    outStr = "out" + str(nFiles)
    outFilesList.append(outStr)
    nEvents = NanoReader(inputFileName=f,
                         outputFileName=outStr,
                         nJobs=1,
                         jobNum=1,
                         json=runsAndLumis())
    print("events are %i \n" % nEvents)
    jobReport.addInputFile(f, nEvents)

#hadd outputs
hadd_cmnd = "./haddnano.py tree.root "
for f in outFilesList:
    hadd_cmnd += f + " "

print("executing : %s" % hadd_cmnd)
os.system(hadd_cmnd)
print("ls")
os.system("ls")

jobReport.addOutputFile("tree.root")
jobReport.save()

print "DONE"
os.system("ls -lR")
Exemple #7
0
class PostProcessor :
    def __init__(self,outputDir,inputFiles,cut=None,branchsel=None,modules=[],compression="LZMA:9",friend=False,postfix=None,
		 jsonInput=None,noOut=False,justcount=False,provenance=False,haddFileName=None,fwkJobReport=False):
	self.outputDir=outputDir
	self.inputFiles=inputFiles
	self.cut=cut
	self.modules=modules
	self.compression=compression
	self.postfix=postfix
	self.json=jsonInput
	self.noOut=noOut
	self.friend=friend
	self.justcount=justcount
	self.provenance=provenance
	self.jobReport = JobReport() if fwkJobReport else None
	self.haddFileName=haddFileName
	if self.jobReport and not self.haddFileName :
		print "Because you requested a FJR we assume you want the final hadd. No name specified for the output file, will use tree.root"
		self.haddFileName="tree.root"
 	self.branchsel = BranchSelection(branchsel) if branchsel else None 
    def run(self) :
    	if not self.noOut:
            outpostfix = self.postfix if self.postfix != None else ("_Friend" if self.friend else "_Skim")
            if self.compression != "none":
                ROOT.gInterpreter.ProcessLine("#include <Compression.h>")
                (algo, level) = self.compression.split(":")
                compressionLevel = int(level)
                if   algo == "LZMA": compressionAlgo  = ROOT.ROOT.kLZMA
                elif algo == "ZLIB": compressionAlgo  = ROOT.ROOT.kZLIB
                else: raise RuntimeError("Unsupported compression %s" % algo)
            else:
                compressionLevel = 0 
	    print "Will write selected trees to "+self.outputDir
            if not self.justcount:
                if not os.path.exists(self.outputDir):
                    os.system("mkdir -p "+self.outputDir)

	if self.noOut:
	    if len(self.modules) == 0: 
		raise RuntimeError("Running with --noout and no modules does nothing!")

	for m in self.modules: m.beginJob()

	fullClone = (len(self.modules) == 0)
	outFileNames=[]
	for fname in self.inputFiles:
	    # open input file
	    inFile = ROOT.TFile.Open(fname)

	    #get input tree
	    inTree = inFile.Get("Events")
	    
	    # pre-skimming
	    elist,jsonFilter = preSkim(inTree, self.json, self.cut)
	    if self.justcount:
		print 'Would select %d entries from %s'%(elist.GetN() if elist else inTree.GetEntries(), fname)
		continue

	    if fullClone:
		# no need of a reader (no event loop), but set up the elist if available
		if elist: inTree.SetEntryList(elist)
	    else:
		# initialize reader
		inTree = InputTree(inTree, elist) 

	    # prepare output file
	    outFileName = os.path.join(self.outputDir, os.path.basename(fname).replace(".root",outpostfix+".root"))
	    outFile = ROOT.TFile.Open(outFileName, "RECREATE", "", compressionLevel)
	    outFileNames.append(outFileName)
	    if compressionLevel: outFile.SetCompressionAlgorithm(compressionAlgo)

	    # prepare output tree
	    if self.friend:
		outTree = FriendOutput(inFile, inTree, outFile)
	    else:
		outTree = FullOutput(inFile, inTree, outFile, branchSelection = self.branchsel, fullClone = fullClone, jsonFilter = jsonFilter,provenance=self.provenance)

	    # process events, if needed
	    if not fullClone:
		(nall, npass, time) = eventLoop(self.modules, inFile, outFile, inTree, outTree)
		print 'Processed %d entries from %s, selected %d entries' % (nall, fname, npass)
	    else:
		print 'Selected %d entries from %s' % (outTree.tree().GetEntries(), fname)

	    # now write the output
	    outTree.write()
	    outFile.Close()
	    print "Done %s" % outFileName
	    if self.jobReport:
		self.jobReport.addInputFile(fname,nall)
		
	for m in self.modules: m.endJob()

	if self.haddFileName :
		os.system("./haddnano.py %s %s" %(self.haddFileName," ".join(outFileNames))) #FIXME: remove "./" once haddnano.py is distributed with cms releases
	if self.jobReport :
		self.jobReport.addOutputFile(self.haddFileName)
		self.jobReport.save()