Ejemplo n.º 1
0
class PostProcessor:
    def __init__(self,
                 outputDir,
                 inputFiles,
                 cut=None,
                 branchsel=None,
                 modules=[],
                 compression="LZMA:9",
                 friend=False,
                 postfix=None,
                 jsonInput=None,
                 noOut=False,
                 justcount=False,
                 provenance=False,
                 haddFileName=None,
                 fwkJobReport=False,
                 histFileName=None,
                 histDirName=None,
                 outputbranchsel=None,
                 maxEntries=None,
                 firstEntry=0,
                 prefetch=False,
                 longTermCache=False):
        self.outputDir = outputDir
        self.inputFiles = inputFiles
        self.cut = cut
        self.modules = modules
        self.compression = compression
        self.postfix = postfix
        self.json = jsonInput
        self.noOut = noOut
        self.friend = friend
        self.justcount = justcount
        self.provenance = provenance
        self.jobReport = JobReport() if fwkJobReport else None
        self.haddFileName = haddFileName
        self.histFile = None
        self.histDirName = None
        if self.jobReport and not self.haddFileName:
            print("Because you requested a FJR we assume you want the final " \
                "hadd. No name specified for the output file, will use tree.root")
            self.haddFileName = "tree.root"
        self.branchsel = BranchSelection(branchsel) if branchsel else None
        if outputbranchsel != None:
            self.outputbranchsel = BranchSelection(outputbranchsel)
        elif outputbranchsel == None and branchsel != None:
            # Use the same branches in the output as in input
            self.outputbranchsel = BranchSelection(branchsel)
        else:
            self.outputbranchsel = None

        self.histFileName = histFileName
        self.histDirName = histDirName
        # 2^63 - 1, largest int64
        self.maxEntries = maxEntries if maxEntries else 9223372036854775807
        self.firstEntry = firstEntry
        self.prefetch = prefetch  # prefetch files to TMPDIR using xrdcp
        # keep cached files across runs (it's then up to you to clean up the temp)
        self.longTermCache = longTermCache

    def prefetchFile(self, fname, verbose=True):
        tmpdir = os.environ['TMPDIR'] if 'TMPDIR' in os.environ else "/tmp"
        if not fname.startswith("root://"):
            return fname, False
        rndchars = "".join([hex(ord(i))[2:] for i in os.urandom(
            8)]) if not self.longTermCache else "long_cache-id%d-%s" \
            % (os.getuid(), hashlib.sha1(fname).hexdigest())
        localfile = "%s/%s-%s.root" \
            % (tmpdir, os.path.basename(fname).replace(".root", ""), rndchars)
        if self.longTermCache and os.path.exists(localfile):
            if verbose:
                print("Filename %s is already available in local path %s " \
                    % (fname, localfile))
            return localfile, False
        try:
            if verbose:
                print("Filename %s is remote, will do a copy to local path %s"\
                    % (fname, localfile))
            start = time.time()
            subprocess.check_output(["xrdcp", "-f", "-N", fname, localfile])
            if verbose:
                print("Time used for transferring the file locally: %.2f s"\
                    % (time.time() - start))
            return localfile, (not self.longTermCache)
        except:
            if verbose:
                print(
                    "Error: could not save file locally, will run from remote")
            if os.path.exists(localfile):
                if verbose:
                    print("Deleting partially transferred file %s" % localfile)
                try:
                    os.unlink(localfile)
                except:
                    pass
            return fname, False

    def run(self):
        outpostfix = self.postfix if self.postfix != None else (
            "_Friend" if self.friend else "_Skim")
        if not self.noOut:

            if self.compression != "none":
                ROOT.gInterpreter.ProcessLine("#include <Compression.h>")
                (algo, level) = self.compression.split(":")
                compressionLevel = int(level)
                if algo == "LZMA":
                    compressionAlgo = ROOT.ROOT.kLZMA
                elif algo == "ZLIB":
                    compressionAlgo = ROOT.ROOT.kZLIB
                elif algo == "LZ4":
                    compressionAlgo = ROOT.ROOT.kLZ4
                else:
                    raise RuntimeError("Unsupported compression %s" % algo)
            else:
                compressionLevel = 0
            print("Will write selected trees to " + self.outputDir)
            if not self.justcount:
                if not os.path.exists(self.outputDir):
                    os.system("mkdir -p " + self.outputDir)
        else:
            compressionLevel = 0

        if self.noOut:
            if len(self.modules) == 0:
                raise RuntimeError(
                    "Running with --noout and no modules does nothing!")

        # Open histogram file, if desired
        if (self.histFileName != None and self.histDirName == None) or (
                self.histFileName == None and self.histDirName != None):
            raise RuntimeError(
                "Must specify both histogram file and histogram directory!")
        elif self.histFileName != None and self.histDirName != None:
            self.histFile = ROOT.TFile.Open(self.histFileName, "RECREATE")
        else:
            self.histFile = None

        for m in self.modules:
            if hasattr(m, 'writeHistFile') and m.writeHistFile:
                m.beginJob(histFile=self.histFile,
                           histDirName=self.histDirName)
            else:
                m.beginJob()

        fullClone = (len(self.modules) == 0)
        outFileNames = []
        t0 = time.time()
        totEntriesRead = 0
        for fname in self.inputFiles:
            ffnames = []
            if "," in fname:
                fnames = fname.split(',')
                fname, ffnames = fnames[0], fnames[1:]

            # open input file
            if self.prefetch:
                ftoread, toBeDeleted = self.prefetchFile(fname)
                inFile = ROOT.TFile.Open(ftoread)
            else:
                inFile = ROOT.TFile.Open(fname)

            # get input tree
            inTree = inFile.Get("Events")
            if inTree == None:
                inTree = inFile.Get("Friends")
            nEntries = min(inTree.GetEntries() - self.firstEntry,
                           self.maxEntries)
            totEntriesRead += nEntries
            # pre-skimming
            elist, jsonFilter = preSkim(inTree,
                                        self.json,
                                        self.cut,
                                        maxEntries=self.maxEntries,
                                        firstEntry=self.firstEntry)
            if self.justcount:
                print('Would select %d / %d entries from %s (%.2f%%)' %
                      (elist.GetN() if elist else nEntries, nEntries, fname,
                       (elist.GetN() if elist else nEntries) /
                       (0.01 * nEntries) if nEntries else 0))
                if self.prefetch:
                    if toBeDeleted:
                        os.unlink(ftoread)
                continue
            else:
                print('Pre-select %d entries out of %s (%.2f%%)' %
                      (elist.GetN() if elist else nEntries, nEntries,
                       (elist.GetN() if elist else nEntries) /
                       (0.01 * nEntries) if nEntries else 0))
                inAddFiles = []
                inAddTrees = []
            for ffname in ffnames:
                inAddFiles.append(ROOT.TFile.Open(ffname))
                inAddTree = inAddFiles[-1].Get("Events")
                if inAddTree == None:
                    inAddTree = inAddFiles[-1].Get("Friends")
                inAddTrees.append(inAddTree)
                inTree.AddFriend(inAddTree)

            if fullClone:
                # no need of a reader (no event loop), but set up the elist if available
                if elist:
                    inTree.SetEntryList(elist)
            else:
                # initialize reader
                inTree = InputTree(inTree, elist)

            # prepare output file
            if not self.noOut:
                outFileName = os.path.join(
                    self.outputDir,
                    os.path.basename(fname).replace(".root",
                                                    outpostfix + ".root"))
                outFile = ROOT.TFile.Open(outFileName, "RECREATE", "",
                                          compressionLevel)
                outFileNames.append(outFileName)
                if compressionLevel:
                    outFile.SetCompressionAlgorithm(compressionAlgo)
                # prepare output tree
                if self.friend:
                    outTree = FriendOutput(inFile, inTree, outFile)
                else:
                    outTree = FullOutput(
                        inFile,
                        inTree,
                        outFile,
                        branchSelection=self.branchsel,
                        outputbranchSelection=self.outputbranchsel,
                        fullClone=fullClone,
                        maxEntries=self.maxEntries,
                        firstEntry=self.firstEntry,
                        jsonFilter=jsonFilter,
                        provenance=self.provenance)
            else:
                outFile = None
                outTree = None
                if self.branchsel:
                    self.branchsel.selectBranches(inTree)

            # process events, if needed
            if not fullClone:
                eventRange = range(
                    self.firstEntry, self.firstEntry +
                    nEntries) if nEntries > 0 and not elist else None
                (nall, npass, timeLoop) = eventLoop(self.modules,
                                                    inFile,
                                                    outFile,
                                                    inTree,
                                                    outTree,
                                                    eventRange=eventRange,
                                                    maxEvents=self.maxEntries)
                print(
                    'Processed %d preselected entries from %s (%s entries). Finally selected %d entries'
                    % (nall, fname, nEntries, npass))
            else:
                nall = nEntries
                print('Selected %d / %d entries from %s (%.2f%%)' %
                      (outTree.tree().GetEntries(), nall, fname,
                       outTree.tree().GetEntries() /
                       (0.01 * nall) if nall else 0))

            # now write the output
            if not self.noOut:
                outTree.write()
                outFile.Close()
                print("Done %s" % outFileName)
            if self.jobReport:
                self.jobReport.addInputFile(fname, nall)
            if self.prefetch:
                if toBeDeleted:
                    os.unlink(ftoread)

        for m in self.modules:
            m.endJob()

        print("Total time %.1f sec. to process %i events. Rate = %.1f Hz." %
              ((time.time() - t0), totEntriesRead, totEntriesRead /
               (time.time() - t0)))

        if self.haddFileName:
            haddnano = "./haddnano.py" if os.path.isfile(
                "./haddnano.py") else "haddnano.py"
            os.system("%s %s %s" %
                      (haddnano, self.haddFileName, " ".join(outFileNames)))
        if self.jobReport:
            self.jobReport.addOutputFile(self.haddFileName)
            self.jobReport.save()
def HarvestNanoAOD(inFileList, outFilePath, sample):
    #
    # Create the output file
    #
    print "Create Output File: %s" % (outFilePath)
    f = ROOT.TFile(outFilePath, "RECREATE")
    f.cd()
    #
    # Initialize the tree jet
    #
    treeName = "TreeFatJet"

    isMC_QCD = "QCD" in sample

    print "Create Output Tree: %s" % (treeName)
    TreeFatJet = ROOT.TTree(treeName, treeName)

    #
    # FatJet branch
    #
    nFatJetSizeMax = 25
    nFatJetString = 'nFatJet'
    nFatJet = bookIntBranch(TreeFatJet, nFatJetString)
    FatJetPt = bookFloatArrayBranch(TreeFatJet, 'FatJet_pt', nFatJetString,
                                    nFatJetSizeMax)
    FatJetEta = bookFloatArrayBranch(TreeFatJet, 'FatJet_eta', nFatJetString,
                                     nFatJetSizeMax)
    FatJetPhi = bookFloatArrayBranch(TreeFatJet, 'FatJet_phi', nFatJetString,
                                     nFatJetSizeMax)
    FatJetM = bookFloatArrayBranch(TreeFatJet, 'FatJet_mass', nFatJetString,
                                   nFatJetSizeMax)
    FatJetTau21 = bookFloatArrayBranch(TreeFatJet, 'FatJet_tau21',
                                       nFatJetString, nFatJetSizeMax)
    FatJetTau31 = bookFloatArrayBranch(TreeFatJet, 'FatJet_tau31',
                                       nFatJetString, nFatJetSizeMax)
    FatJetTau32 = bookFloatArrayBranch(TreeFatJet, 'FatJet_tau32',
                                       nFatJetString, nFatJetSizeMax)
    FatJetDeepTagTvsQCD = bookFloatArrayBranch(TreeFatJet,
                                               'FatJet_deepTag_TvsQCD',
                                               nFatJetString, nFatJetSizeMax)
    FatJetDeepTagWvsQCD = bookFloatArrayBranch(TreeFatJet,
                                               'FatJet_deepTag_WvsQCD',
                                               nFatJetString, nFatJetSizeMax)
    FatJetDeepTagZvsQCD = bookFloatArrayBranch(TreeFatJet,
                                               'FatJet_deepTag_ZvsQCD',
                                               nFatJetString, nFatJetSizeMax)
    FatJetDeepTagQCD = bookFloatArrayBranch(TreeFatJet, 'FatJet_deepTag_QCD',
                                            nFatJetString, nFatJetSizeMax)
    FatJetDeepTagQCDOthers = bookFloatArrayBranch(TreeFatJet,
                                                  'FatJet_deepTag_QCDothers',
                                                  nFatJetString,
                                                  nFatJetSizeMax)
    FatJetDeepTagMDTvsQCD = bookFloatArrayBranch(TreeFatJet,
                                                 'FatJet_deepTagMD_TvsQCD',
                                                 nFatJetString, nFatJetSizeMax)
    FatJetDeepTagMDWvsQCD = bookFloatArrayBranch(TreeFatJet,
                                                 'FatJet_deepTagMD_WvsQCD',
                                                 nFatJetString, nFatJetSizeMax)
    FatJetDeepTagMDZvsQCD = bookFloatArrayBranch(TreeFatJet,
                                                 'FatJet_deepTagMD_ZvsQCD',
                                                 nFatJetString, nFatJetSizeMax)
    FatJetMSoftDrop = bookFloatArrayBranch(TreeFatJet, 'FatJet_msoftdrop',
                                           nFatJetString, nFatJetSizeMax)
    FatJetRawFactor = bookFloatArrayBranch(TreeFatJet, 'FatJet_rawFactor',
                                           nFatJetString, nFatJetSizeMax)
    FatJetJetId = bookIntArrayBranch(TreeFatJet, 'FatJet_jetId', nFatJetString,
                                     nFatJetSizeMax)
    FatJetSubJetIdx1 = bookIntArrayBranch(TreeFatJet, 'FatJet_subJetIdx1',
                                          nFatJetString, nFatJetSizeMax)
    FatJetSubJetIdx2 = bookIntArrayBranch(TreeFatJet, 'FatJet_subJetIdx2',
                                          nFatJetString, nFatJetSizeMax)
    FatJetGenJetAK8Idx = bookIntArrayBranch(TreeFatJet, 'FatJet_genJetAK8Idx',
                                            nFatJetString, nFatJetSizeMax)

    #
    # GenPart branch
    #
    if isMC_QCD == False:
        nGenPartSizeMax = 1000
        nGenPartString = 'nGenPart'
        nGenPart = bookIntBranch(TreeFatJet, nGenPartString)
        GenPartPt = bookFloatArrayBranch(TreeFatJet, 'GenPart_pt',
                                         nGenPartString, nGenPartSizeMax)
        GenPartEta = bookFloatArrayBranch(TreeFatJet, 'GenPart_eta',
                                          nGenPartString, nGenPartSizeMax)
        GenPartPhi = bookFloatArrayBranch(TreeFatJet, 'GenPart_phi',
                                          nGenPartString, nGenPartSizeMax)
        GenPartM = bookFloatArrayBranch(TreeFatJet, 'GenPart_mass',
                                        nGenPartString, nGenPartSizeMax)
        GenPartPdgId = bookIntArrayBranch(TreeFatJet, 'GenPart_pdgId',
                                          nGenPartString, nGenPartSizeMax)
        GenPartStatus = bookIntArrayBranch(TreeFatJet, 'GenPart_status',
                                           nGenPartString, nGenPartSizeMax)
        GenPartStatusFlags = bookIntArrayBranch(TreeFatJet,
                                                'GenPart_statusFlags',
                                                nGenPartString,
                                                nGenPartSizeMax)
        GenPartGenPartIdxMother = bookIntArrayBranch(
            TreeFatJet, 'GenPart_genPartIdxMother', nGenPartString,
            nGenPartSizeMax)
    #
    # GenJetAK8 branch
    #
    nGenJetAK8SizeMax = 25
    nGenJetAK8String = 'nGenJetAK8'
    nGenJetAK8 = bookIntBranch(TreeFatJet, nGenJetAK8String)
    GenJetAK8Pt = bookFloatArrayBranch(TreeFatJet, 'GenJetAK8_pt',
                                       nGenJetAK8String, nGenJetAK8SizeMax)
    GenJetAK8Eta = bookFloatArrayBranch(TreeFatJet, 'GenJetAK8_eta',
                                        nGenJetAK8String, nGenJetAK8SizeMax)
    GenJetAK8Phi = bookFloatArrayBranch(TreeFatJet, 'GenJetAK8_phi',
                                        nGenJetAK8String, nGenJetAK8SizeMax)
    GenJetAK8M = bookFloatArrayBranch(TreeFatJet, 'GenJetAK8_mass',
                                      nGenJetAK8String, nGenJetAK8SizeMax)
    GenJetAK8HadronFlavour = bookIntArrayBranch(TreeFatJet,
                                                'GenJetAK8_hadronFlavour',
                                                nGenJetAK8String,
                                                nGenJetAK8SizeMax)
    GenJetAK8PartonFlavour = bookIntArrayBranch(TreeFatJet,
                                                'GenJetAK8_partonFlavour',
                                                nGenJetAK8String,
                                                nGenJetAK8SizeMax)
    #
    # Subjet branch
    #
    nSubJetSizeMax = 50
    nSubJetString = 'nSubJet'
    nSubJet = bookIntBranch(TreeFatJet, nSubJetString)
    SubJetPt = bookFloatArrayBranch(TreeFatJet, 'SubJet_pt', nSubJetString,
                                    nSubJetSizeMax)
    SubJetEta = bookFloatArrayBranch(TreeFatJet, 'SubJet_eta', nSubJetString,
                                     nSubJetSizeMax)
    SubJetPhi = bookFloatArrayBranch(TreeFatJet, 'SubJet_phi', nSubJetString,
                                     nSubJetSizeMax)
    SubJetM = bookFloatArrayBranch(TreeFatJet, 'SubJet_mass', nSubJetString,
                                   nSubJetSizeMax)
    SubJetRawFactor = bookFloatArrayBranch(TreeFatJet, 'SubJet_rawFactor',
                                           nSubJetString, nSubJetSizeMax)
    SubJetNBHadrons = bookIntArrayBranch(TreeFatJet, 'SubJet_nBHadrons',
                                         nSubJetString, nSubJetSizeMax)
    SubJetNCHadrons = bookIntArrayBranch(TreeFatJet, 'SubJet_nCHadrons',
                                         nSubJetString, nSubJetSizeMax)
    #
    # SubGenJetAK8 branch
    #
    nSubGenJetAK8SizeMax = 50
    nSubGenJetAK8String = 'nSubGenJetAK8'
    nSubGenJetAK8 = bookIntBranch(TreeFatJet, nSubGenJetAK8String)
    SubGenJetAK8Pt = bookFloatArrayBranch(TreeFatJet, 'SubGenJetAK8_pt',
                                          nSubGenJetAK8String,
                                          nSubGenJetAK8SizeMax)
    SubGenJetAK8Eta = bookFloatArrayBranch(TreeFatJet, 'SubGenJetAK8_eta',
                                           nSubGenJetAK8String,
                                           nSubGenJetAK8SizeMax)
    SubGenJetAK8Phi = bookFloatArrayBranch(TreeFatJet, 'SubGenJetAK8_phi',
                                           nSubGenJetAK8String,
                                           nSubGenJetAK8SizeMax)
    SubGenJetAK8M = bookFloatArrayBranch(TreeFatJet, 'SubGenJetAK8_mass',
                                         nSubGenJetAK8String,
                                         nSubGenJetAK8SizeMax)

    #
    # PV branch
    #
    PVnpvs = bookIntBranch(TreeFatJet, 'nPVnpvs')
    PVnpvsGood = bookIntBranch(TreeFatJet, 'nPVnpvsGood')
    PileUpNTrueInt = bookFloatBranch(TreeFatJet, 'nPileUpNTrueInt')
    PileUpNPU = bookIntBranch(TreeFatJet, 'nPileUpNPU')
    #
    # SetupTChain
    #
    tree = ROOT.TChain("Events")
    for inFilePath in inFileList:
        print 'Adding files: %s' % (inFilePath)
        tree.Add(inFilePath)

    tree.ls()
    #
    # Use TChain and Setup TTreeReader.
    #
    inTree = InputTree(tree)

    #
    #
    #
    if isMC_QCD: branchSel = BranchSelection("branchSel_QCD.txt")
    else: branchSel = BranchSelection("branchSel.txt")
    branchSel.selectBranches(inTree)

    numEvents = inTree.GetEntries()
    #
    # Set max number of events to process
    # Set to -1 if you want to run over all events
    #
    maxevents = -1
    # maxevents = 1000

    #
    # Loop over events
    #
    print numEvents
    for iev in xrange(0, numEvents):
        # print iev
        if maxevents > 0 and iev > maxevents:
            break
        if (iev) % 1000 == 0:
            print "Processing event %d out of %d" % (iev, numEvents)
        #
        # Load Event
        #
        evt = Event(inTree, iev)
        #
        # Loop over fatjets
        #
        fatjets = Collection(evt, "FatJet")
        nFatJet[0] = 0
        for i, fj in enumerate(fatjets):
            fj_p4 = fj.p4()
            FatJetPt[i] = fj_p4.Pt()
            FatJetEta[i] = fj_p4.Eta()
            FatJetPhi[i] = fj_p4.Phi()
            FatJetM[i] = fj_p4.M()
            if fj.tau1 > 0:
                FatJetTau21[i] = fj.tau2 / fj.tau1
            else:
                FatJetTau21[i] = -1
            if fj.tau1 > 0:
                FatJetTau31[i] = fj.tau3 / fj.tau1
            else:
                FatJetTau31[i] = -1
            if fj.tau2 > 0:
                FatJetTau32[i] = fj.tau3 / fj.tau2
            else:
                FatJetTau32[i] = -1

            FatJetDeepTagMDTvsQCD[i] = fj.deepTagMD_TvsQCD
            FatJetDeepTagMDWvsQCD[i] = fj.deepTagMD_WvsQCD
            FatJetDeepTagMDZvsQCD[i] = fj.deepTagMD_ZvsQCD
            FatJetDeepTagTvsQCD[i] = fj.deepTag_TvsQCD
            FatJetDeepTagWvsQCD[i] = fj.deepTag_WvsQCD
            FatJetDeepTagZvsQCD[i] = fj.deepTag_ZvsQCD
            FatJetDeepTagQCD[i] = fj.deepTag_QCD
            FatJetDeepTagQCDOthers[i] = fj.deepTag_QCDothers
            FatJetMSoftDrop[i] = fj.msoftdrop
            FatJetRawFactor[i] = fj.rawFactor
            FatJetJetId[i] = fj.jetId
            FatJetSubJetIdx1[i] = fj.subJetIdx1
            FatJetSubJetIdx2[i] = fj.subJetIdx2
            FatJetGenJetAK8Idx[i] = fj.genJetAK8Idx
            nFatJet[0] += 1
        #
        # Loop over genparts
        #
        if isMC_QCD == False:
            particles = Collection(evt, "GenPart")
            nGenPart[0] = 0
            for i, gp in enumerate(particles):
                GenPartPt[i] = gp.pt
                GenPartEta[i] = gp.eta
                GenPartPhi[i] = gp.phi
                GenPartM[i] = gp.mass
                GenPartPdgId[i] = gp.pdgId
                GenPartStatus[i] = gp.status
                GenPartStatusFlags[i] = gp.statusFlags
                GenPartGenPartIdxMother[i] = gp.genPartIdxMother
                nGenPart[0] += 1
        #
        # Loop over GenJetAK8
        #
        jets = Collection(evt, "GenJetAK8")
        nGenJetAK8[0] = 0
        for i, gj in enumerate(jets):
            GenJetAK8Pt[i] = gj.pt
            GenJetAK8Eta[i] = gj.eta
            GenJetAK8Phi[i] = gj.phi
            GenJetAK8M[i] = gj.mass
            GenJetAK8HadronFlavour[i] = gj.hadronFlavour
            GenJetAK8PartonFlavour[i] = gj.partonFlavour
            nGenJetAK8[0] += 1
        #
        # Subjet over GenJetAK8
        #
        subjets = Collection(evt, "SubJet")
        nSubJet[0] = 0
        for i, sj in enumerate(subjets):
            SubJetPt[i] = sj.pt
            SubJetEta[i] = sj.eta
            SubJetPhi[i] = sj.phi
            SubJetM[i] = sj.mass
            SubJetRawFactor[i] = sj.rawFactor
            SubJetNBHadrons[i] = sj.nBHadrons
            SubJetNCHadrons[i] = sj.nCHadrons
            nSubJet[0] += 1

        #
        # Subjet over GenJetAK8
        #
        subjets = Collection(evt, "SubGenJetAK8")
        nSubGenJetAK8[0] = 0
        for i, sj in enumerate(subjets):
            SubGenJetAK8Pt[i] = sj.pt
            SubGenJetAK8Eta[i] = sj.eta
            SubGenJetAK8Phi[i] = sj.phi
            SubGenJetAK8M[i] = sj.mass
            nSubGenJetAK8[0] += 1
        #
        # Loop over PV
        #
        PVnpvs[0] = evt.PV_npvs
        PVnpvsGood[0] = evt.PV_npvsGood
        PileUpNTrueInt[0] = evt.Pileup_nTrueInt
        PileUpNPU[0] = evt.Pileup_nPU

        #
        # Fill the tree for this event
        #
        TreeFatJet.Fill()

    #
    # Save the output ttree in the output file
    #
    print "Write tree to file"
    f.Write()

    #
    # Gracefully close the output file
    #
    print "Closing output"
    f.Close()
Ejemplo n.º 3
0
    #now we will demonstrate running outside the Draw command which is more flexable
    #however this is slooow so we do things to speed it up like
    #pre skim things as it is slower with an entry list
    #and dropping branches we dont need so we dont waste time reading them
    #note I highly suggest you look into RDataFrames as that should be much faster
    Events.Draw(
        ">>elist",
        "Sum$(Electron_pt>25 && abs(Electron_eta+Electron_deltaEtaSC)<1.4442)>=1",
        "entrylist goff", max_events)
    elist = ROOT.gDirectory.Get('elist')
    elist.SetDirectory(0)  #removing it from the file not to write it out
    Events.SetEntryList(elist)

    branchsel = BranchSelection(
        "EgammaUser/EgammaDAS2020/data/nano_electron_branches.txt")
    branchsel.selectBranches(Events)

    nr_events = elist.GetN()
    #2nd way, bare event loop
    for event_nr in range(0, nr_events):
        entry_nr = Events.GetEntryNumber(event_nr)
        Events.GetEntry(entry_nr)
        event = Events
        if event_nr % args.report == 0:
            print("processing event {} / {} {}".format(event_nr, nr_events,
                                                       time.ctime()))
        for ele_nr in range(0, event.nElectron):
            if event.Electron_pt[ele_nr] > 25 and abs(
                    event.Electron_eta[ele_nr] +
                    event.Electron_deltaEtaSC[ele_nr]) < 1.4442:
                if event.Electron_genPartIdx[ele_nr] >= 0:
Ejemplo n.º 4
0
class PostProcessor:
    def __init__(self,
                 outputDir,
                 inputFiles,
                 cut=None,
                 branchsel=None,
                 modules=[],
                 compression="LZMA:9",
                 friend=False,
                 postfix=None,
                 jsonInput=None,
                 noOut=False,
                 justcount=False,
                 provenance=False,
                 haddFileName=None,
                 fwkJobReport=False,
                 histFileName=None,
                 histDirName=None,
                 outputbranchsel=None,
                 maxEntries=None,
                 firstEntry=0,
                 prefetch=False,
                 longTermCache=False,
                 saveHistoGenWeights=False,
                 allowNoPostfix=False):
        self.outputDir = outputDir
        self.inputFiles = inputFiles
        self.cut = cut
        self.modules = modules
        self.compression = compression
        self.postfix = postfix
        self.allowNoPostfix = allowNoPostfix
        self.json = jsonInput
        self.noOut = noOut
        self.friend = friend
        self.justcount = justcount
        self.provenance = provenance
        self.jobReport = JobReport() if fwkJobReport else None
        self.haddFileName = haddFileName
        self.saveHistoGenWeights = saveHistoGenWeights
        self.histFile = None
        self.histDirName = None
        if self.jobReport and not self.haddFileName:
            print("Because you requested a FJR we assume you want the final " \
                "hadd. No name specified for the output file, will use tree.root")
            self.haddFileName = "tree.root"
        self.branchsel = BranchSelection(branchsel) if branchsel else None
        if outputbranchsel is not None:
            self.outputbranchsel = BranchSelection(outputbranchsel)
        elif outputbranchsel is None and branchsel is not None:
            # Use the same branches in the output as in input
            self.outputbranchsel = BranchSelection(branchsel)
        else:
            self.outputbranchsel = None

        self.histFileName = histFileName
        self.histDirName = histDirName
        # 2^63 - 1, largest int64
        self.maxEntries = maxEntries if maxEntries else 9223372036854775807
        self.firstEntry = firstEntry
        self.prefetch = prefetch  # prefetch files to TMPDIR using xrdcp
        # keep cached files across runs (it's then up to you to clean up the temp)
        self.longTermCache = longTermCache

    def prefetchFile(self, fname, verbose=True):
        tmpdir = os.environ['TMPDIR'] if 'TMPDIR' in os.environ else "/tmp"
        if not fname.startswith("root://"):
            return fname, False
        rndchars = "".join([hex(ord(i))[2:] for i in os.urandom(
            8)]) if not self.longTermCache else "long_cache-id%d-%s" \
            % (os.getuid(), hashlib.sha1(fname).hexdigest())
        localfile = "%s/%s-%s.root" \
            % (tmpdir, os.path.basename(fname).replace(".root", ""), rndchars)
        if self.longTermCache and os.path.exists(localfile):
            if verbose:
                print("Filename %s is already available in local path %s " \
                    % (fname, localfile))
            return localfile, False
        try:
            if verbose:
                print("Filename %s is remote, will do a copy to local path %s"\
                    % (fname, localfile))
            start = time.time()
            subprocess.check_output(["xrdcp", "-f", "-N", fname, localfile])
            if verbose:
                print("Time used for transferring the file locally: %.2f s"\
                    % (time.time() - start))
            return localfile, (not self.longTermCache)
        except:
            if verbose:
                print(
                    "Error: could not save file locally, will run from remote")
            if os.path.exists(localfile):
                if verbose:
                    print("Deleting partially transferred file %s" % localfile)
                try:
                    os.unlink(localfile)
                except:
                    pass
            return fname, False

    def run(self):
        outpostfix = self.postfix if self.postfix is not None else (
            "_Friend" if self.friend else "_Skim")
        if self.allowNoPostfix and self.postfix is None:
            outpostfix = ""
        if not self.noOut:

            if self.compression != "none":
                ROOT.gInterpreter.ProcessLine("#include <Compression.h>")
                (algo, level) = self.compression.split(":")
                compressionLevel = int(level)
                if algo == "LZMA":
                    compressionAlgo = ROOT.ROOT.kLZMA
                elif algo == "ZLIB":
                    compressionAlgo = ROOT.ROOT.kZLIB
                elif algo == "LZ4":
                    compressionAlgo = ROOT.ROOT.kLZ4
                else:
                    raise RuntimeError("Unsupported compression %s" % algo)
            else:
                compressionLevel = 0
            print("Will write selected trees to " + self.outputDir)
            if not self.justcount:
                if not os.path.exists(self.outputDir):
                    os.system("mkdir -p " + self.outputDir)
        else:
            compressionLevel = 0

        if self.noOut:
            if len(self.modules) == 0:
                raise RuntimeError(
                    "Running with --noout and no modules does nothing!")

        # Open histogram file, if desired
        if (self.histFileName is not None and self.histDirName is None) or (
                self.histFileName is None and self.histDirName is not None):
            raise RuntimeError(
                "Must specify both histogram file and histogram directory!")
        elif self.histFileName is not None and self.histDirName is None:
            self.histFile = ROOT.TFile.Open(self.histFileName, "RECREATE")
        else:
            self.histFile = None

        for m in self.modules:
            if hasattr(m, 'writeHistFile') and m.writeHistFile:
                m.beginJob(histFile=self.histFile,
                           histDirName=self.histDirName)
            else:
                m.beginJob()

        fullClone = (len(self.modules) == 0)
        outFileNames = []
        t0 = time.time()
        totEntriesRead = 0
        for fname in self.inputFiles:
            ffnames = []
            if "," in fname:
                fnames = fname.split(',')
                fname, ffnames = fnames[0], fnames[1:]

            # open input file
            if self.prefetch:
                ftoread, toBeDeleted = self.prefetchFile(fname)
                inFile = ROOT.TFile.Open(ftoread)
            else:
                inFile = ROOT.TFile.Open(fname)

            if not inFile:
                print 'ERROR: file does not exist, check!'
                print '    filename:', fname
                exit(0)

            # get input tree
            inTree = inFile.Get("Events")
            if inTree is None:
                inTree = inFile.Get("Friends")
            nEntries = min(inTree.GetEntries() - self.firstEntry,
                           self.maxEntries)
            # first check that the histogram with weights is not already in the file
            hasWeightHistograms = False
            if inFile.GetListOfKeys().Contains(
                    "hGenWeights") and inFile.GetListOfKeys().Contains(
                        "hNumWeights"):
                hasWeightHistograms = True
                print "Histogram hGenWeights already exists, I will just copy it without recreating it"
            if self.saveHistoGenWeights and inTree.GetName(
            ) == "Events" and not hasWeightHistograms:
                print "Histogram hGenWeights does not exist yet, I will create it"
                # check that the tree contains all the original events, otherwise the sum of gen weights will miss some
                tmpTreeRuns = inFile.Get("Runs")
                for ievt, event in enumerate(tmpTreeRuns):
                    if ievt:
                        break  # only need first event (but there should be only 1 here)
                    nGenEvents = event.genEventCount
                if nGenEvents != inTree.GetEntries():
                    raise RuntimeError(
                        "I am creating the histogram with genWeight, but tree Events has less entries than genEventCount in tree Runs (%s instead of %s). The sum of weights will thus be wrong, please check"
                        % (str(inTree.GetEntries()), str(nGenEvents)))

                # saving distribution of genWeight for offline usage
                # idea is to fill the distribution of Log10(genWeight) with the sign, so to have a histogram from about -10 to 10
                # with about 10k bins (genWeights can take valus spanning several orders of magnitude, especially for fancy weights)
                # then one can compute the sum of genWeight in a given range using its integral (using Log10(threshold) ).
                # This somehow relies on having always |genWeight|>1, should it be < 1 the Log would change the sign.
                # So for the purpose of choosing the bin to be filled, we use |value| or 1.001, whatever is larger (this will not affect the integral)
                # then, need a second histogram to keep the integer number of events in each bin, so to allow for clipping of large weights
                hGenWeights = ROOT.TH1D("hGenWeights",
                                        "distribution of Log10(genWeight)",
                                        4800, -12.0, 12.0)
                hNumWeights = ROOT.TH1D(
                    "hNumWeights",
                    "distribution of Log10(genWeight) (unweighted)", 4800,
                    -12.0, 12.0)
                drawResult = inTree.Draw(
                    "TMath::Sign(1.0,genWeight)*TMath::Log10(max(1.001,abs(genWeight)))>>hGenWeights",
                    "genWeight", "goff", nEntries, self.firstEntry)
                drawResult = inTree.Draw(
                    "TMath::Sign(1.0,genWeight)*TMath::Log10(max(1.001,abs(genWeight)))>>hNumWeights",
                    "1", "goff", nEntries, self.firstEntry)
            totEntriesRead += nEntries
            # pre-skimming
            elist, jsonFilter = preSkim(inTree,
                                        self.json,
                                        self.cut,
                                        maxEntries=self.maxEntries,
                                        firstEntry=self.firstEntry)
            if self.justcount:
                print('Would select %d / %d entries from %s (%.2f%%)' %
                      (elist.GetN() if elist else nEntries, nEntries, fname,
                       (elist.GetN() if elist else nEntries) /
                       (0.01 * nEntries) if nEntries else 0))
                if self.prefetch:
                    if toBeDeleted:
                        os.unlink(ftoread)
                continue
            else:
                print('Pre-select %d entries out of %s (%.2f%%)' %
                      (elist.GetN() if elist else nEntries, nEntries,
                       (elist.GetN() if elist else nEntries) /
                       (0.01 * nEntries) if nEntries else 0))
                inAddFiles = []
                inAddTrees = []
            for ffname in ffnames:
                inAddFiles.append(ROOT.TFile.Open(ffname))
                inAddTree = inAddFiles[-1].Get("Events")
                if inAddTree is None:
                    inAddTree = inAddFiles[-1].Get("Friends")
                inAddTrees.append(inAddTree)
                inTree.AddFriend(inAddTree)

            if fullClone:
                # no need of a reader (no event loop), but set up the elist if available
                if elist:
                    inTree.SetEntryList(elist)
            else:
                # initialize reader
                inTree = InputTree(inTree, elist)

            # prepare output file
            if not self.noOut:
                outFileName = os.path.join(
                    self.outputDir,
                    os.path.basename(fname).replace(".root",
                                                    outpostfix + ".root"))
                outFile = ROOT.TFile.Open(outFileName, "RECREATE", "",
                                          compressionLevel)
                outFileNames.append(outFileName)
                if compressionLevel:
                    outFile.SetCompressionAlgorithm(compressionAlgo)
                # prepare output tree
                if self.friend:
                    outTree = FriendOutput(inFile, inTree, outFile)
                else:
                    outTree = FullOutput(
                        inFile,
                        inTree,
                        outFile,
                        branchSelection=self.branchsel,
                        outputbranchSelection=self.outputbranchsel,
                        fullClone=fullClone,
                        maxEntries=self.maxEntries,
                        firstEntry=self.firstEntry,
                        jsonFilter=jsonFilter,
                        provenance=self.provenance)
            else:
                outFile = None
                outTree = None
                if self.branchsel:
                    self.branchsel.selectBranches(inTree)

            # process events, if needed
            if not fullClone:
                eventRange = range(
                    self.firstEntry, self.firstEntry +
                    nEntries) if nEntries > 0 and not elist else None
                (nall, npass, timeLoop) = eventLoop(self.modules,
                                                    inFile,
                                                    outFile,
                                                    inTree,
                                                    outTree,
                                                    eventRange=eventRange,
                                                    maxEvents=self.maxEntries)
                print(
                    'Processed %d preselected entries from %s (%s entries). Finally selected %d entries'
                    % (nall, fname, nEntries, npass))
            else:
                nall = nEntries
                print('Selected %d / %d entries from %s (%.2f%%)' %
                      (outTree.tree().GetEntries(), nall, fname,
                       outTree.tree().GetEntries() /
                       (0.01 * nall) if nall else 0))

            # now write the output
            if not self.noOut:
                outTree.write()
                if not hasWeightHistograms:
                    if self.saveHistoGenWeights:
                        hGenWeights.Write(hGenWeights.GetName())
                        hNumWeights.Write(hNumWeights.GetName())
                outFile.Close()
                print("Done %s" % outFileName)
            if self.jobReport:
                self.jobReport.addInputFile(fname, nall)
            if self.prefetch:
                if toBeDeleted:
                    os.unlink(ftoread)

        for m in self.modules:
            m.endJob()

        print("Total time %.1f sec. to process %i events. Rate = %.1f Hz." %
              ((time.time() - t0), totEntriesRead, totEntriesRead /
               (time.time() - t0)))

        if self.haddFileName:
            haddnano = "./haddnano.py" if os.path.isfile(
                "./haddnano.py") else "haddnano.py"
            os.system("%s %s %s" %
                      (haddnano, self.haddFileName, " ".join(outFileNames)))
        if self.jobReport:
            self.jobReport.addOutputFile(self.haddFileName)
            self.jobReport.save()
Ejemplo n.º 5
0
class PostProcessor :
    def __init__(self,outputDir,inputFiles,cut=None,branchsel=None,modules=[],compression="LZMA:9",friend=False,postfix=None,
		 jsonInput=None,noOut=False,justcount=False,provenance=False,haddFileName=None,fwkJobReport=False,histFileName=None,histDirName=None, outputbranchsel=None):
	self.outputDir=outputDir
	self.inputFiles=inputFiles
	self.cut=cut
	self.modules=modules
	self.compression=compression
	self.postfix=postfix
	self.json=jsonInput
	self.noOut=noOut
	self.friend=friend
	self.justcount=justcount
	self.provenance=provenance
	self.jobReport = JobReport() if fwkJobReport else None
	self.haddFileName=haddFileName
	self.histFile = None
	self.histDirName = None
	if self.jobReport and not self.haddFileName :
		print "Because you requested a FJR we assume you want the final hadd. No name specified for the output file, will use tree.root"
		self.haddFileName="tree.root"
 	self.branchsel = BranchSelection(branchsel) if branchsel else None 
        self.outputbranchsel = BranchSelection(outputbranchsel) if outputbranchsel else None
        self.histFileName=histFileName
        self.histDirName=histDirName
    def run(self) :
        outpostfix = self.postfix if self.postfix != None else ("_Friend" if self.friend else "_Skim")
    	if not self.noOut:
            
            if self.compression != "none":
                ROOT.gInterpreter.ProcessLine("#include <Compression.h>")
                (algo, level) = self.compression.split(":")
                compressionLevel = int(level)
                if   algo == "LZMA": compressionAlgo  = ROOT.ROOT.kLZMA
                elif algo == "ZLIB": compressionAlgo  = ROOT.ROOT.kZLIB
                else: raise RuntimeError("Unsupported compression %s" % algo)
            else:
                compressionLevel = 0 
	    print "Will write selected trees to "+self.outputDir
            if not self.justcount:
                if not os.path.exists(self.outputDir):
                    os.system("mkdir -p "+self.outputDir)
        else:
            compressionLevel = 0

	if self.noOut:
	    if len(self.modules) == 0: 
		raise RuntimeError("Running with --noout and no modules does nothing!")

        # Open histogram file, if desired 
        if (self.histFileName != None and self.histDirName == None) or (self.histFileName == None and self.histDirName != None) :
            raise RuntimeError("Must specify both histogram file and histogram directory!")
        elif self.histFileName != None and self.histDirName != None:
            self.histFile = ROOT.TFile.Open( self.histFileName, "RECREATE" )
        else :
            self.histFile = None

    
        for m in self.modules:
            if hasattr( m, 'writeHistFile') and m.writeHistFile :
                m.beginJob(histFile=self.histFile,histDirName=self.histDirName)
            else :
                m.beginJob()

	fullClone = (len(self.modules) == 0)
	outFileNames=[]
        t0 = time.clock()
	totEntriesRead=0
	for fname in self.inputFiles:

	    # open input file
	    inFile = ROOT.TFile.Open(fname)

	    #get input tree
	    inTree = inFile.Get("Events")
	    totEntriesRead+=inTree.GetEntries()
	    # pre-skimming
	    elist,jsonFilter = preSkim(inTree, self.json, self.cut)
	    if self.justcount:
		print 'Would select %d entries from %s'%(elist.GetN() if elist else inTree.GetEntries(), fname)
		continue
	    else:
		print 'Pre-select %d entries out of %s '%(elist.GetN() if elist else inTree.GetEntries(),inTree.GetEntries())
		
	    if fullClone:
		# no need of a reader (no event loop), but set up the elist if available
		if elist: inTree.SetEntryList(elist)
	    else:
		# initialize reader
		inTree = InputTree(inTree, elist) 

	    # prepare output file
            if not self.noOut:
                outFileName = os.path.join(self.outputDir, os.path.basename(fname).replace(".root",outpostfix+".root"))
                outFile = ROOT.TFile.Open(outFileName, "RECREATE", "", compressionLevel)
                outFileNames.append(outFileName)
                if compressionLevel: 
                    outFile.SetCompressionAlgorithm(compressionAlgo)
                # prepare output tree
                if self.friend:
                    outTree = FriendOutput(inFile, inTree, outFile)
                else:
                    outTree = FullOutput(inFile, inTree, outFile, branchSelection = self.branchsel, fullClone = fullClone, jsonFilter = jsonFilter,provenance=self.provenance)
            else : 
                outFile = None
                outTree = None

	    # process events, if needed
	    if not fullClone:
		(nall, npass, timeLoop) = eventLoop(self.modules, inFile, outFile, inTree, outTree)
		print 'Processed %d preselected entries from %s (%s entries). Finally selected %d entries' % (nall, fname, inTree.GetEntries(), npass)
	    else:
		print 'Selected %d entries from %s' % (outTree.tree().GetEntries(), fname)

	    # now write the output
            if self.outputbranchsel:
                self.outputbranchsel.selectBranches(outTree._tree)
            if not self.noOut: 
                outTree.write()
                outFile.Close()
                print "Done %s" % outFileName
	    if self.jobReport:
		self.jobReport.addInputFile(fname,nall)
		
	for m in self.modules: m.endJob()
	
	print  totEntriesRead/(time.clock()-t0), "Hz"


	if self.haddFileName :
		os.system("./haddnano.py %s %s" %(self.haddFileName," ".join(outFileNames))) #FIXME: remove "./" once haddnano.py is distributed with cms releases
	if self.jobReport :
		self.jobReport.addOutputFile(self.haddFileName)
		self.jobReport.save()