Beispiel #1
0
    skimCond += "&&lheHTIncoming<" + options.lheHTCut

if "Run2015D" in sample.name and not hasattr(sample, "vetoList"):
    sys.exit(
        "ERROR. Sample %s seems to be data but no vetoList was provided!!" %
        sample.name)

vetoList_ = vetoList(sample.vetoList) if hasattr(sample, "vetoList") else None

outDir = os.path.join(options.targetDir, options.skim, sample.name)
if os.path.exists(outDir):
    existingFiles = [
        outDir + '/' + f for f in os.listdir(outDir) if f.endswith('.root')
    ]
    hasBadFile = any([
        not checkRootFile(f, checkForObjects=["Events"]) for f in existingFiles
    ])
else:
    existingFiles = []
    hasBadFile = False

#print "Found bad file? %r"%hasBadFile
if os.path.exists(outDir) and len(existingFiles) > 0 and (
        not hasBadFile) and not options.overwrite:
    print "Found non-empty directory: %s -> skipping! (found a bad file? %r.)" % (
        outDir, hasBadFile)
    sys.exit(0)
else:
    tmpDir = os.path.join(outDir, 'tmp')
    if hasBadFile:
        print "Found a corrupted file. Remake sample. Delete %s" % outDir
def loopAndFill(stacks, mode="loop"):
    allSamples=[]
    allSampleNames=[]
    allPlots = []
    usedBranches = []
    for s in stacks:
        usedBranches = list(set(usedBranches+s.usedBranches))
        for l in s.plotLists:
            for p in l:
                allPlots.append(p)
                if p.leaf:
                    if not p.leaf in usedBranches:
                        usedBranches.append(p.leaf)
                    if p.weightString  and not p.weightString in usedBranches:
                        usedBranches.append(p.weightString)
                else:
                    usedBranches = list(set(usedBranches+p.usedBranches))
                if not p.sample in allSamples:
                    assert p.sample.has_key('dir') or p.sample.has_key('dirname'), "Missing key dir or dirname in sample %s"%repr(p.sample)
                    allSamples.append(p.sample)
    if mode=='loop':
        assert not any ([p.string for p in allPlots]), "Loop mode is %s but specified 'string' for: %s"%(mode, ", ".join([p.name for p in allPlots if p.string]))
    if mode=='draw':
        assert all([p.string for p in allPlots]), "Loop mode is %s but specified no 'string' for: %s"%(mode, ", ".join([p.name for p in allPlots if not p.string]))
    print "Found",len(allSamples),'different samples:',", ".join(s['name'] for s in allSamples)
    for s in allSamples:
        cutStringForSample=[]
        plotsPerCutForSample={}
#    print s['name'], s.has_key('isData'), s.has_key('isData') and s['isData'], s.has_key('isData') and s['isData'] and s.has_key('dataCut')
        for p in allPlots:
            if p.sample==s:
                cut = p.cut['string'] if not (s.has_key('isData') and s['isData'] and p.cut.has_key('dataCut')) else "("+p.cut['string']+")&&("+p.cut['dataCut']+")"
                if not cut in cutStringForSample:
                    cutStringForSample.append(cut)
                    plotsPerCutForSample[cut]=[]
                if not p in plotsPerCutForSample[cut]:
                    plotsPerCutForSample[cut].append(p)
        s['plotsPerCutForSample'] = plotsPerCutForSample
    for s in allSamples:
        sampleScaleFac = 1 if not s.has_key('scale') else s['scale']
        if sampleScaleFac!=1:
            print "Using sampleScaleFac", sampleScaleFac ,"for sample",s["name"]

        bins = s['bins'] if s.has_key('bins') else ['default']
        for b in bins:
            treeName = 'Events' if not s.has_key('treeName') else s['treeName']
            maxN = -1 if not (s.has_key('small') and s['small']) else 1
            c = ROOT.TChain(treeName)
            counter=0
            dir = s['dirname'] if s.has_key('dirname') else s['dir']
            fileList = getFileList(dir+'/'+b, maxN=maxN, histname="") if s.has_key('bins') else [s['file']]
            for f in fileList:
                if not f[-5:]=='.root':continue
#        counter+=1
#        c.Add(f)
                if checkRootFile(f, checkForObjects=[treeName]):
                    counter+=1
                    c.Add(f)
                else:
                    print "File %s looks broken."%f
            ntot = c.GetEntries()
            print "Added ",counter,'files from sample',s['name'],'dir',dir,'bin',b,'ntot',ntot

            switchOnBranches(c, usedBranches)

            if ntot==0:
                print "Warning! Found zero events in",s['name'],'bin',b," -> do nothing"
                continue
            if mode.lower()=='loop':
                for ics, cutString in enumerate(s['plotsPerCutForSample'].keys()):
                    plotsToFill = s['plotsPerCutForSample'][cutString]
                    elistName = "eList_"+s['name']+'_'+b+'_'+str(ics)
                    elist = ROOT.TEventList(elistName)
                    c.Draw(">>"+elistName,cutString)
    #        print "elist",elist,elist.GetN(),cutString,'plots',plotsToFill
                    number_events = elist.GetN()# if not (s.has_key('small')  and s['small']) else min(elist.GetN(), 100)
                    print "Reading: ", s["name"], b, "with",number_events,"events passing cutString", cutString, 'and will fill', len([p.name for p in plotsToFill]),'vars.'
                    for p in plotsToFill:
                        if not (p.cut.has_key('func') and p.cut['func']):
                            p.cut['func']=None
                        if p.TTreeFormula:
                            assert p.TTreeFormula and not (p.TTreeFormula==""), "Problem in TTreeFormula %s" % p.TTreeFormula
                            fString='ROOT.TTreeFormula("'+p.name+'","'+p.TTreeFormula+'",c)'
                            exec('p.ttreeFormula='+fString)
                            print "Created TTreeFormula:",fString
                    for i in range(0,number_events):
                        if (i%10000 == 0) and i>0 :
                            print i
                        c.GetEntry(elist.GetEntry(i))
                        for p in plotsToFill:
    #            print p.cut['func'],  p.cut['func'](c)
                            if (not p.cut['func']) or p.cut['func'](c):
                                weight = c.GetLeaf(p.weightString).GetValue() if p.weightString else 1.
                                reWeight = p.weightFunc(c) if p.weightFunc else 1.
    #              print c, p.weightFunc, p.weightFunc(c), getVarValue(c, "nVert"), c.GetLeaf("nVert").GetValue(), c.nVert
                                if p.leaf:
                                    val =  getVarValue(c, p.leaf, p.ind)
    #                print "Fill leaf",p.leaf, p.ind, val, weight,sampleScaleFac
                                if p.TTreeFormula:
                                    p.ttreeFormula.UpdateFormulaLeaves()
                                    val = p.ttreeFormula.EvalInstance()
                                if p.func:
                                    val = p.func(c)
    #              if val>170:print val, reWeight, weight, sampleScaleFac, p.leaf, p.ind, i, c.GetEntries(), elist.GetEntry(i),"x",c.GetLeaf('Jet_pt').GetValue(4), c.GetLeaf('met_pt').GetValue(), c.GetLeaf('lumi').GetValue(), c.GetLeaf('evt').GetValue()
                                if val<float('inf'):
                                    p.histo.Fill(val, reWeight*weight*sampleScaleFac)
    #              print p.histo.GetName(), b, val, weight*sampleScaleFac, reWeight*weight*sampleScaleFac
            elif mode.lower()=='draw':
                for ics, cutString in enumerate(s['plotsPerCutForSample'].keys()):
                    plotsToFill = s['plotsPerCutForSample'][cutString]
                    print "Reading: ", s["name"], b, "with cutString", cutString, 'and will fill', len([p.name for p in plotsToFill]),'vars.'
                    for p in plotsToFill:
                        print c, "String", p.string, p.binning, "Cut", cutString, p.weightString, p.binningIsExplicit, sampleScaleFac
                        tmp = getPlotFromChain(c, p.string, p.binning, cutString, p.weightString, binningIsExplicit=p.binningIsExplicit)
                        tmp.Scale(sampleScaleFac)
                        p.histo.Add(tmp)
#      for ics, cutString in enumerate(s['plotsPerCutForSample'].keys()):
#        plotsToFill = s['plotsPerCutForSample'][cutString]
#        for p in plotsToFill:
#          print c.GetEntries(), p.name,p.histo.Integral()
#      c.GetListOfFiles().ls()
            c.Reset()
            del c

#do over-flow bins
    for p in allPlots:
        if p.overFlow and p.overFlow in [ "upper", "both"]:
            nbins = p.histo.GetNbinsX()
            p.histo.SetBinContent(nbins , p.histo.GetBinContent(nbins) + p.histo.GetBinContent(nbins + 1))
            p.histo.SetBinError(nbins , sqrt(p.histo.GetBinError(nbins)**2 + p.histo.GetBinError(nbins + 1)**2))
        if p.overFlow and p.overFlow in [ "lower", "both"]:
            p.histo.SetBinContent(1 , p.histo.GetBinContent(0) + p.histo.GetBinContent(1))
            p.histo.SetBinError(1 , sqrt(p.histo.GetBinError(0)**2 + p.histo.GetBinError(1)**2))
#sum stacks
    for s in stacks:
        sumStackHistos(s)
#normalize
    for p in allPlots:
        if p.normalizeTo:
            t = p.normalizeTo.histo.Integral()
            y = p.histo.Integral()
            r = p.normalizeRef.histo.Integral() if p.normalizeRef else y
            if r>0:
                p.histo.Scale(t/r)
Beispiel #3
0
def loopAndFill(stacks, mode="loop"):
    allSamples = []
    allSampleNames = []
    allPlots = []
    usedBranches = []
    for s in stacks:
        usedBranches = list(set(usedBranches + s.usedBranches))
        for l in s.plotLists:
            for p in l:
                allPlots.append(p)
                if p.leaf:
                    if not p.leaf in usedBranches:
                        usedBranches.append(p.leaf)
                    if p.weightString and not p.weightString in usedBranches:
                        usedBranches.append(p.weightString)
                else:
                    usedBranches = list(set(usedBranches + p.usedBranches))
                if not p.sample in allSamples:
                    assert p.sample.has_key('dir') or p.sample.has_key(
                        'dirname'
                    ), "Missing key dir or dirname in sample %s" % repr(
                        p.sample)
                    allSamples.append(p.sample)
    if mode == 'loop':
        assert not any([
            p.string for p in allPlots
        ]), "Loop mode is %s but specified 'string' for: %s" % (
            mode, ", ".join([p.name for p in allPlots if p.string]))
    if mode == 'draw':
        assert all([
            p.string for p in allPlots
        ]), "Loop mode is %s but specified no 'string' for: %s" % (
            mode, ", ".join([p.name for p in allPlots if not p.string]))
    print "Found", len(allSamples), 'different samples:', ", ".join(
        s['name'] for s in allSamples)
    for s in allSamples:
        cutStringForSample = []
        plotsPerCutForSample = {}
        #    print s['name'], s.has_key('isData'), s.has_key('isData') and s['isData'], s.has_key('isData') and s['isData'] and s.has_key('dataCut')
        for p in allPlots:
            if p.sample == s:
                cut = p.cut['string'] if not (
                    s.has_key('isData') and s['isData']
                    and p.cut.has_key('dataCut')
                ) else "(" + p.cut['string'] + ")&&(" + p.cut['dataCut'] + ")"
                if not cut in cutStringForSample:
                    cutStringForSample.append(cut)
                    plotsPerCutForSample[cut] = []
                if not p in plotsPerCutForSample[cut]:
                    plotsPerCutForSample[cut].append(p)
        s['plotsPerCutForSample'] = plotsPerCutForSample
    for s in allSamples:
        sampleScaleFac = 1 if not s.has_key('scale') else s['scale']
        if sampleScaleFac != 1:
            print "Using sampleScaleFac", sampleScaleFac, "for sample", s[
                "name"]

        bins = s['bins'] if s.has_key('bins') else ['default']
        for b in bins:
            treeName = 'Events' if not s.has_key('treeName') else s['treeName']
            maxN = -1 if not (s.has_key('small') and s['small']) else 1
            c = ROOT.TChain(treeName)
            counter = 0
            dir = s['dirname'] if s.has_key('dirname') else s['dir']
            fileList = getFileList(
                dir + '/' + b, maxN=maxN,
                histname="") if s.has_key('bins') else [s['file']]
            for f in fileList:
                if not f[-5:] == '.root': continue
                #        counter+=1
                #        c.Add(f)
                if checkRootFile(f, checkForObjects=[treeName]):
                    counter += 1
                    c.Add(f)
                else:
                    print "File %s looks broken." % f
            ntot = c.GetEntries()
            print "Added ", counter, 'files from sample', s[
                'name'], 'dir', dir, 'bin', b, 'ntot', ntot

            switchOnBranches(c, usedBranches)

            if ntot == 0:
                print "Warning! Found zero events in", s[
                    'name'], 'bin', b, " -> do nothing"
                continue
            if mode.lower() == 'loop':
                for ics, cutString in enumerate(
                        s['plotsPerCutForSample'].keys()):
                    plotsToFill = s['plotsPerCutForSample'][cutString]
                    elistName = "eList_" + s['name'] + '_' + b + '_' + str(ics)
                    elist = ROOT.TEventList(elistName)
                    c.Draw(">>" + elistName, cutString)
                    #        print "elist",elist,elist.GetN(),cutString,'plots',plotsToFill
                    number_events = elist.GetN(
                    )  # if not (s.has_key('small')  and s['small']) else min(elist.GetN(), 100)
                    print "Reading: ", s[
                        "name"], b, "with", number_events, "events passing cutString", cutString, 'and will fill', len(
                            [p.name for p in plotsToFill]), 'vars.'
                    for p in plotsToFill:
                        if not (p.cut.has_key('func') and p.cut['func']):
                            p.cut['func'] = None
                        if p.TTreeFormula:
                            assert p.TTreeFormula and not (
                                p.TTreeFormula == ""
                            ), "Problem in TTreeFormula %s" % p.TTreeFormula
                            fString = 'ROOT.TTreeFormula("' + p.name + '","' + p.TTreeFormula + '",c)'
                            exec('p.ttreeFormula=' + fString)
                            print "Created TTreeFormula:", fString
                    for i in range(0, number_events):
                        if (i % 10000 == 0) and i > 0:
                            print i
                        c.GetEntry(elist.GetEntry(i))
                        for p in plotsToFill:
                            #            print p.cut['func'],  p.cut['func'](c)
                            if (not p.cut['func']) or p.cut['func'](c):
                                weight = c.GetLeaf(p.weightString).GetValue(
                                ) if p.weightString else 1.
                                reWeight = p.weightFunc(
                                    c) if p.weightFunc else 1.
                                #              print c, p.weightFunc, p.weightFunc(c), getVarValue(c, "nVert"), c.GetLeaf("nVert").GetValue(), c.nVert
                                if p.leaf:
                                    val = getVarValue(c, p.leaf, p.ind)
    #                print "Fill leaf",p.leaf, p.ind, val, weight,sampleScaleFac
                                if p.TTreeFormula:
                                    p.ttreeFormula.UpdateFormulaLeaves()
                                    val = p.ttreeFormula.EvalInstance()
                                if p.func:
                                    val = p.func(c)
    #              if val>170:print val, reWeight, weight, sampleScaleFac, p.leaf, p.ind, i, c.GetEntries(), elist.GetEntry(i),"x",c.GetLeaf('Jet_pt').GetValue(4), c.GetLeaf('met_pt').GetValue(), c.GetLeaf('lumi').GetValue(), c.GetLeaf('evt').GetValue()
                                if val < float('inf'):
                                    p.histo.Fill(
                                        val,
                                        reWeight * weight * sampleScaleFac)
    #              print p.histo.GetName(), b, val, weight*sampleScaleFac, reWeight*weight*sampleScaleFac
            elif mode.lower() == 'draw':
                for ics, cutString in enumerate(
                        s['plotsPerCutForSample'].keys()):
                    plotsToFill = s['plotsPerCutForSample'][cutString]
                    print "Reading: ", s[
                        "name"], b, "with cutString", cutString, 'and will fill', len(
                            [p.name for p in plotsToFill]), 'vars.'
                    for p in plotsToFill:
                        print c, "String", p.string, p.binning, "Cut", cutString, p.weightString, p.binningIsExplicit, sampleScaleFac
                        tmp = getPlotFromChain(
                            c,
                            p.string,
                            p.binning,
                            cutString,
                            p.weightString,
                            binningIsExplicit=p.binningIsExplicit)
                        tmp.Scale(sampleScaleFac)
                        p.histo.Add(tmp)
#      for ics, cutString in enumerate(s['plotsPerCutForSample'].keys()):
#        plotsToFill = s['plotsPerCutForSample'][cutString]
#        for p in plotsToFill:
#          print c.GetEntries(), p.name,p.histo.Integral()
#      c.GetListOfFiles().ls()
            c.Reset()
            del c

#do over-flow bins
    for p in allPlots:
        if p.overFlow and p.overFlow in ["upper", "both"]:
            nbins = p.histo.GetNbinsX()
            p.histo.SetBinContent(
                nbins,
                p.histo.GetBinContent(nbins) +
                p.histo.GetBinContent(nbins + 1))
            p.histo.SetBinError(
                nbins,
                sqrt(
                    p.histo.GetBinError(nbins)**2 +
                    p.histo.GetBinError(nbins + 1)**2))
        if p.overFlow and p.overFlow in ["lower", "both"]:
            p.histo.SetBinContent(
                1,
                p.histo.GetBinContent(0) + p.histo.GetBinContent(1))
            p.histo.SetBinError(
                1, sqrt(p.histo.GetBinError(0)**2 + p.histo.GetBinError(1)**2))
#sum stacks
    for s in stacks:
        sumStackHistos(s)
#normalize
    for p in allPlots:
        if p.normalizeTo:
            t = p.normalizeTo.histo.Integral()
            y = p.histo.Integral()
            r = p.normalizeRef.histo.Integral() if p.normalizeRef else y
            if r > 0:
                p.histo.Scale(t / r)
jobs = []
for dirName, subdirList, fileList in os.walk(options.dir):
    rootFiles = []
    for f in fileList:
        if f.endswith('.root'):
            full_filename = os.path.join(dirName, f)
            if not '_reHadd_' in f:
                to_skip = False
                for skip in options.skip:
                    if skip in f:
                        logger.info("Found skip string %s in %s. Skipping.",
                                    skip, f)
                        to_skip = True
                        break
                if to_skip: continue
                isOK =  checkRootFile( full_filename, checkForObjects = [options.treeName]) \
                        if options.treeName is not None else checkRootFile( full_filename )
                if isOK:
                    rootFiles.append(f)
                else:
                    logger.warning(
                        "File %s does not look OK. Checked for tree: %r",
                        full_filename, options.treeName)
            else:
                logger.info("Found '_reHadd_' in file %s in %s. Skipping.",
                            full_filename, dirName)
    job = []
    jobsize = 0
    for fname in rootFiles:
        filename, file_extension = os.path.splitext(fname)
        n_str = filename.split('_')[-1]
filename, ext = os.path.splitext( os.path.join(outDir, sample.name + '.root') )

clonedEvents = 0
convertedEvents = 0
outputLumiList = {}
for ievtRange, eventRange in enumerate( eventRanges ):

    if len(options.job)>0 and not ievtRange in options.job: continue

    logger.info( "Processing range %i/%i from %i to %i which are %i events.",  ievtRange, len(eventRanges), eventRange[0], eventRange[1], eventRange[1]-eventRange[0] )

    # Check whether file exists
    outfilename = filename+'_'+str(ievtRange)+ext
    if os.path.isfile(outfilename):
        logger.info( "Output file %s found.", outfilename)
        if not checkRootFile(outfilename, checkForObjects=["Events"]):
            logger.info( "File %s is broken. Overwriting.", outfilename)
        elif not options.overwrite:
            logger.info( "Skipping.")
            continue
        else:
            logger.info( "Overwriting.")

    tmp_directory = ROOT.gDirectory
    outputfile = ROOT.TFile.Open(outfilename, 'recreate')
    tmp_directory.cd()

    # Set the reader to the event range
    reader.setEventRange( eventRange )
    clonedTree = reader.cloneTree( branchKeepStrings, newTreename = "Events", rootfile = outputfile )
    clonedEvents += clonedTree.GetEntries()
Beispiel #6
0
# Walk the directory structure and group files in 'jobs' of [f1_0.root, f1_1.root, ...]  tootalling to approx. sizeGB
jobs = []
for dirName, subdirList, fileList in os.walk(options.dir):
    rootFiles = []
    for f in fileList:
        if f.endswith('.root'):
            full_filename = os.path.join(dirName, f)
            if not '_reHadd_' in f:
                to_skip = False
                for skip in options.skip:
                    if skip in f:
                        logger.info( "Found skip string %s in %s. Skipping.", skip, f )
                        to_skip = True
                        break
                if to_skip: continue
                isOK =  checkRootFile( full_filename, checkForObjects = [options.treeName]) \
                        if options.treeName is not None else checkRootFile( full_filename )
                if isOK:
                    rootFiles.append( f )
                else:
                    logger.warning( "File %s does not look OK. Checked for tree: %r", full_filename, options.treeName )
            else:
                logger.info( "Found '_reHadd_' in file %s in %s. Skipping.", full_filename, dirName )
    job = []
    jobsize = 0
    for fname in rootFiles:
        filename, file_extension = os.path.splitext(fname)
        n_str = filename.split('_')[-1]
        if n_str.isdigit():
            full_filename = os.path.join(dirName, fname)
            jobsize += os.path.getsize( full_filename  )
Beispiel #7
0
  try:
    float(options.lheHTCut)
  except:
    sys.exit("Float conversion of option lheHTCut failed. Got this: %s"%options.lheHTCut)
  sample.name+="_lheHT"+options.lheHTCut
  skimCond+="&&lheHTIncoming<"+options.lheHTCut

if "Run2015D" in sample.name and not hasattr(sample, "vetoList"):
  sys.exit("ERROR. Sample %s seems to be data but no vetoList was provided!!" %sample.name)

vetoList_ = vetoList(sample.vetoList) if hasattr(sample, "vetoList") else None

outDir = os.path.join(options.targetDir, options.skim, sample.name)
if os.path.exists(outDir):
  existingFiles = [outDir+'/'+f for f in os.listdir(outDir) if f.endswith('.root')]
  hasBadFile = any([not checkRootFile(f, checkForObjects=["Events"]) for f in existingFiles])
else:
  existingFiles = []
  hasBadFile = False
  
#print "Found bad file? %r"%hasBadFile
if os.path.exists(outDir) and len(existingFiles)>0 and (not hasBadFile) and not options.overwrite:
  print "Found non-empty directory: %s -> skipping! (found a bad file? %r.)"%(outDir, hasBadFile)
  sys.exit(0)
else:
  tmpDir = os.path.join(outDir,'tmp')
  if hasBadFile:
    print "Found a corrupted file. Remake sample. Delete %s"%outDir
    shutil.rmtree(outDir)
  if os.path.exists(outDir) and options.overwrite: #not options.update: 
    print "Directory %s exists. Delete it."%outDir
clonedEvents = 0
convertedEvents = 0
outputLumiList = {}
for ievtRange, eventRange in enumerate(eventRanges):

    if len(options.job) > 0 and not ievtRange in options.job: continue

    logger.info("Processing range %i/%i from %i to %i which are %i events.",
                ievtRange, len(eventRanges), eventRange[0], eventRange[1],
                eventRange[1] - eventRange[0])

    # Check whether file exists
    outfilename = filename + '_' + str(ievtRange) + ext
    if os.path.isfile(outfilename):
        logger.info("Output file %s found.", outfilename)
        if not checkRootFile(outfilename, checkForObjects=["Events"]):
            logger.info("File %s is broken. Overwriting.", outfilename)
        elif not options.overwrite:
            logger.info("Skipping.")
            continue
        else:
            logger.info("Overwriting.")

    tmp_directory = ROOT.gDirectory
    outputfile = ROOT.TFile.Open(outfilename, 'recreate')
    tmp_directory.cd()

    # Set the reader to the event range
    reader.setEventRange(eventRange)
    clonedTree = reader.cloneTree(branchKeepStrings,
                                  newTreename="Events",