skimCond += "&&lheHTIncoming<" + options.lheHTCut if "Run2015D" in sample.name and not hasattr(sample, "vetoList"): sys.exit( "ERROR. Sample %s seems to be data but no vetoList was provided!!" % sample.name) vetoList_ = vetoList(sample.vetoList) if hasattr(sample, "vetoList") else None outDir = os.path.join(options.targetDir, options.skim, sample.name) if os.path.exists(outDir): existingFiles = [ outDir + '/' + f for f in os.listdir(outDir) if f.endswith('.root') ] hasBadFile = any([ not checkRootFile(f, checkForObjects=["Events"]) for f in existingFiles ]) else: existingFiles = [] hasBadFile = False #print "Found bad file? %r"%hasBadFile if os.path.exists(outDir) and len(existingFiles) > 0 and ( not hasBadFile) and not options.overwrite: print "Found non-empty directory: %s -> skipping! (found a bad file? %r.)" % ( outDir, hasBadFile) sys.exit(0) else: tmpDir = os.path.join(outDir, 'tmp') if hasBadFile: print "Found a corrupted file. Remake sample. Delete %s" % outDir
def loopAndFill(stacks, mode="loop"): allSamples=[] allSampleNames=[] allPlots = [] usedBranches = [] for s in stacks: usedBranches = list(set(usedBranches+s.usedBranches)) for l in s.plotLists: for p in l: allPlots.append(p) if p.leaf: if not p.leaf in usedBranches: usedBranches.append(p.leaf) if p.weightString and not p.weightString in usedBranches: usedBranches.append(p.weightString) else: usedBranches = list(set(usedBranches+p.usedBranches)) if not p.sample in allSamples: assert p.sample.has_key('dir') or p.sample.has_key('dirname'), "Missing key dir or dirname in sample %s"%repr(p.sample) allSamples.append(p.sample) if mode=='loop': assert not any ([p.string for p in allPlots]), "Loop mode is %s but specified 'string' for: %s"%(mode, ", ".join([p.name for p in allPlots if p.string])) if mode=='draw': assert all([p.string for p in allPlots]), "Loop mode is %s but specified no 'string' for: %s"%(mode, ", ".join([p.name for p in allPlots if not p.string])) print "Found",len(allSamples),'different samples:',", ".join(s['name'] for s in allSamples) for s in allSamples: cutStringForSample=[] plotsPerCutForSample={} # print s['name'], s.has_key('isData'), s.has_key('isData') and s['isData'], s.has_key('isData') and s['isData'] and s.has_key('dataCut') for p in allPlots: if p.sample==s: cut = p.cut['string'] if not (s.has_key('isData') and s['isData'] and p.cut.has_key('dataCut')) else "("+p.cut['string']+")&&("+p.cut['dataCut']+")" if not cut in cutStringForSample: cutStringForSample.append(cut) plotsPerCutForSample[cut]=[] if not p in plotsPerCutForSample[cut]: plotsPerCutForSample[cut].append(p) s['plotsPerCutForSample'] = plotsPerCutForSample for s in allSamples: sampleScaleFac = 1 if not s.has_key('scale') else s['scale'] if sampleScaleFac!=1: print "Using sampleScaleFac", sampleScaleFac ,"for sample",s["name"] bins = s['bins'] if s.has_key('bins') else ['default'] for b in bins: treeName = 'Events' if not s.has_key('treeName') else s['treeName'] maxN = -1 if not (s.has_key('small') and s['small']) else 1 c = ROOT.TChain(treeName) counter=0 dir = s['dirname'] if s.has_key('dirname') else s['dir'] fileList = getFileList(dir+'/'+b, maxN=maxN, histname="") if s.has_key('bins') else [s['file']] for f in fileList: if not f[-5:]=='.root':continue # counter+=1 # c.Add(f) if checkRootFile(f, checkForObjects=[treeName]): counter+=1 c.Add(f) else: print "File %s looks broken."%f ntot = c.GetEntries() print "Added ",counter,'files from sample',s['name'],'dir',dir,'bin',b,'ntot',ntot switchOnBranches(c, usedBranches) if ntot==0: print "Warning! Found zero events in",s['name'],'bin',b," -> do nothing" continue if mode.lower()=='loop': for ics, cutString in enumerate(s['plotsPerCutForSample'].keys()): plotsToFill = s['plotsPerCutForSample'][cutString] elistName = "eList_"+s['name']+'_'+b+'_'+str(ics) elist = ROOT.TEventList(elistName) c.Draw(">>"+elistName,cutString) # print "elist",elist,elist.GetN(),cutString,'plots',plotsToFill number_events = elist.GetN()# if not (s.has_key('small') and s['small']) else min(elist.GetN(), 100) print "Reading: ", s["name"], b, "with",number_events,"events passing cutString", cutString, 'and will fill', len([p.name for p in plotsToFill]),'vars.' for p in plotsToFill: if not (p.cut.has_key('func') and p.cut['func']): p.cut['func']=None if p.TTreeFormula: assert p.TTreeFormula and not (p.TTreeFormula==""), "Problem in TTreeFormula %s" % p.TTreeFormula fString='ROOT.TTreeFormula("'+p.name+'","'+p.TTreeFormula+'",c)' exec('p.ttreeFormula='+fString) print "Created TTreeFormula:",fString for i in range(0,number_events): if (i%10000 == 0) and i>0 : print i c.GetEntry(elist.GetEntry(i)) for p in plotsToFill: # print p.cut['func'], p.cut['func'](c) if (not p.cut['func']) or p.cut['func'](c): weight = c.GetLeaf(p.weightString).GetValue() if p.weightString else 1. reWeight = p.weightFunc(c) if p.weightFunc else 1. # print c, p.weightFunc, p.weightFunc(c), getVarValue(c, "nVert"), c.GetLeaf("nVert").GetValue(), c.nVert if p.leaf: val = getVarValue(c, p.leaf, p.ind) # print "Fill leaf",p.leaf, p.ind, val, weight,sampleScaleFac if p.TTreeFormula: p.ttreeFormula.UpdateFormulaLeaves() val = p.ttreeFormula.EvalInstance() if p.func: val = p.func(c) # if val>170:print val, reWeight, weight, sampleScaleFac, p.leaf, p.ind, i, c.GetEntries(), elist.GetEntry(i),"x",c.GetLeaf('Jet_pt').GetValue(4), c.GetLeaf('met_pt').GetValue(), c.GetLeaf('lumi').GetValue(), c.GetLeaf('evt').GetValue() if val<float('inf'): p.histo.Fill(val, reWeight*weight*sampleScaleFac) # print p.histo.GetName(), b, val, weight*sampleScaleFac, reWeight*weight*sampleScaleFac elif mode.lower()=='draw': for ics, cutString in enumerate(s['plotsPerCutForSample'].keys()): plotsToFill = s['plotsPerCutForSample'][cutString] print "Reading: ", s["name"], b, "with cutString", cutString, 'and will fill', len([p.name for p in plotsToFill]),'vars.' for p in plotsToFill: print c, "String", p.string, p.binning, "Cut", cutString, p.weightString, p.binningIsExplicit, sampleScaleFac tmp = getPlotFromChain(c, p.string, p.binning, cutString, p.weightString, binningIsExplicit=p.binningIsExplicit) tmp.Scale(sampleScaleFac) p.histo.Add(tmp) # for ics, cutString in enumerate(s['plotsPerCutForSample'].keys()): # plotsToFill = s['plotsPerCutForSample'][cutString] # for p in plotsToFill: # print c.GetEntries(), p.name,p.histo.Integral() # c.GetListOfFiles().ls() c.Reset() del c #do over-flow bins for p in allPlots: if p.overFlow and p.overFlow in [ "upper", "both"]: nbins = p.histo.GetNbinsX() p.histo.SetBinContent(nbins , p.histo.GetBinContent(nbins) + p.histo.GetBinContent(nbins + 1)) p.histo.SetBinError(nbins , sqrt(p.histo.GetBinError(nbins)**2 + p.histo.GetBinError(nbins + 1)**2)) if p.overFlow and p.overFlow in [ "lower", "both"]: p.histo.SetBinContent(1 , p.histo.GetBinContent(0) + p.histo.GetBinContent(1)) p.histo.SetBinError(1 , sqrt(p.histo.GetBinError(0)**2 + p.histo.GetBinError(1)**2)) #sum stacks for s in stacks: sumStackHistos(s) #normalize for p in allPlots: if p.normalizeTo: t = p.normalizeTo.histo.Integral() y = p.histo.Integral() r = p.normalizeRef.histo.Integral() if p.normalizeRef else y if r>0: p.histo.Scale(t/r)
def loopAndFill(stacks, mode="loop"): allSamples = [] allSampleNames = [] allPlots = [] usedBranches = [] for s in stacks: usedBranches = list(set(usedBranches + s.usedBranches)) for l in s.plotLists: for p in l: allPlots.append(p) if p.leaf: if not p.leaf in usedBranches: usedBranches.append(p.leaf) if p.weightString and not p.weightString in usedBranches: usedBranches.append(p.weightString) else: usedBranches = list(set(usedBranches + p.usedBranches)) if not p.sample in allSamples: assert p.sample.has_key('dir') or p.sample.has_key( 'dirname' ), "Missing key dir or dirname in sample %s" % repr( p.sample) allSamples.append(p.sample) if mode == 'loop': assert not any([ p.string for p in allPlots ]), "Loop mode is %s but specified 'string' for: %s" % ( mode, ", ".join([p.name for p in allPlots if p.string])) if mode == 'draw': assert all([ p.string for p in allPlots ]), "Loop mode is %s but specified no 'string' for: %s" % ( mode, ", ".join([p.name for p in allPlots if not p.string])) print "Found", len(allSamples), 'different samples:', ", ".join( s['name'] for s in allSamples) for s in allSamples: cutStringForSample = [] plotsPerCutForSample = {} # print s['name'], s.has_key('isData'), s.has_key('isData') and s['isData'], s.has_key('isData') and s['isData'] and s.has_key('dataCut') for p in allPlots: if p.sample == s: cut = p.cut['string'] if not ( s.has_key('isData') and s['isData'] and p.cut.has_key('dataCut') ) else "(" + p.cut['string'] + ")&&(" + p.cut['dataCut'] + ")" if not cut in cutStringForSample: cutStringForSample.append(cut) plotsPerCutForSample[cut] = [] if not p in plotsPerCutForSample[cut]: plotsPerCutForSample[cut].append(p) s['plotsPerCutForSample'] = plotsPerCutForSample for s in allSamples: sampleScaleFac = 1 if not s.has_key('scale') else s['scale'] if sampleScaleFac != 1: print "Using sampleScaleFac", sampleScaleFac, "for sample", s[ "name"] bins = s['bins'] if s.has_key('bins') else ['default'] for b in bins: treeName = 'Events' if not s.has_key('treeName') else s['treeName'] maxN = -1 if not (s.has_key('small') and s['small']) else 1 c = ROOT.TChain(treeName) counter = 0 dir = s['dirname'] if s.has_key('dirname') else s['dir'] fileList = getFileList( dir + '/' + b, maxN=maxN, histname="") if s.has_key('bins') else [s['file']] for f in fileList: if not f[-5:] == '.root': continue # counter+=1 # c.Add(f) if checkRootFile(f, checkForObjects=[treeName]): counter += 1 c.Add(f) else: print "File %s looks broken." % f ntot = c.GetEntries() print "Added ", counter, 'files from sample', s[ 'name'], 'dir', dir, 'bin', b, 'ntot', ntot switchOnBranches(c, usedBranches) if ntot == 0: print "Warning! Found zero events in", s[ 'name'], 'bin', b, " -> do nothing" continue if mode.lower() == 'loop': for ics, cutString in enumerate( s['plotsPerCutForSample'].keys()): plotsToFill = s['plotsPerCutForSample'][cutString] elistName = "eList_" + s['name'] + '_' + b + '_' + str(ics) elist = ROOT.TEventList(elistName) c.Draw(">>" + elistName, cutString) # print "elist",elist,elist.GetN(),cutString,'plots',plotsToFill number_events = elist.GetN( ) # if not (s.has_key('small') and s['small']) else min(elist.GetN(), 100) print "Reading: ", s[ "name"], b, "with", number_events, "events passing cutString", cutString, 'and will fill', len( [p.name for p in plotsToFill]), 'vars.' for p in plotsToFill: if not (p.cut.has_key('func') and p.cut['func']): p.cut['func'] = None if p.TTreeFormula: assert p.TTreeFormula and not ( p.TTreeFormula == "" ), "Problem in TTreeFormula %s" % p.TTreeFormula fString = 'ROOT.TTreeFormula("' + p.name + '","' + p.TTreeFormula + '",c)' exec('p.ttreeFormula=' + fString) print "Created TTreeFormula:", fString for i in range(0, number_events): if (i % 10000 == 0) and i > 0: print i c.GetEntry(elist.GetEntry(i)) for p in plotsToFill: # print p.cut['func'], p.cut['func'](c) if (not p.cut['func']) or p.cut['func'](c): weight = c.GetLeaf(p.weightString).GetValue( ) if p.weightString else 1. reWeight = p.weightFunc( c) if p.weightFunc else 1. # print c, p.weightFunc, p.weightFunc(c), getVarValue(c, "nVert"), c.GetLeaf("nVert").GetValue(), c.nVert if p.leaf: val = getVarValue(c, p.leaf, p.ind) # print "Fill leaf",p.leaf, p.ind, val, weight,sampleScaleFac if p.TTreeFormula: p.ttreeFormula.UpdateFormulaLeaves() val = p.ttreeFormula.EvalInstance() if p.func: val = p.func(c) # if val>170:print val, reWeight, weight, sampleScaleFac, p.leaf, p.ind, i, c.GetEntries(), elist.GetEntry(i),"x",c.GetLeaf('Jet_pt').GetValue(4), c.GetLeaf('met_pt').GetValue(), c.GetLeaf('lumi').GetValue(), c.GetLeaf('evt').GetValue() if val < float('inf'): p.histo.Fill( val, reWeight * weight * sampleScaleFac) # print p.histo.GetName(), b, val, weight*sampleScaleFac, reWeight*weight*sampleScaleFac elif mode.lower() == 'draw': for ics, cutString in enumerate( s['plotsPerCutForSample'].keys()): plotsToFill = s['plotsPerCutForSample'][cutString] print "Reading: ", s[ "name"], b, "with cutString", cutString, 'and will fill', len( [p.name for p in plotsToFill]), 'vars.' for p in plotsToFill: print c, "String", p.string, p.binning, "Cut", cutString, p.weightString, p.binningIsExplicit, sampleScaleFac tmp = getPlotFromChain( c, p.string, p.binning, cutString, p.weightString, binningIsExplicit=p.binningIsExplicit) tmp.Scale(sampleScaleFac) p.histo.Add(tmp) # for ics, cutString in enumerate(s['plotsPerCutForSample'].keys()): # plotsToFill = s['plotsPerCutForSample'][cutString] # for p in plotsToFill: # print c.GetEntries(), p.name,p.histo.Integral() # c.GetListOfFiles().ls() c.Reset() del c #do over-flow bins for p in allPlots: if p.overFlow and p.overFlow in ["upper", "both"]: nbins = p.histo.GetNbinsX() p.histo.SetBinContent( nbins, p.histo.GetBinContent(nbins) + p.histo.GetBinContent(nbins + 1)) p.histo.SetBinError( nbins, sqrt( p.histo.GetBinError(nbins)**2 + p.histo.GetBinError(nbins + 1)**2)) if p.overFlow and p.overFlow in ["lower", "both"]: p.histo.SetBinContent( 1, p.histo.GetBinContent(0) + p.histo.GetBinContent(1)) p.histo.SetBinError( 1, sqrt(p.histo.GetBinError(0)**2 + p.histo.GetBinError(1)**2)) #sum stacks for s in stacks: sumStackHistos(s) #normalize for p in allPlots: if p.normalizeTo: t = p.normalizeTo.histo.Integral() y = p.histo.Integral() r = p.normalizeRef.histo.Integral() if p.normalizeRef else y if r > 0: p.histo.Scale(t / r)
jobs = [] for dirName, subdirList, fileList in os.walk(options.dir): rootFiles = [] for f in fileList: if f.endswith('.root'): full_filename = os.path.join(dirName, f) if not '_reHadd_' in f: to_skip = False for skip in options.skip: if skip in f: logger.info("Found skip string %s in %s. Skipping.", skip, f) to_skip = True break if to_skip: continue isOK = checkRootFile( full_filename, checkForObjects = [options.treeName]) \ if options.treeName is not None else checkRootFile( full_filename ) if isOK: rootFiles.append(f) else: logger.warning( "File %s does not look OK. Checked for tree: %r", full_filename, options.treeName) else: logger.info("Found '_reHadd_' in file %s in %s. Skipping.", full_filename, dirName) job = [] jobsize = 0 for fname in rootFiles: filename, file_extension = os.path.splitext(fname) n_str = filename.split('_')[-1]
filename, ext = os.path.splitext( os.path.join(outDir, sample.name + '.root') ) clonedEvents = 0 convertedEvents = 0 outputLumiList = {} for ievtRange, eventRange in enumerate( eventRanges ): if len(options.job)>0 and not ievtRange in options.job: continue logger.info( "Processing range %i/%i from %i to %i which are %i events.", ievtRange, len(eventRanges), eventRange[0], eventRange[1], eventRange[1]-eventRange[0] ) # Check whether file exists outfilename = filename+'_'+str(ievtRange)+ext if os.path.isfile(outfilename): logger.info( "Output file %s found.", outfilename) if not checkRootFile(outfilename, checkForObjects=["Events"]): logger.info( "File %s is broken. Overwriting.", outfilename) elif not options.overwrite: logger.info( "Skipping.") continue else: logger.info( "Overwriting.") tmp_directory = ROOT.gDirectory outputfile = ROOT.TFile.Open(outfilename, 'recreate') tmp_directory.cd() # Set the reader to the event range reader.setEventRange( eventRange ) clonedTree = reader.cloneTree( branchKeepStrings, newTreename = "Events", rootfile = outputfile ) clonedEvents += clonedTree.GetEntries()
# Walk the directory structure and group files in 'jobs' of [f1_0.root, f1_1.root, ...] tootalling to approx. sizeGB jobs = [] for dirName, subdirList, fileList in os.walk(options.dir): rootFiles = [] for f in fileList: if f.endswith('.root'): full_filename = os.path.join(dirName, f) if not '_reHadd_' in f: to_skip = False for skip in options.skip: if skip in f: logger.info( "Found skip string %s in %s. Skipping.", skip, f ) to_skip = True break if to_skip: continue isOK = checkRootFile( full_filename, checkForObjects = [options.treeName]) \ if options.treeName is not None else checkRootFile( full_filename ) if isOK: rootFiles.append( f ) else: logger.warning( "File %s does not look OK. Checked for tree: %r", full_filename, options.treeName ) else: logger.info( "Found '_reHadd_' in file %s in %s. Skipping.", full_filename, dirName ) job = [] jobsize = 0 for fname in rootFiles: filename, file_extension = os.path.splitext(fname) n_str = filename.split('_')[-1] if n_str.isdigit(): full_filename = os.path.join(dirName, fname) jobsize += os.path.getsize( full_filename )
try: float(options.lheHTCut) except: sys.exit("Float conversion of option lheHTCut failed. Got this: %s"%options.lheHTCut) sample.name+="_lheHT"+options.lheHTCut skimCond+="&&lheHTIncoming<"+options.lheHTCut if "Run2015D" in sample.name and not hasattr(sample, "vetoList"): sys.exit("ERROR. Sample %s seems to be data but no vetoList was provided!!" %sample.name) vetoList_ = vetoList(sample.vetoList) if hasattr(sample, "vetoList") else None outDir = os.path.join(options.targetDir, options.skim, sample.name) if os.path.exists(outDir): existingFiles = [outDir+'/'+f for f in os.listdir(outDir) if f.endswith('.root')] hasBadFile = any([not checkRootFile(f, checkForObjects=["Events"]) for f in existingFiles]) else: existingFiles = [] hasBadFile = False #print "Found bad file? %r"%hasBadFile if os.path.exists(outDir) and len(existingFiles)>0 and (not hasBadFile) and not options.overwrite: print "Found non-empty directory: %s -> skipping! (found a bad file? %r.)"%(outDir, hasBadFile) sys.exit(0) else: tmpDir = os.path.join(outDir,'tmp') if hasBadFile: print "Found a corrupted file. Remake sample. Delete %s"%outDir shutil.rmtree(outDir) if os.path.exists(outDir) and options.overwrite: #not options.update: print "Directory %s exists. Delete it."%outDir
clonedEvents = 0 convertedEvents = 0 outputLumiList = {} for ievtRange, eventRange in enumerate(eventRanges): if len(options.job) > 0 and not ievtRange in options.job: continue logger.info("Processing range %i/%i from %i to %i which are %i events.", ievtRange, len(eventRanges), eventRange[0], eventRange[1], eventRange[1] - eventRange[0]) # Check whether file exists outfilename = filename + '_' + str(ievtRange) + ext if os.path.isfile(outfilename): logger.info("Output file %s found.", outfilename) if not checkRootFile(outfilename, checkForObjects=["Events"]): logger.info("File %s is broken. Overwriting.", outfilename) elif not options.overwrite: logger.info("Skipping.") continue else: logger.info("Overwriting.") tmp_directory = ROOT.gDirectory outputfile = ROOT.TFile.Open(outfilename, 'recreate') tmp_directory.cd() # Set the reader to the event range reader.setEventRange(eventRange) clonedTree = reader.cloneTree(branchKeepStrings, newTreename="Events",