def main(): # Defaults inputpath = 'output/ntuple*.root' from Core import compileMinimal compileMinimal() options = parseInputArgs() samples = datasets.getListSamples(options.samplecsv) if options.inRunDir: if not os.path.isdir(options.inRunDir): print "ERROR: input directory does not exist or is not a directory" return else: inputdir = options.inRunDir inputpath = inputdir + '/*/*.root*' checkDuplicates(inputpath, samples) else: if options.inFiles: inputpath = options.inFiles checkDuplicates(inputpath, samples)
def makeCutFlowHistogram(cutflow): try: from ROOT import AnalysisFramework except: compileMinimal() from ROOT import AnalysisFramework CutFlowHist = AnalysisFramework.CutFlows.CutFlowHist if outputFile: outputFile.cd() totalsize = 0 for flowname, numbers in cutflow: totalsize += len(numbers) cutFlowHist = CutFlowHist("CutFlow", "CutFlow output of AnalysisFramework", 400000, 0, 1) #xaxis = cutFlowHist.GetXaxis() flownum = 0 index = 0 for flowname, numbers in cutflow: for streamlet in sorted(numbers.keys()): raw, weighted = numbers[streamlet] cutFlowHist.SetBinContent(index+1, raw) cutFlowHist.SetBinContent(index+2, weighted) cutFlowHist.SetBinLabel(index+1, str(flownum) + '/R/' + flowname + '/' + streamlet) cutFlowHist.SetBinLabel(index+2, str(flownum) + '/W/' + flowname + '/' + streamlet) index += 2 flownum += 1 print 'Created CutFlow histogram with', index, 'entries.' return cutFlowHist
def main(): # Defaults inputpath = "output/ntuple*.root" from Core import compileMinimal compileMinimal() options = parseInputArgs() samples = datasets.getListSamples(options.samplecsv) if options.inRunDir: if not os.path.isdir(options.inRunDir): print "ERROR: input directory does not exist or is not a directory" return else: inputdir = options.inRunDir inputpath = inputdir + "/*/*.root*" checkDuplicates(inputpath, samples) else: if options.inFiles: inputpath = options.inFiles checkDuplicates(inputpath, samples)
def makeCutFlowHistogram(cutflow): try: from ROOT import AnalysisFramework except: compileMinimal() from ROOT import AnalysisFramework CutFlowHist = AnalysisFramework.CutFlows.CutFlowHist if outputFile: outputFile.cd() totalsize = 0 for flowname, numbers in cutflow: totalsize += len(numbers) cutFlowHist = CutFlowHist("CutFlow", "CutFlow output of AnalysisFramework", 400000, 0, 1) #xaxis = cutFlowHist.GetXaxis() flownum = 0 index = 0 for flowname, numbers in cutflow: for streamlet in sorted(numbers.keys()): raw, weighted = numbers[streamlet] cutFlowHist.SetBinContent(index + 1, raw) cutFlowHist.SetBinContent(index + 2, weighted) cutFlowHist.SetBinLabel( index + 1, str(flownum) + '/R/' + flowname + '/' + streamlet) cutFlowHist.SetBinLabel( index + 2, str(flownum) + '/W/' + flowname + '/' + streamlet) index += 2 flownum += 1 print 'Created CutFlow histogram with', index, 'entries.' return cutFlowHist
def main(): global samples # Defaults outputpath = 'output/combined.root' inputpath = 'output/ntuple*.root' from Core import compileMinimal compileMinimal() options = parseInputArgs() samples = datasets.getListSamples(options.samplecsv) if options.inRunDir: if not os.path.isdir(options.inRunDir): print("ERROR: input directory does not exist or is not a directory") return else: inputdir = options.inRunDir if not options.outRunDir: print("ERROR: invalid output directory (set with --outRunDir)") return elif os.path.isdir(options.outRunDir): print("ERROR:\noutput directory: \n{0} \nalready exists...to avoid inconsistencies, please remove it first".format(options.outRunDir)) return else: outputdir = options.outRunDir os.makedirs(outputdir) logfile = open(outputdir + '/merge.log', 'w') for s in samples: sampledir = outputdir + '/' + s['group'] if not os.path.isdir(sampledir): os.makedirs(sampledir) if not s['category'] == 'Data' and not s['group'] == 'Embedding': inputpath = inputdir + '/*' + s['ID'] + '*/*.root*' else: inputpath = inputdir + '/*' + s['name'] + '*/*.root*' separator = '.' if not s['ID']: separator = '' outputpath = sampledir + '/' + s['ID'] + separator + s['name'] + '.root' weight = None try: if options.weight: weight = float(s['xsection']) * float(s['efficiency']) * float(s['kfactor']) * 1e3 # to get the weight in fb (the Xsec is in pb) except: pass mergeOne(inputpath, outputpath, logfile, weight, options.cutflow) else: if options.inFiles: inputpath = options.inFiles if options.outFile: outputpath = options.outFile mergeOne(inputpath, outputpath, cutflow=options.cutflow)
def getCutFlowFromHistogram(inputdir): try: from ROOT import AnalysisFramework except: compileMinimal() from ROOT import AnalysisFramework CutFlowHist = AnalysisFramework.CutFlows.CutFlowHist inputpath = listifyInputFiles(inputdir) htemp = CutFlowHist("CutFlow", "CutFlow output of AnalysisFramework", 400000, 0, 1) for d in inputpath: f = TFile.Open(d) heach = f.Get("CutFlow") #for i in range(heach.GetNbinsX()): # if not heach.GetBinLabel(i+1): # break # print i+1, heach.GetBinLabel(i+1) col = TObjArray() col.Add(heach) htemp.Merge(col) f.Close() #xaxis = htemp.GetXaxis() temp = {} for i in range(htemp.GetNbinsX()): label = htemp.GetBinLabel(i + 1) if not label: continue flownum = int(label.split('/')[0]) isweighted = label.split('/')[1] == 'W' flowname = label.split('/')[2] streamlet = label.split('/')[3] if isweighted: raw = 0. weighted = htemp.GetBinContent(i + 1) else: raw = htemp.GetBinContent(i + 1) weighted = 0. if not flownum in temp: temp[flownum] = (flowname, {}) flownametemp, numberstemp = temp[flownum] if not streamlet in numberstemp: numberstemp[streamlet] = (raw, weighted) else: rawtemp, weightedtemp = numberstemp[streamlet] numberstemp[streamlet] = (raw + rawtemp, weighted + weightedtemp) cutflow = [] totalEvents = getTotalEventsHistogram(inputdir) if totalEvents: cutflow.append(('OriginalTotalEvents', { 'All': (totalEvents.GetBinContent(1), totalEvents.GetBinContent(2)) })) for i in sorted(temp.keys()): cutflow.append(temp[i]) return cutflow
def getCutFlowFromHistogram(inputdir): try: from ROOT import AnalysisFramework except: compileMinimal() from ROOT import AnalysisFramework CutFlowHist = AnalysisFramework.CutFlows.CutFlowHist inputpath = listifyInputFiles(inputdir) htemp = CutFlowHist("CutFlow", "CutFlow output of AnalysisFramework", 400000, 0, 1) for d in inputpath: f = TFile.Open(d) heach = f.Get("CutFlow") #for i in range(heach.GetNbinsX()): # if not heach.GetBinLabel(i+1): # break # print i+1, heach.GetBinLabel(i+1) col = TObjArray() col.Add(heach) htemp.Merge(col) f.Close() #xaxis = htemp.GetXaxis() temp = {} for i in range(htemp.GetNbinsX()): label = htemp.GetBinLabel(i+1) if not label: continue flownum = int(label.split('/')[0]) isweighted = label.split('/')[1] == 'W' flowname = label.split('/')[2] streamlet = label.split('/')[3] if isweighted: raw = 0. weighted = htemp.GetBinContent(i+1) else: raw = htemp.GetBinContent(i+1) weighted = 0. if not flownum in temp: temp[flownum] = (flowname, {}) flownametemp, numberstemp = temp[flownum] if not streamlet in numberstemp: numberstemp[streamlet] = (raw, weighted) else: rawtemp, weightedtemp = numberstemp[streamlet] numberstemp[streamlet] = (raw+rawtemp, weighted+weightedtemp) cutflow = [] totalEvents = getTotalEventsHistogram(inputdir) if totalEvents: cutflow.append( ('OriginalTotalEvents', {'All': (totalEvents.GetBinContent(1), totalEvents.GetBinContent(2))}) ) for i in sorted(temp.keys()): cutflow.append(temp[i]) return cutflow
def main(): global samples # Defaults outputpath = 'output/combined.root' inputpath = 'output/ntuple*.root' #joboptions = options.joboptions samplename = "data12_8TeV.periodA.physics_Muons.PhysCont.NTUP_EMBLHIM.grp14_v02_r4697_p1462" from Core import compileMinimal compileMinimal() options = parseInputArgs() samples = datasets.getListSamples(options.samplecsv) joboptions = options.joboptions if options.inRunDir: if not os.path.isdir(options.inRunDir): print "ERROR: input directory does not exist or is not a directory" return else: inputdir = options.inRunDir if not options.outRunDir: print "ERROR: invalid output directory (set with --outRunDir)" return elif os.path.isdir(options.outRunDir): print "ERROR: output directory already exists...to avoid inconsistencies, please remove it first" return else: outputdir = options.outRunDir os.makedirs(outputdir) for s in samples: sampledir = outputdir + '/' + s['group'] if not os.path.isdir(sampledir): os.makedirs(sampledir) inputpath = inputdir + '/' + s['group'] + '/' + s['name'] + '.root' outputpath = sampledir + '/' + s['name'] + '.root' rerunOne(inputpath, outputpath, samplename, joboptions) else: if options.inFiles: inputpath = options.inFiles if options.outFile: outputpath = options.outFile rerunOne(inputpath, outputpath, samplename, joboptions)
def main(): global samples # Defaults outputpath = 'output/combined.root' inputpath = 'output/ntuple*.root' from Core import compileMinimal compileMinimal() options = parseInputArgs() samples = datasets.getListSamples(options.samplecsv) print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" print "Running over singleOutput: %s" % (options.singleOutput) print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" #Here checks if input and output folders are valid if options.inRunDir: if not os.path.isdir(options.inRunDir): print "ERROR: input directory does not exist or is not a directory" return else: inputdir = options.inRunDir if not options.outRunDir: print "ERROR: invalid output directory (set with --outRunDir)" return elif os.path.isdir(options.outRunDir): print "ERROR: output directory already exists...to avoid inconsistencies, please remove it first" return else: outputdir = options.outRunDir os.makedirs(outputdir) logfile = open(outputdir + '/merge.log', 'w') #Here creates the structure in subdirectories with the name of the group for s in samples: sampledir = outputdir + '/' + s['group'] if not os.path.isdir(sampledir): os.makedirs(sampledir) if not s['category'] == 'Data' and not s['group'] == 'Embedding': #For MC samples the number and the name are used inputpath = inputdir + '/*' + s['ID'] + '*' + s[ 'name'] + '*/*.root*' else: #For data and embedding only the name inputpath = inputdir + '/*' + s['name'] + '*/*.root*' outputpath = sampledir + '/' + s['name'] + '.root' weight = None try: if options.weight: weight = float(s['xsection']) * float( s['efficiency']) * float(s['kfactor']) * 1.0e6 except: pass weight = None #Here start the difference with the standard tools. It merges the file only if it is specified options.singleOutput and coincides with the output ntuple name otherwise it does nothing if options.singleOutput in outputpath.split('/')[-1]: print "MERGING: %s" % (outputpath.split('/')[-1]) # print "MERGING: %s" % (outputpath) mergeOne(inputpath, outputpath, logfile, weight, options.cutflow) else: print "SKIPPING: %s" % (outputpath.split('/')[-1]) # print "SKIPPING: %s" % (outputpath) else: #This part of code should not be used when merging in pbs if options.inFiles: inputpath = options.inFiles if options.outFile: outputpath = options.outFile if options.singleOutput in outputpath.split('/')[-1]: print "MERGING: %s" % (outputpath.split('/')[-1]) # print "MERGING: %s" % (outputpath) #Why the weight is not specified in this case? mergeOne(inputpath, outputpath, cutflow=options.cutflow) else: print "SKIPPING: %s" % (outputpath.split('/')[-1])
def main(): global samples # Defaults outputpath = 'output/combined.root' inputpath = 'output/ntuple*.root' from Core import compileMinimal compileMinimal() options = parseInputArgs() samples = datasets.getListSamples(options.samplecsv) if options.inRunDir: if not os.path.isdir(options.inRunDir): print( "ERROR: input directory does not exist or is not a directory") return else: inputdir = options.inRunDir if not options.outRunDir: print("ERROR: invalid output directory (set with --outRunDir)") return elif os.path.isdir(options.outRunDir): print( "ERROR:\noutput directory: \n{0} \nalready exists...to avoid inconsistencies, please remove it first" .format(options.outRunDir)) return else: outputdir = options.outRunDir os.makedirs(outputdir) logfile = open(outputdir + '/merge.log', 'w') for s in samples: sampledir = outputdir + '/' + s['group'] if not os.path.isdir(sampledir): os.makedirs(sampledir) if not s['category'] == 'Data' and not s['group'] == 'Embedding': inputpath = inputdir + '/*' + s['ID'] + '*/*.root*' else: inputpath = inputdir + '/*' + s['name'] + '*/*.root*' separator = '.' if not s['ID']: separator = '' outputpath = sampledir + '/' + s['ID'] + separator + s[ 'name'] + '.root' weight = None try: if options.weight: weight = float(s['xsection']) * float( s['efficiency']) * float( s['kfactor'] ) * 1e3 # to get the weight in fb (the Xsec is in pb) except: pass mergeOne(inputpath, outputpath, logfile, weight, options.cutflow) else: if options.inFiles: inputpath = options.inFiles if options.outFile: outputpath = options.outFile mergeOne(inputpath, outputpath, cutflow=options.cutflow)
def main(): global samples # Defaults outputpath = 'output/combined.root' inputpath = 'output/ntuple*.root' from Core import compileMinimal compileMinimal() options = parseInputArgs() samples = datasets.getListSamples(options.samplecsv) print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" print "Running over singleOutput: %s" % (options.singleOutput) print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" #Here checks if input and output folders are valid if options.inRunDir: if not os.path.isdir(options.inRunDir): print "ERROR: input directory does not exist or is not a directory" return else: inputdir = options.inRunDir if not options.outRunDir: print "ERROR: invalid output directory (set with --outRunDir)" return elif os.path.isdir(options.outRunDir): print "ERROR: output directory already exists...to avoid inconsistencies, please remove it first" return else: outputdir = options.outRunDir os.makedirs(outputdir) logfile = open(outputdir + '/merge.log', 'w') #Here creates the structure in subdirectories with the name of the group for s in samples: sampledir = outputdir + '/' + s['group'] if not os.path.isdir(sampledir): os.makedirs(sampledir) if not s['category'] == 'Data' and not s['group'] == 'Embedding': #For MC samples the number and the name are used inputpath = inputdir + '/*' + s['ID'] + '*' + s['name'] + '*/*.root*' else: #For data and embedding only the name inputpath = inputdir + '/*' + s['name'] + '*/*.root*' outputpath = sampledir + '/' + s['name'] + '.root' weight = None try: if options.weight: weight = float(s['xsection']) * float(s['efficiency']) * float(s['kfactor']) * 1.0e6 except: pass weight = None #Here start the difference with the standard tools. It merges the file only if it is specified options.singleOutput and coincides with the output ntuple name otherwise it does nothing if options.singleOutput in outputpath.split('/')[-1]: print "MERGING: %s" % (outputpath.split('/')[-1]) # print "MERGING: %s" % (outputpath) mergeOne(inputpath, outputpath, logfile, weight, options.cutflow) else: print "SKIPPING: %s" % (outputpath.split('/')[-1]) # print "SKIPPING: %s" % (outputpath) else: #This part of code should not be used when merging in pbs if options.inFiles: inputpath = options.inFiles if options.outFile: outputpath = options.outFile if options.singleOutput in outputpath.split('/')[-1]: print "MERGING: %s" % (outputpath.split('/')[-1]) # print "MERGING: %s" % (outputpath) #Why the weight is not specified in this case? mergeOne(inputpath, outputpath, cutflow=options.cutflow) else: print "SKIPPING: %s" % (outputpath.split('/')[-1])