def __init__(self, channels, baseCutSet, inFile, outfile='./results/output.root', maxEvents=float("inf"), intLumi=10000, rowCleaner='', cutModifiers=[], ntupleDir='ntuple'): ''' channels: list of strings or single string in the format (e.g.) eemm for a 2e2mu final state. '4l', 'zz' and 'ZZ' turn into ['eeee' 'eemm' 'mmmm'] cutSet: string with the name of the cut template to use infile: string of an input file name, with path outfile: string of an output file name, with path maxEvents: stop after this many events processed intLumi: in output text file, report how many events we would expect for this integrated luminosity rowCleaner: name of a module to clean out redundant rows. If an empty string (or other False boolean), no cleaning is performed. ''' self.cutSet = [baseCutSet]+cutModifiers CutClass = getCutClass(baseCutSet, *cutModifiers) self.cuts = CutClass() self.outFile = outfile self.cutOrder = self.cuts.getCutList() self.sample = inFile.split('/')[-1].replace('.root','') self.inFile = root_open(inFile) assert bool(inFile), 'No file %s'%self.inFile self.maxEvents = maxEvents # if we don't use all the events, we need to know how many we would have done in the whole thing if self.maxEvents < float('inf'): self.ntupleSize = {} self.channels = parseChannels(channels) self.ntuples = {} for channel in parseChannels(channels): try: nt = self.inFile.Get('/'.join([channel,ntupleDir])) # if not nt.GetEntries(): # raise DoesNotExist('') self.ntuples[channel] = nt nt.create_buffer() except DoesNotExist: print "Ntuple for channel %s is empty or not found! Skipping."%channel self.channels.remove(channel) continue if self.maxEvents < float('inf'): self.ntupleSize[channel] = self.ntuples[channel].GetEntries() self.results = NtupleCopier(self.outFile, **self.ntuples) self.prepareCutSummary() self.intLumi = intLumi self.cleanRows = bool(rowCleaner) if self.cleanRows: self.CleanerClass = getCleanerClass(rowCleaner)
class Analyzer(object): def __init__(self, channels, baseCutSet, inFile, outfile='./results/output.root', maxEvents=float("inf"), intLumi=10000, rowCleaner='', cutModifiers=[], ntupleDir='ntuple'): ''' channels: list of strings or single string in the format (e.g.) eemm for a 2e2mu final state. '4l', 'zz' and 'ZZ' turn into ['eeee' 'eemm' 'mmmm'] cutSet: string with the name of the cut template to use infile: string of an input file name, with path outfile: string of an output file name, with path maxEvents: stop after this many events processed intLumi: in output text file, report how many events we would expect for this integrated luminosity rowCleaner: name of a module to clean out redundant rows. If an empty string (or other False boolean), no cleaning is performed. ''' self.cutSet = [baseCutSet]+cutModifiers CutClass = getCutClass(baseCutSet, *cutModifiers) self.cuts = CutClass() self.outFile = outfile self.cutOrder = self.cuts.getCutList() self.sample = inFile.split('/')[-1].replace('.root','') self.inFile = root_open(inFile) assert bool(inFile), 'No file %s'%self.inFile self.maxEvents = maxEvents # if we don't use all the events, we need to know how many we would have done in the whole thing if self.maxEvents < float('inf'): self.ntupleSize = {} self.channels = parseChannels(channels) self.ntuples = {} for channel in parseChannels(channels): try: nt = self.inFile.Get('/'.join([channel,ntupleDir])) # if not nt.GetEntries(): # raise DoesNotExist('') self.ntuples[channel] = nt nt.create_buffer() except DoesNotExist: print "Ntuple for channel %s is empty or not found! Skipping."%channel self.channels.remove(channel) continue if self.maxEvents < float('inf'): self.ntupleSize[channel] = self.ntuples[channel].GetEntries() self.results = NtupleCopier(self.outFile, **self.ntuples) self.prepareCutSummary() self.intLumi = intLumi self.cleanRows = bool(rowCleaner) if self.cleanRows: self.CleanerClass = getCleanerClass(rowCleaner) def prepareCutSummary(self): ''' Prepare dictionary with number of events passing each cut for each channel. If redundant row cleaning is done, an extra item will be added later (not here) ''' self.cutsPassed = {} for channel in self.channels: self.cutsPassed[channel] = {} for cut in self.cutOrder: self.cutsPassed[channel][cut] = 0 def analyze(self): ''' For a given file, do the whole analysis and output the results to self.outFile ''' if self.cleanRows: # For events with more than 4 leptons, FSA Ntuples just have one # row for each possible combination of objects. We have to know # which one is the right one. Can do this before or after other cuts rowCleaner = self.CleanerClass(self.cuts) cleanAfter = rowCleaner.cleanAfter() if not cleanAfter: for channel in self.channels: self.cutsPassed[channel]["TotalRows"] = 0 # hold number of rows pre-cleaning rowCleaner.setChannel(channel) for iRow, row in enumerate(self.ntuples[channel]): if iRow == self.maxEvents: break if (iRow % 5000) == 0: print "%s: Finding redundant rows for %s row %d"%(self.sample, channel, iRow) rowCleaner.bookRow(row, iRow) rowCleaner.finalize() else: cleanAfter = False for channel in self.channels: objectTemplate = mapObjects(channel) objects = objectTemplate needReorder = self.cuts.needReorder(channel) if cleanAfter: rowCleaner.setChannel(channel) self.cutsPassed[channel]["SelectBest"] = 0 self.ntuples[channel].SetBranchStatus('*', 0) for branch in self.ntuples[channel].iterbranchnames(): for pattern in self.cuts.branchesNeeded: if pattern.match(branch): self.ntuples[channel].SetBranchStatus(branch, 1) break iRow = -1 # in case of empty ntuple # Loop through and do the cuts for iRow, row in enumerate(self.ntuples[channel]): # If we've hit maxEvents, we're done if iRow == self.maxEvents: print "%s: Reached %d %s rows, ending"%(self.sample, self.maxEvents, channel) break if self.cleanRows: if not cleanAfter: # Always pass "TotalRows" because it's always a new row self.passCut(row, channel, "TotalRows") # Ignore wrong version of event (if we're cleaning now) if rowCleaner.isRedundant(row, channel, iRow): continue # Report progress every 5000 rows if iRow % 5000 == 0: print "%s: Processing %s row %d"%(self.sample, channel, iRow) if needReorder: objects = self.cuts.orderLeptons(row, channel, objectTemplate) evPass = True for cut in self.cutOrder: self.preCut(row, channel, cut) if self.cuts.analysisCut(row, cut, *objects): self.passCut(row, channel, cut) else: evPass = False break if evPass: if cleanAfter: # Don't save yet, still might get cleaned rowCleaner.bookRow(row, iRow) else: self.results.saveRow(row, channel) else: print "%s: Done with %s (%d rows)"%(self.sample, channel, iRow+1) if cleanAfter: self.ntuples[channel].SetBranchStatus('*', 1) if cleanAfter: rowCleaner.finalize() for channel in self.channels: for iRow, row in enumerate(self.ntuples[channel]): if iRow == self.maxEvents: break if rowCleaner.isRedundant(row, channel, iRow): continue else: self.passCut(row, channel, "SelectBest") self.results.saveRow(row, channel) print "%s: Done with all channels, saving results as %s"%(self.sample, self.outFile) self.results.save() self.inFile.close() self.cutReport() def passCut(self, row, channel, cut): ''' Function to run after cut is passed. Here, just updates the cut summary. In derived classes, may be overwritten to do other things like fill cut flow histograms. ''' self.cutsPassed[channel][cut] += 1 def preCut(self, row, channel, cut): ''' Here, does nothing. In derived classes, may be used to do things like make control plots. ''' pass def getOSSF(self, row, channel, objects=[]): ''' Will return a list of same flavor, opposite sign leptons ordered by closeness to nominal Z mass. Will only return as many pairs as are in the row, so length<4 means there are not two Z candidates in the event. Assumes 4 objects which are all leptons. If objects list is given, uses that. Otherwise figures it out from channel. Takes advantage of the fact that FSA ntuples place best Z candidate first in eeee and mmmm cases. ''' if not objects: objects = mapObjects(channel) if len(objects) != 4: return [] ossfs = [] # only include pairs that are OSSF if objects[0][0] == objects[1][0] and not nObjVar(row, 'SS', objects[0], objects[1]): ossfs.extend(objects[:2]) if objects[2][0] == objects[3][0] and not nObjVar(row, 'SS', objects[2], objects[3]): ossfs.extend(objects[2:]) # If there's 0 or 1 Z candidate, we don't need to worry about order if len(ossfs) < 4: return ossfs # if this is 2e2mu, we might need to flip if the 2mu Z was better if channel == 'eemm': mass1 = nObjVar(row, self.zMassVar, ossfs[0], ossfs[1]) mass2 = nObjVar(row, self.zMassVar, ossfs[2], ossfs[3]) if abs(mass2 - Z_MASS) < abs(mass1 - Z_MASS): return ossfs[2:]+ossfs[:2] return ossfs def cutReport(self): ''' Save a text file with cut information. Same name as outfile but .txt instead of .root. ''' totals = {} # expectedTotals = {} # factor to translate n events in MC to m events in data for cut in self.cutOrder: totals[cut] = 0 # expectedTotals[cut] = 0. with open(self.outFile.replace('.root','.txt'), 'w') as f: for channel in self.channels: for cleanerCut in ["TotalRows", "SelectBest"]: if cleanerCut in self.cutsPassed[channel] and cleanerCut not in totals: totals[cleanerCut] = 0 # expectedTotals[cleanerCut] = 0 break # if self.cutsPassed[channel]['Total'] != self.maxEvents: # expectedFactor = sampleInfo[self.sample]['xsec'] * self.intLumi / sampleInfo[self.sample]['n'] # else: # # estimate fraction that would have passed to be nPassedTot/nEvents ~ nPassed * nRowsTot / (nRows * nEvents) # # must make this approximation because we don't know what fraction of the sample was cut out # # by the ntuplizer # expectedFactor = sampleInfo[self.sample]['xsec'] * self.intLumi * \ # self.ntupleSize[channel] / \ # (self.cutsPassed[channel]['TotalRows'] * sampleInfo[self.sample]['n']) f.write("\n%-32s\n"%channel) # in %0.0f pb^-1\n"%(channel+':',self.intLumi)) if "TotalRows" in self.cutsPassed[channel]: listOfCuts = ['TotalRows']+self.cutOrder elif "SelectBest" in self.cutsPassed[channel]: listOfCuts = self.cutOrder+["SelectBest"] else: listOfCuts = self.cutOrder for cut in listOfCuts: # expected = self.cutsPassed[channel][cut] * expectedFactor f.write("%16s : %-9d\n"%(cut, self.cutsPassed[channel][cut])) # : %0.2f\n"%(cut, self.cutsPassed[channel][cut], expected)) totals[cut] += self.cutsPassed[channel][cut] # expectedTotals[cut] += expected if "TotalRows" in self.cutsPassed[channel]: listOfCuts = ['TotalRows']+self.cutOrder elif "SelectBest" in self.cutsPassed[channel]: listOfCuts = self.cutOrder+["SelectBest"] else: listOfCuts = self.cutOrder f.write("Total\n") # f.write("\n%-32s in %0.0f pb^-1\n"%('Total:',self.intLumi)) for cut in listOfCuts: f.write("%16s : %-9d\n"%(cut, totals[cut])) # : %0.2f\n"%(cut, totals[cut], expectedTotals[cut]))