def __flatten2D(self, selection, **kwargs): '''Produce flat 2D histograms for a given selection.''' if not hasProgress: logging.info('Flattening {0}'.format(self.sample)) scalefactor = kwargs.pop('scalefactor', '1' if isData(self.sample) else 'genWeight') postfix = kwargs.pop('postfix', '') # copy try from selection #tree = self.sampleTree.CopyTree(selection) tree = self.sampleTree if not tree: return # setup outputs os.system('mkdir -p {0}'.format(os.path.dirname(self.outputFileName))) self.__open(self.outputFileName) if not isData(self.sample): scalefactor = '{0}*{1}'.format( scalefactor, float(self.intLumi) / self.sampleLumi) # make each histogram for histName, params in self.histParameters2D.iteritems(): drawString = '{0}:{1}>>{2}({3})'.format( params['yVariable'], params['xVariable'], histName, ', '.join( [str(x) for x in params['xBinning'] + params['yBinning']])) selectionString = '{0}*({1})'.format(scalefactor, '1') tree.Draw(drawString, selectionString, 'goff') self.currVal += 1 if hasProgress: self.pbar.update(self.currVal) self.__write()
def __initializeNtuple(self): tchain = ROOT.TChain(self.treeName) if self.inputFileList: # reading from a passed list of inputfiles allFiles = [] with open(self.inputFileList, 'r') as f: for line in f.readlines(): allFiles += [line.strip()] else: # reading from an input directory (all files in directory will be processed) allFiles = glob.glob('{0}/*.root'.format(self.ntupleDirectory)) if len(allFiles) == 0: logging.error('No files found for sample {0}'.format(self.sample)) summedWeights = 0. for f in allFiles: tfile = ROOT.TFile.Open(f) summedWeights += tfile.Get("summedWeights").GetBinContent(1) tfile.Close() tchain.Add(f) if not summedWeights and not isData(self.sample): logging.warning('No events for sample {0}'.format(self.sample)) self.intLumi = float(getLumi()) self.xsec = getXsec(self.sample) self.sampleLumi = float(summedWeights) / self.xsec if self.xsec else 0. self.sampleTree = tchain self.files = allFiles self.initialized = True logging.debug( 'Initialized {0}: summedWeights = {1}; xsec = {2}; sampleLumi = {3}; intLumi = {4}' .format(self.sample, summedWeights, self.xsec, self.sampleLumi, self.intLumi))
def __initializeNtuple(self): tchain = ROOT.TChain(self.treeName) if self.inputFileList: # reading from a passed list of inputfiles allFiles = [] with open(self.inputFileList,'r') as f: for line in f.readlines(): allFiles += [line.strip()] else: # reading from an input directory (all files in directory will be processed) #allFiles = glob.glob('{0}/*.root'.format(self.ntupleDirectory)) allFiles = [] for root, dirnames, fnames in os.walk(self.ntupleDirectory): if 'failed' in root: continue for fname in fnmatch.filter(fnames, '*.root'): allFiles.append(os.path.join(root,fname)) if len(allFiles)==0: logging.error('No files found for sample {0}'.format(self.sample)) summedWeights = 0. for f in allFiles: tfile = ROOT.TFile.Open(f) summedWeights += tfile.Get("summedWeights").GetBinContent(1) tfile.Close() tchain.Add(f) if not summedWeights and not isData(self.sample): logging.warning('No events for sample {0}'.format(self.sample)) self.xsec = getXsec(self.sample) self.sampleLumi = float(summedWeights)/self.xsec if self.xsec else 0. self.sampleTree = tchain self.files = allFiles self.initialized = True logging.debug('Initialized {0}: summedWeights = {1}; xsec = {2}; sampleLumi = {3}; intLumi = {4}'.format(self.sample,summedWeights,self.xsec,self.sampleLumi,self.intLumi))
def __init__(self,analysis,sample,**kwargs): # default to access via sample/analysis self.analysis = analysis self.sample = sample self.shift = kwargs.pop('shift','') self.skipHists = kwargs.pop('skipHists',False) self.isData = isData(self.sample) self.intLumi = kwargs.get('intLumi',float(getLumi())) logging.debug('Initializing {0} {1} {2}'.format(self.analysis,self.sample,self.shift)) # backup passing custom parameters self.ntupleDirectory = kwargs.pop('ntupleDirectory','{0}/{1}'.format(getNtupleDirectory(self.analysis,shift=self.shift),self.sample)) self.inputFileList = kwargs.pop('inputFileList','') self.outputFile = kwargs.pop('outputFile',getNewFlatHistograms(self.analysis,self.sample,shift=self.shift)) if os.path.dirname(self.outputFile): python_mkdir(os.path.dirname(self.outputFile)) self.treeName = kwargs.pop('treeName',getTreeName(self.analysis)) if hasProgress: self.pbar = kwargs.pop('progressbar',ProgressBar(widgets=['{0}: '.format(sample),' ',SimpleProgress(),' ',Percentage(),' ',Bar(),' ',ETA()])) else: self.pbar = None # get stuff needed to flatten self.infile = 0 self.tchain = 0 self.initialized = False self.hists = {} self.datasets = {}
def flatten(directory): wzFlatten = FlattenTree( ntupleDirectory=sourceDirectory, treeName='WZTree', ) for histName, params in histParameters.iteritems(): wzFlatten.addHistogram(histName, **params) sample = directory.split('/')[-1] nl = 3 for region in ['PPP', 'PPF', 'PFP', 'FPP', 'PFF', 'FPF', 'FFP', 'FFF']: scalefactor = '*'.join([scaleMap[region[x]][x] for x in range(3)] + ['genWeight']) if isData(sample): scalefactor = '1' cut = ' && '.join([ '{0}=={1}'.format(tightVar[x], 1 if region[x] == 'P' else 0) for x in range(3) ] + [baseCut]) postfix = '' if region == 'PPP' else region wzFlatten.flatten(sample, 'flat/WZ/{0}.root'.format(sample), cut, scalefactor=scalefactor, postfix=postfix)
def main(argv=None): if argv is None: argv = sys.argv[1:] args = parse_command_line(argv) if args.verbose and args.analysis: table = PrettyTable(['Sample','xsec [pb]','entries','ratio neg.','lumi. [/pb]','eff. entries']) else: table = PrettyTable(['Sample','xsec [pb]']) table.align = 'r' table.align['Sample'] = 'l' ntupleDir = getAnalysisNtupleDirectory(args.analysis,True) if args.verbose and args.analysis else getNtupleDirectory(version=args.version) #Odd mix of local pathnames and xrootd access # for sample in sorted(hdfs_ls_directory(ntupleDir)): for sample in (glob.glob('/'.join([ntupleDir,'*']))): name = os.path.basename(sample) logging.info('Processing {0}'.format(name)) data = isData(name) xsec = getXsec(name) if args.verbose and args.analysis: print sample fnames = get_hdfs_root_files(sample) # get total events, total weights tree = ROOT.TChain(getTreeName(args.analysis)) summedWeights = 0. for f in fnames: tfile = ROOT.TFile.Open('/hdfs'+f) summedWeights += tfile.Get("summedWeights").GetBinContent(1) tfile.Close() tree.Add('/hdfs'+f) numEntries = tree.GetEntries(args.selection) weightedEntries = 0. negevents = 0. seltree = tree.CopyTree(args.selection) for row in seltree: if data: weightedEntries += 1. else: weightedEntries += row.genWeight if row.genWeight<0.: negevents += 1 if data: sampleLumi = getLumi() else: sampleLumi = float(summedWeights)/xsec if xsec else 0. negratio = float(negevents)/numEntries if numEntries else 0. effevents = weightedEntries*getLumi()/sampleLumi if sampleLumi else 0. table.add_row([name,'{0:.6f}'.format(float(xsec)),numEntries,'{0:.3}'.format(float(negratio)),'{0:.6f}'.format(float(sampleLumi)),'{0:.3f}'.format(float(effevents))]) else: table.add_row([name,xsec]) print table.get_string()
def __flatten2D(self,selection,**kwargs): '''Produce flat 2D histograms for a given selection.''' if not hasProgress: logging.info('Flattening {0}'.format(self.sample)) scalefactor = kwargs.pop('scalefactor','1' if isData(self.sample) else 'genWeight') postfix = kwargs.pop('postfix','') # copy try from selection #tree = self.sampleTree.CopyTree(selection) tree = self.sampleTree if not tree: return # setup outputs os.system('mkdir -p {0}'.format(os.path.dirname(self.outputFileName))) self.__open(self.outputFileName) if not isData(self.sample): scalefactor = '{0}*{1}'.format(scalefactor,float(self.intLumi)/self.sampleLumi) # make each histogram for histName, params in self.histParameters2D.iteritems(): drawString = '{0}:{1}>>{2}({3})'.format(params['yVariable'],params['xVariable'],histName,', '.join([str(x) for x in params['xBinning']+params['yBinning']])) selectionString = '{0}*({1})'.format(scalefactor,'1') tree.Draw(drawString,selectionString,'goff') self.currVal += 1 if hasProgress: self.pbar.update(self.currVal) self.__write()
def main(argv=None): if argv is None: argv = sys.argv[1:] args = parse_command_line(argv) if args.verbose and args.analysis: table = PrettyTable(['Sample','xsec [pb]','entries','ratio neg.','lumi. [/pb]','eff. entries']) else: table = PrettyTable(['Sample','xsec [pb]']) table.align = 'r' table.align['Sample'] = 'l' ntupleDir = getAnalysisNtupleDirectory(args.analysis) if args.verbose and args.analysis else getNtupleDirectory(version=args.version) for sample in sorted(glob.glob(os.path.join(ntupleDir,'*'))): name = os.path.basename(sample) logging.info('Processing {0}'.format(name)) data = isData(name) xsec = getXsec(name) if args.verbose and args.analysis: fnames = get_hdfs_root_files(sample) # get total events, total weights tree = ROOT.TChain(getTreeName(args.analysis)) summedWeights = 0. for f in fnames: tfile = ROOT.TFile.Open('/hdfs'+f) summedWeights += tfile.Get("summedWeights").GetBinContent(1) tfile.Close() tree.Add('/hdfs'+f) numEntries = tree.GetEntries(args.selection) weightedEntries = 0. negevents = 0. seltree = tree.CopyTree(args.selection) for row in seltree: if data: weightedEntries += 1. else: weightedEntries += row.genWeight if row.genWeight<0.: negevents += 1 if data: sampleLumi = getLumi() else: sampleLumi = float(summedWeights)/xsec if xsec else 0. negratio = float(negevents)/numEntries if numEntries else 0. effevents = weightedEntries*getLumi()/sampleLumi if sampleLumi else 0. table.add_row([name,'{0:.6f}'.format(float(xsec)),numEntries,'{0:.3}'.format(float(negratio)),'{0:.6f}'.format(float(sampleLumi)),'{0:.3f}'.format(float(effevents))]) else: table.add_row([name,xsec]) print table.get_string()
def flatten(directory): wzFlatten = FlattenTree( ntupleDirectory = sourceDirectory, treeName = 'WZTree', ) for histName, params in histParameters.iteritems(): wzFlatten.addHistogram(histName,**params) sample = directory.split('/')[-1] nl = 3 for region in ['PPP','PPF','PFP','FPP','PFF','FPF','FFP','FFF']: scalefactor = '*'.join([scaleMap[region[x]][x] for x in range(3)]+['genWeight']) if isData(sample): scalefactor = '1' cut = ' && '.join(['{0}=={1}'.format(tightVar[x],1 if region[x]=='P' else 0) for x in range(3)]+[baseCut]) postfix = '' if region=='PPP' else region wzFlatten.flatten(sample,'flat/WZ/{0}.root'.format(sample),cut,scalefactor=scalefactor,postfix=postfix)
def _getCount(self,processName,directory,**kwargs): '''Get count for process''' analysis = self.analysisDict[processName] scalefactor = kwargs.pop('scalefactor','1') mcscalefactor = kwargs.pop('mcscalefactor','1') datascalefactor = kwargs.pop('datascalefactor','1') selection = kwargs.pop('selection','') mccut = kwargs.pop('mccut','') poisson = kwargs.pop('poisson',self.poisson) # check if it is a map, list, or directory if isinstance(directory,dict): # its a map directory = directory[processName] if isinstance(directory,basestring): # its a single directory directory = [directory] if processName in self.processDict: logging.debug('Process: {0}'.format(processName)) counts = [] for dirName in directory: logging.debug('Directory: {0}'.format(dirName)) for sampleName in self.processDict[processName]: logging.debug('Sample: {0}'.format(sampleName)) if selection: logging.debug('Custom selection') sf = '*'.join([scalefactor,datascalefactor if isData(sampleName) else mcscalefactor]) fullcut = ' && '.join([selection,mccut]) if mccut and not isData(sampleName) else selection if processName in self.processSampleCuts: if sampleName in self.processSampleCuts[processName]: fullcut += ' && {0}'.format(self.processSampleCuts[processName][sampleName]) # scale a sample via a cut if processName in self.scales and sampleName in self.scales[processName]: for cut in self.scales[processName][sampleName]: thissf = '{0}*{1}'.format(sf,self.scales[processName][sampleName][cut]) thisFullCut = '{0} && {1}'.format(fullcut, cut) count = self._getTempCount(sampleName,thisFullCut,thissf,analysis=analysis,poisson=poisson) logging.debug('Count: {0} +/- {1}'.format(*count)) if count: counts += [count] else: count = self._getTempCount(sampleName,fullcut,sf,analysis=analysis,poisson=poisson) logging.debug('Count: {0} +/- {1}'.format(*count)) if count: counts += [count] else: count = self._readSampleCount(sampleName,dirName,analysis=analysis,poisson=poisson) logging.debug('Count: {0} +/- {1}'.format(*count)) if count: counts += [count] if not counts: logging.debug('No entries for {0}'.format(processName)) return (0., self._getPoisson(0.)) if poisson else (0.,0.) if len(counts)==1: if poisson: perr = self._getPoisson(counts[0][2]) w = float(counts[0][0])/counts[0][2] val = counts[0][0] err = perr * w else: if len(counts[0])!=2: print counts val, err = counts[0] logging.debug('Total: {0} +/- {1}'.format(val,err)) return val,err else: if poisson: newcounts = [] for count in counts: perr = self._getPoisson(count[2]) w = float(count[0])/count[2] val = count[0] err = perr * w newcounts += [[val,err]] counts = newcounts total = sumWithError(*counts) logging.debug('Total: {0} +/- {1}'.format(*total)) return total else: return (0.,0.)
def _getCount(self,processName,directory,**kwargs): '''Get count for process''' analysis = self.analysisDict[processName] scalefactor = kwargs.pop('scalefactor','1') mcscalefactor = kwargs.pop('mcscalefactor','1') datascalefactor = kwargs.pop('datascalefactor','1') selection = kwargs.pop('selection','') mccut = kwargs.pop('mccut','') poisson = kwargs.pop('poisson',self.poisson) # check if it is a map, list, or directory if isinstance(directory,dict): # its a map directory = directory[processName] if isinstance(directory,basestring): # its a single directory directory = [directory] if processName in self.processDict: logging.debug('Process: {0}'.format(processName)) counts = [] for dirName in directory: logging.debug('Directory: {0}'.format(dirName)) for sampleName in self.processDict[processName]: logging.debug('Sample: {0}'.format(sampleName)) if selection: logging.debug('Custom selection') sf = '*'.join([scalefactor,datascalefactor if isData(sampleName) else mcscalefactor]) fullcut = ' && '.join([selection,mccut]) if mccut and not isData(sampleName) else selection if processName in self.processSampleCuts: if sampleName in self.processSampleCuts[processName]: fullcut += ' && {0}'.format(self.processSampleCuts[processName][sampleName]) # scale a sample via a cut if processName in self.scales and sampleName in self.scales[processName]: for cut in self.scales[processName][sampleName]: thissf = '{0}*{1}'.format(sf,self.scales[processName][sampleName][cut]) thisFullCut = '{0} && {1}'.format(fullcut, cut) count = self._getTempCount(sampleName,thisFullCut,thissf,analysis=analysis,poisson=poisson) logging.debug('Count: {0} +/- {1}'.format(*count)) if count: counts += [count] else: count = self._getTempCount(sampleName,fullcut,sf,analysis=analysis,poisson=poisson) logging.debug('Count: {0} +/- {1}'.format(*count)) if count: counts += [count] else: count = self._readSampleCount(sampleName,dirName,analysis=analysis,poisson=poisson) logging.debug('Count: {0} +/- {1}'.format(*count)) if count: counts += [count] if not counts: logging.debug('No entries for {0}'.format(processName)) return (0., self._getPoisson(0.)) if poisson else (0.,0.) if len(counts)==1: if poisson: perr = self._getPoisson(counts[0][2]) w = float(counts[0][0])/counts[0][2] val = counts[0][0] err = perr * w else: if len(counts[0])!=2: print counts val, err = counts[0] logging.debug('Total: {0} +/- {1}'.format(val,err)) return val,err else: if poisson: newcounts = [] for count in counts: perr = self._getPoisson(count[2]) w = float(count[0])/count[2] val = count[0] err = perr * w newcounts += [[val,err]] counts = newcounts total = sumWithError(*counts) logging.debug('Total: {0} +/- {1}'.format(*total)) return total else: return (0.,0.)