Beispiel #1
0
 def __flatten2D(self, selection, **kwargs):
     '''Produce flat 2D histograms for a given selection.'''
     if not hasProgress: logging.info('Flattening {0}'.format(self.sample))
     scalefactor = kwargs.pop('scalefactor',
                              '1' if isData(self.sample) else 'genWeight')
     postfix = kwargs.pop('postfix', '')
     # copy try from selection
     #tree = self.sampleTree.CopyTree(selection)
     tree = self.sampleTree
     if not tree: return
     # setup outputs
     os.system('mkdir -p {0}'.format(os.path.dirname(self.outputFileName)))
     self.__open(self.outputFileName)
     if not isData(self.sample):
         scalefactor = '{0}*{1}'.format(
             scalefactor,
             float(self.intLumi) / self.sampleLumi)
     # make each histogram
     for histName, params in self.histParameters2D.iteritems():
         drawString = '{0}:{1}>>{2}({3})'.format(
             params['yVariable'], params['xVariable'], histName, ', '.join(
                 [str(x) for x in params['xBinning'] + params['yBinning']]))
         selectionString = '{0}*({1})'.format(scalefactor, '1')
         tree.Draw(drawString, selectionString, 'goff')
         self.currVal += 1
         if hasProgress: self.pbar.update(self.currVal)
     self.__write()
 def __initializeNtuple(self):
     tchain = ROOT.TChain(self.treeName)
     if self.inputFileList:  # reading from a passed list of inputfiles
         allFiles = []
         with open(self.inputFileList, 'r') as f:
             for line in f.readlines():
                 allFiles += [line.strip()]
     else:  # reading from an input directory (all files in directory will be processed)
         allFiles = glob.glob('{0}/*.root'.format(self.ntupleDirectory))
     if len(allFiles) == 0:
         logging.error('No files found for sample {0}'.format(self.sample))
     summedWeights = 0.
     for f in allFiles:
         tfile = ROOT.TFile.Open(f)
         summedWeights += tfile.Get("summedWeights").GetBinContent(1)
         tfile.Close()
         tchain.Add(f)
     if not summedWeights and not isData(self.sample):
         logging.warning('No events for sample {0}'.format(self.sample))
     self.intLumi = float(getLumi())
     self.xsec = getXsec(self.sample)
     self.sampleLumi = float(summedWeights) / self.xsec if self.xsec else 0.
     self.sampleTree = tchain
     self.files = allFiles
     self.initialized = True
     logging.debug(
         'Initialized {0}: summedWeights = {1}; xsec = {2}; sampleLumi = {3}; intLumi = {4}'
         .format(self.sample, summedWeights, self.xsec, self.sampleLumi,
                 self.intLumi))
 def __initializeNtuple(self):
     tchain = ROOT.TChain(self.treeName)
     if self.inputFileList: # reading from a passed list of inputfiles
         allFiles = []
         with open(self.inputFileList,'r') as f:
             for line in f.readlines():
                allFiles += [line.strip()]
     else: # reading from an input directory (all files in directory will be processed)
         #allFiles = glob.glob('{0}/*.root'.format(self.ntupleDirectory))
         allFiles = []
         for root, dirnames, fnames in os.walk(self.ntupleDirectory):
             if 'failed' in root: continue
             for fname in fnmatch.filter(fnames, '*.root'):
                 allFiles.append(os.path.join(root,fname))
     if len(allFiles)==0: logging.error('No files found for sample {0}'.format(self.sample))
     summedWeights = 0.
     for f in allFiles:
         tfile = ROOT.TFile.Open(f)
         summedWeights += tfile.Get("summedWeights").GetBinContent(1)
         tfile.Close()
         tchain.Add(f)
     if not summedWeights and not isData(self.sample): logging.warning('No events for sample {0}'.format(self.sample))
     self.xsec = getXsec(self.sample)
     self.sampleLumi = float(summedWeights)/self.xsec if self.xsec else 0.
     self.sampleTree = tchain
     self.files = allFiles
     self.initialized = True
     logging.debug('Initialized {0}: summedWeights = {1}; xsec = {2}; sampleLumi = {3}; intLumi = {4}'.format(self.sample,summedWeights,self.xsec,self.sampleLumi,self.intLumi))
Beispiel #4
0
 def __init__(self,analysis,sample,**kwargs):
     # default to access via sample/analysis
     self.analysis = analysis
     self.sample = sample
     self.shift = kwargs.pop('shift','')
     self.skipHists = kwargs.pop('skipHists',False)
     self.isData = isData(self.sample)
     self.intLumi = kwargs.get('intLumi',float(getLumi()))
     logging.debug('Initializing {0} {1} {2}'.format(self.analysis,self.sample,self.shift))
     # backup passing custom parameters
     self.ntupleDirectory = kwargs.pop('ntupleDirectory','{0}/{1}'.format(getNtupleDirectory(self.analysis,shift=self.shift),self.sample))
     self.inputFileList = kwargs.pop('inputFileList','')
     self.outputFile = kwargs.pop('outputFile',getNewFlatHistograms(self.analysis,self.sample,shift=self.shift))
     if os.path.dirname(self.outputFile): python_mkdir(os.path.dirname(self.outputFile))
     self.treeName = kwargs.pop('treeName',getTreeName(self.analysis))
     if hasProgress:
         self.pbar = kwargs.pop('progressbar',ProgressBar(widgets=['{0}: '.format(sample),' ',SimpleProgress(),' ',Percentage(),' ',Bar(),' ',ETA()]))
     else:
         self.pbar = None
     # get stuff needed to flatten
     self.infile = 0
     self.tchain = 0
     self.initialized = False
     self.hists = {}
     self.datasets = {}
Beispiel #5
0
def flatten(directory):
    wzFlatten = FlattenTree(
        ntupleDirectory=sourceDirectory,
        treeName='WZTree',
    )
    for histName, params in histParameters.iteritems():
        wzFlatten.addHistogram(histName, **params)

    sample = directory.split('/')[-1]

    nl = 3
    for region in ['PPP', 'PPF', 'PFP', 'FPP', 'PFF', 'FPF', 'FFP', 'FFF']:
        scalefactor = '*'.join([scaleMap[region[x]][x]
                                for x in range(3)] + ['genWeight'])
        if isData(sample): scalefactor = '1'

        cut = ' && '.join([
            '{0}=={1}'.format(tightVar[x], 1 if region[x] == 'P' else 0)
            for x in range(3)
        ] + [baseCut])

        postfix = '' if region == 'PPP' else region
        wzFlatten.flatten(sample,
                          'flat/WZ/{0}.root'.format(sample),
                          cut,
                          scalefactor=scalefactor,
                          postfix=postfix)
Beispiel #6
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    args = parse_command_line(argv)

    if args.verbose and args.analysis:
        table = PrettyTable(['Sample','xsec [pb]','entries','ratio neg.','lumi. [/pb]','eff. entries'])
    else:
        table = PrettyTable(['Sample','xsec [pb]'])
    table.align = 'r'
    table.align['Sample'] = 'l'

    ntupleDir = getAnalysisNtupleDirectory(args.analysis,True) if args.verbose and args.analysis else getNtupleDirectory(version=args.version)

    #Odd mix of local pathnames and xrootd access
#    for sample in sorted(hdfs_ls_directory(ntupleDir)):
    for sample in (glob.glob('/'.join([ntupleDir,'*']))):
        name = os.path.basename(sample)
        logging.info('Processing {0}'.format(name))
        data = isData(name)
        xsec = getXsec(name)
        if args.verbose and args.analysis:
            print sample
            fnames = get_hdfs_root_files(sample)
            # get total events, total weights
            tree = ROOT.TChain(getTreeName(args.analysis))
            summedWeights = 0.
            for f in fnames:
                tfile = ROOT.TFile.Open('/hdfs'+f)
                summedWeights += tfile.Get("summedWeights").GetBinContent(1)
                tfile.Close()
                tree.Add('/hdfs'+f)
            numEntries = tree.GetEntries(args.selection)
            weightedEntries = 0.
            negevents = 0.
            seltree = tree.CopyTree(args.selection)
            for row in seltree:
                if data:
                    weightedEntries += 1.
                else:
                    weightedEntries += row.genWeight
                    if row.genWeight<0.: negevents += 1
            if data:
                sampleLumi = getLumi()
            else:
                sampleLumi = float(summedWeights)/xsec if xsec else 0.
            negratio = float(negevents)/numEntries if numEntries else 0.
            effevents = weightedEntries*getLumi()/sampleLumi if sampleLumi else 0.
            table.add_row([name,'{0:.6f}'.format(float(xsec)),numEntries,'{0:.3}'.format(float(negratio)),'{0:.6f}'.format(float(sampleLumi)),'{0:.3f}'.format(float(effevents))])
        else:
            table.add_row([name,xsec])

    print table.get_string()
Beispiel #7
0
 def __flatten2D(self,selection,**kwargs):
     '''Produce flat 2D histograms for a given selection.'''
     if not hasProgress: logging.info('Flattening {0}'.format(self.sample))
     scalefactor = kwargs.pop('scalefactor','1' if isData(self.sample) else 'genWeight')
     postfix = kwargs.pop('postfix','')
     # copy try from selection
     #tree = self.sampleTree.CopyTree(selection)
     tree = self.sampleTree
     if not tree: return
     # setup outputs
     os.system('mkdir -p {0}'.format(os.path.dirname(self.outputFileName)))
     self.__open(self.outputFileName)
     if not isData(self.sample): scalefactor = '{0}*{1}'.format(scalefactor,float(self.intLumi)/self.sampleLumi)
     # make each histogram
     for histName, params in self.histParameters2D.iteritems():
         drawString = '{0}:{1}>>{2}({3})'.format(params['yVariable'],params['xVariable'],histName,', '.join([str(x) for x in params['xBinning']+params['yBinning']]))
         selectionString = '{0}*({1})'.format(scalefactor,'1')
         tree.Draw(drawString,selectionString,'goff')
         self.currVal += 1
         if hasProgress: self.pbar.update(self.currVal)
     self.__write()
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    args = parse_command_line(argv)

    if args.verbose and args.analysis:
        table = PrettyTable(['Sample','xsec [pb]','entries','ratio neg.','lumi. [/pb]','eff. entries'])
    else:
        table = PrettyTable(['Sample','xsec [pb]'])
    table.align = 'r'
    table.align['Sample'] = 'l'

    ntupleDir = getAnalysisNtupleDirectory(args.analysis) if args.verbose and args.analysis else getNtupleDirectory(version=args.version)

    for sample in sorted(glob.glob(os.path.join(ntupleDir,'*'))):
        name = os.path.basename(sample)
        logging.info('Processing {0}'.format(name))
        data = isData(name)
        xsec = getXsec(name)
        if args.verbose and args.analysis:
            fnames = get_hdfs_root_files(sample)
            # get total events, total weights
            tree = ROOT.TChain(getTreeName(args.analysis))
            summedWeights = 0.
            for f in fnames:
                tfile = ROOT.TFile.Open('/hdfs'+f)
                summedWeights += tfile.Get("summedWeights").GetBinContent(1)
                tfile.Close()
                tree.Add('/hdfs'+f)
            numEntries = tree.GetEntries(args.selection)
            weightedEntries = 0.
            negevents = 0.
            seltree = tree.CopyTree(args.selection)
            for row in seltree:
                if data:
                    weightedEntries += 1.
                else:
                    weightedEntries += row.genWeight
                    if row.genWeight<0.: negevents += 1
            if data:
                sampleLumi = getLumi()
            else:
                sampleLumi = float(summedWeights)/xsec if xsec else 0.
            negratio = float(negevents)/numEntries if numEntries else 0.
            effevents = weightedEntries*getLumi()/sampleLumi if sampleLumi else 0.
            table.add_row([name,'{0:.6f}'.format(float(xsec)),numEntries,'{0:.3}'.format(float(negratio)),'{0:.6f}'.format(float(sampleLumi)),'{0:.3f}'.format(float(effevents))])
        else:
            table.add_row([name,xsec])

    print table.get_string()
Beispiel #9
0
def flatten(directory):
    wzFlatten = FlattenTree(
        ntupleDirectory = sourceDirectory,
        treeName = 'WZTree',
    )
    for histName, params in histParameters.iteritems():
        wzFlatten.addHistogram(histName,**params)
    
    sample = directory.split('/')[-1]

    nl = 3
    for region in ['PPP','PPF','PFP','FPP','PFF','FPF','FFP','FFF']:
        scalefactor = '*'.join([scaleMap[region[x]][x] for x in range(3)]+['genWeight'])
        if isData(sample): scalefactor = '1'

        cut = ' && '.join(['{0}=={1}'.format(tightVar[x],1 if region[x]=='P' else 0) for x in range(3)]+[baseCut])

        postfix = '' if region=='PPP' else region
        wzFlatten.flatten(sample,'flat/WZ/{0}.root'.format(sample),cut,scalefactor=scalefactor,postfix=postfix)
Beispiel #10
0
 def _getCount(self,processName,directory,**kwargs):
     '''Get count for process'''
     analysis = self.analysisDict[processName]
     scalefactor = kwargs.pop('scalefactor','1')
     mcscalefactor = kwargs.pop('mcscalefactor','1')
     datascalefactor = kwargs.pop('datascalefactor','1')
     selection = kwargs.pop('selection','')
     mccut = kwargs.pop('mccut','')
     poisson = kwargs.pop('poisson',self.poisson)
     # check if it is a map, list, or directory
     if isinstance(directory,dict):       # its a map
         directory = directory[processName]
     if isinstance(directory,basestring): # its a single directory
         directory = [directory]
     if processName in self.processDict:
         logging.debug('Process: {0}'.format(processName))
         counts = []
         for dirName in directory:
             logging.debug('Directory: {0}'.format(dirName))
             for sampleName in self.processDict[processName]:
                 logging.debug('Sample: {0}'.format(sampleName))
                 if selection:
                     logging.debug('Custom selection')
                     sf = '*'.join([scalefactor,datascalefactor if isData(sampleName) else mcscalefactor])
                     fullcut = ' && '.join([selection,mccut]) if mccut and not isData(sampleName) else selection
                     if processName in self.processSampleCuts:
                         if sampleName in self.processSampleCuts[processName]:
                             fullcut += ' && {0}'.format(self.processSampleCuts[processName][sampleName])
                     # scale a sample via a cut
                     if processName in self.scales and sampleName in self.scales[processName]:
                         for cut in self.scales[processName][sampleName]:
                             thissf = '{0}*{1}'.format(sf,self.scales[processName][sampleName][cut])
                             thisFullCut = '{0} && {1}'.format(fullcut, cut)
                             count = self._getTempCount(sampleName,thisFullCut,thissf,analysis=analysis,poisson=poisson)
                             logging.debug('Count: {0} +/- {1}'.format(*count))
                             if count: counts += [count]
                     else:
                         count = self._getTempCount(sampleName,fullcut,sf,analysis=analysis,poisson=poisson)
                         logging.debug('Count: {0} +/- {1}'.format(*count))
                         if count: counts += [count]
                 else:
                     count = self._readSampleCount(sampleName,dirName,analysis=analysis,poisson=poisson)
                     logging.debug('Count: {0} +/- {1}'.format(*count))
                     if count: counts += [count]
         if not counts:
             logging.debug('No entries for {0}'.format(processName))
             return (0., self._getPoisson(0.)) if poisson else (0.,0.)
         if len(counts)==1:
             if poisson: 
                 perr = self._getPoisson(counts[0][2])
                 w = float(counts[0][0])/counts[0][2]
                 val = counts[0][0]
                 err = perr * w
             else:
                 if len(counts[0])!=2: print counts
                 val, err = counts[0]
             logging.debug('Total: {0} +/- {1}'.format(val,err))
             return val,err
         else:
             if poisson:
                 newcounts = []
                 for count in counts:
                     perr = self._getPoisson(count[2])
                     w = float(count[0])/count[2]
                     val = count[0]
                     err = perr * w
                     newcounts += [[val,err]]
                 counts = newcounts
             total = sumWithError(*counts)
             logging.debug('Total: {0} +/- {1}'.format(*total))
             return total
     else:
         return (0.,0.)
Beispiel #11
0
 def _getCount(self,processName,directory,**kwargs):
     '''Get count for process'''
     analysis = self.analysisDict[processName]
     scalefactor = kwargs.pop('scalefactor','1')
     mcscalefactor = kwargs.pop('mcscalefactor','1')
     datascalefactor = kwargs.pop('datascalefactor','1')
     selection = kwargs.pop('selection','')
     mccut = kwargs.pop('mccut','')
     poisson = kwargs.pop('poisson',self.poisson)
     # check if it is a map, list, or directory
     if isinstance(directory,dict):       # its a map
         directory = directory[processName]
     if isinstance(directory,basestring): # its a single directory
         directory = [directory]
     if processName in self.processDict:
         logging.debug('Process: {0}'.format(processName))
         counts = []
         for dirName in directory:
             logging.debug('Directory: {0}'.format(dirName))
             for sampleName in self.processDict[processName]:
                 logging.debug('Sample: {0}'.format(sampleName))
                 if selection:
                     logging.debug('Custom selection')
                     sf = '*'.join([scalefactor,datascalefactor if isData(sampleName) else mcscalefactor])
                     fullcut = ' && '.join([selection,mccut]) if mccut and not isData(sampleName) else selection
                     if processName in self.processSampleCuts:
                         if sampleName in self.processSampleCuts[processName]:
                             fullcut += ' && {0}'.format(self.processSampleCuts[processName][sampleName])
                     # scale a sample via a cut
                     if processName in self.scales and sampleName in self.scales[processName]:
                         for cut in self.scales[processName][sampleName]:
                             thissf = '{0}*{1}'.format(sf,self.scales[processName][sampleName][cut])
                             thisFullCut = '{0} && {1}'.format(fullcut, cut)
                             count = self._getTempCount(sampleName,thisFullCut,thissf,analysis=analysis,poisson=poisson)
                             logging.debug('Count: {0} +/- {1}'.format(*count))
                             if count: counts += [count]
                     else:
                         count = self._getTempCount(sampleName,fullcut,sf,analysis=analysis,poisson=poisson)
                         logging.debug('Count: {0} +/- {1}'.format(*count))
                         if count: counts += [count]
                 else:
                     count = self._readSampleCount(sampleName,dirName,analysis=analysis,poisson=poisson)
                     logging.debug('Count: {0} +/- {1}'.format(*count))
                     if count: counts += [count]
         if not counts:
             logging.debug('No entries for {0}'.format(processName))
             return (0., self._getPoisson(0.)) if poisson else (0.,0.)
         if len(counts)==1:
             if poisson: 
                 perr = self._getPoisson(counts[0][2])
                 w = float(counts[0][0])/counts[0][2]
                 val = counts[0][0]
                 err = perr * w
             else:
                 if len(counts[0])!=2: print counts
                 val, err = counts[0]
             logging.debug('Total: {0} +/- {1}'.format(val,err))
             return val,err
         else:
             if poisson:
                 newcounts = []
                 for count in counts:
                     perr = self._getPoisson(count[2])
                     w = float(count[0])/count[2]
                     val = count[0]
                     err = perr * w
                     newcounts += [[val,err]]
                 counts = newcounts
             total = sumWithError(*counts)
             logging.debug('Total: {0} +/- {1}'.format(*total))
             return total
     else:
         return (0.,0.)