예제 #1
0
def runPlot(opts):
    lepton    = opts.lepton
    batchMode = opts.batch
    inputDir  = opts.input_dir
    outputDir = opts.output_dir
    verbose   = opts.verbose
    debug     = opts.debug
    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir)
    regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions)

    inputDir  = outputDir+'/'+lepton+'/histos'
    outputDir = outputDir+'/'+lepton+'/plots'
    mkdirIfNeeded(outputDir)
    histonames = dict((g.name, histonamesOneSample(g.name, variables_to_plot(), regions, leptonSources))
                      for g in groups)
    groups_to_stack = [g.name for g in groups if not g.is_data]
    if verbose:
        print 'groups being included in the compositions: ',groups_to_stack
    for region in regions:
        all_histos = dict([(g.name,
                            rootUtils.fetchObjectsFromFile(os.path.join(inputDir, g.name+'_'+region+'.root'),
                                                           histonames[g.name][region],
                                                           verbose))
                           for g in groups])
        for v in variables_to_plot():
            histos = dict()
            for s in leptonSources:
                histos[s] = summedHisto(histos=[all_histos[g][v][s] for g in groups_to_stack],
                                        label='')
            histos['data'] = all_histos['data'][v]['Unknown']
            plotStackedHistos(histos=histos, datakey='data', stackkeys=leptonSources,
                              outputDir=outputDir+'/'+region, region=region,
                              colors=fakeu.colorsFillSources(), verbose=verbose)
    return
예제 #2
0
def runPlot(opts):
    lepton = opts.lepton
    batchMode = opts.batch
    inputDir = opts.input_dir
    outputDir = opts.output_dir
    verbose = opts.verbose
    debug = opts.debug
    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        opts.samples_dir)
    regions = regions_to_plot(opts.include_regions, opts.exclude_regions,
                              opts.regions)

    inputDir = outputDir + '/' + lepton + '/histos'
    outputDir = outputDir + '/' + lepton + '/plots'
    mkdirIfNeeded(outputDir)
    histonames = dict((g.name,
                       histonamesOneSample(g.name, variables_to_plot(),
                                           regions, leptonSources))
                      for g in groups)
    groups_to_stack = [g.name for g in groups if not g.is_data]
    if verbose:
        print 'groups being included in the compositions: ', groups_to_stack
    for region in regions:
        all_histos = dict([(g.name,
                            rootUtils.fetchObjectsFromFile(
                                os.path.join(inputDir,
                                             g.name + '_' + region + '.root'),
                                histonames[g.name][region], verbose))
                           for g in groups])
        for v in variables_to_plot():
            histos = dict()
            for s in leptonSources:
                histos[s] = summedHisto(
                    histos=[all_histos[g][v][s] for g in groups_to_stack],
                    label='')
            histos['data'] = all_histos['data'][v]['Unknown']
            plotStackedHistos(histos=histos,
                              datakey='data',
                              stackkeys=leptonSources,
                              outputDir=outputDir + '/' + region,
                              region=region,
                              colors=fakeu.colorsFillSources(),
                              verbose=verbose)
    return
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fakerate/efficiencies')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-m', '--mode', help='emu')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    mode      = options.mode
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    validModes = ['emu']
    if mode not in validModes : parser.error("invalid mode %s"%mode)
    tupleStem, treeName = filter(lambda _: _[0]==mode, fakeu.tupleStemsAndNames)[0]

    templateInputFilename = "*_%(stem)s_tuple_%(tag)s.root" % {'tag':tag, 'stem':tupleStem}
    templateOutputFilename =  "%(stem)s_%(l)s_eff.root" % {'stem':tupleStem.replace('tuple','histos'), 'l':lepton}
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_'+mode+'_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos
    optionsToPrint = ['inputDir', 'outputDir', 'mode', 'tag', 'doFillHistograms', 'cacheFileName', 'onthefly_tight_def']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
        print 'input filenames: ',os.path.join(inputDir, templateInputFilename)
    # collect inputs
    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    mkdirIfNeeded(outputDir)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['pt', 'pt_eta']
    groups = [g for g in samplesPerGroup.keys() if g is not 'higgs']
    if lepton=='el' : groups = [g for g in groups if g is not 'heavyflavor']
    sourcesThisMode = ['real', 'conv', 'heavy', 'light', 'unknown'] if lepton=='el' else ['real', 'heavy', 'light', 'unknown']
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, sourcesThisMode, mode=mode)
        for group in groups:
            filenames = filenamesPerGroup[group]
            sources = histosPerGroupPerSource.keys()
            histosThisGroupPerSource = dict((s, histosPerGroupPerSource[s][group]) for s in sources)
            histosAnyGroupPerSource  = dict((s, histosPerGroupPerSource[s]['anygroup']) for s in sources) if group!='data' else {}

            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            if verbose: print "%s : %d entries"%(group, chain.GetEntries())
            is_data = group in ['data']
            print 'is_data ',is_data
            num_processed_entries += fillHistos(chain=chain,
                                                histosPerSource=histosThisGroupPerSource,
                                                histosPerSourceAnygroup=histosAnyGroupPerSource,
                                                lepton=lepton,
                                                onthefly_tight_def=onthefly_tight_def,
                                                verbose=verbose)
        writeHistos(cacheFileName, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        one_minute = 60
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>one_minute else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # plot histos
    histosPerGroupPerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, sourcesThisMode, mode), verbose)

    # effs = computeEfficiencies(histosPerGroupPerSource) # still [var][gr][source][l/t]
    for v in vars:
        varIs1D, varIs2D = v=='pt', v=='pt_eta'
        densThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['loose']) for s in sourcesThisMode),
                                        {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']})
        numsThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['tight']) for s in sourcesThisMode),
                                        {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']})
        if varIs1D:
            lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)'
            cname = 'stack_loose_'+lepton
            lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)'
            title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY
            plotStackedHistosWithData(densThisSourceThisVar,
                                      outputDir, cname, title,
                                      colors=fakeu.colorsFillSources(),
                                      verbose=verbose)
            cname = 'stack_tight_'+lepton
            lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)'
            title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY
            plotStackedHistosWithData(numsThisSourceThisVar,
                                      outputDir, cname, title,
                                      colors=fakeu.colorsFillSources(),
                                      verbose=verbose)

    for s in sourcesThisMode:
        for v in vars:
            groups = first(histosPerGroupPerSource).keys()
            varIs1D, varIs2D = v=='pt', v=='pt_eta'
            # effsThisSourceThisVar = dict((g, effs[v][g][s]) for g in groups)
            densThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g][s]['loose'])
                                                 for g in groups if g not in ['anygroup','data']),
                                            {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']})
            numsThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g]['unknown']['tight'])
                                                 for g in groups if g not in ['anygroup','data']),
                                            {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']})
            if varIs1D:
                # cname = 'eff_'+lepton+'_'+s
                lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)'
                # title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                # zoomIn = True
                # fakeu.plot1dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn)
                cname = 'stack_loose_'+lepton+'_'+s
                lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                plotStackedHistosWithData(densThisSourceThisVar,
                                          outputDir, cname, title,
                                          colors=SampleUtils.colors,
                                          verbose=verbose)
                cname = 'stack_tight_'+lepton+'_'+s
                lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                plotStackedHistosWithData(numsThisSourceThisVar,
                                          outputDir, cname, title,
                                          colors=SampleUtils.colors,
                                          verbose=verbose)

            # elif varIs2D:
            #     cname = 'eff_'+lepton+'_'+s
            #     lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#eta'
            #     title = lT+' '+s+' '+lepton+';'+lX+';'+lY
            #     fakeu.plot2dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn=zoomIn)
    # writeHistos(outputFileName, effs, verbose)
    if verbose : print "saved scale factors to %s" % outputFileName
                                          colors=SampleUtils.colors,
                                          verbose=verbose)

            # elif varIs2D:
            #     cname = 'eff_'+lepton+'_'+s
            #     lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#eta'
            #     title = lT+' '+s+' '+lepton+';'+lX+';'+lY
            #     fakeu.plot2dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn=zoomIn)
    # writeHistos(outputFileName, effs, verbose)
    if verbose : print "saved scale factors to %s" % outputFileName

#___________________________________________________

leptonTypes = ['loose', 'tight']
leptonSources = []
colorsFillSources = fakeu.colorsFillSources()
colorsLineSources = fakeu.colorsLineSources()
markersSources = fakeu.markersSources()
enum2source = fakeu.enum2source

def fillHistos(chain, histosPerSource, histosPerSourceAnygroup={}, lepton='', onthefly_tight_def=None, verbose=False):
    """fill the histograms, returns the number of events
    processed. histosPerSource is required; histosPerSourceAnygroup is
    filled only when provided"""
    class Counters: # scope trick (otherwise unavailable within nested func
        nLoose, nTight = 0, 0
        totWeightLoose, totWeightTight = 0.0, 0.0
        def str(self):
            counterNames = ['nLoose', 'nTight', 'totWeightLoose', 'totWeightTight']
            return ', '.join(["%s : %.1f"%(c, getattr(self, c)) for c in counterNames])
    counters = Counters()
예제 #5
0
            histos['data'] = all_histos['data'][v]['Unknown']
            plotStackedHistos(histos=histos,
                              datakey='data',
                              stackkeys=leptonSources,
                              outputDir=outputDir + '/' + region,
                              region=region,
                              colors=fakeu.colorsFillSources(),
                              verbose=verbose)
    return


#___________________________________________________

allLeptonSources = fakeu.allLeptonSources()
leptonSources = fakeu.leptonSources()
colorsFillSources = fakeu.colorsFillSources()
colorsLineSources = fakeu.colorsLineSources()
markersSources = fakeu.markersSources()
enum2source = fakeu.enum2source


def histoName(sample, var, selection, source):
    return 'h_' + sample + '_' + var + '_' + selection + '_' + source


def histonamesOneSample(sample_name, variables, selections, sources):
    "dict of histogram names with keys [sel][var][source]"
    hn = histoName
    return dict([(se,
                  dict([(v,
                         dict([(so, hn(sample_name, v, se, so))
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/tight_variables_plots', help='dir for plots')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    region    = options.region
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    filestems, treenames = utils.verticalSlice(fakeu.tupleStemsAndNames)
    regions = filestems
    assert region in regions,"invalid region '%s', must be one of %s"%(region, str(regions))
    templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region}
    templateOutputFilename =  "%(region)s_%(l)s_tight_plots.root" % {'region':region, 'l':lepton}
    treeName = treenames[regions.index(region)]
    outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things
    mkdirIfNeeded(outputDir)
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_'+region+'_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    optionsToPrint = ['inputDir', 'outputDir', 'region', 'tag', 'doFillHistograms']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
    # collect inputs
    if verbose : print 'input files ',os.path.join(inputDir, templateInputFilename)
    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    if not samples : samples = [guessSampleFromFilename(f) for f in tupleFilenames] # if the fast guess didn't work, try the slow one
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['pt','eta','d0sig','z0SinTheta','etCone','ptCone','etConeCorr','ptConeCorr']
    vars += ['relEtConeStd', 'relPtConeStd', 'relEtConeMod', 'relPtConeMod']
    groups = samplesPerGroup.keys()
    sources = leptonSources
    #fill histos
    if doFillHistograms :
        lepLabel = "(probe %s)"%lepton
        histosPerGroup = bookHistosPerGroup(vars, groups, lepLabel=lepLabel)
        histosPerSource = bookHistosPerSource(vars, sources, lepLabel=lepLabel)
        for group in groups:
            isData = isDataSample(group)
            filenames = filenamesPerGroup[group]
            histosThisGroup = histosPerGroup[group]
            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            print "%s : %d entries"%(group, chain.GetEntries())
            fillHistos(chain, histosThisGroup, histosPerSource, isData, lepton, group, verbose)
        writeHistos(cacheFileName, {'perGroup':histosPerGroup, 'perSource':histosPerSource}, verbose)
    # compute scale factors
    histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, groups), verbose)
    histosPerSource = fetchHistos(cacheFileName, histoNames(vars, sources), verbose)
    plotStackedHistos(histosPerGroup,  outputDir+'/by_group',  region, colors=SampleUtils.colors, verbose=verbose)
    plotStackedHistos(histosPerSource, outputDir+'/by_source', region, colors=fakeu.colorsFillSources(), verbose=verbose)
    plotIsoComparison(histosPerSource, outputDir+'/',          region, lepton, verbose)