def plotStackedHistosWithData(histosPerGroup={}, outputDir='', canvasname='', canvastitle='', colors={}, verbose=False):
    "histosPerGroup[group], where group=data is treated as special"
    groups = histosPerGroup.keys()
    mkdirIfNeeded(outputDir)
    missingGroups = [g for g, h in histosPerGroup.iteritems() if not h]
    if missingGroups:
        if verbose : print "skip %s, missing histos for %s"%(histoname, str(missingGroups))
        return
    bkgHistos = dict([(g, h) for g, h in histosPerGroup.iteritems() if not isDataSample(g)])
    totBkg = summedHisto(bkgHistos.values())
    err_band = buildErrBandGraph(totBkg, computeStatErr2(totBkg))
    emptyBkg = totBkg.Integral()==0
    histoname, region = totBkg.GetName(), 'emu' # tmp replacement vars, to be fixed
    if emptyBkg:
        if verbose : print "empty backgrounds, skip %s"%histoname
        return
    can = r.TCanvas(canvasname, canvastitle, 800, 600)
    can.cd()
    pm = totBkg # pad master
    pm.SetStats(False)
    pm.Draw('axis')
    can.Update() # necessary to fool root's dumb object ownership
    stack = r.THStack('stack_'+histoname,'')
    can.Update()
    r.SetOwnership(stack, False)
    for s, h in bkgHistos.iteritems() :
        h.SetFillColor(colors[s] if s in colors else r.kOrange)
        h.SetDrawOption('bar')
        h.SetDirectory(0)
        stack.Add(h)
    stack.Draw('hist same')
    err_band.Draw('E2 same')
    data = histosPerGroup['data'] if 'data' in histosPerGroup else None
    if data and data.GetEntries():
        data.SetMarkerStyle(r.kFullDotLarge)
        data.Draw('p same')
        if verbose :
            print "integrals : {0} tot.bkg.: {1}, data: {2}".format(histoname, totBkg.Integral(), data.Integral())
    else:
        print "no data"
    yMin, yMax = getMinMax([h for h in [totBkg, data, err_band] if h])
    pm.SetMinimum(0.0)
    pm.SetMaximum(1.1*yMax)
    can.Update()
    topRightLabel(can, "#splitline{%s}{%s}"%(histoname, region), xpos=0.15, ypos=(1.0-0.5*can.GetTopMargin()), align=13)
    drawLegendWithDictKeys(can, dictSum(bkgHistos, {'stat err':err_band}), opt='f')
    can.RedrawAxis()
    can._stack = stack
    can._histos = [h for h in stack.GetHists()]+[data]
    can.Update()
    filename=os.path.join(outputDir, histoname+'.png')
    rmIfExists(filename)
    can.SaveAs(filename)
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fakerate/efficiencies')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-m', '--mode', help='real, conv, hflf')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    mode      = options.mode
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    validModesEl = ['real', 'hflf'] + ['conv']
    validModesMu = ['real', 'hflf']
    if mode not in (validModesEl if lepton=='el' else validModesMu) : parser.error("invalid mode %s"%mode)
    tupleStem, treeName = {'conv' : ('mcconv_tuple', 'ConversionExtractionRegion'),
                           'hflf' : ('mcqcd_tuple', 'HfLfExtractionRegion'),
                           'real' : ('mcreal_tuple', 'RealExtractionRegion')
                           }[mode]
    templateInputFilename = "*_%(stem)s_%(tag)s.root" % {'tag':tag, 'stem':tupleStem}
    templateOutputFilename =  "%(stem)s_%(l)s_eff.root" % {'stem':tupleStem.replace('tuple','histos'), 'l':lepton}
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_'+mode+'_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos
    optionsToPrint = ['inputDir', 'outputDir', 'mode', 'tag', 'doFillHistograms', 'cacheFileName', 'onthefly_tight_def']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
    # collect inputs
    print 'input filenames: ',os.path.join(inputDir, templateInputFilename)

    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    mkdirIfNeeded(outputDir)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['pt', 'pt_eta']
    groups = [g for g in samplesPerGroup.keys() if not isDataSample(g) and not g=='higgs']
    if lepton=='el' : groups = [g for g in groups if g!='heavyflavor']

    sourcesThisMode = {'real' : ['real'], # use same convention as in FakeLeptonSources.h
                       'conv' : ['conv'],
                       'hflf' : ['heavy', 'light', 'qcd']
                       }[mode]
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, sourcesThisMode, mode=mode)
        for group in groups:
            filenames = filenamesPerGroup[group]
            histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group]) for v in histosPerGroupPerSource.keys())
            histosAnyGroupPerSource  = dict((v, histosPerGroupPerSource[v]['anygroup']) for v in histosPerGroupPerSource.keys())

            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            if verbose: print "%s : %d entries"%(group, chain.GetEntries())
            num_processed_entries += fillHistos(chain, histosThisGroupPerSource, histosAnyGroupPerSource,
                                                lepton, mode,
                                                onthefly_tight_def=onthefly_tight_def, verbose=verbose)
        writeHistos(cacheFileName, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # compute efficiencies
    histosPerGroupPerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, sourcesThisMode, mode), verbose)
    effs = computeEfficiencies(histosPerGroupPerSource) # still [var][gr][source][l/t]
    for s in sourcesThisMode:
        for v in vars:
            groups = first(effs).keys()
            varIs1D, varIs2D = v=='pt', v=='pt_eta'
            effsThisSourceThisVar = dict((g, effs[v][g][s]) for g in groups)
            densThisSourceThisVar = dict((g, histosPerGroupPerSource[v][g][s]['loose']) for g in groups if g!='anygroup')
            numsThisSourceThisVar = dict((g, histosPerGroupPerSource[v][g][s]['tight']) for g in groups if g!='anygroup')
            if varIs1D:
                cname = 'eff_'+lepton+'_'+s
                lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                zoomIn = True
                fakeu.plot1dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn)
                cname = 'stack_loose_'+lepton+'_'+s
                lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                plotParametrizedFractions.plotStackedHistos(densThisSourceThisVar, cname, outputDir, title)
                cname = 'stack_tight_'+lepton+'_'+s
                lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                plotParametrizedFractions.plotStackedHistos(numsThisSourceThisVar, cname, outputDir, title)

            elif varIs2D:
                cname = 'eff_'+lepton+'_'+s
                lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#eta'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                fakeu.plot2dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn=zoomIn)
    writeHistos(outputFileName, effs, verbose)
    if verbose : print "saved scale factors to %s" % outputFileName
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fake_el_scale_factor', help='dir for plots')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-r', '--region', help='where we want the compositions,'
                      ' i.e. one of the regions for which we saved the fake nutples'
                      ' (eg. ssinc1j_tuple*, emu_tuple*')
    parser.add_option('-s', '--syst-fudge', help='scale down main group (el:wjets, mu:bb/cc) to evaluate fraction syst unc')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    systfudge = options.syst_fudge
    region    = options.region
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if not region : parser.error('region is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    outputDir = outputDir+'/'+lepton # split the output in subdirectories, so we don't overwrite things

    templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region}
    templateOutputFilename =  "%(l)s_composition_histos.root" % {'l':lepton}
    treeName = dict(fakeu.tupleStemsAndNames)[region]
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    optionsToPrint = ['inputDir', 'outputDir', 'tag', 'doFillHistograms', 'systfudge']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
    # collect inputs
    print '----> input files ',os.path.join(inputDir, templateInputFilename)
    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    mkdirIfNeeded(outputDir)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['pt', 'eta', 'pt_eta', 'mt', 'mdeltar']
    groups = samplesPerGroup.keys()
    if lepton=='el' : groups = [g for g in groups if g!='heavyflavor']
    selections = [region]
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, leptonSources, selections)
        for group in groups:
            isData = isDataSample(group)
            filenames = filenamesPerGroup[group]
            histosThisGroupPerSource = histosPerGroupPerSource[group]
            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            print "%s : %d entries (%d files)"%(group, chain.GetEntries(), chain.GetListOfFiles().GetEntries())
            num_processed_entries += fillHistos(chain, histosThisGroupPerSource,
                                                isData, lepton, group, region, verbose)
        writeHistos(cacheFileName, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # compute and plot fractions
    histosPerGroupPerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, leptonSources, selections))
    histosCompositions = dict()
    for sel in selections:
        histosCompositions[sel] = dict()
        for var in vars:
            hs, groups = histosPerGroupPerSource, histosPerGroupPerSource.keys()
            groups = [g for g in groups if g!='data' and g!='higgs']
            histosHeavy = dict((g, hs[g][sel]['heavy'][var]['loose']) for g in groups)
            histosLight = dict((g, hs[g][sel]['light'][var]['loose']) for g in groups)
            histosConv  = dict((g, hs[g][sel]['conv' ][var]['loose']) for g in groups)
            normalizeHistos   = plotParametrizedFractions.normalizeHistos
            plotStackedHistos = plotParametrizedFractions.plotStackedHistos

            frameTitle = 'hf '+lepton+': '+sel+' loose;'+var
            canvasName = lepton+'_hf'+sel+'_'+var+'_den'
            plotStackedHistos(histosHeavy, canvasName, outputDir, frameTitle)

            frameTitle = 'lf '+lepton+': '+sel+' loose;'+var
            canvasName = lepton+'_lf'+sel+'_'+var+'_den'
            plotStackedHistos(histosHeavy, canvasName, outputDir, frameTitle)

            frameTitle = 'conv '+lepton+': '+sel+' loose;'+var
            canvasName = lepton+'_conv'+sel+'_'+var+'_den'
            plotStackedHistos(histosConv, canvasName, outputDir, frameTitle)

            # normalize and draw fractions (den only)
            histos = dict([(k+'_heavy',  h) for k,h in histosHeavy.iteritems()] +
                          [(k+'_light',  h) for k,h in histosLight.iteritems()] +
                          [(k+'_conv', h) for k,h in histosConv.iteritems()])
            if systfudge: fudgeCompositions(histosHeavy, histosLight, histosConv if lepton=='el' else None)
            normalizeHistos(histos)
            anygroupCompositions = buildCompositionsAddingGroups({'heavy':histosHeavy, 'light':histosLight, 'conv':histosConv})
            histosCompositions[sel][var] = {'bygroup':histos, 'anygroup': anygroupCompositions}
            is1Dhisto = var!='pt_eta' # can only stack 1D plots
            if is1Dhisto:
                histosBySource = {'heavy':histosHeavy, 'light':histosLight, 'conv':histosConv}
                frameTitle = lepton+': '+sel+';'+var
                canvasBaseName = lepton+'_fake'+sel+'_'+var+'_frac'
                plotFractionsStacked(histosBySource, canvasBaseName+'_stack', outputDir, frameTitle)
    writeHistos(outputFileName, histosCompositions, verbose)
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/tight_variables_plots', help='dir for plots')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    region    = options.region
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    filestems, treenames = utils.verticalSlice(fakeu.tupleStemsAndNames)
    regions = filestems
    assert region in regions,"invalid region '%s', must be one of %s"%(region, str(regions))
    templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region}
    templateOutputFilename =  "%(region)s_%(l)s_tight_plots.root" % {'region':region, 'l':lepton}
    treeName = treenames[regions.index(region)]
    outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things
    mkdirIfNeeded(outputDir)
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_'+region+'_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    optionsToPrint = ['inputDir', 'outputDir', 'region', 'tag', 'doFillHistograms']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
    # collect inputs
    if verbose : print 'input files ',os.path.join(inputDir, templateInputFilename)
    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    if not samples : samples = [guessSampleFromFilename(f) for f in tupleFilenames] # if the fast guess didn't work, try the slow one
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['pt','eta','d0sig','z0SinTheta','etCone','ptCone','etConeCorr','ptConeCorr']
    vars += ['relEtConeStd', 'relPtConeStd', 'relEtConeMod', 'relPtConeMod']
    groups = samplesPerGroup.keys()
    sources = leptonSources
    #fill histos
    if doFillHistograms :
        lepLabel = "(probe %s)"%lepton
        histosPerGroup = bookHistosPerGroup(vars, groups, lepLabel=lepLabel)
        histosPerSource = bookHistosPerSource(vars, sources, lepLabel=lepLabel)
        for group in groups:
            isData = isDataSample(group)
            filenames = filenamesPerGroup[group]
            histosThisGroup = histosPerGroup[group]
            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            print "%s : %d entries"%(group, chain.GetEntries())
            fillHistos(chain, histosThisGroup, histosPerSource, isData, lepton, group, verbose)
        writeHistos(cacheFileName, {'perGroup':histosPerGroup, 'perSource':histosPerSource}, verbose)
    # compute scale factors
    histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, groups), verbose)
    histosPerSource = fetchHistos(cacheFileName, histoNames(vars, sources), verbose)
    plotStackedHistos(histosPerGroup,  outputDir+'/by_group',  region, colors=SampleUtils.colors, verbose=verbose)
    plotStackedHistos(histosPerSource, outputDir+'/by_source', region, colors=fakeu.colorsFillSources(), verbose=verbose)
    plotIsoComparison(histosPerSource, outputDir+'/',          region, lepton, verbose)
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor', help='dir for plots')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    region    = options.region
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    filestems, treenames = utils.verticalSlice(fakeu.tupleStemsAndNames)
    regions = filestems
    assert region in regions,"invalid region '%s', must be one of %s"%(region, str(regions))

    templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region}
    templateOutputFilename =  "%(region)s_%(l)s_scale_histos.root" % {'region':region, 'l':lepton}
    treeName = treenames[regions.index(region)]
    outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things
    mkdirIfNeeded(outputDir)
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_'+region+'_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos
    optionsToPrint = ['inputDir', 'outputDir', 'region', 'tag', 'doFillHistograms', 'onthefly_tight_def']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
    # collect inputs
    if verbose : print 'input files ',os.path.join(inputDir, templateInputFilename)
    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    mkdirIfNeeded(outputDir)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1']
    groups = samplesPerGroup.keys()
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroup = bookHistos(vars, groups, region=region)
        histosPerSource = bookHistosPerSource(vars, leptonSources, region=region)
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, leptonSources, region=region)
        for group in groups:
            isData = isDataSample(group)
            filenames = filenamesPerGroup[group]
            if verbose:
                print " --- group : %s ---".format(group)
                print '\n\t'.join(filenames)
            histosThisGroup = histosPerGroup[group]
            histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group]) for v in histosPerGroupPerSource.keys())
            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            if verbose: print "%s : %d entries"%(group, chain.GetEntries())
            num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource,
                                                histosThisGroupPerSource,
                                                lepton, group, region,
                                                onthefly_tight_def=onthefly_tight_def, verbose=verbose)
        writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # compute scale factors
    histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, groups, region), verbose)
    histosPerSource = fetchHistos(cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose)
    histosPerSamplePerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, leptonSources, region), verbose)
    plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, verbose)
    plotStackedHistosSources(histosPerSource, outputDir+'/by_source', region, verbose)
    plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir+'/by_source', lepton=lepton, region=region, verbose=verbose)
    for g in groups:
        hps = dict((v, histosPerSamplePerSource[v][g])for v in vars)
        plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose)


    hn_sf_eta = histoname_sf_vs_eta           (lepton)
    hn_sf_pt  = histoname_sf_vs_pt            (lepton)
    hn_da_eta = histoname_data_fake_eff_vs_eta(lepton)
    hn_da_pt  = histoname_data_fake_eff_vs_pt (lepton)
    objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, verbose)
    objs_pt  = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1',  hn_sf_pt,  hn_da_pt,  outputDir, region, verbose)
    rootUtils.writeObjectsToFile(outputFileName, dictSum(objs_eta, objs_pt), verbose)
    if verbose : print "saved scale factors to %s" % outputFileName
def fillHistos(chain, histosThisGroup, histosPerSource, histosThisGroupPerSource,
               lepton, group, region, onthefly_tight_def=None, verbose=False):
    nLoose, nTight = 0, 0
    totWeightLoose, totWeightTight = 0.0, 0.0
    normFactor = 1.0 if group=='heavyflavor' else 1.0 # bb/cc hand-waving normalization factor, see notes 2014-04-17
    addTlv, computeMt = kin.addTlv, kin.computeMt
    isData = isDataSample(group)
    isHflf = region=='hflf'
    isConversion = region=='conv'
    if group=='heavyflavor':
        if lepton=='el' and not isConversion : normFactor = 1.6
        if lepton=='mu' :                      normFactor = 0.87
    num_processed_entries = 0
    for iEvent, event in enumerate(chain) :
        num_processed_entries += 1
        pars = event.pars
        weight, evtN, runN = pars.weight, pars.eventNumber, pars.runNumber
        hasTrigmatch = pars.has2ltrigmatch==1
        weight = weight*normFactor
        tag, probe, met = addTlv(event.l0), addTlv(event.l1), addTlv(event.met)
        isSameSign = tag.charge*probe.charge > 0.
        isRightProbe = probe.isEl if lepton=='el' else probe.isMu if lepton=='mu' else False
        isTight = onthefly_tight_def(probe) if onthefly_tight_def else probe.isTight
        probeSource = probe.source
        sourceReal = 3 # see FakeLeptonSources.h
        isReal = probeSource==sourceReal and not isData
        isFake = not isReal and not isData
        jets = event.jets
        jets = [addTlv(j) for j in jets] # only if needed
        def isBjet(j, mv1_80=0.3511) : return j.mv1 > mv1_80 # see SusyDefs.h
        hasBjets = any(isBjet(j) for j in jets) # compute only if necessary
        hasFjets = any(abs(j.p4.Eta())>2.4 and j.p4.Pt()>30. for j in jets)
        hasCLjets = any(abs(j.p4.Eta())<2.4 and j.p4.Pt()>30 and not isBjet(j) for j in jets)
        hasJ30jets = any(j.p4.Pt()>30. for j in jets)
        hasJets = hasBjets or hasFjets or hasCLjets
        tag4m, probe4m, met4m = r.TLorentzVector(), r.TLorentzVector(), r.TLorentzVector()
        tag4m.SetPxPyPzE(tag.px, tag.py, tag.pz, tag.E)
        probe4m.SetPxPyPzE(probe.px, probe.py, probe.pz, probe.E)
        met4m.SetPxPyPzE(met.px, met.py, met.pz, met.E)
        pt = probe4m.Pt()
        eta = abs(probe4m.Eta())
        mt0 = computeMt(tag4m, met4m)
        mt1 = computeMt(probe4m, met4m)
        pt0 = tag4m.Pt()
        pt1 = probe4m.Pt()
        isLowMt = mt1 < 40.0 if region=='hflf' else True # used to reduce the contamination from real (mostly W+jets)
        isMuMu = tag.isMu and probe.isMu
        passTrigBias =  True
        probeIsFromPv = fakeu.lepIsFromPv(probe)
        if   isHflf       : passTrigBias = pt0>20.0 and pt1>20.0
        elif isConversion : passTrigBias = pt1>20.0
        if tag.isMu and isRightProbe and isSameSign : # test 1 : no jet req
        # if tag.isMu and isRightProbe and isSameSign and tag4m.Pt()>40.0 : # test 1a : harder tag
        # if tag.isMu and isRightProbe and isSameSign and not hasBjets: # test 2 : veto b-jets
        # if tag.isMu and isRightProbe and isSameSign and not hasJets: # test 3 : require no jets (cl30, bj, fj)
        # if tag.isMu and isRightProbe and isSameSign and not hasJ30jets: # test 4 : require no jets (cl30, bj, fj)
        # if tag.isMu and isRightProbe and isSameSign and not hasJets and tag4m.Pt()>40.0: # test 4 : require no jets (cl30, bj, fj)
        # --last test-- if tag.isMu and isRightProbe and isSameSign and hasTrigmatch: # test 5 : no jet req, trig match
        # if tag.isMu and (isSameSign or isConversion) and isRightProbe and isLowMt and passTrigBias:
        # if tag.isMu and isRightProbe and isSameSign and hasTrigmatch and tag4m.Pt()>40.0: # test 6 : try again pt>40
        # if tag.isMu and isRightProbe and isSameSign and hasTrigmatch and tag4m.Pt()>40.0 and abs(tag4m.DeltaPhi(probe4m))>2.3: # test 7 pt and deltaPhi
        # if tag.isMu and isRightProbe and isSameSign and hasTrigmatch and probeIsFromPv: # test 8 loose only drops iso
        # if isMuMu and isRightProbe and isLowMt and passTrigBias: # test emu mumu
        # if (isSameSign or isConversion) and isRightProbe and isLowMt: # test sf conversion (not very important for now, 2014-04)
            def fillHistosBySource(probe):
                leptonSource = enum2source(probe)
                def fill(tightOrLoose):
                    histosPerSource         ['mt1' ][leptonSource][tightOrLoose].Fill(mt1, weight)
                    histosPerSource         ['pt1' ][leptonSource][tightOrLoose].Fill(pt,  weight)
                    histosPerSource         ['eta1'][leptonSource][tightOrLoose].Fill(eta, weight)
                    histosThisGroupPerSource['mt1' ][leptonSource][tightOrLoose].Fill(mt1, weight)
                    histosThisGroupPerSource['pt1' ][leptonSource][tightOrLoose].Fill(pt,  weight)
                    histosThisGroupPerSource['eta1'][leptonSource][tightOrLoose].Fill(eta, weight)
                fill('loose')
                if isTight : fill('tight')
            sourceIsKnown = not isData
            if sourceIsKnown : fillHistosBySource(probe)
            nLoose, totWeightLoose = nLoose+1, totWeightLoose+weight
            if isTight:
                nTight, totWeightTight = nTight+1, totWeightTight+weight
            histosThisGroup['mt0']['loose'].Fill(mt0, weight)
            histosThisGroup['pt0']['loose'].Fill(pt0, weight)
            histosThisGroup['mt1']['loose'].Fill(mt1, weight)
            def fill(lepType=''):
                histosThisGroup['pt1' ][lepType].Fill(pt, weight)
                histosThisGroup['eta1'][lepType].Fill(eta, weight)
            fill('loose')
            if isTight : fill('tight')
            if isReal : fill('real_loose')
            if isFake : fill('fake_loose')
            if isReal and isTight : fill('real_tight')
            if isFake and isTight : fill('fake_tight')
    if verbose:
        counterNames = ['nLoose', 'nTight', 'totWeightLoose', 'totWeightTight']
        print ', '.join(["%s : %.1f"%(c, eval(c)) for c in counterNames])
    return num_processed_entries