def buildCorrectionHisto(
    hndRealDataCr,
    hndFakeDataLo,
    hndFakeDataHi,
    hndFakeMcLo,
    hndFakeMcHi,
    histoname="lep_corHFRate",
    nIter=1,
    verbose=False,
    plotdir=None,
):
    hRealEff = buildRatioHistogram(hndRealDataCr["num"], hndRealDataCr["den"], "real_eff")
    corrected = dict([(nd, hndFakeDataLo[nd].Clone("corrected_" + nd)) for nd in ["num", "den"]])
    print "buildCorrectionHisto with nIter ", nIter
    can = r.TCanvas("c_" + histoname, "")
    can.Draw()
    can.cd()
    hRealEff.GetYaxis().SetRangeUser(0.0, 1.0)
    hRealEff.Draw("axis")
    tex = r.TLatex()
    tex.SetNDC(True)
    tex.DrawLatex(0.15, 0.915, histoname)
    can._tex = tex
    can._histos = [hRealEff]
    can._leg = r.TLegend(0.925, 0.25, 1.0, 0.9, "iter")
    can._leg.SetBorderSize(0)
    can._leg.SetFillColor(0)
    for iteration in range(nIter):
        print "iter ", iteration
        rate = buildRatioHistogram(corrected["num"], corrected["den"])  # temporary rate (?)
        if verbose:
            print "Iteration %d, corrected values:" % iteration
            print "  num   %s" % lf2s(getBinContents(corrected["num"]))
            print "  den   %s" % lf2s(getBinContents(corrected["den"]))
            print "  ratio %s" % lf2s(getBinContents(rate))
        dataNum, dataDen = hndFakeDataHi["num"], hndFakeDataHi["den"]
        for nd, tl in [("num", "tight"), ("den", "loose")]:
            corr, dataLow = corrected[nd], hndFakeDataLo[nd]
            mcLow, mcHi = hndFakeMcLo[nd], hndFakeMcHi[nd]
            corrFact = getCorrFactors(hRealEff, rate, dataNum, dataDen, mcHi, tl)
            corr = correctRate(corr, dataLow, mcLow, corrFact)
        rate.SetLineColor(20 + iteration)
        rate.SetMarkerColor(20 + iteration)
        rate.SetLineWidth(4)
        rate.SetMarkerStyle(r.kFullCross)
        can._histos.append(rate.DrawClone("el same"))
        can._leg.AddEntry(can._histos[-1], "%d" % iteration, "lp")
    ratio = buildRatioHistogram(corrected["num"], corrected["den"], histoname)
    can._leg.Draw()
    can.Update()
    if plotdir:
        can.SaveAs(plotdir + "/" + histoname + "_iterations.png")
    return ratio
def computeAndPlotHfSf2d(fileIter, fileHf, lepton, variable_name, outdir) :
    eff_da = fileIter.Get(lepton+'_corHFRate_eta')
    eff_mc = buildRate(fileHf, lepton+'_fakeHF_all_l_pt_eta')
    ratio = buildRatioHistogram(eff_da, eff_mc)
    ratio.Print()
    xAx, yAx = ratio.GetXaxis(), ratio.GetYaxis()
    print ratio.GetName(),": bins (%d, %d)"%(ratio.GetNbinsX(), ratio.GetNbinsY())
    nEtaBins = yAx.GetNbins()
    print 'nEtaBins: ',nEtaBins
    xMin, xMax = xAx.GetXmin(), xAx.GetXmax()
    fitFunc = r.TF1('fit_func_const_'+ratio.GetName(), '[0]', xMin, xMax)
    etaBins = range(1, 1+nEtaBins)
    slices = [ratio.ProjectionX("%s_bin%d"%(ratio.GetName(), b), b, b, 'e') for b in etaBins]
    for b, s in zip(etaBins, slices) :
        s.SetTitle(lepton+" data/mc heavyflavor : eta bin %d"%b)
        s.Fit(fitFunc.GetName(), '0RQ') # do not draw, range, quiet
        p0, p0Err, chi2, ndf = fitResults(fitFunc)
        p0, p0Err = pdgRound(p0, p0Err)
        print "bin %d :  %s +/- %s"%(b, p0, p0Err)
        can = r.TCanvas('')
        s.Draw('ep')
        fitFunc.Draw('same')
        tex = r.TLatex()
        tex.SetNDC(True)
        fitParLabel = "Const. fit : %s #pm %s"%(p0, p0Err)
        fitGoodLabel = "#chi^{2}/DOF : %.2f / %d"%(chi2, ndf)
        tex.SetTextSize(yAx.GetTitleSize())
        tex.SetTextFont(yAx.GetTitleFont())
        tex.DrawLatex(0.15, 0.45, s.GetTitle())
        tex.DrawLatex(0.15, 0.40, "#splitline{%s}{%s}"%(fitParLabel, fitGoodLabel))
        can.Update()
        for ext in ['eps','png'] : can.SaveAs(outdir+'/fit_'+lepton+"_heavyflavor_etabin%d.%s"%(b, ext))
    return p0
def buildSideBandSubRate(file, lepton, variable_name):
    hZwindow = getNumDenHistos(file, lepton + '_realCR_' + variable_name)
    hSideLo = getNumDenHistos(file, lepton + '_realSideLow_' + variable_name)
    hSideHi = getNumDenHistos(file, lepton + '_realSideHigh_' + variable_name)

    def sbErr(el, ew, eh):
        "SidebandError; DG this doesn't make any sense to me; ask Matt"
        return sqrt(ew * ew - (el * el + eh * eh))

    def be(h):
        return [h.GetBinError(b) for b in range(1, 1 + h.GetNbinsX())]

    errs = dict([(k, [
        sbErr(l, w, h)
        for l, w, h in zip(be(hSideLo[k]), be(hZwindow[k]), be(hSideHi[k]))
    ]) for k in ['num', 'den']])
    hSideLo['num'].Add(hSideHi['num'])
    hSideLo['den'].Add(hSideHi['den'])
    hZwindow['num'].Add(hSideLo['num'], -1)
    hZwindow['den'].Add(hSideLo['den'], -1)
    for nd in ['num', 'den']:
        h, err = hZwindow[nd], errs[nd]
        nbins = h.GetNbinsX()
        for i, b in zip(range(nbins), range(1, 1 + nbins)):
            h.SetBinError(b, err[i])
    return buildRatioHistogram(hZwindow['num'], hZwindow['den'])
def computeAndPlotConvSf(fileData, fileMc, lepton, variable_name, outdir):
    "Electron conversion: simplest case, just data/mc"
    eff_da = buildRate(fileData, lepton + '_fakeConv_' + variable_name)
    eff_mc = buildRate(fileMc, lepton + '_fakeConv_' + variable_name)
    ratio = buildRatioHistogram(eff_da, eff_mc)
    fitFunc = fitWithConst(ratio)
    p0, p0Err, chi2, ndf = fitResults(fitFunc)
    p0, p0Err = pdgRound(p0, p0Err)
    print "SF for %s fake conv : %s +/- %s" % (lepton, p0, p0Err)
    graphics = {
        'xtitle': xTitle(lepton, variable_name),
        'ytitle': lepton + ' p(tight | fake conv)',
        'colors': {
            'data': r.kBlack,
            'mc': mcColor(lepton)
        },
        'markers': {
            'data': r.kFullCircle,
            'mc': mcMarker(lepton)
        },
        'labels': {
            'data': 'Data: Conversion CR',
            'mc': 'MC Comb: Conv CR'
        }
    }
    plotHistRatioAndFit({
        'data': eff_da,
        'mc': eff_mc
    }, ratio, fitFunc, outdir + lepton + '_fakeconv', graphics)
def computeAndPlotRealSf(file_data, file_mc, lepton, variable_name, outdir):
    "Scale factor from the real control region, Z tag and probe"
    eff_da = buildSideBandSubRate(file_data, lepton, variable_name)
    eff_mc = buildSideBandSubRate(file_mc, lepton, variable_name)
    ratio = buildRatioHistogram(eff_da, eff_mc)
    fitFunc = fitWithConst(ratio)
    p0, p0Err, chi2, ndf = fitResults(fitFunc)
    p0, p0Err = pdgRound(p0, p0Err)
    print "SF for %s real : %s +/- %s" % (lepton, p0, p0Err)
    graphics = {
        'xtitle': xTitle(lepton, variable_name),
        'ytitle': lepton + ' p(tight | real)',
        'colors': {
            'data': r.kBlack,
            'mc': mcColor(lepton)
        },
        'markers': {
            'data': r.kFullCircle,
            'mc': mcMarker(lepton)
        },
        'labels': {
            'data': 'Data: Z Tag and Probe',
            'mc': 'MC Comb: Z Tag and Probe'
        }
    }
    plotHistRatioAndFit({
        'data': eff_da,
        'mc': eff_mc
    }, ratio, fitFunc, outdir + lepton + '_real', graphics)
def drawBot(pad, histo_data, histo_mc, err_band_r, xaxis_title='') :
    pad.Draw()
    pad.cd()
    ratio = buildRatioHistogram(histo_data, histo_mc)
    yMin, yMax = 0.0, 2.0
    ratio.SetMinimum(yMin)
    ratio.SetMaximum(yMax)
    ratio.SetStats(0)
    ratio.Draw('axis')
    x_lo, x_hi = getXrange(ratio)
    refLines = [referenceLine(x_lo, x_hi, y, y) for y in [0.5, 1.0, 1.5]]
    for l in refLines : l.Draw()
    err_band_r.Draw('E2 same')
    ratio.Draw('ep same')
    xA, yA = ratio.GetXaxis(), ratio.GetYaxis()
    textScaleUp = 1.0/pad.GetHNDC()
    if xaxis_title : xA.SetTitle(xaxis_title)
    yA.SetNdivisions(-104)
    yA.SetTitle('Data/SM')
    yA.CenterTitle()
    yA.SetTitleOffset(yA.GetTitleOffset()/textScaleUp)
    for a in [xA, yA] :
        a.SetLabelSize(a.GetLabelSize()*textScaleUp)
        a.SetTitleSize(a.GetTitleSize()*textScaleUp)
    pad._graphical_objects = [ratio, err_band_r] + refLines # avoid garbage collection
    pad.Update()
def computeAndPlotHfSf(fileIter, fileHf, lepton, variable_name, outdir):
    "HF tag and probe; in this case we need to subract out the contamination"
    eff_da = fileIter.Get(lepton + '_corHFRate')
    eff_mc = buildRate(fileHf, lepton + '_fakeHF_' + variable_name)
    ratio = buildRatioHistogram(eff_da, eff_mc)
    fitFunc = fitWithConst(ratio)
    p0, p0Err, chi2, ndf = fitResults(fitFunc)
    p0, p0Err = pdgRound(p0, p0Err)
    print "SF for %s fake HF : %s +/- %s" % (lepton, p0, p0Err)
    graphics = {
        'xtitle': xTitle(lepton, variable_name),
        'ytitle': lepton + ' p(tight | fake hf)',
        'colors': {
            'data': r.kBlack,
            'mc': mcColor(lepton)
        },
        'markers': {
            'data': r.kFullCircle,
            'mc': mcMarker(lepton)
        },
        'labels': {
            'data': 'Data HF Tag and Probe (Iterative Subtraction)',
            'mc': 'b#bar{b}/c#bar{c} MC: HF Tag and Probe'
        }
    }
    plotHistRatioAndFit({
        'data': eff_da,
        'mc': eff_mc
    }, ratio, fitFunc, outdir + lepton + '_fakehf', graphics)
def drawBot(pad, histo_data, histo_mc, err_band_r, xaxis_title='') :
    pad.Draw()
    pad.cd()
    ratio = buildRatioHistogram(histo_data, histo_mc)
    yMin, yMax = 0.0, 2.0
    ratio.SetMinimum(yMin)
    ratio.SetMaximum(yMax)
    ratio.SetStats(0)
    ratio.Draw('axis')
    x_lo, x_hi = getXrange(ratio)
    refLines = [referenceLine(x_lo, x_hi, y, y) for y in [0.5, 1.0, 1.5]]
    for l in refLines : l.Draw()
    err_band_r.Draw('E2 same')
    ratio.Draw('ep same')
    xA, yA = ratio.GetXaxis(), ratio.GetYaxis()
    textScaleUp = 1.0/pad.GetHNDC()
    if xaxis_title : xA.SetTitle(xaxis_title)
    yA.SetNdivisions(-104)
    yA.SetTitle('Data/SM')
    yA.CenterTitle()
    yA.SetTitleOffset(yA.GetTitleOffset()/textScaleUp)
    for a in [xA, yA] :
        a.SetLabelSize(a.GetLabelSize()*textScaleUp)
        a.SetTitleSize(a.GetTitleSize()*textScaleUp)
    pad._graphical_objects = [ratio, err_band_r] + refLines # avoid garbage collection
    pad.Update()
def computeAndPlotRealSf(file_data, file_mc, lepton, variable_name, outdir) :
    "Scale factor from the real control region, Z tag and probe"
    eff_da = buildSideBandSubRate(file_data, lepton, variable_name)
    eff_mc = buildSideBandSubRate(file_mc,   lepton, variable_name)
    ratio = buildRatioHistogram(eff_da, eff_mc)
    fitFunc = fitWithConst(ratio)
    p0, p0Err, chi2, ndf = fitResults(fitFunc)
    p0, p0Err = pdgRound(p0, p0Err)
    print "SF for %s real : %s +/- %s"%(lepton, p0, p0Err)
    graphics = {'xtitle' : xTitle(lepton, variable_name),
                'ytitle' : lepton+' p(tight | real)',
                'colors' : {'data' : r.kBlack, 'mc' : mcColor(lepton)},
                'markers': {'data' : r.kFullCircle, 'mc' : mcMarker(lepton)},
                'labels' : {'data' : 'Data: Z Tag and Probe',
                            'mc'   : 'MC Comb: Z Tag and Probe'}}
    plotHistRatioAndFit({'data':eff_da, 'mc':eff_mc}, ratio, fitFunc, outdir+lepton+'_real',
                        graphics)
def computeAndPlotConvSf(fileData, fileMc, lepton, variable_name, outdir) :
    "Electron conversion: simplest case, just data/mc"
    eff_da = buildRate(fileData, lepton+'_fakeConv_'+variable_name)
    eff_mc = buildRate(fileMc,   lepton+'_fakeConv_'+variable_name)
    ratio  = buildRatioHistogram(eff_da, eff_mc)
    fitFunc = fitWithConst(ratio)
    p0, p0Err, chi2, ndf = fitResults(fitFunc)
    p0, p0Err = pdgRound(p0, p0Err)
    print "SF for %s fake conv : %s +/- %s"%(lepton, p0, p0Err)
    graphics = {'xtitle' : xTitle(lepton, variable_name),
                'ytitle' : lepton+' p(tight | fake conv)',
                'colors' : {'data' : r.kBlack, 'mc' : mcColor(lepton)},
                'markers': {'data' : r.kFullCircle, 'mc' : mcMarker(lepton)},
                'labels' : {'data' : 'Data: Conversion CR',
                            'mc'   : 'MC Comb: Conv CR'}}
    plotHistRatioAndFit({'data':eff_da, 'mc':eff_mc}, ratio, fitFunc, outdir+lepton+'_fakeconv',
                        graphics)
def computeAndPlotHfSf(fileIter, fileHf, lepton, variable_name, outdir) :
    "HF tag and probe; in this case we need to subract out the contamination"
    eff_da = fileIter.Get(lepton+'_corHFRate')
    eff_mc = buildRate(fileHf, lepton+'_fakeHF_'+variable_name)
    ratio = buildRatioHistogram(eff_da, eff_mc)
    fitFunc = fitWithConst(ratio)
    p0, p0Err, chi2, ndf = fitResults(fitFunc)
    p0, p0Err = pdgRound(p0, p0Err)
    print "SF for %s fake HF : %s +/- %s"%(lepton, p0, p0Err)
    graphics = {'xtitle' : xTitle(lepton, variable_name),
                'ytitle' : lepton+' p(tight | fake hf)',
                'colors' : {'data' : r.kBlack, 'mc' : mcColor(lepton)},
                'markers': {'data' : r.kFullCircle, 'mc' : mcMarker(lepton)},
                'labels' : {'data' : 'Data HF Tag and Probe (Iterative Subtraction)',
                            'mc'   : 'b#bar{b}/c#bar{c} MC: HF Tag and Probe'}}
    plotHistRatioAndFit({'data':eff_da, 'mc':eff_mc}, ratio, fitFunc, outdir+lepton+'_fakehf',
                        graphics)
def computeAndPlotConvSf2d(fileData, fileMc, lepton, variable_name, outdir) :
    "Electron conversion: simplest case, just data/mc"
    histoname = 'elec_fakeConv_all_l_pt_eta'
    eff_da = buildRate(fileData, histoname)
    eff_mc = buildRate(fileMc,   histoname)
    print 'SF conversion: using histo ',histoname
    ratio  = buildRatioHistogram(eff_da, eff_mc)
    ratio.Print()
    xAx, yAx = ratio.GetXaxis(), ratio.GetYaxis()
    #pt_eta : check that x is pt, y is eta
    nEtaBins = yAx.GetNbins()
    xMin, xMax = xAx.GetXmin(), xAx.GetXmax()
    fitFunc = r.TF1('fit_func_const_'+ratio.GetName(), '[0]', xMin, xMax)
    etaBins = range(1, 1+nEtaBins)
    slices = [ratio.ProjectionX("%s_bin%d"%(ratio.GetName(), b), b, b, 'e') for b in etaBins]
    for b, s in zip(etaBins, slices) :
        s.SetTitle("data/mc conversion : eta bin %d"%b)
        s.Fit(fitFunc.GetName(), '0RQ') # do not draw, range, quiet
        p0, p0Err, chi2, ndf = fitResults(fitFunc)
        p0, p0Err = pdgRound(p0, p0Err)
        print "bin %d :  %s +/- %s"%(b, p0, p0Err)
        can = r.TCanvas('')
        s.Draw('ep')
        fitFunc.Draw('same')
        tex = r.TLatex()
        tex.SetNDC(True)
        fitParLabel = "Const. fit : %s #pm %s"%(p0, p0Err)
        fitGoodLabel = "#chi^{2}/DOF : %.2f / %d"%(chi2, ndf)
        tex.SetTextSize(yAx.GetTitleSize())
        tex.SetTextFont(yAx.GetTitleFont())
        tex.DrawLatex(0.15, 0.45, s.GetTitle())
        tex.DrawLatex(0.15, 0.40, "#splitline{%s}{%s}"%(fitParLabel, fitGoodLabel))
        can.Update()
        for ext in ['eps','png'] : can.SaveAs(outdir+"/fit_el_conv_etabin%d.%s"%(b, ext))
#     graphics = {'xtitle' : xTitle(lepton, variable_name),
#                 'ytitle' : lepton+' p(tight | fake conv)',
#                 'colors' : {'data' : r.kBlack, 'mc' : mcColor(lepton)},
#                 'markers': {'data' : r.kFullCircle, 'mc' : mcMarker(lepton)},
#                 'labels' : {'data' : 'Data: Conversion CR',
#                             'mc'   : 'MC Comb: Conv CR'}}
#     plotHistRatioAndFit({'data':eff_da, 'mc':eff_mc}, ratio, fitFunc, outdir+lepton+'_fakeconv',
#                         graphics)
    return p0
def computeAndPlotConvSf(fileData, fileMc, lepton, variable_name, outdir, outfile=None) :
    "Electron conversion: simplest case, just data/mc"
    eff_da = buildRate(fileData, lepton+'_fakeConv_'+variable_name)
    eff_mc = buildRate(fileMc,   lepton+'_fakeConv_'+variable_name)
    ratio  = buildRatioHistogram(eff_da, eff_mc)
    print ratio.GetName(),' : ',["%.3f"%ratio.GetBinContent(b) for b in range(1, 1+ratio.GetNbinsX())]
    fitFunc = fitWithConst(ratio)
    p0, p0Err, chi2, ndf = fitResults(fitFunc)
    p0, p0Err = pdgRound(p0, p0Err)
    print "SF for %s fake conv : %s +/- %s"%(lepton, p0, p0Err)
    graphics = {'xtitle' : xTitle(lepton, variable_name),
                'ytitle' : lepton+' p(tight | fake conv)',
                'colors' : {'data' : r.kBlack, 'mc' : mcColor(lepton)},
                'markers': {'data' : r.kFullCircle, 'mc' : mcMarker(lepton)},
                'labels' : {'data' : 'Data: Conversion CR',
                            'mc'   : 'MC Comb: Conv CR'}}
    plotHistRatioAndFit({'data':eff_da, 'mc':eff_mc}, ratio, fitFunc, outdir+'/fit_'+lepton+'_conv', graphics)
    if outfile : saveObject(outfile, ratio, 'elec_convSF_pt')
    return p0
def buildRatioHistos(histosNum={}, histosDen={}) :
    "assume that the histos are organized in two dict[vt][sample] provided by buildHistos"
    histosPerVtypePerSample = {}
    def sameLists(l1=[], l2=[]) : return len(l1)==len(l2) and sorted(l1)==sorted(l2)
    assert sameLists(histosNum.keys(), histosDen.keys()),"num and den w/ different vtype keys"
    result = {}
    for vt in histosDen.keys() :
        resultPerSample = {}
        hvn, hvd = histosNum[vt], histosDen[vt]
        assert sameLists(hvn.keys(), hvd.keys()),"num and den w/ different sample keys"
        for s in hvd.keys() :
            hnum, hden = hvn[s], hvd[s]
            hNames = [h.GetName() for h in hnum, hden]
            pre, suf = commonPrefix(hNames), commonSuffix(hNames)
            hName = pre+'_ratio_'+suf if len(pre) and len(suf) else '_over_'.join(hNames)
            h = buildRatioHistogram(hnum, hden, hName)
            resetErrors(h)
            resultPerSample[s] = h
        result[vt] = resultPerSample
    return result
def buildRatioHistos(histosNum={}, histosDen={}) :
    "assume that the histos are organized in two dict[vt][sample] provided by buildHistos"
    histosPerVtypePerSample = {}
    def sameLists(l1=[], l2=[]) : return len(l1)==len(l2) and sorted(l1)==sorted(l2)
    assert sameLists(histosNum.keys(), histosDen.keys()),"num and den w/ different vtype keys"
    result = {}
    for vt in histosDen.keys() :
        resultPerSample = {}
        hvn, hvd = histosNum[vt], histosDen[vt]
        assert sameLists(hvn.keys(), hvd.keys()),"num and den w/ different sample keys"
        for s in hvd.keys() :
            hnum, hden = hvn[s], hvd[s]
            hNames = [h.GetName() for h in hnum, hden]
            pre, suf = commonPrefix(hNames), commonSuffix(hNames)
            hName = pre+'_ratio_'+suf if len(pre) and len(suf) else '_over_'.join(hNames)
            h = buildRatioHistogram(hnum, hden, hName)
            resetErrors(h)
            resultPerSample[s] = h
        result[vt] = resultPerSample
    return result
def buildSideBandSubRate(file, lepton, variable_name) :
    hZwindow = getNumDenHistos(file, lepton+'_realCR_'      +variable_name)
    hSideLo  = getNumDenHistos(file, lepton+'_realSideLow_' +variable_name)
    hSideHi  = getNumDenHistos(file, lepton+'_realSideHigh_'+variable_name)
    def sbErr(el, ew, eh) :
        "SidebandError; DG this doesn't make any sense to me; ask Matt"
        return sqrt(ew*ew - (el*el + eh*eh))
    def be(h) : return [h.GetBinError(b) for b in range(1, 1+h.GetNbinsX())]
    errs = dict([(k, [sbErr(l,w,h)
                      for l, w, h in zip(be(hSideLo[k]), be(hZwindow[k]), be(hSideHi[k]))])
                 for k in ['num','den']])
    hSideLo ['num'].Add(hSideHi['num'])
    hSideLo ['den'].Add(hSideHi['den'])
    hZwindow['num'].Add(hSideLo['num'], -1)
    hZwindow['den'].Add(hSideLo['den'], -1)
    for nd in ['num', 'den'] :
        h, err = hZwindow[nd], errs[nd]
        nbins = h.GetNbinsX()
        for i, b in  zip(range(nbins), range(1, 1+nbins)) :
            h.SetBinError(b, err[i])
    return buildRatioHistogram(hZwindow['num'], hZwindow['den'])
def buildRatio(inputFile=None, histoBaseName='') :
    num, den = inputFile.Get(histoBaseName+'_num'), inputFile.Get(histoBaseName+'_den')
    return buildRatioHistogram(num, den, histoBaseName +'_rat')
def plotHistosRatio(histosPairs=[], canvasName='') :
    histosRatio = [buildRatioHistogram(hh['num'], hh['den']) for hh in histosPairs]
    plotHistos(histosRatio, canvasName)
def main() :
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-n', '--n_iter', type='int', default=8)
    parser.add_option('-m', '--input_mc')
    parser.add_option('-d', '--input_data')
    parser.add_option('-o', '--output')
    parser.add_option('-p', '--plot', help='plot inputs') # todo: implement sanity plot vs. n_iter
    parser.add_option('-v','--verbose', action='store_true', default=False)
    (opts, args) = parser.parse_args()
    requiredOptions = ['n_iter', 'input_mc', 'input_data', 'output']
    otherOptions = ['plot', 'verbose']
    allOptions = requiredOptions + otherOptions
    def optIsNotSpecified(o) : return not hasattr(opts, o) or getattr(opts,o) is None
    if any(optIsNotSpecified(o) for o in requiredOptions) : parser.error('Missing required option')
    nIter        = opts.n_iter
    fnameInputMc = opts.input_mc
    fnameInputDa = opts.input_data
    fnameOutput  = opts.output
    plot         = opts.plot
    verbose      = opts.verbose
    if verbose : print ('\nUsing the following options:\n'
                        +'\n'.join("%s : %s"%(o, str(getattr(opts, o))) for o in allOptions))
    fileData = r.TFile.Open(fnameInputDa)
    fileMc   = r.TFile.Open(fnameInputMc)
    assert fileData and fileMc, "Missing input files: data %s, mc %s"%(str(fileData), str(fileMc))
    correctionHistos = {}
    for lep in ['muon', 'elec'] :
        if verbose : print "Lepton: %s"%lep
        hRealDataCr = getNumDenHistos(fileData, lep+'_realCR_all_l_pt')
        hFakeDataLo = getNumDenHistos(fileData, lep+'_fakeHF_all_l_pt')
        hFakeDataHi = getNumDenHistos(fileData, lep+'_fakeHF_high_all_l_pt')
        hFakeMcLo   = getNumDenHistos(fileMc,   lep+'_fakeHF_all_l_pt')
        hFakeMcHi   = getNumDenHistos(fileMc,   lep+'_fakeHF_high_all_l_pt')
        if plot :
            hNumDen = [hFakeDataLo, hFakeDataHi, hFakeMcLo, hFakeMcHi]
            for nd in ['num','den'] : plotHistos([h[nd] for h in hNumDen], 'c_'+lep+'_'+nd)
            plotHistosRatio(hNumDen, 'c_'+lep+'_ratio')
        def missingInputHisto(ndHistos) : return any(not h for h in ndHistos.values())
        histoCollToBeChecked = ['hRealDataCr','hFakeDataLo','hFakeDataHi','hFakeMcLo','hFakeMcHi']
        missingHistos = dict([(nhc,hp) for nhc,hp in [(hc, eval(hc)) for hc in histoCollToBeChecked]
                              if missingInputHisto(hp)])
        if len(missingHistos) :
            print (lep+' : missing histograms: \n'
                   +'\n'.join(["%s: num %s den %s"%(k, v['num'], v['den'])
                               for k,v in missingHistos.iteritems()]))
            continue
        hRealEff = buildRatioHistogram(hRealDataCr['num'], hRealDataCr['den'], 'real_eff')
        corrected = dict([(nd, hFakeDataLo[nd].Clone('corrected_'+nd)) for nd in ['num', 'den']])
        for iteration in range(nIter) :
            rate = buildRatioHistogram(corrected['num'], corrected['den']) # temporary rate (?)
            if verbose :
                def lf2s(l) : return ', '.join(["%.3f"%e for e in l])
                print "Iteration %d, corrected values:"%iteration
                print "  num   %s"%lf2s(binContents(corrected['num']))
                print "  den   %s"%lf2s(binContents(corrected['den']))
                print "  ratio %s"%lf2s(binContents(rate))
            dataNum, dataDen = hFakeDataHi['num'], hFakeDataHi['den']
            for nd,tl in [('num','tight'), ('den','loose')] :
                corr, dataLow = corrected[nd], hFakeDataLo[nd]
                mcLow, mcHi = hFakeMcLo[nd], hFakeMcHi[nd]
                corrFact = getCorrFactors(hRealEff, rate, dataNum, dataDen, mcHi, tl)
                corr = correctRate(corr, dataLow, mcLow, corrFact)
        ratio = buildRatioHistogram(corrected['num'], corrected['den'], lep+'_corHFRate')
        correctionHistos[lep] = ratio
    if verbose : print "saving output to ",fnameOutput
    fileOut = r.TFile.Open(fnameOutput, 'recreate')
    fileOut.cd()
    for l,h in correctionHistos.iteritems() :
        if verbose : print "%s : writing %s\n%s"%(l, h.GetName(),histo1dToTxt(h))
        h.Write()
    fileOut.Close()
예제 #20
0
def main() :
    parser = argparse.ArgumentParser(description=description,
                                     epilog=epilog,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    add_arg = parser.add_argument
    add_arg('-o', '--output-dir', default="./")
    add_arg('-p', '--process', help='one physics process, eg. ttw')
    add_arg('-s', '--systematic', help='one of the systematic variations')
    add_arg('-v', '--verbose', action='store_true')
    add_arg('-d', '--debug', action='store_true')
    args = parser.parse_args()

    set_log(args.verbose, args.debug)
    outdir = args.output_dir
    process = args.process
    systematic = args.systematic

    available_processes = get_input_samples().keys()
    if process not in available_processes:
        raise StandardError("invalid process %s, should be one of %s"%(process, str(available_processes)))
    available_systematics = get_input_samples()[process].keys()
    if systematic not in available_systematics:
        raise StandardError("invalid systematic %s, should be one of %s"%(systematic, str(available_systematics)))

    file_label = process+'_sys_'+systematic
    plot_label = process+' sys. '+systematic

    normalize_to_unity = False # True
    luminosity = 1.0

    combiner = HistogramCombiner()

    combiner.build_samples(process=process, systematic=systematic)
    histogram_names = ['h_meff', 'h_jetN',
                       # 'h_electronPt', 'h_muonPt',
                       # 'h_bjetEmulN', 'h_bjetN',
                       # 'h_bjetEmulN_sr3b', 'h_bjetN_sr3b',
                       # 'h_bjetEmulN_sr1b', 'h_bjetN_sr1b',
                       # 'h_bjetEmulN_cr2bttV', 'h_bjetN_cr2bttV',
                       # 'h_meff_sr3b',    'h_jetN_sr3b',    'h_met_sr3b',
                       # 'h_meff_sr1b',    'h_jetN_sr1b',    'h_met_sr1b',
                       # 'h_meff_sr0b5j',  'h_jetN_sr0b5j',  'h_met_sr0b5j',
                       # 'h_meff_sr0b4j',  'h_jetN_sr0b4j',  'h_met_sr0b4j',
                       # 'h_meff_sr0b3j',  'h_jetN_sr0b3j',  'h_met_sr0b3j',
                       # 'h_meff_cr2bttV', 'h_jetN_cr2bttV', 'h_met_cr2bttV'
                       ]

    combiner.compute_normalization_factors()
    output_pdf_name = outdir+'/'+file_label+'.pdf'
    c_summary = R.TCanvas('c_summary', 'plotExplicitSamples sampes summary ')
    combiner.print_sample_summary_to_pdf(c_summary, label="%s: nominal vs. %s systematic"%(process, systematic))
    c_summary.SaveAs(output_pdf_name+'(')


    for histogram_name in histogram_names:
        rebin = 'meff' in histogram_name # and '_sr' in histogram_name # non-inclusive histos: low stats
        rebin_factor = (2 if 'meff' in histogram_name else 2 if 'jetN' in histogram_name else 1) if rebin else 1
        histograms = combiner.get_histograms(histogram_name=histogram_name)
        h_nom = histograms['nominal']
        h_up  = histograms['up']
        h_dn  = histograms['down']
        if 'h_jetFlavorMultiplicity' in histogram_name:
            h_nom = emulate_btag_multiplicity_from_truth_flavor(h_nom, 'nom')
            h_up  = emulate_btag_multiplicity_from_truth_flavor(h_up, 'up')
            h_dn  = emulate_btag_multiplicity_from_truth_flavor(h_dn, 'dn')
        histos = [h_nom, h_up, h_dn]
        for h in set(histos): # set: avoid rebinning twice when up==down
            h.Rebin(rebin_factor)
        h_nom.SetLineWidth(2*h_nom.GetLineWidth())
        h_nom.SetLineColor(R.kBlack)
        h_up.SetLineColor(R.kBlue)
        h_dn.SetLineColor(R.kRed)

        pad_master = h_nom
        pad_master.SetMaximum(1.1*max([h.GetMaximum() for h in histos]))
        pad_master.SetMinimum(1.0*min([0.0]+[h.GetMinimum() for h in histos]))
        pad_master.GetYaxis().SetTitle('Arbitrary Units')
        pad_master.SetStats(0)
        can = R.TCanvas('c_ttV_syst_'+histogram_name, 'ttV explicit variations '+pad_master.GetTitle(), 700, 700)
        botPad, topPad = ru.buildBotTopPads(can, squeezeMargins=False)
        # top
        can.cd()
        topPad.Draw()
        topPad.cd()
        topPad._po = [pad_master] # persistent objects
        pad_master.GetXaxis().SetTitleSize(0)
        pad_master.GetXaxis().SetLabelSize(0)
        pad_master.Draw('axis')
        # ru.topRightLabel(topPad, pad_master.GetTitle(), xpos=0.5)
        ru.topRightLabel(topPad, "#bf{#it{ATLAS}} Simulation Preliminary", xpos=0.85, ypos=0.9)
        ru.topRightLabel(topPad, "#sqrt{s} = 13 TeV",                   xpos=0.85, ypos=0.8)

        leg = ru.topRightLegend(can, legWidth=0.225, legHeight=0.300, hShift=-0.10, vShift=-0.25)
        leg.SetBorderSize(0)
        # leg.SetHeader(plot_label+ ("(norm=1)" if normalize_to_unity else "(lumi %.1f)"%luminosity))
        topPad._po.append(leg)
        def format_legend_label(h, l):
            return "{0}: {1:.2E} ({2:.0f})".format(l, h.Integral(), h.GetEntries())
        def pretty_scale_legend_label(h, l):
            return ("nominal" if l is 'nom' else
                    "#mu = 2.0 #mu_{0}" if l is 'up' else
                    "#mu = 0.5 #mu_{0}" if l is 'dn' else
                    'unknown')
        for h,l in [(h_nom, 'nom'), (h_up, 'up'), (h_dn, 'dn')]:
            h.Draw('hist same')
            leg.AddEntry(h, pretty_scale_legend_label(h, l), 'l')
            topPad._po.append(h)
        leg.Draw('same')
        def integral_and_error(h):
            error = R.Double(0.0)
            integral = h.IntegralAndError(1, h.GetNbinsX()+1, error)
            return integral, error
        def ratio_and_error(ave=(1.0, 0.01), bve=(2.0, 0.001)):
            a, sa = ave
            b, sb = bve
            if a and b:
                r = a/b
                e = r * sqrt((sa/a)*(sa/a)+(sb/b)*(sb/b))
                return r, e
            else:
                return 0.0, 0.0
        print_normalization_summary = histogram_name.startswith('h_meff')
        if print_normalization_summary:
            nom_int = h_nom.Integral()
            up_int = h_up.Integral()
            dn_int = h_dn.Integral()
            nom_int, nom_err = integral_and_error(h_nom)
            up_int, up_err = integral_and_error(h_up)
            dn_int, dn_err = integral_and_error(h_dn)
            rup, rupe = ratio_and_error((up_int, up_err), (nom_int, nom_err))
            rdn, rdne = ratio_and_error((dn_int, dn_err), (nom_int, nom_err))
            # print ("normalization change: "
            #        +"{} up {:.1%} down {:.1%} (nom {:.1f}, up {:.1f}, do {:.1f})".format(h_nom.GetName(),
            #                                                                           1.0-up_int/nom_int if nom_int else 1.0,
            #                                                                           1.0-dn_int/nom_int if nom_int else 1.0,
            #                                                                           nom_int,
            #                                                                           up_int,
            #                                                                           dn_int))
            print ("normalization change: "
                   +"{} up {:.1%} +/- {:.1%} down {:.1%} +/- {:.1%} ".format(h_nom.GetName(), 1.0-rup, rupe, 1.0-rdn, rdne)
                   +"(integral: "
                   +"nom {:.2E}  +/- {:.2E}, up {:.2E} +/- {:.2E}, do {:.2E} +/- {:.2E})".format(nom_int, nom_err,
                                                                                                 up_int, up_err,
                                                                                                 dn_int, dn_err)
                   +" (entries nom {:.2E} up {:.2E} do {:.2E}".format(h_nom.GetEntries(), h_up.GetEntries(), h_dn.GetEntries()))
            print ("tex normalization change: "
                   +"{} up ${:.1%} \pm {:.1%}$ down ${:.1%} \pm {:.1%}$ ".format(h_nom.GetName(), 1.0-rup, rupe, 1.0-rdn, rdne)
                   )

            def bc(h): return [h.GetBinContent(i) for i in range(1,1+h.GetNbinsX())]
            def max_frac_variation(h1, h2):
                "maximum bin-by-bin fractional variation; h1 is denominator, empty bins skipped"
                bc1 = bc(h1)
                bc2 = bc(h2)
                return max([abs(b2/b1) for b1, b2 in zip(bc1, bc2) if b1 and b2])
            def max_frac_variation_within10(h1, h2):
                """maximum bin-by-bin fractional variation; h1 is denominator.
                Bins with <0.1*peak are skipped"""
                bc1 = bc(h1)
                bc2 = bc(h2)
                m1 = max(bc1)
                m2 = max(bc2)
                return max([abs(b2/b1) for b1, b2 in zip(bc1, bc2) if b1>0.1*m1 and b2>0.1*m2])

            # print ("shape change: "
            #        +"{} up {:.1%} down {:.1%} ".format(h_nom.GetName(),
            #                                            1.0-max_frac_variation_within10(h_up, h_nom),
            #                                            1.0-max_frac_variation_within10(h_dn, h_nom)))

        topPad.Update()
        # bottom
        can.cd()
        botPad.SetTopMargin(1.25*botPad.GetTopMargin())
        botPad.Draw()
        botPad.cd()
        ratio_up = ru.buildRatioHistogram(h_up, h_nom)
        ratio_dn = ru.buildRatioHistogram(h_dn, h_nom)
        yMin, yMax = 0.5, 1.5
        ratioPadMaster = pad_master.Clone(pad_master.GetName()+'_ratio')
        ratioPadMaster.SetMinimum(yMin)
        ratioPadMaster.SetMaximum(yMax)
        ratioPadMaster.SetStats(0)
        ratioPadMaster.Draw('axis')
        x_lo, x_hi = ru.getXrange(ratioPadMaster)
        refLines = [ru.referenceLine(x_lo, x_hi, y, y) for y in [0.5, 1.0, 1.5] if y>yMin and y<yMax]
        for l in refLines : l.Draw()
        ratio_up.Draw('same')
        ratio_dn.Draw('same')
        xA, yA = ratioPadMaster.GetXaxis(), ratioPadMaster.GetYaxis()
        textScaleUp = 0.75*1.0/botPad.GetHNDC()
        yA.SetNdivisions(-102)
        yA.SetTitle('Ratio')
        yA.CenterTitle()
        yA.SetTitleOffset(yA.GetTitleOffset()/textScaleUp)
        xA.SetTitleSize(yA.GetTitleSize()) # x- was set to 0 for padmaster, restore it
        xA.SetLabelSize(yA.GetLabelSize())
        xA.SetTitle(prettify_title(xA.GetTitle()))
        for a in [xA, yA] :
            a.SetLabelSize(a.GetLabelSize()*textScaleUp)
            a.SetTitleSize(a.GetTitleSize()*textScaleUp)
        botPad._graphical_objects = [ratio_up, ratio_dn, ratioPadMaster] + refLines # avoid garbage collection
        botPad.Update()
        can.Update()
        first_histo = histogram_name is histogram_names[0]
        last_histo  = histogram_name is histogram_names[-1]
        can.SaveAs(outdir+'/'+can.GetName()+'.png')
        can.SaveAs(outdir+'/'+can.GetName()+'.eps')
        can.SaveAs(output_pdf_name+ (')' if last_histo else ''))
def buildRate(file, histo_basename) :
    hs = getNumDenHistos(file, histo_basename)
    return buildRatioHistogram(hs['num'], hs['den'])
예제 #22
0
def buildRatio(inputFile=None, histoBaseName=''):
    num, den = inputFile.Get(histoBaseName +
                             '_num'), inputFile.Get(histoBaseName + '_den')
    return buildRatioHistogram(num, den, histoBaseName + '_rat')
def buildRate(file, histo_basename):
    hs = getNumDenHistos(file, histo_basename)
    return buildRatioHistogram(hs['num'], hs['den'])
def plotHistosRatio(histosPairs=[], canvasName=''):
    histosRatio = [
        buildRatioHistogram(hh['num'], hh['den']) for hh in histosPairs
    ]
    plotHistos(histosRatio, canvasName)
예제 #25
0
def main():
    parser = argparse.ArgumentParser(
        description=description,
        epilog=epilog,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    add_arg = parser.add_argument
    add_arg('-o', '--output-dir', default="./")
    add_arg('-p', '--process', help='one physics process, eg. ttw')
    add_arg('-s', '--systematic', help='one of the systematic variations')
    add_arg('-v', '--verbose', action='store_true')
    add_arg('-d', '--debug', action='store_true')
    args = parser.parse_args()

    set_log(args.verbose, args.debug)
    outdir = args.output_dir
    process = args.process
    systematic = args.systematic

    available_processes = get_input_samples().keys()
    if process not in available_processes:
        raise StandardError("invalid process %s, should be one of %s" %
                            (process, str(available_processes)))
    available_systematics = get_input_samples()[process].keys()
    if systematic not in available_systematics:
        raise StandardError("invalid systematic %s, should be one of %s" %
                            (systematic, str(available_systematics)))

    file_label = process + '_sys_' + systematic
    plot_label = process + ' sys. ' + systematic

    normalize_to_unity = False  # True
    luminosity = 1.0

    combiner = HistogramCombiner()

    combiner.build_samples(process=process, systematic=systematic)
    histogram_names = [
        'h_meff',
        'h_jetN',
        # 'h_electronPt', 'h_muonPt',
        # 'h_bjetEmulN', 'h_bjetN',
        # 'h_bjetEmulN_sr3b', 'h_bjetN_sr3b',
        # 'h_bjetEmulN_sr1b', 'h_bjetN_sr1b',
        # 'h_bjetEmulN_cr2bttV', 'h_bjetN_cr2bttV',
        # 'h_meff_sr3b',    'h_jetN_sr3b',    'h_met_sr3b',
        # 'h_meff_sr1b',    'h_jetN_sr1b',    'h_met_sr1b',
        # 'h_meff_sr0b5j',  'h_jetN_sr0b5j',  'h_met_sr0b5j',
        # 'h_meff_sr0b4j',  'h_jetN_sr0b4j',  'h_met_sr0b4j',
        # 'h_meff_sr0b3j',  'h_jetN_sr0b3j',  'h_met_sr0b3j',
        # 'h_meff_cr2bttV', 'h_jetN_cr2bttV', 'h_met_cr2bttV'
    ]

    combiner.compute_normalization_factors()
    output_pdf_name = outdir + '/' + file_label + '.pdf'
    c_summary = R.TCanvas('c_summary', 'plotExplicitSamples sampes summary ')
    combiner.print_sample_summary_to_pdf(
        c_summary,
        label="%s: nominal vs. %s systematic" % (process, systematic))
    c_summary.SaveAs(output_pdf_name + '(')

    for histogram_name in histogram_names:
        rebin = 'meff' in histogram_name  # and '_sr' in histogram_name # non-inclusive histos: low stats
        rebin_factor = (2 if 'meff' in histogram_name else
                        2 if 'jetN' in histogram_name else 1) if rebin else 1
        histograms = combiner.get_histograms(histogram_name=histogram_name)
        h_nom = histograms['nominal']
        h_up = histograms['up']
        h_dn = histograms['down']
        if 'h_jetFlavorMultiplicity' in histogram_name:
            h_nom = emulate_btag_multiplicity_from_truth_flavor(h_nom, 'nom')
            h_up = emulate_btag_multiplicity_from_truth_flavor(h_up, 'up')
            h_dn = emulate_btag_multiplicity_from_truth_flavor(h_dn, 'dn')
        histos = [h_nom, h_up, h_dn]
        for h in set(histos):  # set: avoid rebinning twice when up==down
            h.Rebin(rebin_factor)
        h_nom.SetLineWidth(2 * h_nom.GetLineWidth())
        h_nom.SetLineColor(R.kBlack)
        h_up.SetLineColor(R.kBlue)
        h_dn.SetLineColor(R.kRed)

        pad_master = h_nom
        pad_master.SetMaximum(1.1 * max([h.GetMaximum() for h in histos]))
        pad_master.SetMinimum(1.0 *
                              min([0.0] + [h.GetMinimum() for h in histos]))
        pad_master.GetYaxis().SetTitle('Arbitrary Units')
        pad_master.SetStats(0)
        can = R.TCanvas('c_ttV_syst_' + histogram_name,
                        'ttV explicit variations ' + pad_master.GetTitle(),
                        700, 700)
        botPad, topPad = ru.buildBotTopPads(can, squeezeMargins=False)
        # top
        can.cd()
        topPad.Draw()
        topPad.cd()
        topPad._po = [pad_master]  # persistent objects
        pad_master.GetXaxis().SetTitleSize(0)
        pad_master.GetXaxis().SetLabelSize(0)
        pad_master.Draw('axis')
        # ru.topRightLabel(topPad, pad_master.GetTitle(), xpos=0.5)
        ru.topRightLabel(topPad,
                         "#bf{#it{ATLAS}} Simulation Preliminary",
                         xpos=0.85,
                         ypos=0.9)
        ru.topRightLabel(topPad, "#sqrt{s} = 13 TeV", xpos=0.85, ypos=0.8)

        leg = ru.topRightLegend(can,
                                legWidth=0.225,
                                legHeight=0.300,
                                hShift=-0.10,
                                vShift=-0.25)
        leg.SetBorderSize(0)
        # leg.SetHeader(plot_label+ ("(norm=1)" if normalize_to_unity else "(lumi %.1f)"%luminosity))
        topPad._po.append(leg)

        def format_legend_label(h, l):
            return "{0}: {1:.2E} ({2:.0f})".format(l, h.Integral(),
                                                   h.GetEntries())

        def pretty_scale_legend_label(h, l):
            return ("nominal"
                    if l is 'nom' else "#mu = 2.0 #mu_{0}" if l is 'up' else
                    "#mu = 0.5 #mu_{0}" if l is 'dn' else 'unknown')

        for h, l in [(h_nom, 'nom'), (h_up, 'up'), (h_dn, 'dn')]:
            h.Draw('hist same')
            leg.AddEntry(h, pretty_scale_legend_label(h, l), 'l')
            topPad._po.append(h)
        leg.Draw('same')

        def integral_and_error(h):
            error = R.Double(0.0)
            integral = h.IntegralAndError(1, h.GetNbinsX() + 1, error)
            return integral, error

        def ratio_and_error(ave=(1.0, 0.01), bve=(2.0, 0.001)):
            a, sa = ave
            b, sb = bve
            if a and b:
                r = a / b
                e = r * sqrt((sa / a) * (sa / a) + (sb / b) * (sb / b))
                return r, e
            else:
                return 0.0, 0.0

        print_normalization_summary = histogram_name.startswith('h_meff')
        if print_normalization_summary:
            nom_int = h_nom.Integral()
            up_int = h_up.Integral()
            dn_int = h_dn.Integral()
            nom_int, nom_err = integral_and_error(h_nom)
            up_int, up_err = integral_and_error(h_up)
            dn_int, dn_err = integral_and_error(h_dn)
            rup, rupe = ratio_and_error((up_int, up_err), (nom_int, nom_err))
            rdn, rdne = ratio_and_error((dn_int, dn_err), (nom_int, nom_err))
            # print ("normalization change: "
            #        +"{} up {:.1%} down {:.1%} (nom {:.1f}, up {:.1f}, do {:.1f})".format(h_nom.GetName(),
            #                                                                           1.0-up_int/nom_int if nom_int else 1.0,
            #                                                                           1.0-dn_int/nom_int if nom_int else 1.0,
            #                                                                           nom_int,
            #                                                                           up_int,
            #                                                                           dn_int))
            print(
                "normalization change: " +
                "{} up {:.1%} +/- {:.1%} down {:.1%} +/- {:.1%} ".format(
                    h_nom.GetName(), 1.0 - rup, rupe, 1.0 - rdn, rdne) +
                "(integral: " +
                "nom {:.2E}  +/- {:.2E}, up {:.2E} +/- {:.2E}, do {:.2E} +/- {:.2E})"
                .format(nom_int, nom_err, up_int, up_err, dn_int, dn_err) +
                " (entries nom {:.2E} up {:.2E} do {:.2E}".format(
                    h_nom.GetEntries(), h_up.GetEntries(), h_dn.GetEntries()))
            print(
                "tex normalization change: " +
                "{} up ${:.1%} \pm {:.1%}$ down ${:.1%} \pm {:.1%}$ ".format(
                    h_nom.GetName(), 1.0 - rup, rupe, 1.0 - rdn, rdne))

            def bc(h):
                return [
                    h.GetBinContent(i) for i in range(1, 1 + h.GetNbinsX())
                ]

            def max_frac_variation(h1, h2):
                "maximum bin-by-bin fractional variation; h1 is denominator, empty bins skipped"
                bc1 = bc(h1)
                bc2 = bc(h2)
                return max(
                    [abs(b2 / b1) for b1, b2 in zip(bc1, bc2) if b1 and b2])

            def max_frac_variation_within10(h1, h2):
                """maximum bin-by-bin fractional variation; h1 is denominator.
                Bins with <0.1*peak are skipped"""
                bc1 = bc(h1)
                bc2 = bc(h2)
                m1 = max(bc1)
                m2 = max(bc2)
                return max([
                    abs(b2 / b1) for b1, b2 in zip(bc1, bc2)
                    if b1 > 0.1 * m1 and b2 > 0.1 * m2
                ])

            # print ("shape change: "
            #        +"{} up {:.1%} down {:.1%} ".format(h_nom.GetName(),
            #                                            1.0-max_frac_variation_within10(h_up, h_nom),
            #                                            1.0-max_frac_variation_within10(h_dn, h_nom)))

        topPad.Update()
        # bottom
        can.cd()
        botPad.SetTopMargin(1.25 * botPad.GetTopMargin())
        botPad.Draw()
        botPad.cd()
        ratio_up = ru.buildRatioHistogram(h_up, h_nom)
        ratio_dn = ru.buildRatioHistogram(h_dn, h_nom)
        yMin, yMax = 0.5, 1.5
        ratioPadMaster = pad_master.Clone(pad_master.GetName() + '_ratio')
        ratioPadMaster.SetMinimum(yMin)
        ratioPadMaster.SetMaximum(yMax)
        ratioPadMaster.SetStats(0)
        ratioPadMaster.Draw('axis')
        x_lo, x_hi = ru.getXrange(ratioPadMaster)
        refLines = [
            ru.referenceLine(x_lo, x_hi, y, y) for y in [0.5, 1.0, 1.5]
            if y > yMin and y < yMax
        ]
        for l in refLines:
            l.Draw()
        ratio_up.Draw('same')
        ratio_dn.Draw('same')
        xA, yA = ratioPadMaster.GetXaxis(), ratioPadMaster.GetYaxis()
        textScaleUp = 0.75 * 1.0 / botPad.GetHNDC()
        yA.SetNdivisions(-102)
        yA.SetTitle('Ratio')
        yA.CenterTitle()
        yA.SetTitleOffset(yA.GetTitleOffset() / textScaleUp)
        xA.SetTitleSize(
            yA.GetTitleSize())  # x- was set to 0 for padmaster, restore it
        xA.SetLabelSize(yA.GetLabelSize())
        xA.SetTitle(prettify_title(xA.GetTitle()))
        for a in [xA, yA]:
            a.SetLabelSize(a.GetLabelSize() * textScaleUp)
            a.SetTitleSize(a.GetTitleSize() * textScaleUp)
        botPad._graphical_objects = [ratio_up, ratio_dn, ratioPadMaster
                                     ] + refLines  # avoid garbage collection
        botPad.Update()
        can.Update()
        first_histo = histogram_name is histogram_names[0]
        last_histo = histogram_name is histogram_names[-1]
        can.SaveAs(outdir + '/' + can.GetName() + '.png')
        can.SaveAs(outdir + '/' + can.GetName() + '.eps')
        can.SaveAs(output_pdf_name + (')' if last_histo else ''))
def plotHistosRatio(histosPairs=[], canvasName="", outdir="./"):
    histosRatio = [buildRatioHistogram(hh["num"], hh["den"]) for hh in histosPairs]
    print histosRatio
    for h in histosRatio:
        h.SetTitle("ratio " + h.GetTitle())
    plotHistos(histosRatio, canvasName, outdir)
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-n', '--n_iter', type='int', default=8)
    parser.add_option('-m', '--input_mc')
    parser.add_option('-d', '--input_data')
    parser.add_option('-o', '--output')
    parser.add_option(
        '-p', '--plot',
        help='plot inputs')  # todo: implement sanity plot vs. n_iter
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (opts, args) = parser.parse_args()
    requiredOptions = ['n_iter', 'input_mc', 'input_data', 'output']
    otherOptions = ['plot', 'verbose']
    allOptions = requiredOptions + otherOptions

    def optIsNotSpecified(o):
        return not hasattr(opts, o) or getattr(opts, o) is None

    if any(optIsNotSpecified(o) for o in requiredOptions):
        parser.error('Missing required option')
    nIter = opts.n_iter
    fnameInputMc = opts.input_mc
    fnameInputDa = opts.input_data
    fnameOutput = opts.output
    plot = opts.plot
    verbose = opts.verbose
    if verbose:
        print(
            '\nUsing the following options:\n' +
            '\n'.join("%s : %s" % (o, str(getattr(opts, o)))
                      for o in allOptions))
    fileData = r.TFile.Open(fnameInputDa)
    fileMc = r.TFile.Open(fnameInputMc)
    assert fileData and fileMc, "Missing input files: data %s, mc %s" % (
        str(fileData), str(fileMc))
    correctionHistos = {}
    for lep in ['muon', 'elec']:
        if verbose: print "Lepton: %s" % lep
        hRealDataCr = getNumDenHistos(fileData, lep + '_realCR_all_l_pt')
        hFakeDataLo = getNumDenHistos(fileData, lep + '_fakeHF_all_l_pt')
        hFakeDataHi = getNumDenHistos(fileData, lep + '_fakeHF_high_all_l_pt')
        hFakeMcLo = getNumDenHistos(fileMc, lep + '_fakeHF_all_l_pt')
        hFakeMcHi = getNumDenHistos(fileMc, lep + '_fakeHF_high_all_l_pt')
        if plot:
            hNumDen = [hFakeDataLo, hFakeDataHi, hFakeMcLo, hFakeMcHi]
            for nd in ['num', 'den']:
                plotHistos([h[nd] for h in hNumDen], 'c_' + lep + '_' + nd)
            plotHistosRatio(hNumDen, 'c_' + lep + '_ratio')

        def missingInputHisto(ndHistos):
            return any(not h for h in ndHistos.values())

        histoCollToBeChecked = [
            'hRealDataCr', 'hFakeDataLo', 'hFakeDataHi', 'hFakeMcLo',
            'hFakeMcHi'
        ]
        missingHistos = dict([(nhc, hp)
                              for nhc, hp in [(hc, eval(hc))
                                              for hc in histoCollToBeChecked]
                              if missingInputHisto(hp)])
        if len(missingHistos):
            print(
                lep + ' : missing histograms: \n' + '\n'.join([
                    "%s: num %s den %s" % (k, v['num'], v['den'])
                    for k, v in missingHistos.iteritems()
                ]))
            continue
        hRealEff = buildRatioHistogram(hRealDataCr['num'], hRealDataCr['den'],
                                       'real_eff')
        corrected = dict([(nd, hFakeDataLo[nd].Clone('corrected_' + nd))
                          for nd in ['num', 'den']])
        for iteration in range(nIter):
            rate = buildRatioHistogram(corrected['num'],
                                       corrected['den'])  # temporary rate (?)
            if verbose:

                def lf2s(l):
                    return ', '.join(["%.3f" % e for e in l])

                print "Iteration %d, corrected values:" % iteration
                print "  num   %s" % lf2s(binContents(corrected['num']))
                print "  den   %s" % lf2s(binContents(corrected['den']))
                print "  ratio %s" % lf2s(binContents(rate))
            dataNum, dataDen = hFakeDataHi['num'], hFakeDataHi['den']
            for nd, tl in [('num', 'tight'), ('den', 'loose')]:
                corr, dataLow = corrected[nd], hFakeDataLo[nd]
                mcLow, mcHi = hFakeMcLo[nd], hFakeMcHi[nd]
                corrFact = getCorrFactors(hRealEff, rate, dataNum, dataDen,
                                          mcHi, tl)
                corr = correctRate(corr, dataLow, mcLow, corrFact)
        ratio = buildRatioHistogram(corrected['num'], corrected['den'],
                                    lep + '_corHFRate')
        correctionHistos[lep] = ratio
    if verbose: print "saving output to ", fnameOutput
    fileOut = r.TFile.Open(fnameOutput, 'recreate')
    fileOut.cd()
    for l, h in correctionHistos.iteritems():
        if verbose:
            print "%s : writing %s\n%s" % (l, h.GetName(), histo1dToTxt(h))
        h.Write()
    fileOut.Close()
def plotIsoComparison(histosPerSource={}, outputDir='', region='', lepton='', verbose=False):
    """
    plot a comparison of eff(T|L) for real and for fake leptons
    vs. pt, where the numerator is one of the tight definitions
    """
    var = 'pt'
    sources = histosPerSource.keys()
    lOrTOrTs = first(first(histosPerSource)).keys()
    histosPtPerSource = dict((s, dict((lt, histosPerSource[s][var][lt]) for lt in lOrTOrTs)) for s in sources)
    def buildTotFakeHistos():
        "add up all the non-real (fake) sources"
        notRealSources = [s for s in sources if s!='real']
        aSource = first(notRealSources)
        totFakeHistos = dict()
        for lt in ['loose', 'tight', 'tight_std', 'tight_minden', 'tight_tight']:
            template = histosPtPerSource[aSource][lt]
            h = template.Clone(template.GetName().replace(aSource, 'fake'))
            h.Reset()
            for s in sources : h.Add(histosPtPerSource[s][lt])
            totFakeHistos[lt] = h
        return totFakeHistos
    histosPtPerSource['fake'] = buildTotFakeHistos()
    effReal_wh     = rootUtils.buildRatioHistogram(histosPtPerSource['real']['tight'       ], histosPtPerSource['real']['loose'])
    effReal_std    = rootUtils.buildRatioHistogram(histosPtPerSource['real']['tight_std'   ], histosPtPerSource['real']['loose'])
    effReal_minden = rootUtils.buildRatioHistogram(histosPtPerSource['real']['tight_minden'], histosPtPerSource['real']['loose'])
    effReal_tight  = rootUtils.buildRatioHistogram(histosPtPerSource['real']['tight_tight' ], histosPtPerSource['real']['loose'])
    effFake_wh     = rootUtils.buildRatioHistogram(histosPtPerSource['fake']['tight'       ], histosPtPerSource['fake']['loose'])
    effFake_std    = rootUtils.buildRatioHistogram(histosPtPerSource['fake']['tight_std'   ], histosPtPerSource['fake']['loose'])
    effFake_minden = rootUtils.buildRatioHistogram(histosPtPerSource['fake']['tight_minden'], histosPtPerSource['fake']['loose'])
    effFake_tight  = rootUtils.buildRatioHistogram(histosPtPerSource['fake']['tight_tight' ], histosPtPerSource['fake']['loose'])
    frameName, frameTitle = region+'_'+lepton, "fake and real efficiencies for %s in %s"%(lepton, region)
    can = r.TCanvas('c_'+frameName, frameTitle, 800, 600)
    can.cd()
    pm = effReal_wh
    pm.SetMinimum(0.0)
    pm.SetMaximum(1.1)
    pm.GetYaxis().SetTitle("#epsilon(T|L)")
    colorReal, colorFake = r.kBlue, r.kRed
    markerWh, markerStd, markerMinden, markerTight = r.kMultiply, r.kCircle, r.kOpenTriangleUp, r.kOpenSquare
    def setAttrs(h, mark, col):
        h.SetLineColor(col)
        h.SetMarkerColor(col)
        h.SetMarkerStyle(mark)
    setAttrs(effReal_wh,     markerWh,     colorReal)
    setAttrs(effReal_std,    markerStd,    colorReal)
    setAttrs(effReal_minden, markerMinden, colorReal)
    setAttrs(effReal_tight,  markerTight,  colorReal)
    setAttrs(effFake_wh,     markerWh,     colorFake)
    setAttrs(effFake_std,    markerStd,    colorFake)
    setAttrs(effFake_minden, markerMinden, colorFake)
    setAttrs(effFake_tight,  markerTight,  colorFake)
    pm.SetStats(0)
    pm.Draw('axis')
    #for h in [effReal_wh, effReal_std, effReal_tight, effFake_wh, effFake_std, effFake_tight]:
    for h in [effReal_wh, effReal_std, effReal_minden, effFake_wh, effFake_std, effFake_minden]:
        h.Draw('same')
    leg = rightLegend(can)
    leg.SetBorderSize(0)
    leg.AddEntry(r.TObject(),   'Real', '')
    leg.AddEntry(effReal_std,   'std iso', 'lp')
    #leg.AddEntry(effReal_tight, 'tight iso', 'lp')
    leg.AddEntry(effReal_minden,'minden iso', 'lp')
    leg.AddEntry(effReal_wh,    'wh iso',  'lp')
    leg.AddEntry(r.TObject(),   'Fake', '')
    leg.AddEntry(effFake_std,   'std iso', 'lp')
    #leg.AddEntry(effFake_tight, 'tight iso', 'lp')
    leg.AddEntry(effFake_minden,'minden iso', 'lp')
    leg.AddEntry(effFake_wh,  '  wh iso',  'lp')
    leg.Draw()
    topRightLabel(can, "#splitline{%s}{%s}"%(lepton, region), xpos=0.125, align=13)
    can.RedrawAxis()
    can._histos = [effReal_wh, effReal_std, effFake_wh, effFake_std]
    can.Update()
    mkdirIfNeeded(outputDir)
    can.SaveAs(os.path.join(outputDir, frameTitle+'.png'))