def weightedAverage(histosEff={}, histosWeight={}, histoName='', histoTitle='', verbose=False): getBinIndices, getBinContents, getBinning = rootUtils.getBinIndices, rootUtils.getBinContents, rootUtils.getBinning assert sorted(histosEff.keys())==sorted(histosWeight.keys()),"effs and weights must have the same keys:\n\teffs %s\n\tweights %s"%(str(histosEff.keys()), str(histosWeight.keys())) hout = first(histosEff).Clone(histoName if histoName else 'weighted_average_eff') hout.SetTitle(histoTitle) hout.Reset() def validateWeights(h): bins, values = getBinIndices(h), getBinContents(h) allValid = all(v>=0.0 and v<=1.0 for v in values) if not allValid: if verbose : print "warning '%s' weights not in [0,1] : [%s]"%(h.GetName(), ', '.join(("%.3f"%v for v in values))) print "setting them to 0.0 or 1.0" for b, v in zip(bins, values) : h.SetBinContent(b, 0.0 if v<0.0 else 1.0 if v>1.0 else v) return allValid [validateWeights(hw) for hw in histosWeight.values()] bins, binning = getBinIndices(first(histosEff)), getBinning(first(histosEff)) for h in histosWeight.values()+histosEff.values(): if getBinning(h)!=binning : print "warning: %s has binning %s, expecting %s"%(h.GetName(), str(getBinning(h)), str(binning)) groups = sorted(histosEff.keys()) epsilon = 1.0e-3 binWeightNormalizations = [sum(histosWeight[g].GetBinContent(b) for g in groups) for b in bins] weightsAreNormalized = all(abs(1.0-norm)<epsilon for norm in binWeightNormalizations) if not weightsAreNormalized : print "warning, compositions are not normalized : [%s]"%', '.join(("%.3f"%v for v in binWeightNormalizations)) print '-- ',histoName,'-- ' for g in groups: bws, bcs = getBinContents(histosWeight[g]), getBinContents(histosEff[g]) print "adding %18s : %s"%(g, ' : '.join("%.4f*%.4f"%(bw, bc) for bw, bc in zip(bws, bcs))) histoEff, histoWeight = histosEff[g], histosWeight[g] histoEff.Multiply(histoWeight) hout.Add(histoEff) print "tot weight : %s"%' '.join(("%.4f"%v for v in (sum(histosWeight[g].GetBinContent(b) for g in groups) for b in bins))) print "weighted avg : %s"%' '.join(("%.4f"%v for v in getBinContents(hout))) return hout
def buildRatioAndScaleIt(histoPrefix='', inputFile=None, scaleFactor=1.0, verbose=False) : ratioHisto = buildRatio(inputFile, histoPrefix) def lf2s(l) : return ', '.join(["%.3f"%e for e in l]) if verbose: print ratioHisto.GetName()," before scaling: ",lf2s(getBinContents(ratioHisto)) if type(scaleFactor)==float : ratioHisto.Scale(scaleFactor) elif type(scaleFactor)==r.TH1F : ratioHisto.Multiply(scaleFactor) elif type(scaleFactor)==r.TH2F : ratioHisto.Multiply(scaleFactor) else : raise TypeError("unknown SF type %s, %s"%(type(scaleFactor), str(scaleFactor))) if verbose: print ratioHisto.GetName()," after scaling: ",lf2s(getBinContents(ratioHisto)) return ratioHisto
def buildCorrectionHisto( hndRealDataCr, hndFakeDataLo, hndFakeDataHi, hndFakeMcLo, hndFakeMcHi, histoname="lep_corHFRate", nIter=1, verbose=False, plotdir=None, ): hRealEff = buildRatioHistogram(hndRealDataCr["num"], hndRealDataCr["den"], "real_eff") corrected = dict([(nd, hndFakeDataLo[nd].Clone("corrected_" + nd)) for nd in ["num", "den"]]) print "buildCorrectionHisto with nIter ", nIter can = r.TCanvas("c_" + histoname, "") can.Draw() can.cd() hRealEff.GetYaxis().SetRangeUser(0.0, 1.0) hRealEff.Draw("axis") tex = r.TLatex() tex.SetNDC(True) tex.DrawLatex(0.15, 0.915, histoname) can._tex = tex can._histos = [hRealEff] can._leg = r.TLegend(0.925, 0.25, 1.0, 0.9, "iter") can._leg.SetBorderSize(0) can._leg.SetFillColor(0) for iteration in range(nIter): print "iter ", iteration rate = buildRatioHistogram(corrected["num"], corrected["den"]) # temporary rate (?) if verbose: print "Iteration %d, corrected values:" % iteration print " num %s" % lf2s(getBinContents(corrected["num"])) print " den %s" % lf2s(getBinContents(corrected["den"])) print " ratio %s" % lf2s(getBinContents(rate)) dataNum, dataDen = hndFakeDataHi["num"], hndFakeDataHi["den"] for nd, tl in [("num", "tight"), ("den", "loose")]: corr, dataLow = corrected[nd], hndFakeDataLo[nd] mcLow, mcHi = hndFakeMcLo[nd], hndFakeMcHi[nd] corrFact = getCorrFactors(hRealEff, rate, dataNum, dataDen, mcHi, tl) corr = correctRate(corr, dataLow, mcLow, corrFact) rate.SetLineColor(20 + iteration) rate.SetMarkerColor(20 + iteration) rate.SetLineWidth(4) rate.SetMarkerStyle(r.kFullCross) can._histos.append(rate.DrawClone("el same")) can._leg.AddEntry(can._histos[-1], "%d" % iteration, "lp") ratio = buildRatioHistogram(corrected["num"], corrected["den"], histoname) can._leg.Draw() can.Update() if plotdir: can.SaveAs(plotdir + "/" + histoname + "_iterations.png") return ratio
def print_scale_factor_dict(sf_dict): "dict is something like {conv,heavy : {flat,vs_eta : values}}" print "scale factors:" for k,v in sf_dict.iteritems(): values = getBinContents(v['vs_eta']) if 'vs_eta' in v and v['vs_eta'] else [v['flat'],] formatted_values = ', '.join("%.3f"%_ for _ in values) print "%s : %s"%(k, formatted_values)
def validateWeights(h): bins, values = getBinIndices(h), getBinContents(h) allValid = all(v>=0.0 and v<=1.0 for v in values) if not allValid: if verbose : print "warning '%s' weights not in [0,1] : [%s]"%(h.GetName(), ', '.join(("%.3f"%v for v in values))) print "setting them to 0.0 or 1.0" for b, v in zip(bins, values) : h.SetBinContent(b, 0.0 if v<0.0 else 1.0 if v>1.0 else v) return allValid
def histo1dToTxt(h): "represent a TH1 as a string with name, bin edges, contents, and errors" bins = getBinIndices(h) hisName = h.GetName() binEdge = [h.GetBinLowEdge(b) for b in bins] binEdge.append(binEdge[-1] + h.GetBinWidth(bins[-1])) binCont = getBinContents(h) binErr = [be(h, b) for b in bins] def lf2s(l): return ", ".join(["%.3f" % e for e in l]) return "\n".join( [ "%s : %s" % (n, v) for n, v in [("hisName", hisName)] + [(l, lf2s(eval(l))) for l in ["binEdge", "binCont", "binErr"]] ] )
def getBinContents(self, variable, selection) : return getBinContents(self.getHistogram)
def subtractRealAndComputeScaleFactor(histosPerGroup={}, variable='', outRatiohistoname='',outDataeffhistoname='', outputDir='./', region='', subtractReal=True, verbose=False): "efficiency scale factor" groups = histosPerGroup.keys() mkdirIfNeeded(outputDir) histosPerType = dict([(lt, dict([(g, histosPerGroup[g][variable][lt]) for g in groups])) for lt in leptonTypes]) for lt in leptonTypes : histosPerType[lt]['totSimBkg'] = summedHisto([histo for group,histo in histosPerType[lt].iteritems() if group not in ['data', 'signal']]) simuTight = histosPerType['fake_tight']['totSimBkg'] simuLoose = histosPerType['fake_loose']['totSimBkg'] dataTight = histosPerType['tight' ]['data' ] dataLoose = histosPerType['loose' ]['data' ] # subtract real contribution from data # _Note to self_: currently estimating the real contr from MC; in # the past also used iterative corr, which might be more # appropriate in cases like here, where the normalization is # so-so. Todo: investigate the normalization. dataSubTight = dataTight.Clone(dataTight.GetName().replace('data_tight','data_minus_prompt_tight')) dataSubLoose = dataLoose.Clone(dataLoose.GetName().replace('data_loose','data_minus_prompt_loose')) dataSubTight.SetDirectory(0) dataSubLoose.SetDirectory(0) dataSubTight.Add(histosPerType['real_tight']['totSimBkg'], -1.0 if subtractReal else 0.0) dataSubLoose.Add(histosPerType['real_loose']['totSimBkg'], -1.0 if subtractReal else 0.0) effData = dataSubTight.Clone(outDataeffhistoname) effData.SetDirectory(0) effData.Divide(dataSubLoose) effSimu = simuTight.Clone(simuTight.GetName().replace('fake_tight','fake_eff')) effSimu.SetDirectory(0) effSimu.Divide(simuLoose) print "eff(T|L) vs. ",variable def formatFloat(floats): return ["%.4f"%f for f in floats] print "efficiency data : ",formatFloat(getBinContents(effData)) print "efficiency simu : ",formatFloat(getBinContents(effSimu)) ratio = effData.Clone(outRatiohistoname) ratio.SetDirectory(0) ratio.Divide(effSimu) print "sf data/simu : ",formatFloat(getBinContents(ratio)) print " +/- : ",formatFloat(getBinErrors(ratio)) can = r.TCanvas('c_'+outRatiohistoname, outRatiohistoname, 800, 600) botPad, topPad = rootUtils.buildBotTopPads(can) can.cd() topPad.Draw() topPad.cd() pm = effData pm.SetStats(0) pm.Draw('axis') xAx, yAx = pm.GetXaxis(), pm.GetYaxis() xAx.SetTitle('') xAx.SetLabelSize(0) yAx.SetRangeUser(0.0, 0.25) textScaleUp = 1.0/topPad.GetHNDC() yAx.SetLabelSize(textScaleUp*0.04) yAx.SetTitleSize(textScaleUp*0.04) yAx.SetTitle('#epsilon(T|L)') yAx.SetTitleOffset(yAx.GetTitleOffset()/textScaleUp) effSimu.SetLineColor(r.kRed) effSimu.SetMarkerStyle(r.kOpenCross) effSimu.SetMarkerColor(effSimu.GetLineColor()) effData.Draw('same') effSimu.Draw('same') leg = drawLegendWithDictKeys(topPad, {'data':effData, 'simulation':simuTight}, legWidth=0.4) leg.SetHeader('scale factor '+region+' '+('electron' if '_el_'in outRatiohistoname else 'muon' if '_mu_' in outRatiohistoname else '')) can.cd() botPad.Draw() botPad.cd() ratio.SetStats(0) ratio.Draw() textScaleUp = 1.0/botPad.GetHNDC() xAx, yAx = ratio.GetXaxis(), ratio.GetYaxis() yAx.SetRangeUser(0.0, 2.0) xAx.SetTitle({'pt1':'p_{T}', 'eta1':'|#eta|', 'pt1_eta1':'p_{T}'}[variable]) yAx.SetNdivisions(-202) yAx.SetTitle('Data/Sim') yAx.CenterTitle() xAx.SetLabelSize(textScaleUp*0.04) xAx.SetTitleSize(textScaleUp*0.04) yAx.SetLabelSize(textScaleUp*0.04) yAx.SetTitleSize(textScaleUp*0.04) refLine = rootUtils.referenceLine(xAx.GetXmin(), xAx.GetXmax()) refLine.Draw() can.Update() outFname = os.path.join(outputDir, region+'_'+outRatiohistoname) for ext in ['.eps','.png']: utils.rmIfExists(outFname+ext) can.SaveAs(outFname+ext) return {outRatiohistoname : ratio, outDataeffhistoname : effData, outDataeffhistoname.replace('_fake_rate_data_', '_tight_data_minus_prompt') : dataSubTight, outDataeffhistoname.replace('_fake_rate_data_', '_loose_data_minus_prompt') : dataSubLoose }
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option("-n", "--n_iter", type="int", default=8) parser.add_option("-m", "--input_mc") parser.add_option("-d", "--input_data") parser.add_option("-o", "--output") parser.add_option("-p", "--plot", help="plot inputs") # todo: implement sanity plot vs. n_iter parser.add_option("-v", "--verbose", action="store_true", default=False) (opts, args) = parser.parse_args() requiredOptions = ["n_iter", "input_mc", "input_data", "output"] otherOptions = ["plot", "verbose"] allOptions = requiredOptions + otherOptions def optIsNotSpecified(o): return not hasattr(opts, o) or getattr(opts, o) is None if any(optIsNotSpecified(o) for o in requiredOptions): parser.error("Missing required option") nIter = opts.n_iter fnameInputMc = opts.input_mc fnameInputDa = opts.input_data fnameOutput = opts.output plotdir = opts.plot verbose = opts.verbose if verbose: print ( "\nUsing the following options:\n" + "\n".join("%s : %s" % (o, str(getattr(opts, o))) for o in allOptions) ) fileData = r.TFile.Open(fnameInputDa) fileMc = r.TFile.Open(fnameInputMc) if plotdir: mkdirIfNeeded(plotdir) assert fileData and fileMc, "Missing input files: data %s, mc %s" % (str(fileData), str(fileMc)) correctionHistos = {} for lep in ["muon", "elec"]: if verbose: print "Lepton: %s" % lep hRealDataCr = getNumDenHistos(fileData, lep + "_realCR_all_l_pt") hFakeDataLo = getNumDenHistos(fileData, lep + "_fakeHF_all_l_pt") hFakeDataHi = getNumDenHistos(fileData, lep + "_fakeHF_high_all_l_pt") hFakeMcLo = getNumDenHistos(fileMc, lep + "_fakeHF_all_l_pt") hFakeMcHi = getNumDenHistos(fileMc, lep + "_fakeHF_high_all_l_pt") if plotdir: hNumDen = [hFakeDataLo, hFakeDataHi, hFakeMcLo, hFakeMcHi] for nd in ["num", "den"]: plotHistos([h[nd] for h in hNumDen], "c_" + lep + "_" + nd, plotdir) plotHistosRatio(hNumDen, "c_" + lep + "_ratio", plotdir) h2dRealDataCr = getNumDenHistos(fileData, lep + "_realCR_all_l_pt_eta") h2dFakeDataLo = getNumDenHistos(fileData, lep + "_fakeHF_all_l_pt_eta") h2dFakeDataHi = getNumDenHistos(fileData, lep + "_fakeHF_high_all_l_pt_eta") h2dFakeMcLo = getNumDenHistos(fileMc, lep + "_fakeHF_all_l_pt_eta") h2dFakeMcHi = getNumDenHistos(fileMc, lep + "_fakeHF_high_all_l_pt_eta") def missingInputHisto(ndHistos): return any(not h for h in ndHistos.values()) histoCollToBeChecked = ["hRealDataCr", "hFakeDataLo", "hFakeDataHi", "hFakeMcLo", "hFakeMcHi"] missingHistos = dict( [(nhc, hp) for nhc, hp in [(hc, eval(hc)) for hc in histoCollToBeChecked] if missingInputHisto(hp)] ) for v in histoCollToBeChecked: print "entries 1d %s : num %d den %d (%s)" % ( v, eval(v)["num"].GetEntries(), eval(v)["den"].GetEntries(), str(eval(v)["den"]), ) histoCollToBeChecked = ["h2dRealDataCr", "h2dFakeDataLo", "h2dFakeDataHi", "h2dFakeMcLo", "h2dFakeMcHi"] missingHistos = dict( [(nhc, hp) for nhc, hp in [(hc, eval(hc)) for hc in histoCollToBeChecked] if missingInputHisto(hp)] ) for v in histoCollToBeChecked: print "entries 2d %s : num %d den %d (%s)" % ( v, eval(v)["num"].GetEntries(), eval(v)["den"].GetEntries(), str(eval(v)["den"]), ) print histoCollToBeChecked print missingHistos if len(missingHistos): print ( lep + " : missing histograms: \n" + "\n".join(["%s: num %s den %s" % (k, v["num"], v["den"]) for k, v in missingHistos.iteritems()]) ) continue correctionHistos[lep] = buildCorrectionHisto( hRealDataCr, hFakeDataLo, hFakeDataHi, hFakeMcLo, hFakeMcHi, nIter=nIter, verbose=verbose, histoname=lep + "_corHFRate", plotdir=plotdir, ) # here do the 2d ones print 10 * "--", " now doing the 2d ones ", 10 * "--" dummy = h2dRealDataCr["num"] xAx, yAx = dummy.GetXaxis(), dummy.GetYaxis() print dummy.GetName(), ": bins (%d, %d)" % (dummy.GetNbinsX(), dummy.GetNbinsY()) nEtaBins = yAx.GetNbins() print "nEtaBins: ", nEtaBins xMin, xMax = xAx.GetXmin(), xAx.GetXmax() etaBins = range(1, 1 + nEtaBins) for eb in etaBins: def etaSlice(h, b, p): return h.ProjectionX(p + h.GetName() + "_eta%d" % b, b, b) # prefix needed to avoid overwriting hRealDataCr = dict((k, etaSlice(h, eb, "rdc")) for k, h in h2dRealDataCr.iteritems()) hFakeDataLo = dict((k, etaSlice(h, eb, "fdl")) for k, h in h2dFakeDataLo.iteritems()) hFakeDataHi = dict((k, etaSlice(h, eb, "fdh")) for k, h in h2dFakeDataHi.iteritems()) hFakeMcLo = dict((k, etaSlice(h, eb, "fml")) for k, h in h2dFakeMcLo.iteritems()) hFakeMcHi = dict((k, etaSlice(h, eb, "fmh")) for k, h in h2dFakeMcHi.iteritems()) print "eta bin ", eb for k, h in hFakeDataLo.iteritems(): print "fakeDataLo %s : %s" % (k, lf2s(getBinContents(h))) correctionHistos[lep + "_eta%d" % eb] = buildCorrectionHisto( hRealDataCr, hFakeDataLo, hFakeDataHi, hFakeMcLo, hFakeMcHi, nIter=nIter, verbose=verbose, histoname=lep + "_corHFRate" + "_eta_bin%d" % eb, ) correctionHistos[lep + "_eta"] = combineEtaSlices( template2d=h2dRealDataCr["num"], etaSlicedRates=dict((k, h) for k, h in correctionHistos.iteritems() if (lep + "_eta") in k), histoname=lep + "_corHFRate_eta", ) print 10 * "--", " done ", 10 * "--" if verbose: print "saving output to ", fnameOutput fileOut = r.TFile.Open(fnameOutput, "recreate") fileOut.cd() print "keys ", correctionHistos.keys() for l, h in correctionHistos.iteritems(): if verbose: print "%s : writing %s\n%s" % (l, h.GetName(), histo1dToTxt(h)) h.Write() fileOut.Close()
def subtractRealAndComputeScaleFactor(histosPerGroup={}, variable='', outRatiohistoname='', outDataeffhistoname='', outputDir='./', region='', subtractReal=True, verbose=False): "efficiency scale factor" groups = histosPerGroup.keys() mkdirIfNeeded(outputDir) histosPerType = dict([(lt, dict([(g, histosPerGroup[g][variable][lt]) for g in groups])) for lt in leptonTypes]) for lt in leptonTypes: histosPerType[lt]['totSimBkg'] = summedHisto([ histo for group, histo in histosPerType[lt].iteritems() if group not in ['data', 'signal'] ]) simuTight = histosPerType['fake_tight']['totSimBkg'] simuLoose = histosPerType['fake_loose']['totSimBkg'] dataTight = histosPerType['tight']['data'] dataLoose = histosPerType['loose']['data'] # subtract real contribution from data # _Note to self_: currently estimating the real contr from MC; in # the past also used iterative corr, which might be more # appropriate in cases like here, where the normalization is # so-so. Todo: investigate the normalization. dataSubTight = dataTight.Clone(dataTight.GetName().replace( 'data_tight', 'data_minus_prompt_tight')) dataSubLoose = dataLoose.Clone(dataLoose.GetName().replace( 'data_loose', 'data_minus_prompt_loose')) dataSubTight.SetDirectory(0) dataSubLoose.SetDirectory(0) dataSubTight.Add(histosPerType['real_tight']['totSimBkg'], -1.0 if subtractReal else 0.0) dataSubLoose.Add(histosPerType['real_loose']['totSimBkg'], -1.0 if subtractReal else 0.0) effData = dataSubTight.Clone(outDataeffhistoname) effData.SetDirectory(0) effData.Divide(dataSubLoose) effSimu = simuTight.Clone(simuTight.GetName().replace( 'fake_tight', 'fake_eff')) effSimu.SetDirectory(0) effSimu.Divide(simuLoose) print "eff(T|L) vs. ", variable def formatFloat(floats): return ["%.4f" % f for f in floats] print "efficiency data : ", formatFloat(getBinContents(effData)) print "efficiency simu : ", formatFloat(getBinContents(effSimu)) ratio = effData.Clone(outRatiohistoname) ratio.SetDirectory(0) ratio.Divide(effSimu) print "sf data/simu : ", formatFloat(getBinContents(ratio)) print " +/- : ", formatFloat(getBinErrors(ratio)) can = r.TCanvas('c_' + outRatiohistoname, outRatiohistoname, 800, 600) botPad, topPad = rootUtils.buildBotTopPads(can) can.cd() topPad.Draw() topPad.cd() pm = effData pm.SetStats(0) pm.Draw('axis') xAx, yAx = pm.GetXaxis(), pm.GetYaxis() xAx.SetTitle('') xAx.SetLabelSize(0) yAx.SetRangeUser(0.0, 0.25) textScaleUp = 1.0 / topPad.GetHNDC() yAx.SetLabelSize(textScaleUp * 0.04) yAx.SetTitleSize(textScaleUp * 0.04) yAx.SetTitle('#epsilon(T|L)') yAx.SetTitleOffset(yAx.GetTitleOffset() / textScaleUp) effSimu.SetLineColor(r.kRed) effSimu.SetMarkerStyle(r.kOpenCross) effSimu.SetMarkerColor(effSimu.GetLineColor()) effData.Draw('same') effSimu.Draw('same') leg = drawLegendWithDictKeys(topPad, { 'data': effData, 'simulation': simuTight }, legWidth=0.4) leg.SetHeader('scale factor ' + region + ' ' + ('electron' if '_el_' in outRatiohistoname else 'muon' if '_mu_' in outRatiohistoname else '')) can.cd() botPad.Draw() botPad.cd() ratio.SetStats(0) ratio.Draw() textScaleUp = 1.0 / botPad.GetHNDC() xAx, yAx = ratio.GetXaxis(), ratio.GetYaxis() yAx.SetRangeUser(0.0, 2.0) xAx.SetTitle({ 'pt1': 'p_{T}', 'eta1': '|#eta|', 'pt1_eta1': 'p_{T}' }[variable]) yAx.SetNdivisions(-202) yAx.SetTitle('Data/Sim') yAx.CenterTitle() xAx.SetLabelSize(textScaleUp * 0.04) xAx.SetTitleSize(textScaleUp * 0.04) yAx.SetLabelSize(textScaleUp * 0.04) yAx.SetTitleSize(textScaleUp * 0.04) refLine = rootUtils.referenceLine(xAx.GetXmin(), xAx.GetXmax()) refLine.Draw() can.Update() outFname = os.path.join(outputDir, region + '_' + outRatiohistoname) for ext in ['.eps', '.png']: utils.rmIfExists(outFname + ext) can.SaveAs(outFname + ext) return { outRatiohistoname: ratio, outDataeffhistoname: effData, outDataeffhistoname.replace('_fake_rate_data_', '_tight_data_minus_prompt'): dataSubTight, outDataeffhistoname.replace('_fake_rate_data_', '_loose_data_minus_prompt'): dataSubLoose }
def getBinContents(self, variable, selection): return getBinContents(self.getHistogram)