Ejemplo n.º 1
0
def get_yield_histogram(list_of_file_names, regions, labels=[], hsuffix="_cutflow", sfs={}):
    final_h = r.TH1F("yields", "", len(regions), 0, len(regions))
    final_h.Sumw2()
    yields = []
    for i in xrange(len(regions)):
        yields.append(E(0, 0))
    for file_name in list_of_file_names:
        f = r.TFile(file_name)
        for index, region in enumerate(regions):
            try:
                prefix = region.split("(")[0]
                h = f.Get(prefix + hsuffix)
                apply_sf(h, sfs, file_name, prefix)
                binoffset = int(region.split("(")[1].split(")")[0]) if len(region.split("(")) > 1 else h.GetNbinsX()
                if binoffset < 0:
                    binoffset = h.GetNbinsX() + 1 + binoffset
                bc = h.GetBinContent(binoffset)
                be = h.GetBinError(binoffset)
                yields[index] += E(bc, be)
            except:
                #print "Could not find", region+hsuffix, "in", file_name
                pass
        #print file_name
        f.Close()
    for i in xrange(len(regions)):
        final_h.SetBinContent(i+1, yields[i].val)
        final_h.SetBinError(i+1, yields[i].err)
        if len(labels):
            final_h.GetXaxis().SetBinLabel(i+1, labels[i])
            final_h.SetCanExtend(False)
    return final_h
Ejemplo n.º 2
0
def print_table(d_yields):
    nbins = len(d_yields["ttz"]["central"])
    # colnames = ["","$\\ttW$","$\\ttZ$","$\\ttH$","$\\ttVV$","X+$\\gamma$","Rares","Flips","Fakes","Total","Data","$\\tttt$"]
    # procs = ["ttw","ttz","tth","ttvv","xg","rares","flips","fakes","total_background","data","tttt"]

    colnames = ["","$\\ttW$","$\\ttZ$","$\\ttH$","$\\ttVV$","X+$\\gamma$","Rares","Flips","Fakes","Total","Data","$\\tttt$", "tot s+b"]
    allprocs = [["ttw"],["ttz"],["tth"],["ttvv"],["xg"],["rares"],["flips"],["fakes"],["total_background"],["data"],["tttt"],["total"]]

    # colnames = ["","$\\ttW$","$\\ttZ$","$\\ttH$","Others","Total","Data","$\\tttt$"]
    # allprocs = [["ttw"],["ttz"],["tth"],["ttvv","xg","rares","flips","fakes"],["total_background"],["data"],["tttt"]]

    srnames = ["CRZ","CRW","SR1","SR2","SR3","SR4","SR5","SR6","SR7","SR8"]
    for ibin in range(nbins):
        # print ibin
        if ibin == 0:
            print "&".join(map(lambda x: "{0:12s}".format(x),colnames)),
            print r"\\"
            print r"\hline\hline"

        tojoin = [srnames[ibin]]
        for procs in allprocs:
            tot_ve = E(0.,0.)
            for subproc in procs:
                ve = E(max(d_yields[subproc]["central"][ibin],0.), d_yields[subproc]["error"][ibin])
                tot_ve += ve
            cent,err = tot_ve
            if "data" in procs:
                tojoin.append("{0:.0f}".format(cent))
            else:
                tojoin.append("{0:5.2f}$\\pm${1:5.2f}".format(cent,err))
        print " & ".join(tojoin),
        print r"\\"
Ejemplo n.º 3
0
def apply_sf(h, sfs, file_name, hist_name):
    labels = h.GetXaxis().GetLabels()
    if labels:
        h.GetXaxis().SetRange(1, h.GetXaxis().GetNbins())
        h.GetXaxis().SetCanExtend(False)
    for key in sfs:
        if key in file_name:
            for rptn in sfs[key]:
                if rptn in hist_name:
                    for i in xrange(0, h.GetNbinsX()+2):
                        bc, be = h.GetBinContent(i), h.GetBinError(i)
                        nb = E(bc, be) * E(sfs[key][rptn][0], sfs[key][rptn][1])
                        h.SetBinContent(i, nb.val)
Ejemplo n.º 4
0
def rebin36(h):

    # To rebin emu channels 180 bins to 5 bin

    h.Rebin(36)
    bin5 = E(h.GetBinContent(5), h.GetBinError(5))
    bin6 = E(h.GetBinContent(6), h.GetBinError(6))
    bin5plus6 = bin5 + bin6
    h.SetBinContent(6, 0)
    h.SetBinError(6, 0)
    h.SetBinContent(5, bin5plus6.val)
    h.SetBinError(5, bin5plus6.err)

    return h
Ejemplo n.º 5
0
def print_yield_table(procs, rates, output_name):

    hists = []
    bkgh = r.TH1F("Total", "Total", 1, 0, 1)
    total_rate = E(0, 0)
    for proc, rate in zip(procs, rates):
        procname = proc.split("_")[1]
        h = r.TH1F(procname, procname, 1, 0, 1)
        if procname != "sig" and procname != "wzz" and procname != "wzz" and procname != "zzz":
            total_rate += rate
        h.SetBinContent(1, rate.val)
        h.SetBinError(1, rate.err)
        hists.append(h)
    bkgh.SetBinContent(1, total_rate.val)
    bkgh.SetBinError(1, total_rate.err)
    hists.insert(0, bkgh)
    obsh = bkgh.Clone("obs")
    obsh.Reset()
    hists.insert(0, obsh)

    p.print_yield_table_from_list(hists,
                                  output_name + ".txt",
                                  prec=2,
                                  binrange=[1])
    p.print_yield_tex_table_from_list(hists, output_name + ".tex", prec=2)
Ejemplo n.º 6
0
def get_alpha(process, numerator_region, denominator_region, valopt="eff"):

    systs = syst_list_all[1:]

    nominal = run_alpha(process, numerator_region, denominator_region, "",
                        valopt)

    rtn_val = {}
    rtn_val["Nominal"] = nominal

    for syst in systs:
        var = E(nominal.val, 0)
        varup = run_alpha(process, numerator_region, denominator_region,
                          syst + "Up", valopt)
        vardn = run_alpha(process, numerator_region, denominator_region,
                          syst + "Down", valopt)
        err = math.sqrt(abs(((varup - var) * (vardn - var)).val))
        var.err = err
        rtn_val[syst] = var
        # print syst, varup, vardn, nominal

    # Not entirely a correct treatment... but a work around
    pufracerr = rtn_val["Pileup"].err / rtn_val["Pileup"].val
    metpufracerr = rtn_val["METPileup"].err / rtn_val["METPileup"].val
    rtn_val["Pileup"] = E(
        rtn_val["Pileup"].val,
        rtn_val["Pileup"].val * math.sqrt(pufracerr**2 + metpufracerr**2))
    del rtn_val["METPileup"]

    # for key in syst_list_all:
    #     if key == "METPileup": continue
    #     print "{:<10s} {:.3f} {:.3f} {:.3f}".format(key, rtn_val[key].val, rtn_val[key].err, rtn_val[key].err / rtn_val[key].val)

    hists = []

    for index, key in enumerate(syst_list_all):
        if key == "METPileup": continue
        h = r.TH1F("{}".format(key), "", 1, 0, 1)
        h.SetBinContent(1, rtn_val[key].val)
        h.SetBinError(1, rtn_val[key].err)
        hists.append(h)

    return hists
Ejemplo n.º 7
0
def read_table(fname):

    f = open(fname)

    lines = [l.strip() for l in f.readlines()]

    categories = []
    yields = {}
    for line in lines:
        if "Bin#" in line:
            line = "".join(["	"] + line.split()[3:])
            line = line.replace("|", "			")
            categories = line.split()
            for category in categories:
                yields[category] = []
        if "Bin" in line:
            line = "".join(["	"] + line.split()[3:])
            line = line.replace("|", "	")
            line = line.replace(u"\u00B1".encode("utf-8"), ",")
            for category, item in zip(categories, line.split()):
                val, err = item.split(",")
                yields[category].append(E(float(val), float(err)))

    return yields
Ejemplo n.º 8
0
    def print_yields(self, detail=False):
        systs_lines = []
        for syst in self.systs:
            systname = ""
            systvals = []
            if self.check_gmN(syst):
                systs_lines.append(self.get_syst_str(syst))
        systs_lines += self.get_stats_str().split('\n')[:-1]

        systs_data = {}
        for syst_line in systs_lines:
            systname = ""
            systvals = []
            syst_data = syst_line.split()
            systname = syst_data[0]
            if syst_data[1] == 'gmN':
                dataN = int(syst_data[2])
                systvals = syst_data[3:]
                systvals_new = []
                for systval in systvals:
                    if systval != "-":
                        systvals_new.append(
                            "{:.4f}".format(1. + 1. / math.sqrt(float(dataN))))
                    else:
                        systvals_new.append(systval)
                systvals = systvals_new
            else:
                systvals = syst_data[2:]
            systvals_in_float = []
            for systval in systvals:
                systval_in_float = 0
                if '/' in systval:
                    up = abs(float(systval.split('/')[0]) - 1)
                    down = abs(float(systval.split('/')[1]) - 1)
                    systval_in_float = math.sqrt(up * down)
                elif '-' in systval:
                    systval_in_float = 0
                else:
                    systval_in_float = abs(float(systval) - 1)
                systvals_in_float.append(systval_in_float)
            systs_data[systname] = systvals_in_float

        if detail:

            rates_errs = {}
            print_str = "{:<40s}".format("systematics")
            for procname in self.proc_names:
                print_str += "& " + "{:<20s}".format(procname)
            print print_str
            print_str = ""
            for systname in sorted(systs_data.keys()):
                print_str = "{:<40s}".format(systname)
                rates_errs[systname] = {}
                for index, (rate, procname) in enumerate(
                        zip(self.rates, self.proc_names)):
                    # rates_errs[systname][procname] = E(rate, 0)
                    rates_errs[systname][
                        procname] = systs_data[systname][index] * 100.
                    if rates_errs[systname][procname] == 0:
                        print_str += "& " + "{:<20s}".format("-")
                    else:
                        print_str += "& " + "{:<20.1f}".format(
                            rates_errs[systname][procname])
                print print_str
                print_str = ""

            # rates_errs = []
            # for index, _ in enumerate(self.rates):
            #     rate_err = E(self.rates[index], 0)
            #     for systname in systs_data:
            #         rate_err *= E(1, systs_data[systname][index])
            #     rates_errs.append(rate_err)

            # for proc, rate_err in zip(self.proc_names, rates_errs):
            #     if rate_err.val != 0:
            #         print proc, rate_err, rate_err.err / rate_err.val
            #     else:
            #         print proc, rate_err, 0

        else:

            rates_errs = []
            for index, _ in enumerate(self.rates):
                rate_err = E(self.rates[index], 0)
                for systname in systs_data:
                    rate_err *= E(1, systs_data[systname][index])
                rates_errs.append(rate_err)

            for proc, rate_err in zip(self.proc_names, rates_errs):
                if rate_err.val != 0:
                    print proc, rate_err, rate_err.err / rate_err.val
                else:
                    print proc, rate_err, 0

            return self.proc_names, rates_errs
Ejemplo n.º 9
0
print cmd
output = commands.getoutput(cmd)
print output
d_sfs = ast.literal_eval(output.replace("GREP", "").strip())

sf_wz, sf_err_wz = d_sfs["totals"]["wz"]
sf_ttz, sf_err_ttz = d_sfs["totals"]["ttz"]
sf_fakes, sf_err_fakes = d_sfs["totals"]["fakes"]
sf_rares, sf_err_rares = d_sfs["totals"]["rares"]

# calculate chi2
counts_wz, errs_wz = d_sfs["postfit_totals"]["wz"]
counts_ttz, errs_ttz = d_sfs["postfit_totals"]["ttz"]
counts_fakes, errs_fakes = d_sfs["postfit_totals"]["fakes"]
counts_rares, errs_rares = d_sfs["postfit_totals"]["rares"]
bins_wz = map(lambda x: E(*x), zip(counts_wz, errs_wz))
bins_ttz = map(lambda x: E(*x), zip(counts_ttz, errs_ttz))
bins_fakes = map(lambda x: E(*x), zip(counts_fakes, errs_fakes))
bins_rares = map(lambda x: E(*x), zip(counts_rares, errs_rares))

# if we are missing a wz bin at the end
if len(bins_ttz) == 4 and len(bins_wz) < 4:
    while len(bins_wz) < 4:
        bins_wz.append(E(0.0, 0.0))
bins_pred = map(sum, zip(*[bins_wz, bins_ttz, bins_fakes, bins_rares]))
bins_data = list(h_data)[1:-1]  # drop underflow and overflow bins
chi2sum = 0.
for data, pred_ce in zip(bins_data, bins_pred):
    chi2sum += ((data - pred_ce[0]) / pred_ce[1])**2.0
    print data, pred_ce[0], pred_ce[1]
Ejemplo n.º 10
0
def main_fake_rate_measurement(prefix, output_name, etaregion="", procname="ttbar6"):

    # Parse the input arguments
    try:
        ntuple_version = sys.argv[1]
        tag = sys.argv[2]
    except:
        usage()

    if "2016" in ntuple_version: lumi = 35.9
    if "2017" in ntuple_version: lumi = 41.3
    if "2018" in ntuple_version: lumi = 59.74

    basedir = "plots/{}/{}/lin/".format(ntuple_version, tag)

    # Denominator : fake from data (i.e. data - prompt)
    yields_ddfake = rt.read_table(basedir + prefix + "Prompt__lepFakeCand2PtFineVarBin"+etaregion+".txt")
    yields_ddfake["ddfake"] = []
    for datacount, bkgcount in zip(yields_ddfake["data"], yields_ddfake["Total"]):
        yields_ddfake["ddfake"].append(datacount - bkgcount)
    # print yields_ddfake["ddfake"]

    # Numerator : fake from data (i.e. data - prompt)
    yields_ddfake_tight = rt.read_table(basedir + prefix + "TightPrompt__lepFakeCand2PtFineVarBin"+etaregion+".txt")
    yields_ddfake_tight["ddfake"] = []
    for datacount, bkgcount in zip(yields_ddfake_tight["data"], yields_ddfake_tight["Total"]):
        yields_ddfake_tight["ddfake"].append(datacount - bkgcount)
    # print yields_ddfake_tight["ddfake"]

    fr_data = []
    for den, num in zip(yields_ddfake["ddfake"], yields_ddfake_tight["ddfake"]):
        if den.val != 0:
            fr = num / den
            fr_data.append(fr)
        else:
            fr_data.append(E(0, 0))
    fr_data.pop(0) # first one is underflow bin
    fr_data.pop(0) # second one is underflow bin
    fr_data.pop(-1) # last one is overflow bin
    print(fr_data)

    # Denominator: Fake directly from ttbar MC
    yields_ttbar = rt.read_table(basedir + prefix + "Fake__lepFakeCand2PtFineVarBin"+etaregion+".txt")
    # print yields_ttbar[procname]

    # Numerator: fake from data (i.e. data - prompt)
    yields_ttbar_tight = rt.read_table(basedir + prefix + "TightFake__lepFakeCand2PtFineVarBin"+etaregion+".txt")
    # print yields_ttbar_tight[procname]

    fr_mc = []
    for den, num in zip(yields_ttbar[procname], yields_ttbar_tight[procname]):
        if den.val != 0:
            fr = num / den
            fr_mc.append(fr)
        else:
            fr_mc.append(E(0, 0))
    print(fr_mc)
    fr_mc.pop(0) # first one is underflow bin
    fr_mc.pop(0) # second one is underflow bin
    fr_mc.pop(-1) # last one is overflow bin

    # bin boundaries
    # bounds = [0., 10., 15., 20., 30., 150.]
    # bounds = [0., 10., 20., 70.]
    bounds = [0., 10., 20., 30., 50., 70.]

    h_fr_data = r.TH1F("FR","",len(bounds)-1,array('d',bounds))
    h_fr_mc = r.TH1F("FR","",len(bounds)-1,array('d',bounds))

    for idx, fr in enumerate(fr_data):
        h_fr_data.SetBinContent(idx+2, fr.val)
        h_fr_data.SetBinError(idx+2, fr.err)

    for idx, fr in enumerate(fr_mc):
        h_fr_mc.SetBinContent(idx+2, fr.val)
        h_fr_mc.SetBinError(idx+2, fr.err)

    # Options
    alloptions= {
               "ratio_range":[0.0,2.0],
               "nbins": 180,
               "autobin": False,
               "legend_scalex": 0.8,
               "legend_scaley": 0.8,
               "output_name": basedir + "/"+output_name+".pdf",
               "bkg_sort_method": "unsorted",
               "no_ratio": False,
               "print_yield": True,
               "yield_prec": 3,
               "draw_points": True,
               "hist_line_none": True,
               "show_bkg_errors": True,
               "lumi_value" : lumi,
               # "yaxis_range": [0., 1],
               }

    p.plot_hist(
           sigs = [],
           bgs = [h_fr_mc.Clone()],
           data = h_fr_data.Clone(),
           syst = None,
           colors=[2001],
           legend_labels=["MC t#bar{t}"],
           options=alloptions)

    return h_fr_mc.Clone(), h_fr_data.Clone()
Ejemplo n.º 11
0
def write_table(data, bgs, outname=None, signal=None, extra_hists=[],precision=2,sep = u"\u00B1".encode("utf-8"), binedge_fmt="{}-{}", fix_negative=True, binlabels=[], show_errors=True, cell_callback=None):
    tab = Table()
    sumbgs = sum(bgs)
    databg = data/sumbgs
    if signal is not None:
        procs = bgs+[sumbgs,data,databg,signal]
        cnames = [bg.get_attr("label") for bg in bgs] + ["Total bkg","Data", "Data/bkg","tttt"]
    else:
        procs = bgs+[sumbgs,data,databg]
        cnames = [bg.get_attr("label") for bg in bgs] + ["Total bkg","Data", "Data/bkg"]
    for eh in extra_hists:
        procs.append(eh)
        cnames.append(eh.get_attr("label"))
    tab.set_column_names(["bin"]+cnames)
    if outname:
        sep = "+-"
    binpairs = zip(data.edges[:-1],data.edges[1:])
    tab.set_theme_basic()
    for ibin,binrow in enumerate(binpairs):
        row = [("[%s]"%binedge_fmt).format(binrow[0],binrow[1])]
        if ibin < len(binlabels):
            row = [binlabels[ibin]]
        for iproc,proc in enumerate(procs):
            if fix_negative:
                cent = max(proc.counts[ibin],0.)
            else:
                cent = proc.counts[ibin]
            err = proc.errors[ibin]
            if show_errors:
                tmp = ("{0:5.%if} {1}{2:%i.%if}" % (precision,precision+3,precision)).format(cent,sep,err)
            else:
                tmp = ("{0:5.%if}" % (precision)).format(cent)
            if cell_callback: tmp = cell_callback(tmp)
            row.append(tmp)
        tab.add_row(row)
    tab.add_line()

    row = ["total"]
    for iproc,proc in enumerate(procs):
        if iproc == len(procs)-(1+(signal is not None)+len(extra_hists)):
            totbg = E(sum(sumbgs.counts), np.sum(sumbgs.errors**2.)**0.5)
            totdata = E(sum(data.counts))
            ratio = totdata/totbg
            cent, err = ratio[0], ratio[1]
            precision = max(precision, 2) if precision != 0 else 0
        else:
            cent = sum(proc.counts)
            err = np.sum(proc.errors**2.)**0.5
        if show_errors:
            tmp = ("{0:5.%if} {1}{2:%i.%if}" % (precision,precision+3,precision)).format(cent,sep,err)
        else:
            tmp = ("{0:5.%if}" % (precision)).format(cent)
            if cell_callback: tmp = cell_callback(tmp)
        row.append(tmp)
    tab.add_row(row)

    if outname:
        with open(outname,"w") as fhout:
            # towrite = "".join(tab.get_table_string(show_row_separators=False,show_alternating=False))
            towrite = "".join(tab.get_table_strings(show_row_separators=False,show_alternating=False))
            fhout.write(towrite)
            parts = towrite.split("\n")
            header = parts[:3]
            binparts = parts[3:-4]
            total = parts[-4:-1]
            table_info = { "header":"<br>".join(header), "bins":binparts, "total":"<br>".join(total) }
            return table_info
    return tab
Ejemplo n.º 12
0
def plot(histnames, ps=0, sf=None, sfqcd=None, output_suffix="", dd_qcd=None):

    # Glob the file lists
    bkg_list_wjets = [output_dirpath + "/wj_incl.root"]
    bkg_list_dy = [output_dirpath + "/dy.root"]
    bkg_list_ttbar = [output_dirpath + "/tt_incl.root"]
    bkg_list_vv = [output_dirpath + "/ww.root", output_dirpath + "/wz.root"]
    bkg_list_qcd_mu = [output_dirpath + "/qcd_mu.root"]
    bkg_list_qcd_el = [output_dirpath + "/qcd_em.root"]
    bkg_list_qcd_bc = [output_dirpath + "/qcd_bc.root"]
    bkg_list_all = bkg_list_wjets + bkg_list_dy + bkg_list_ttbar + bkg_list_vv

    # Glob the data file list depending on the region
    if "Mu" in histnames:
        data_list = [output_dirpath + "/data_mu.root"]
    elif "El" in histnames:
        data_list = [output_dirpath + "/data_el.root"]
    else:
        data_list = [
            output_dirpath + "/data_mu.root", output_dirpath + "/data_el.root"
        ]

    # Get all the histogram objects
    h_wjets = ru.get_summed_histogram(bkg_list_wjets, histnames)
    h_dy = ru.get_summed_histogram(bkg_list_dy, histnames)
    h_ttbar = ru.get_summed_histogram(bkg_list_ttbar, histnames)
    h_vv = ru.get_summed_histogram(bkg_list_vv, histnames)
    h_qcd_mu = ru.get_summed_histogram(bkg_list_qcd_mu, histnames)
    h_qcd_el = ru.get_summed_histogram(bkg_list_qcd_el, histnames)
    h_qcd_bc = ru.get_summed_histogram(bkg_list_qcd_bc, histnames)
    h_data = ru.get_summed_histogram(data_list, histnames)

    # Set the names of the histograms
    h_wjets.SetName("W")
    h_dy.SetName("Z")
    h_ttbar.SetName("Top")
    h_vv.SetName("VV")
    h_qcd_mu.SetName("QCD(#mu)")
    h_qcd_el.SetName("QCD(e)")
    h_qcd_bc.SetName("QCD(bc)")
    h_data.SetName("Data")

    # print h_wjets.Integral() + h_dy.Integral() + h_ttbar.Integral() + h_vv.Integral()
    # print h_qcd_el.Integral() + h_qcd_bc.Integral()

    # Scale the histograms appropriately from SF from the EWKCR
    if sf:
        if isinstance(sf, list):
            hists = [h_wjets, h_dy, h_ttbar, h_vv]
            for h in hists:
                for ii, s in enumerate(sf):
                    bc = h.GetBinContent(ii + 1)
                    be = h.GetBinError(ii + 1)
                    h.SetBinContent(ii + 1, bc * s)
                    h.SetBinError(ii + 1, be * s)
        else:
            if sf > 0:
                h_wjets.Scale(sf)
                h_dy.Scale(sf)
                h_ttbar.Scale(sf)
                h_vv.Scale(sf)
    if sfqcd:
        if isinstance(sfqcd, list):
            hists = [h_qcd_mu, h_qcd_el, h_qcd_bc]
            for h in hists:
                for ii, s in enumerate(sfqcd):
                    bc = h.GetBinContent(ii + 1)
                    be = h.GetBinError(ii + 1)
                    h.SetBinContent(ii + 1, bc * s)
                    h.SetBinError(ii + 1, be * s)
        else:
            if sfqcd > 0:
                h_qcd_mu.Scale(sfqcd)
                h_qcd_el.Scale(sfqcd)
                h_qcd_bc.Scale(sfqcd)

    # If the data needs some additional correction for the prescale
    if ps > 0:
        h_data.Scale(ps)

    # print h_wjets.Integral() + h_dy.Integral() + h_ttbar.Integral() + h_vv.Integral()
    # print h_qcd_el.Integral() + h_qcd_bc.Integral()
    # print h_data.Integral()

    # Color settings
    colors = [2007, 2005, 2003, 2001, 920, 921]

    # Options
    alloptions = {
        "ratio_range": [0.0, 2.0],
        "nbins":
        30,
        "autobin":
        False,
        "legend_scalex":
        1.8,
        "legend_scaley":
        1.1,
        "output_name":
        "plots/{}/{}/{}/plot/{}{}.pdf".format(input_ntup_tag, analysis_tag,
                                              "ss" if isSS else "3l",
                                              histnames, output_suffix),
        "bkg_sort_method":
        "unsorted",
        "no_ratio":
        False,
        "print_yield":
        True,
        "yaxis_log":
        False if "ptcorr" in histnames else False,
        #"yaxis_log": False,
        #"yaxis_log": False,
        "divide_by_bin_width":
        True,
        "legend_smart":
        False if "ptcorr" in histnames else True,
        "lumi_value":
        lumi,
    }

    # The bkg histogram list
    h_qcd = h_qcd_mu if "Mu" in histnames else h_qcd_el
    if dd_qcd:
        h_qcd = dd_qcd
    bgs_list = [h_vv, h_ttbar, h_dy, h_wjets, h_qcd]

    legend_labels = ["VV", "t#bar{t}", "DY", "W", "QCD(#mu)"
                     ] if "Mu" in histnames else [
                         "VV", "t#bar{t}", "DY", "W", "QCD(e)", "QCD(HF)"
                     ]
    if "Mu" not in histnames:
        bgs_list.append(h_qcd_bc)

    # # For 2018 merge the last two bins in the central
    # if "ptcorretarolledcoarse" in histnames:
    #     def merge_4_5(h):
    #         bc4 = h.GetBinContent(4)
    #         bc5 = h.GetBinContent(5)
    #         be4 = h.GetBinError(4)
    #         be5 = h.GetBinError(5)
    #         nb = E(bc4, be4) + E(bc5, be5)
    #         nbc = nb.val
    #         nbe = nb.err
    #         h.SetBinContent(4, nbc)
    #         h.SetBinError(4, nbe)
    #         h.SetBinContent(5, nbc)
    #         h.SetBinError(5, nbe)
    #     merge_4_5(h_vv)
    #     merge_4_5(h_ttbar)
    #     merge_4_5(h_dy)
    #     merge_4_5(h_wjets)
    #     merge_4_5(h_qcd_mu)
    #     merge_4_5(h_qcd_el)
    #     merge_4_5(h_qcd_bc)
    #     merge_4_5(h_data)

    # Plot them
    p.plot_hist(bgs=bgs_list,
                data=h_data.Clone("Data"),
                colors=colors,
                syst=None,
                legend_labels=legend_labels,
                options=alloptions)

    # print h_wjets.Integral() + h_dy.Integral() + h_ttbar.Integral() + h_vv.Integral()
    # print h_qcd_el.Integral() + h_qcd_bc.Integral()
    # print h_data.Integral()

    # Obtain the histogram again to return the object for further calculations

    # Data-driven QCD = data - bkg
    h_ddqcd = ru.get_summed_histogram(data_list, histnames)
    h_bkg = ru.get_summed_histogram(bkg_list_all, histnames)
    h_wjets = ru.get_summed_histogram(bkg_list_wjets, histnames)
    h_dy = ru.get_summed_histogram(bkg_list_dy, histnames)
    h_ttbar = ru.get_summed_histogram(bkg_list_ttbar, histnames)
    h_vv = ru.get_summed_histogram(bkg_list_vv, histnames)
    if ps > 0:
        h_ddqcd.Scale(ps)
    # Scale the histograms appropriately from SF from the EWKCR
    if sf:
        if isinstance(sf, list):
            hists = [h_bkg, h_wjets, h_dy, h_ttbar, h_vv]
            for h in hists:
                for ii, s in enumerate(sf):
                    bc = h.GetBinContent(ii + 1)
                    be = h.GetBinError(ii + 1)
                    h.SetBinContent(ii + 1, bc * s)
                    h.SetBinError(ii + 1, be * s)
        else:
            if sf > 0:
                h_bkg.Scale(sf)
                h_wjets.Scale(sf)
                h_dy.Scale(sf)
                h_ttbar.Scale(sf)
                h_vv.Scale(sf)

    if "ptcorretarolled" in histnames:

        # print h_ddqcd.GetBinContent(6), h_ddqcd.GetBinContent(7)
        # d6 = E(h_ddqcd.GetBinContent(6), h_ddqcd.GetBinError(6)) + E(h_ddqcd.GetBinContent(7), h_ddqcd.GetBinError(7))
        # d13 = E(h_ddqcd.GetBinContent(13), h_ddqcd.GetBinError(13)) + E(h_ddqcd.GetBinContent(14), h_ddqcd.GetBinError(14))
        # b6 = E(h_bkg.GetBinContent(6), h_bkg.GetBinError(6)) + E(h_bkg.GetBinContent(7), h_bkg.GetBinError(7))
        # b13 = E(h_bkg.GetBinContent(13), h_bkg.GetBinError(13)) + E(h_bkg.GetBinContent(14), h_bkg.GetBinError(14))
        # h_ddqcd.SetBinContent(6, d6.val)
        # h_ddqcd.SetBinContent(7, d6.val)
        # h_ddqcd.SetBinError(6, d6.err)
        # h_ddqcd.SetBinError(7, d6.err)
        # h_ddqcd.SetBinContent(13, d13.val)
        # h_ddqcd.SetBinContent(14, d13.val)
        # h_ddqcd.SetBinError(13, d13.err)
        # h_ddqcd.SetBinError(14, d13.err)
        # h_bkg.SetBinContent(6, b6.val)
        # h_bkg.SetBinContent(7, b6.val)
        # h_bkg.SetBinError(6, b6.err)
        # h_bkg.SetBinError(7, b6.err)
        # h_bkg.SetBinContent(13, b13.val)
        # h_bkg.SetBinContent(14, b13.val)
        # h_bkg.SetBinError(13, b13.err)
        # h_bkg.SetBinError(14, b13.err)

        for ii in xrange(1, h_ddqcd.GetNbinsX() + 1):
            data_bc = h_ddqcd.GetBinContent(ii)
            data_be = h_ddqcd.GetBinError(ii)
            bkg_bc = h_bkg.GetBinContent(ii)
            bkg_be = h_bkg.GetBinError(ii)
            d = E(data_bc, data_be)
            b = E(bkg_bc, bkg_be)
            n = d - b
            if isSS:
                if d.err > n.val:
                    n.val = d.err

            h_ddqcd.SetBinContent(ii, n.val)
            h_ddqcd.SetBinError(ii, n.err)

    else:
        h_ddqcd.Add(h_bkg, -1)

    # MC QCD
    h_qcd_mu = ru.get_summed_histogram(bkg_list_qcd_mu,
                                       histnames).Clone("QCD(#mu)")
    h_qcd_el = ru.get_summed_histogram(bkg_list_qcd_el,
                                       histnames).Clone("QCD(EM)")
    h_qcd_bc = ru.get_summed_histogram(bkg_list_qcd_bc,
                                       histnames).Clone("QCD(HF)")

    return h_ddqcd, h_data, h_bkg, h_qcd_mu, h_qcd_el, h_qcd_bc, h_wjets, h_dy, h_ttbar, h_vv
Ejemplo n.º 13
0
def yield_str(hist, i, prec=3):
    e = E(hist.GetBinContent(i), hist.GetBinError(i))
    return e.round(prec)
Ejemplo n.º 14
0
def get_sfs(infile, lnNsig=2.0, lnNbg=1.5, shapeUnc=0.1):
    outfile = "forCard.root"
    card_filename = "card.txt"
    variations = ["btag","jes"]
    # variations = ["jes"] # FIXME
    # variations = []
    procs = ["data", "wz", "ttz", "fakes", "rares"]
    # procs = ["data", "wz", "ttz", "fakes"]

    variationsud = sum([[v+"_up",v+"_dn"] for v in variations],[])

    files = {}
    files["central"] = r.TFile(infile)

    for var in variationsud:
        # print "%s_%s_%s.root" % (infile.split(".root")[0], var, ud)
        files[var] = r.TFile("%s_%s.root" % (infile.split(".root")[0], var))



    # return 

    print ">>> Reading input histograms from %s" % infile


    procs_nodata = procs[1:]
    procs_signodata = ["sig"] + procs_nodata

    hnames = {}

    for var in ["central"] + variationsud:
        keys = files[var].GetListOfKeys()
        histnames = [key.GetName() for key in keys if key.ReadObj().InheritsFrom(r.TH1F.Class())]
        hnames[var] = {}
        hnames[var]["data"] = [hn for hn in histnames if "Data" in hn]
        hnames[var]["wz"] = [hn for hn in histnames if "WZ" in hn]
        hnames[var]["ttz"] = [hn for hn in histnames if "ttZ" in hn]
        hnames[var]["fakes"] = [hn for hn in histnames if "Fakes" in hn]
        hnames[var]["rares"] = [hn for hn in histnames if ("Fakes" not in hn) and ("WZ" not in hn) and ("Data" not in hn) and ("ttZ" not in hn)]

    hists = {}
    # for k,v in hnames.items():
    for proc in procs:
        # tmp = [files["central"].Get(hn) for hn in hnames["central"][proc]]
        # hists["central"][proc] = tmp[0].Clone("h_"+proc)
        # for h in tmp[1:]: hists["central"][proc].Add(h)
        for var in ["central"] + variationsud:
            if var not in hists: hists[var] = {}
            tmp = [files[var].Get(hn) for hn in hnames[var][proc]]
            # name = "h_"+proc
            name = ""
            if "cent" not in var: name += var.replace("_up","Up").replace("_dn","Down")
            if name == "": name = proc+"shape"
            # print name
            hists[var][proc] = tmp[0].Clone(name)
            for h in tmp[1:]: hists[var][proc].Add(h)


    d_prefit = {}
    for proc in procs:
        d_prefit[proc] = get_bin_yields_and_errors(hists["central"][proc])


    print ">>> Writing output histograms for combine into %s" % outfile

    d_fouts = {}
    hists["central"]["sig"] = hists["central"]["data"].Clone("sigshape")
    for ix in range(1,hists["central"]["sig"].GetNbinsX()+1): hists["central"]["sig"].SetBinContent(ix,1.0)
    for proc in procs + ["sig"]:
        d_fouts[proc] = r.TFile(outfile.replace(".root","_%s.root" % proc), "RECREATE")
        for var in ["central"] + variationsud:
            if proc == "sig":
                if "cent" not in var:
                    hists["central"]["sig"].SetName(var.replace("_up","Up").replace("_dn","Down"))
                map(lambda x: x.Write(), get_variations(hists["central"]["sig"], syst=0.0))
            else:
                map(lambda x: x.Write(), get_variations(hists[var][proc], syst=(shapeUnc if "data" not in proc.lower() else 0.0), do_var=not("cent" in var)))

    # # fake signal with 1 event in each bin (arbitrary since we only care about BG only fit)
    # d_fouts["sig"] = r.TFile(outfile.replace(".root","_%s.root" % "sig"), "RECREATE")
    # for ix in range(1,hists["central"]["sig"].GetNbinsX()+1): hists["central"]["sig"].SetBinContent(ix,1.0)

    Nbins = hists["central"]["sig"].GetNbinsX()
    Nproc = len(procs)
    counts  = {}
    for proc in procs_signodata + ["data"]:
        counts[proc] = [hists["central"][proc].GetBinContent(ix) for ix in range(1,Nbins+1)]

    bin_str = " ".join([("ch%i "%i) * Nproc for i in range(1,Nbins+1)])
    proc1_str = (" ".join(procs_signodata) + " ") * Nbins
    proc2_str = " ".join([" ".join(map(str,range(0,Nproc))) for _ in range(0,Nbins)])
    rate_list = sum(map(list,zip(*[counts[proc] for proc in procs_signodata])),[])

    all_correlated = True

    buff = ""
    buff += "imax %i\n" % (Nbins)
    buff += "jmax %i\n" % (Nproc-1)
    if all_correlated:
        buff += "kmax %i\n" % ((Nbins+1+len(variations))*(Nproc-1)-((Nproc-2)*len(variations)))
    else:
        buff += "kmax %i\n" % ((Nbins+1+len(variations))*(Nproc-1))
    buff += "------------\n"

    for i in range(1,Nbins+1):
        for proc in ["data_obs"] + procs_signodata:
            proc2 = proc.replace("_obs","")
            ofile = outfile.replace(".root","_%s.root" % proc2)
            buff += "shapes {0} ch{1} {2} bin{1}_{4}shape{1} bin{1}_{3}\n".format(
                    proc,
                    i,
                    ofile,
                    (("datashape"+str(i)) if "data" in proc else "$SYSTEMATIC"),
                    proc2,
                    )

    buff += "------------\n"

    buff += "bin " + " ".join(["ch%i" % i for i in range(1,Nbins+1)]) + "\n"
    buff += "observation " + " ".join(["%.2f" % e for e in counts["data"]]) + "\n"

    buff += "------------\n"

    buff += "bin " + bin_str + "\n"
    buff += "process " + proc1_str + "\n"
    buff += "process " + proc2_str + "\n"
    buff += "rate " + " ".join(["%.2f" % e for e in rate_list]) + "\n"

    buff += "------------\n"

    # for ithing,thing in enumerate(["wz","ttz","fakes","rares"]):
    for ithing,thing in enumerate(procs_nodata):
        norm = lnNbg
        if thing in ["wz","ttz"]: norm = lnNsig
        # FIXME
        # if thing in ["ttz"]: norm = 1.12
        # if thing in ["rares"]: norm = 1.25
        
        for ibin in range(Nbins):
            buff += get_nuisance_line(thing+"shape"+str(ibin+1), "shape", len(rate_list), 1.0, [ibin*(Nproc)+ithing+1])
        buff += get_nuisance_line(thing, "lnN", len(rate_list), norm, range(ithing+1,(Nbins+1)*Nproc,Nproc))

        for var in variations:
            if all_correlated:
                if ithing == 0:
                    buff += get_nuisance_line(var, "shape", len(rate_list), 1.0, range(1,(Nbins+1)*Nproc))
            else:
                buff += get_nuisance_line(thing+var, "shape", len(rate_list), 1.0, range(ithing+1,(Nbins+1)*Nproc,Nproc))

    card_txt = buff
    # print card_txt

    for fout in d_fouts.values():
        fout.Close()


    print ">>> Writing card %s" % card_filename
    # write card
    with open(card_filename, "w") as fhout:
        fhout.write(card_txt)

    # sys.exit()

    # run combine
    print ">>> Running combine with card %s" % card_filename
    cmd = "combine -M MaxLikelihoodFit %s --saveNorm --saveWithUncertainties" % card_filename 
    output = commands.getoutput(cmd)
    print output
    if "Done in" in output:
        print ">>> Combine finished successfully"
    else:
        print ">>> [!] ERROR with combine. Output below:"
        print "-"*40
        print output
        print "-"*40
        sys.exit()

    print ">>> Examining fit results"
    # examine output
    fin = r.TFile.Open("mlfit.root");
    prefit = fin.Get("norm_prefit")
    fit_s = fin.Get("norm_fit_s")
    fit_b = fin.Get("norm_fit_b")
    iter = fit_b.createIterator()
    # print "{:>10} {:>21} {:>21} {:>21}".format("process", "prefit", "postfit", "SF")
    # print "-"*80
    d_sfs = {}
    d_cerrs = {}
    while True:
        norm_s = iter.Next()
        if norm_s == None: break;
        norm_b = fit_b.find(norm_s.GetName())
        norm_p = prefit.find(norm_s.GetName()) if prefit else None
        # title = norm_s.GetName().split("/")[-1]
        title = norm_s.GetName()
        binzi = int(title.split("/")[0].split("ch")[-1])-1 # zero indexed
        name = title.split("/")[-1]
        if "sig" in title: continue
        p_val, p_err = d_prefit[name][binzi][0],d_prefit[name][binzi][1]
        try:
            Epostfit = E(norm_b.getVal(), norm_b.getError())
            Eprefit = E(p_val, p_err)
            # Eprefit = E(norm_p.getVal(),norm_p.getError())
            # print "prefit,postfit",name,binzi,Eprefit,Epostfit, norm_p.getVal(), norm_p.getError()
            if name not in  d_cerrs: d_cerrs[name] = {"prefit": [], "postfit": []}
            d_cerrs[name]["prefit"].append(Eprefit)
            d_cerrs[name]["postfit"].append(Epostfit)

            sf, sferr = Epostfit/Eprefit
        except:
            sf, sferr = -1.0, 0.0
        # print "%s \t\t %.2f +- %.2f \t\t %.2f +- %.2f \t\t %.2f +- %.2f" % (title, d_prefit[title][0],d_prefit[title][1],norm_b.getVal(),norm_b.getError(), sf, sferr)
        # print "{:>10} {:>15,.2f} +-{:>6,.2f} {:>15,.2f} +-{:>6,.2f} {:>15,.2f} +-{:>6,.2f}".format(
        #         title, 
        #         p_val, p_err,
        #         norm_b.getVal(),norm_b.getError(),
        #         sf, sferr
        #         )
        d_sfs[title] = (sf, sferr)

    print "total SFs:"
    d_sfs["totals"] = {}
    d_sfs["postfit_totals"] = {}
    for proc in d_cerrs:
        sf = sum(d_cerrs[proc]["postfit"])/sum(d_cerrs[proc]["prefit"])
        d_sfs["postfit_totals"][proc.split("/")[-1]] = (
                map(lambda x:x[0], d_cerrs[proc]["postfit"]),
                map(lambda x:x[1], d_cerrs[proc]["postfit"])
                )
        d_sfs["totals"][proc.split("/")[-1]] = (sf[0],sf[1])
        print "{:>10} {:>15,.2f} +-{:>6,.2f}".format(proc, sf[0], sf[1])


    return d_sfs
Ejemplo n.º 15
0
def get_alpha_hists(proc, num, den):
    hists_num = get_alpha(proc, num, den, "num")
    hists_den = get_alpha(proc, num, den, "den")
    hists_eff = get_alpha(proc, num, den, "eff")
    hists = []
    totalerrors = [E(1, 0), E(1, 0), E(1, 0)]
    for hist_num, hist_den, hist_eff in zip(hists_num, hists_den, hists_eff):
        syst = hist_num.GetName()
        if syst == "Nominal":
            h = r.TH1F("{}".format(hist_num.GetName()), "", 3, 0, 3)
            h.SetBinContent(1, hist_eff.GetBinContent(1))
            h.SetBinError(1, hist_eff.GetBinError(1))
            h.SetBinContent(2, hist_num.GetBinContent(1))
            h.SetBinError(2, hist_num.GetBinError(1))
            h.SetBinContent(3, hist_den.GetBinContent(1))
            h.SetBinError(3, hist_den.GetBinError(1))
            h_ratio = h.Clone("Ratio")
            h_ratio.SetBinContent(2, 0)
            h_ratio.SetBinContent(3, 0)
            h_ratio.SetBinError(2, 0)
            h_ratio.SetBinError(3, 0)
            h_yield = h.Clone("Yield")
            h_yield.SetBinContent(1, 0)
            h_yield.SetBinError(1, 0)
            hists.append(h_ratio)
            hists.append(h_yield)
            h = r.TH1F("Stat", "", 3, 0, 3)
            h.SetBinContent(
                1,
                hist_eff.GetBinError(1) / hist_eff.GetBinContent(1) * 100.)
            h.SetBinContent(
                2,
                hist_num.GetBinError(1) / hist_num.GetBinContent(1) * 100.)
            h.SetBinContent(
                3,
                hist_den.GetBinError(1) / hist_den.GetBinContent(1) * 100.)
            hists.append(h)
            totalerrors[0] *= E(
                1,
                hist_eff.GetBinError(1) / hist_eff.GetBinContent(1))
            totalerrors[1] *= E(
                1,
                hist_num.GetBinError(1) / hist_num.GetBinContent(1))
            totalerrors[2] *= E(
                1,
                hist_den.GetBinError(1) / hist_den.GetBinContent(1))
        else:
            h = r.TH1F("{}".format(hist_num.GetName()), "", 3, 0, 3)
            h.SetBinContent(
                1,
                hist_eff.GetBinError(1) / hist_eff.GetBinContent(1) * 100.)
            h.SetBinContent(
                2,
                hist_num.GetBinError(1) / hist_num.GetBinContent(1) * 100.)
            h.SetBinContent(
                3,
                hist_den.GetBinError(1) / hist_den.GetBinContent(1) * 100.)
            hists.append(h)
            totalerrors[0] *= E(
                1,
                hist_eff.GetBinError(1) / hist_eff.GetBinContent(1))
            totalerrors[1] *= E(
                1,
                hist_num.GetBinError(1) / hist_num.GetBinContent(1))
            totalerrors[2] *= E(
                1,
                hist_den.GetBinError(1) / hist_den.GetBinContent(1))
    h = r.TH1F("Total", "", 3, 0, 3)
    h.SetBinContent(1, totalerrors[0].err * 100.)
    h.SetBinContent(2, totalerrors[1].err * 100.)
    h.SetBinContent(3, totalerrors[2].err * 100.)
    hists.insert(2, h)
    return hists