Esempio n. 1
0
def Direct_Estimator(var, cut, year):
    from root_numpy import root2array, fill_hist, array2root
    import numpy.lib.recfunctions as rfn
    ### Preliminary Operations ###
    treeRead = not cut in [
        "nnqq", "en", "enqq", "mn", "mnqq", "ee", "eeqq", "mm", "mmqq", "em",
        "emqq", "qqqq"
    ]  # Read from tree
    channel = cut
    unit = ''
    if "GeV" in variable[var]['title']: unit = ' GeV'
    isBlind = BLIND and 'SR' in channel
    isAH = False  #'qqqq' in channel or 'hp' in channel or 'lp' in channel
    showSignal = False if 'SB' in cut or 'TR' in cut else True  #'SR' in channel or channel=='qqqq'#or len(channel)==5
    stype = "HVT model B"
    if len(sign) > 0 and 'AZh' in sign[0]: stype = "2HDM"
    elif len(sign) > 0 and 'monoH' in sign[0]: stype = "Z'-2HDM m_{A}=300 GeV"
    if treeRead:
        for k in sorted(alias.keys(), key=len, reverse=True):
            if BTAGGING == 'semimedium':
                if k in cut:
                    cut = cut.replace(k, aliasSM[k])

            else:
                if k in cut:
                    cut = cut.replace(
                        k, alias[k].format(WP=working_points[BTAGGING]))

    print "Plotting from", ("tree" if treeRead else
                            "file"), var, "in", channel, "channel with:"
    print "  cut    :", cut

    if var == 'jj_deltaEta_widejet':
        if "jj_deltaEta_widejet<1.1 && " in cut:
            print
            print "omitting jj_deltaEta_widejet<1.1 cut to draw the deltaEta distribution"
            print
            cut = cut.replace("jj_deltaEta_widejet<1.1 && ", "")
        else:
            print
            print "no 'jj_deltaEta_widejet<1.1 && ' in the cut string detected, so it cannot be ommited explicitly"
            print

    ### Create and fill MC histograms ###
    # Create dict
    file = {}
    tree = {}
    hist = {}

    ### Create and fill MC histograms ###
    for i, s in enumerate(back + sign):
        if True:  #FIXME

            if variable[var]['nbins'] > 0:
                hist[s] = TH1F(
                    s, ";" + variable[var]['title'] + ";Events / ( " + str(
                        (variable[var]['max'] - variable[var]['min']) /
                        variable[var]['nbins']) + unit + " );" +
                    ('log' if variable[var]['log'] else ''),
                    variable[var]['nbins'], variable[var]['min'],
                    variable[var]['max'])
            else:
                hist[s] = TH1F(
                    s, ";" + variable[var]['title'] + ";Events" +
                    ('log' if variable[var]['log'] else ''),
                    len(variable[var]['bins']) - 1,
                    array('f', variable[var]['bins']))
            hist[s].Sumw2()

            for j, ss in enumerate(sample[s]['files']):
                if not 'data' in s:
                    if year == "run2" or year in ss:
                        arr = root2array(
                            NTUPLEDIR + ss + ".root",
                            branches=[
                                var, "jpt_1", "jpt_2", "eventWeightLumi",
                                "TMath::Abs(jflavour_1)==5 && TMath::Abs(jflavour_2)==5",
                                "TMath::Abs(jflavour_1)==5 && TMath::Abs(jflavour_2)!=5",
                                "TMath::Abs(jflavour_1)!=5 && TMath::Abs(jflavour_2)==5",
                                "TMath::Abs(jflavour_1)!=5 && TMath::Abs(jflavour_2)!=5"
                            ],
                            selection=cut if len(cut) > 0 else "")
                        print "imported " + NTUPLEDIR + ss + ".root"
                        arr.dtype.names = [
                            var, "jpt_1", "jpt_2", "eventWeightLumi", "bb",
                            "bq", "qb", "qq"
                        ]
                        MANtag_eff1 = np.array(map(MANtag_eff, arr["jpt_1"]))
                        MANtag_eff2 = np.array(map(MANtag_eff, arr["jpt_2"]))
                        MANtag_mis1 = np.array(map(MANtag_mis, arr["jpt_1"]))
                        MANtag_mis2 = np.array(map(MANtag_mis, arr["jpt_2"]))
                        MANtag_weight = np.multiply(
                            arr["eventWeightLumi"],
                            np.multiply(arr['bb'],
                                        np.multiply(MANtag_eff1, MANtag_eff2))
                            + np.multiply(
                                arr['bq'], np.multiply(MANtag_eff1,
                                                       MANtag_mis2)) +
                            np.multiply(arr['qb'],
                                        np.multiply(MANtag_mis1,
                                                    MANtag_eff2)) +
                            np.multiply(arr['qq'],
                                        np.multiply(MANtag_mis1, MANtag_mis2)))
                        fill_hist(hist[s], arr[var], weights=MANtag_weight)
                        deepCSV_eff1 = np.array(map(deepCSV_eff, arr["jpt_1"]))
                        deepCSV_eff2 = np.array(map(deepCSV_eff, arr["jpt_2"]))
                        deepCSV_mis1 = np.array(map(deepCSV_mis, arr["jpt_1"]))
                        deepCSV_mis2 = np.array(map(deepCSV_mis, arr["jpt_2"]))
                        deepCSV_weight = np.multiply(
                            arr["eventWeightLumi"],
                            np.multiply(
                                arr['bb'],
                                np.multiply(deepCSV_eff1, deepCSV_eff2)) +
                            np.multiply(
                                arr['bq'],
                                np.multiply(deepCSV_eff1, deepCSV_mis2)) +
                            np.multiply(
                                arr['qb'],
                                np.multiply(deepCSV_mis1, deepCSV_eff2)) +
                            np.multiply(
                                arr['qq'],
                                np.multiply(deepCSV_mis1, deepCSV_mis2)))

                        if var == "jj_mass_widejet" and options.save and not "data" in ss:
                            arr = rfn.append_fields(arr,
                                                    "MANtag_weight",
                                                    MANtag_weight,
                                                    usemask=False)
                            arr = rfn.append_fields(arr,
                                                    "deepCSV_weight",
                                                    deepCSV_weight,
                                                    usemask=False)
                            array2root(arr,
                                       NTUPLEDIR + "MANtag/" + ss + "_" +
                                       BTAGGING + ".root",
                                       treename="tree",
                                       mode='recreate')
                            print "saved as", NTUPLEDIR + "MANtag/" + ss + "_" + BTAGGING + ".root"
                        arr = None

        hist[s].Scale(sample[s]['weight'] if hist[s].Integral() >= 0 else 0)
        hist[s].SetFillColor(sample[s]['fillcolor'])
        hist[s].SetFillStyle(sample[s]['fillstyle'])
        hist[s].SetLineColor(sample[s]['linecolor'])
        hist[s].SetLineStyle(sample[s]['linestyle'])

    if channel.endswith('TR') and channel.replace('TR', '') in topSF:
        hist['TTbarSL'].Scale(topSF[channel.replace('TR', '')][0])
        hist['ST'].Scale(topSF[channel.replace('TR', '')][0])

    hist['BkgSum'] = hist['data_obs'].Clone(
        "BkgSum") if 'data_obs' in hist else hist[back[0]].Clone("BkgSum")
    hist['BkgSum'].Reset("MICES")
    hist['BkgSum'].SetFillStyle(3003)
    hist['BkgSum'].SetFillColor(1)
    for i, s in enumerate(back):
        hist['BkgSum'].Add(hist[s])

    # Create data and Bkg sum histograms
    if options.blind or 'SR' in channel:
        hist['data_obs'] = hist['BkgSum'].Clone("data_obs")
        hist['data_obs'].Reset("MICES")
    # Set histogram style
    hist['data_obs'].SetMarkerStyle(20)
    hist['data_obs'].SetMarkerSize(1.25)

    for i, s in enumerate(back + sign + ['BkgSum']):
        addOverflow(hist[s], False)  # Add overflow
    for i, s in enumerate(sign):
        hist[s].SetLineWidth(3)
    for i, s in enumerate(sign):
        sample[s][
            'plot'] = True  #sample[s]['plot'] and s.startswith(channel[:2])

    if isAH:
        for i, s in enumerate(back):
            hist[s].SetFillStyle(3005)
            hist[s].SetLineWidth(2)
        #for i, s in enumerate(sign):
        #    hist[s].SetFillStyle(0)
        if not var == "Events":
            sfnorm = hist[data[0]].Integral() / hist['BkgSum'].Integral()
            print "Applying SF:", sfnorm
            for i, s in enumerate(back + ['BkgSum']):
                hist[s].Scale(sfnorm)
        if BLIND and var.endswith("Mass"):
            for i, s in enumerate(data + back + ['BkgSum']):
                first, last = hist[s].FindBin(65), hist[s].FindBin(135)
                for j in range(first, last):
                    hist[s].SetBinContent(j, -1.e-4)
        if BLIND and var.endswith("Tau21"):
            for i, s in enumerate(data):
                first, last = hist[s].FindBin(0), hist[s].FindBin(0.6)
                for j in range(first, last):
                    hist[s].SetBinContent(j, -1.e-4)

    # Create stack
    if variable[var]['nbins'] > 0:
        bkg = THStack(
            "Bkg",
            ";" + hist['BkgSum'].GetXaxis().GetTitle() + ";Events / ( " + str(
                (variable[var]['max'] - variable[var]['min']) /
                variable[var]['nbins']) + unit + " )")
    else:
        bkg = THStack("Bkg",
                      ";" + hist['BkgSum'].GetXaxis().GetTitle() + ";Events; ")
    for i, s in enumerate(back):
        bkg.Add(hist[s])

    # Legend
    leg = TLegend(0.65, 0.6, 0.95, 0.9)
    leg.SetBorderSize(0)
    leg.SetFillStyle(0)  #1001
    leg.SetFillColor(0)
    if len(data) > 0:
        leg.AddEntry(hist[data[0]], sample[data[0]]['label'], "pe")
    for i, s in reversed(list(enumerate(['BkgSum'] + back))):
        leg.AddEntry(hist[s], sample[s]['label'], "f")
    if showSignal:
        for i, s in enumerate(sign):
            if sample[s]['plot']:
                leg.AddEntry(hist[s], sample[s]['label'], "fl")

    leg.SetY1(0.9 - leg.GetNRows() * 0.05)

    # --- Display ---
    c1 = TCanvas("c1",
                 hist.values()[0].GetXaxis().GetTitle(), 800,
                 800 if RATIO else 600)

    if RATIO:
        c1.Divide(1, 2)
        setTopPad(c1.GetPad(1), RATIO)
        setBotPad(c1.GetPad(2), RATIO)
    c1.cd(1)
    c1.GetPad(bool(RATIO)).SetTopMargin(0.06)
    c1.GetPad(bool(RATIO)).SetRightMargin(0.05)
    c1.GetPad(bool(RATIO)).SetTicks(1, 1)

    log = variable[var]['log']  #"log" in hist['BkgSum'].GetZaxis().GetTitle()
    if log: c1.GetPad(bool(RATIO)).SetLogy()

    # Draw
    bkg.Draw("HIST")  # stack
    hist['BkgSum'].Draw("SAME, E2")  # sum of bkg
    if not isBlind and len(data) > 0: hist['data_obs'].Draw("SAME, PE")  # data
    if 'sync' in hist: hist['sync'].Draw("SAME, PE")
    #data_graph.Draw("SAME, PE")
    if showSignal:
        smagn = 1.  #if treeRead else 1.e2 #if log else 1.e2
        for i, s in enumerate(sign):
            #        if sample[s]['plot']:
            hist[s].Scale(smagn)
            hist[s].Draw(
                "SAME, HIST"
            )  # signals Normalized, hist[s].Integral()*sample[s]['weight']
        textS = drawText(0.80, 0.9 - leg.GetNRows() * 0.05 - 0.02,
                         stype + " (x%d)" % smagn, True)
    #bkg.GetYaxis().SetTitleOffset(bkg.GetYaxis().GetTitleOffset()*1.075)
    bkg.GetYaxis().SetTitleOffset(0.9)
    #bkg.GetYaxis().SetTitleOffset(2.)
    bkg.SetMaximum((5. if log else 1.25) * max(
        bkg.GetMaximum(),
        hist['data_obs'].GetBinContent(hist['data_obs'].GetMaximumBin()) +
        hist['data_obs'].GetBinError(hist['data_obs'].GetMaximumBin())))
    #if bkg.GetMaximum() < max(hist[sign[0]].GetMaximum(), hist[sign[-1]].GetMaximum()): bkg.SetMaximum(max(hist[sign[0]].GetMaximum(), hist[sign[-1]].GetMaximum())*1.25)
    bkg.SetMinimum(
        max(
            min(hist['BkgSum'].GetBinContent(hist['BkgSum'].GetMinimumBin(
            )), hist['data_obs'].GetMinimum()), 5.e-1) if log else 0.)
    if log:
        bkg.GetYaxis().SetNoExponent(bkg.GetMaximum() < 1.e4)
        #bkg.GetYaxis().SetMoreLogLabels(True)
    bkg.GetXaxis().SetRangeUser(variable[var]['min'], variable[var]['max'])

    #if log: bkg.SetMinimum(1)
    leg.Draw()
    #drawCMS(LUMI[year], "Preliminary")
    drawCMS(LUMI[year], "Work in Progress", suppressCMS=True)
    drawRegion('XVH' + channel, True)
    drawAnalysis(channel)

    setHistStyle(bkg, 1.2 if RATIO else 1.1)
    setHistStyle(hist['BkgSum'], 1.2 if RATIO else 1.1)

    if RATIO:
        c1.cd(2)
        err = hist['BkgSum'].Clone("BkgErr;")
        err.SetTitle("")
        err.GetYaxis().SetTitle("Data / MC")
        err.GetYaxis().SetTitleOffset(0.9)

        err.GetXaxis().SetRangeUser(variable[var]['min'], variable[var]['max'])
        for i in range(1, err.GetNbinsX() + 1):
            err.SetBinContent(i, 1)
            if hist['BkgSum'].GetBinContent(i) > 0:
                err.SetBinError(
                    i, hist['BkgSum'].GetBinError(i) /
                    hist['BkgSum'].GetBinContent(i))
        setBotStyle(err)
        errLine = err.Clone("errLine")
        errLine.SetLineWidth(1)
        errLine.SetFillStyle(0)
        res = hist['data_obs'].Clone("Residues")
        for i in range(0, res.GetNbinsX() + 1):
            if hist['BkgSum'].GetBinContent(i) > 0:
                res.SetBinContent(
                    i,
                    res.GetBinContent(i) / hist['BkgSum'].GetBinContent(i))
                res.SetBinError(
                    i,
                    res.GetBinError(i) / hist['BkgSum'].GetBinContent(i))
        if 'sync' in hist:
            res.SetMarkerColor(2)
            res.SetMarkerStyle(31)
            res.Reset()
            for i in range(0, res.GetNbinsX() + 1):
                x = hist['data_obs'].GetXaxis().GetBinCenter(i)
                if hist['sync'].GetBinContent(hist['sync'].FindBin(x)) > 0:
                    res.SetBinContent(
                        i, hist['data_obs'].GetBinContent(
                            hist['data_obs'].FindBin(x)) /
                        hist['sync'].GetBinContent(hist['sync'].FindBin(x)))
                    res.SetBinError(
                        i, hist['data_obs'].GetBinError(
                            hist['data_obs'].FindBin(x)) /
                        hist['sync'].GetBinContent(hist['sync'].FindBin(x)))
        setBotStyle(res)
        #err.GetXaxis().SetLabelOffset(err.GetXaxis().GetLabelOffset()*5)
        #err.GetXaxis().SetTitleOffset(err.GetXaxis().GetTitleOffset()*2)
        err.Draw("E2")
        errLine.Draw("SAME, HIST")
        if not isBlind and len(data) > 0:
            res.Draw("SAME, PE0")
            #res_graph.Draw("SAME, PE0")
            if len(err.GetXaxis().GetBinLabel(
                    1)) == 0:  # Bin labels: not a ordinary plot
                drawRatio(hist['data_obs'], hist['BkgSum'])
                drawStat(hist['data_obs'], hist['BkgSum'])

    c1.Update()

    if gROOT.IsBatch():
        if channel == "": channel = "nocut"
        varname = var.replace('.', '_').replace('()', '')
        if not os.path.exists("plots/" + channel):
            os.makedirs("plots/" + channel)
        suffix = ''
        if "b" in channel or 'mu' in channel: suffix += "_" + BTAGGING
        c1.Print("plots/MANtag_study/" + channel + "/" + varname + "_" + year +
                 suffix + ".png")
        c1.Print("plots/MANtag_study/" + channel + "/" + varname + "_" + year +
                 suffix + ".pdf")

    # Print table
    printTable(hist, sign)
Esempio n. 2
0
def plot(var, cut, year, norm=False, nm1=False):
    ### Preliminary Operations ###
    treeRead = not cut in [
        "nnqq", "en", "enqq", "mn", "mnqq", "ee", "eeqq", "mm", "mmqq", "em",
        "emqq", "qqqq"
    ]  # Read from tree
    channel = cut
    unit = ''
    if "GeV" in variable[var]['title']: unit = ' GeV'
    isBlind = BLIND and 'SR' in channel
    isAH = False  #'qqqq' in channel or 'hp' in channel or 'lp' in channel
    showSignal = False if 'SB' in cut or 'TR' in cut else True  #'SR' in channel or channel=='qqqq'#or len(channel)==5
    stype = "HVT model B"
    if len(sign) > 0 and 'AZh' in sign[0]: stype = "2HDM"
    elif len(sign) > 0 and 'monoH' in sign[0]: stype = "Z'-2HDM m_{A}=300 GeV"
    if treeRead:
        for k in sorted(alias.keys(), key=len, reverse=True):
            if BTAGGING == 'semimedium':
                if k in cut:
                    if ADDSELECTION:
                        cut = cut.replace(
                            k, aliasSM[k] + SELECTIONS[options.selection])
                    else:
                        cut = cut.replace(k, aliasSM[k])

            else:
                if k in cut:
                    if ADDSELECTION:
                        cut = cut.replace(
                            k, alias[k].format(WP=working_points[BTAGGING]) +
                            SELECTIONS[options.selection])
                    else:
                        cut = cut.replace(
                            k, alias[k].format(WP=working_points[BTAGGING]))

    # Determine Primary Dataset
    pd = sample['data_obs']['files']

    print "Plotting from", ("tree" if treeRead else
                            "file"), var, "in", channel, "channel with:"
    print "  dataset:", pd
    print "  cut    :", cut

    if var == 'jj_deltaEta_widejet':
        if "jj_deltaEta_widejet<1.1 && " in cut:
            print
            print "omitting jj_deltaEta_widejet<1.1 cut to draw the deltaEta distribution"
            print
            cut = cut.replace("jj_deltaEta_widejet<1.1 && ", "")
        else:
            print
            print "no 'jj_deltaEta_widejet<1.1 && ' in the cut string detected, so it cannot be ommited explicitly"
            print

    ### Create and fill MC histograms ###
    # Create dict
    file = {}
    tree = {}
    hist = {}

    ### Create and fill MC histograms ###
    for i, s in enumerate(data + back + sign):
        if treeRead:  # Project from tree
            tree[s] = TChain("tree")
            for j, ss in enumerate(sample[s]['files']):
                if not 'data' in s or ('data' in s and ss in pd):
                    if year == "run2" or year in ss:
                        tree[s].Add(NTUPLEDIR + ss + ".root")
            if variable[var]['nbins'] > 0:
                hist[s] = TH1F(
                    s, ";" + variable[var]['title'] + ";Events / ( " + str(
                        (variable[var]['max'] - variable[var]['min']) /
                        variable[var]['nbins']) + unit + " );" +
                    ('log' if variable[var]['log'] else ''),
                    variable[var]['nbins'], variable[var]['min'],
                    variable[var]['max'])
            else:
                hist[s] = TH1F(
                    s, ";" + variable[var]['title'] + ";Events" +
                    ('log' if variable[var]['log'] else ''),
                    len(variable[var]['bins']) - 1,
                    array('f', variable[var]['bins']))
            hist[s].Sumw2()
            cutstring = "(eventWeightLumi)" + ("*(" + cut +
                                               ")" if len(cut) > 0 else "")
            tree[s].Project(s, var, cutstring)
            if not tree[s].GetTree() == None:
                hist[s].SetOption("%s" % tree[s].GetTree().GetEntriesFast())
        else:  # Histogram written to file
            for j, ss in enumerate(sample[s]['files']):
                if not 'data' in s or ('data' in s and ss in pd):
                    file[ss] = TFile(NTUPLEDIR + ss + ".root", "R")
                    if file[ss].IsZombie():
                        print "WARNING: file", NTUPLEDIR + ss + ".root", "does not exist"
                        continue
                    tmphist = file[ss].Get(cut + "/" + var)
                    if tmphist == None: continue
                    if not s in hist.keys(): hist[s] = tmphist
                    else: hist[s].Add(tmphist)
        hist[s].Scale(sample[s]['weight'] if hist[s].Integral() >= 0 else 0)
        hist[s].SetFillColor(sample[s]['fillcolor'])
        hist[s].SetFillStyle(sample[s]['fillstyle'])
        hist[s].SetLineColor(sample[s]['linecolor'])
        hist[s].SetLineStyle(sample[s]['linestyle'])

    if channel.endswith('TR') and channel.replace('TR', '') in topSF:
        hist['TTbarSL'].Scale(topSF[channel.replace('TR', '')][0])
        hist['ST'].Scale(topSF[channel.replace('TR', '')][0])

    hist['BkgSum'] = hist['data_obs'].Clone(
        "BkgSum") if 'data_obs' in hist else hist[back[0]].Clone("BkgSum")
    hist['BkgSum'].Reset("MICES")
    hist['BkgSum'].SetFillStyle(3003)
    hist['BkgSum'].SetFillColor(1)
    for i, s in enumerate(back):
        hist['BkgSum'].Add(hist[s])

    if options.norm:
        for i, s in enumerate(back + ['BkgSum']):
            hist[s].Scale(hist[data[0]].Integral() / hist['BkgSum'].Integral())

    # Create data and Bkg sum histograms
    if options.blind or 'SR' in channel:
        hist['data_obs'] = hist['BkgSum'].Clone("data_obs")
        hist['data_obs'].Reset("MICES")
    # Set histogram style
    hist['data_obs'].SetMarkerStyle(20)
    hist['data_obs'].SetMarkerSize(1.25)

    for i, s in enumerate(data + back + sign + ['BkgSum']):
        addOverflow(hist[s], False)  # Add overflow
    for i, s in enumerate(sign):
        hist[s].SetLineWidth(3)
    for i, s in enumerate(sign):
        sample[s][
            'plot'] = True  #sample[s]['plot'] and s.startswith(channel[:2])

    if isAH:
        for i, s in enumerate(back):
            hist[s].SetFillStyle(3005)
            hist[s].SetLineWidth(2)
        #for i, s in enumerate(sign):
        #    hist[s].SetFillStyle(0)
        if not var == "Events":
            sfnorm = hist[data[0]].Integral() / hist['BkgSum'].Integral()
            print "Applying SF:", sfnorm
            for i, s in enumerate(back + ['BkgSum']):
                hist[s].Scale(sfnorm)
        if BLIND and var.endswith("Mass"):
            for i, s in enumerate(data + back + ['BkgSum']):
                first, last = hist[s].FindBin(65), hist[s].FindBin(135)
                for j in range(first, last):
                    hist[s].SetBinContent(j, -1.e-4)
        if BLIND and var.endswith("Tau21"):
            for i, s in enumerate(data):
                first, last = hist[s].FindBin(0), hist[s].FindBin(0.6)
                for j in range(first, last):
                    hist[s].SetBinContent(j, -1.e-4)

    if SYNC and var == "jj_mass_widejet" and year in ["2016", "2017", "2018"]:
        #iFile = TFile("sync/JetHT_run" + year + "_red_cert_scan.root", "READ")
        #hist['sync'] = iFile.Get("Mjj")
        if year == '2016':
            iFile = TFile("sync/2016/2016_07Aug2017_1246_1p1.root", "READ")
            hist['sync'] = iFile.Get("h_mjj_data")
        elif year == '2017':
            iFile = TFile(
                "sync/2017/histos_Run2017BCDEF_17Nov2017_JEC2017_mjj1530_cemf_lt_0p8_deltaETA_lt_1p1.root",
                "READ")
            hist['sync'] = iFile.Get("h_mjj_data")
        elif year == '2018':
            iFile = TFile(
                "sync/2018/Double_sideband_inputs_18v10_preliminary_v2.root",
                "READ")
            hist['sync'] = iFile.Get("h_mjj")

#        hist['sync'] = tmp.Rebin(len(dijet_bins)-1, "sync", array('d', dijet_bins))
#        hist['sync'] = tmp.Rebin(100, "sync")
        hist['sync'].SetMarkerStyle(31)
        hist['sync'].SetMarkerSize(1.25)
        hist['sync'].SetMarkerColor(2)
        print "Imported and drawing sync file"

    # Create stack
    if variable[var]['nbins'] > 0:
        bkg = THStack(
            "Bkg",
            ";" + hist['BkgSum'].GetXaxis().GetTitle() + ";Events / ( " + str(
                (variable[var]['max'] - variable[var]['min']) /
                variable[var]['nbins']) + unit + " )")
    else:
        bkg = THStack("Bkg",
                      ";" + hist['BkgSum'].GetXaxis().GetTitle() + ";Events; ")
    for i, s in enumerate(back):
        bkg.Add(hist[s])

    # Legend
    leg = TLegend(0.65, 0.6, 0.95, 0.9)
    leg.SetBorderSize(0)
    leg.SetFillStyle(0)  #1001
    leg.SetFillColor(0)
    if len(data) > 0:
        leg.AddEntry(hist[data[0]], sample[data[0]]['label'], "pe")
    for i, s in reversed(list(enumerate(['BkgSum'] + back))):
        leg.AddEntry(hist[s], sample[s]['label'], "f")
    if showSignal:
        for i, s in enumerate(sign):
            if sample[s]['plot']:
                leg.AddEntry(hist[s], sample[s]['label'], "fl")

    leg.SetY1(0.9 - leg.GetNRows() * 0.05)

    # --- Display ---
    c1 = TCanvas("c1",
                 hist.values()[0].GetXaxis().GetTitle(), 800,
                 800 if RATIO else 600)

    if RATIO:
        c1.Divide(1, 2)
        setTopPad(c1.GetPad(1), RATIO)
        setBotPad(c1.GetPad(2), RATIO)
    c1.cd(1)
    c1.GetPad(bool(RATIO)).SetTopMargin(0.06)
    c1.GetPad(bool(RATIO)).SetRightMargin(0.05)
    c1.GetPad(bool(RATIO)).SetTicks(1, 1)

    log = variable[var]['log']  #"log" in hist['BkgSum'].GetZaxis().GetTitle()
    if log: c1.GetPad(bool(RATIO)).SetLogy()

    # Draw
    bkg.Draw("HIST")  # stack
    hist['BkgSum'].Draw("SAME, E2")  # sum of bkg
    if not isBlind and len(data) > 0: hist['data_obs'].Draw("SAME, PE")  # data
    if 'sync' in hist: hist['sync'].Draw("SAME, PE")
    #data_graph.Draw("SAME, PE")
    if showSignal:
        smagn = 1.  #if treeRead else 1.e2 #if log else 1.e2
        for i, s in enumerate(sign):
            #        if sample[s]['plot']:
            hist[s].Scale(smagn)
            hist[s].Draw(
                "SAME, HIST"
            )  # signals Normalized, hist[s].Integral()*sample[s]['weight']
        textS = drawText(0.80, 0.9 - leg.GetNRows() * 0.05 - 0.02,
                         stype + " (x%d)" % smagn, True)
    #bkg.GetYaxis().SetTitleOffset(bkg.GetYaxis().GetTitleOffset()*1.075)
    bkg.GetYaxis().SetTitleOffset(0.9)
    #bkg.GetYaxis().SetTitleOffset(2.)
    bkg.SetMaximum((5. if log else 1.25) * max(
        bkg.GetMaximum(),
        hist['data_obs'].GetBinContent(hist['data_obs'].GetMaximumBin()) +
        hist['data_obs'].GetBinError(hist['data_obs'].GetMaximumBin())))
    #if bkg.GetMaximum() < max(hist[sign[0]].GetMaximum(), hist[sign[-1]].GetMaximum()): bkg.SetMaximum(max(hist[sign[0]].GetMaximum(), hist[sign[-1]].GetMaximum())*1.25)
    bkg.SetMinimum(
        max(
            min(hist['BkgSum'].GetBinContent(hist['BkgSum'].GetMinimumBin(
            )), hist['data_obs'].GetMinimum()), 5.e-1) if log else 0.)
    if log:
        bkg.GetYaxis().SetNoExponent(bkg.GetMaximum() < 1.e4)
        #bkg.GetYaxis().SetMoreLogLabels(True)
    bkg.GetXaxis().SetRangeUser(variable[var]['min'], variable[var]['max'])

    #if log: bkg.SetMinimum(1)
    leg.Draw()
    #drawCMS(LUMI[year], "Preliminary")
    #drawCMS(LUMI[year], "Work in Progress", suppressCMS=True)
    drawCMS(LUMI[year], "", suppressCMS=True)
    drawRegion('XVH' + channel, True)
    drawAnalysis(channel)

    setHistStyle(bkg, 1.2 if RATIO else 1.1)
    setHistStyle(hist['BkgSum'], 1.2 if RATIO else 1.1)

    if RATIO:
        c1.cd(2)
        err = hist['BkgSum'].Clone("BkgErr;")
        err.SetTitle("")
        if SYNC:
            err.GetYaxis().SetTitle("Nano/Mini")
        else:
            err.GetYaxis().SetTitle("Data / MC")
        err.GetYaxis().SetTitleOffset(0.9)

        err.GetXaxis().SetRangeUser(variable[var]['min'], variable[var]['max'])
        for i in range(1, err.GetNbinsX() + 1):
            err.SetBinContent(i, 1)
            if hist['BkgSum'].GetBinContent(i) > 0:
                err.SetBinError(
                    i, hist['BkgSum'].GetBinError(i) /
                    hist['BkgSum'].GetBinContent(i))
        setBotStyle(err)
        errLine = err.Clone("errLine")
        errLine.SetLineWidth(1)
        errLine.SetFillStyle(0)
        res = hist['data_obs'].Clone("Residues")
        for i in range(0, res.GetNbinsX() + 1):
            if hist['BkgSum'].GetBinContent(i) > 0:
                res.SetBinContent(
                    i,
                    res.GetBinContent(i) / hist['BkgSum'].GetBinContent(i))
                res.SetBinError(
                    i,
                    res.GetBinError(i) / hist['BkgSum'].GetBinContent(i))
        if 'sync' in hist:
            res.SetMarkerColor(1)
            res.SetMarkerStyle(20)
            res.Reset()
            for i in range(0, res.GetNbinsX() + 1):
                x = hist['data_obs'].GetXaxis().GetBinCenter(i)
                if hist['sync'].GetBinContent(hist['sync'].FindBin(x)) > 0:
                    res.SetBinContent(
                        i, hist['data_obs'].GetBinContent(
                            hist['data_obs'].FindBin(x)) /
                        hist['sync'].GetBinContent(hist['sync'].FindBin(x)))
                    res.SetBinError(
                        i, hist['data_obs'].GetBinError(
                            hist['data_obs'].FindBin(x)) /
                        hist['sync'].GetBinContent(hist['sync'].FindBin(x)))
        setBotStyle(res)
        #err.GetXaxis().SetLabelOffset(err.GetXaxis().GetLabelOffset()*5)
        #err.GetXaxis().SetTitleOffset(err.GetXaxis().GetTitleOffset()*2)
        err.Draw("E2")
        errLine.Draw("SAME, HIST")
        if not isBlind and len(data) > 0:
            res.Draw("SAME, PE0")
            #res_graph.Draw("SAME, PE0")
            if len(err.GetXaxis().GetBinLabel(
                    1)) == 0:  # Bin labels: not a ordinary plot
                drawRatio(hist['data_obs'], hist['BkgSum'])
                drawStat(hist['data_obs'], hist['BkgSum'])
        if SYNC: err.GetYaxis().SetRangeUser(0.9, 1.1)

    c1.Update()

    if gROOT.IsBatch():
        if channel == "": channel = "nocut"
        varname = var.replace('.', '_').replace('()', '')
        if not os.path.exists("plots/" + channel):
            os.makedirs("plots/" + channel)
        suffix = ''
        if "b" in channel or 'mu' in channel: suffix += "_" + BTAGGING
        if ADDSELECTION: suffix += "_" + options.selection
        c1.Print("plots/" + channel + "/" + varname + "_" + year + suffix +
                 ".png")
        c1.Print("plots/" + channel + "/" + varname + "_" + year + suffix +
                 ".pdf")

    # Print table
    printTable(hist, sign)

    #    if True:
    #        sFile = TFile("sync/data_2016.root", "RECREATE")
    #        sFile.cd()
    #        hist['data_obs'].

    if not gROOT.IsBatch(): raw_input("Press Enter to continue...")
Esempio n. 3
0
def plot(var, cut, nm1=False):
    ### Preliminary Operations ###
    treeRead = True if not FILE else False  # Read from tree
    channel = cut
    isBlind = BLIND
    showSignal = False if 'SB' in cut or 'TR' in cut else True

    # Determine explicit cut
    if treeRead:
        for k in sorted(alias.keys(), key=len, reverse=True):
            if k in cut: cut = cut.replace(k, alias[k])

    # Determine Primary Dataset
    pd = []
    if "isSingleMuonPhotonTrigger" in cut:
        pd = [x for x in sample['data_obs']['files'] if "MuonEG" in x]
    elif "isJPsiTrigger" in cut:
        pd = [x for x in sample['data_obs']['files'] if "Charmonium" in x]
    else:
        print "Cannot determine Primary Dataset."
        exit()

    print "Plotting from", ("tree" if treeRead else
                            "file"), var, "in", channel, "channel with:"
    print "  dataset:", pd
    print "  cut    :", cut

    if isBlind and "SR" in channel and var in ["H_mass"]:
        cut += " && ( isMC ? 1 : !(H_mass > 86 && H_mass < 96) && !(H_mass > 120 && H_mass < 130) )"

    ### Create and fill MC histograms ###
    # Create dict
    file = {}
    tree = {}
    hist = {}
    cutstring = "(eventWeightLumi)" + ("*(" + cut +
                                       ")" if len(cut) > 0 else "")

    ### Create and fill MC histograms ###
    for i, s in enumerate(data + back + sign):
        hist[s] = TH1F(
            s, ";" + variable[var]['title'] + ";Events;" +
            ('logx' if variable[var]['logx'] else '') +
            ('logy' if variable[var]['logy'] else ''), variable[var]['nbins'],
            variable[var]['min'], variable[var]['max'])
        hist[s].Sumw2()
        tree[s] = TChain("Events")
        for j, ss in enumerate(sample[s]['files']):
            if s in data and not ss in pd: continue
            if YEAR == 2016 and not ('Run2016' in ss or 'Summer16' in ss):
                continue
            if YEAR == 2017 and not ('Run2017' in ss or 'Fall17' in ss):
                continue
            if YEAR == 2018 and not ('Run2018' in ss or 'Autumn18' in ss):
                continue
            for f in os.listdir(NTUPLEDIR + '/' + ss):
                tree[s].Add(NTUPLEDIR + '/' + ss + '/' + f)
        tree[s].Project(s, var, cutstring)
        if not tree[s].GetTree() == None:
            hist[s].SetOption("%s" % tree[s].GetTree().GetEntriesFast())

#    jobs = []
#    queue = multiprocessing.Queue()
#    for i, s in enumerate(data+back+sign):
#        for j, ss in enumerate(sample[s]['files']):
#            if s in data and not ss in pd: continue
#            if YEAR == 2016 and not ('Run2016' in ss or 'Summer16' in ss): continue
#            if YEAR == 2017 and not ('Run2017' in ss or 'Fall17' in ss): continue
#            if YEAR == 2018 and not ('Run2018' in ss or 'Autumn18' in ss): continue
#            if treeRead: # Project from tree
##                hist[s] = loopProject(s, ss, variable[var], cutstring, True)
#                p = multiprocessing.Process(target=parallelProject, args=(queue, s, ss, variable[var], cutstring, ))
#                jobs.append(p)
#                p.start()
#            else: # Histogram written to file
#                hist[s] = readhist(FILE, s, var, cut)
#
#    # Wait for all jobs to finish
#    for job in jobs:
#        h = queue.get()
#        if not h.GetOption() in hist: hist[h.GetOption()] = h
#        else: hist[h.GetOption()].Add(h)
#    for job in jobs:
#        job.join()

# Histogram style
    for i, s in enumerate(data + back + sign):
        hist[s].Scale(sample[s]['weight'] if hist[s].Integral() >= 0 else 0)
        hist[s].SetFillColor(sample[s]['fillcolor'])
        hist[s].SetFillStyle(sample[s]['fillstyle'] if not options.norm else 0)
        hist[s].SetLineColor(sample[s]['linecolor'])
        hist[s].SetLineStyle(sample[s]['linestyle'])
        hist[s].SetLineWidth(sample[s]['linewidth'])

    ### Create Bkg Sum histogram ###
    hist['BkgSum'] = hist['data_obs'].Clone(
        "BkgSum") if 'data_obs' in hist else hist[back[0]].Clone("BkgSum")
    hist['BkgSum'].Reset("MICES")
    hist['BkgSum'].SetFillStyle(3003)
    hist['BkgSum'].SetFillColor(1)
    for i, s in enumerate(back):
        hist['BkgSum'].Add(hist[s])

    if options.norm:
        for i, s in enumerate(back + ['BkgSum']):
            hist[s].Scale(hist[data[0]].Integral() / hist['BkgSum'].Integral())
        for i, s in enumerate(sign):
            hist[s].Scale(hist[data[0]].Integral() / hist[s].Integral())

    # Create data and Bkg sum histograms
#    if BLIND: # or 'SR' in channel:
#        hist['data_obs'] = hist['BkgSum'].Clone("data_obs")
#        hist['data_obs'].Reset("MICES")
# Set histogram style
    hist['data_obs'].SetMarkerStyle(20)
    hist['data_obs'].SetMarkerSize(1.25)

    #    for i, s in enumerate(data+back+sign+['BkgSum']): addOverflow(hist[s], False) # Add overflow
    for i, s in enumerate(sign):
        hist[s].SetLineWidth(3)
    for i, s in enumerate(sign):
        sample[s]['plot'] = True

    # Create stack
    bkg = THStack("Bkg",
                  ";" + hist['BkgSum'].GetXaxis().GetTitle() + ";Events")
    for i, s in enumerate(back):
        bkg.Add(hist[s])

    # Legend
    leg = TLegend(0.65, 0.6, 0.95, 0.9)
    leg.SetBorderSize(0)
    leg.SetFillStyle(0)  #1001
    leg.SetFillColor(0)
    if len(data) > 0:
        leg.AddEntry(hist[data[0]], sample[data[0]]['label'], "pe")
    for i, s in reversed(list(enumerate(['BkgSum'] + back))):
        leg.AddEntry(hist[s], sample[s]['label'], "f")
    if showSignal:
        for i, s in enumerate(sign):
            if sample[s]['plot']:
                leg.AddEntry(hist[s], sample[s]['label'], "fl")

    leg.SetY1(0.9 - leg.GetNRows() * 0.04)

    # --- Display ---
    c1 = TCanvas("c1",
                 hist.values()[0].GetXaxis().GetTitle(), 800,
                 800 if RATIO else 600)

    if RATIO:
        c1.Divide(1, 2)
        setTopPad(c1.GetPad(1), RATIO)
        setBotPad(c1.GetPad(2), RATIO)
    c1.cd(1)
    c1.GetPad(bool(RATIO)).SetTopMargin(0.06)
    c1.GetPad(bool(RATIO)).SetRightMargin(0.05)
    c1.GetPad(bool(RATIO)).SetTicks(1, 1)

    logX, logY = "logx" in hist['BkgSum'].GetZaxis().GetTitle(
    ), "logy" in hist['BkgSum'].GetZaxis().GetTitle()
    if logY: c1.GetPad(bool(RATIO)).SetLogy()
    if logX: c1.GetPad(bool(RATIO)).SetLogx()

    # Draw
    bkg.Draw("HIST")  # stack
    hist['BkgSum'].Draw("SAME, E2")  # sum of bkg
    if len(data) > 0: hist['data_obs'].Draw("SAME, PE")  # data
    #data_graph.Draw("SAME, PE")
    #    if showSignal:
    #        smagn = 1. #if treeRead else 1.e2 #if logY else 1.e2
    for i, s in enumerate(sign):
        if sample[s]['plot']: hist[s].Draw("SAME, HIST")


#                hist[s].Scale(smagn)
#                hist[s].Draw("SAME, HIST") # signals Normalized, hist[s].Integral()*sample[s]['weight']
#        #textS = drawText(0.80, 0.9-leg.GetNRows()*0.05 - 0.02, stype+" (x%d)" % smagn, True)
    bkg.GetYaxis().SetTitleOffset(bkg.GetYaxis().GetTitleOffset() * 1.075)
    bkg.SetMaximum((5. if logY else 1.25) * max(
        bkg.GetMaximum(),
        hist['data_obs'].GetBinContent(hist['data_obs'].GetMaximumBin()) +
        hist['data_obs'].GetBinError(hist['data_obs'].GetMaximumBin())))
    #if bkg.GetMaximum() < max(hist[sign[0]].GetMaximum(), hist[sign[-1]].GetMaximum()): bkg.SetMaximum(max(hist[sign[0]].GetMaximum(), hist[sign[-1]].GetMaximum())*1.25)
    bkg.SetMinimum(
        max(
            min(hist['BkgSum'].GetBinContent(hist['BkgSum'].GetMinimumBin(
            )), hist['data_obs'].GetMinimum()), 5.e-1) if logY else 0.)
    if logY:
        bkg.GetYaxis().SetNoExponent(bkg.GetMaximum() < 1.e4)
        bkg.GetYaxis().SetMoreLogLabels(True)

    #if logY: bkg.SetMinimum(1)
    leg.Draw()
    drawCMS(LUMI[YEAR], "Preliminary")
    if channel in aliasNames: drawRegion(aliasNames[channel], True)
    #drawAnalysis(channel)

    #if nm1 and not cutValue is None: drawCut(cutValue, bkg.GetMinimum(), bkg.GetMaximum()) #FIXME
    #if len(sign) > 0:
    #    if channel.startswith('X') and len(sign)>0: drawNorm(0.9-0.05*(leg.GetNRows()+1), "#sigma(X) = %.1f pb" % 1.)

    setHistStyle(bkg, 1.2 if RATIO else 1.1)
    setHistStyle(hist['BkgSum'], 1.2 if RATIO else 1.1)

    if RATIO:
        c1.cd(2)
        if logX: c1.GetPad(2).SetLogx()
        err = hist['BkgSum'].Clone("BkgErr;")
        err.SetTitle("")
        err.GetYaxis().SetTitle("Data / Bkg")
        for i in range(1, err.GetNbinsX() + 1):
            err.SetBinContent(i, 1)
            if hist['BkgSum'].GetBinContent(i) > 0:
                err.SetBinError(
                    i, hist['BkgSum'].GetBinError(i) /
                    hist['BkgSum'].GetBinContent(i))
        setBotStyle(err)
        errLine = err.Clone("errLine")
        errLine.SetLineWidth(1)
        errLine.SetFillStyle(0)
        res = hist['data_obs'].Clone("Residues")
        for i in range(0, res.GetNbinsX() + 1):
            if hist['BkgSum'].GetBinContent(i) > 0:
                res.SetBinContent(
                    i,
                    res.GetBinContent(i) / hist['BkgSum'].GetBinContent(i))
                res.SetBinError(
                    i,
                    res.GetBinError(i) / hist['BkgSum'].GetBinContent(i))
        setBotStyle(res)
        #err.GetXaxis().SetLabelOffset(err.GetXaxis().GetLabelOffset()*5)
        #err.GetXaxis().SetTitleOffset(err.GetXaxis().GetTitleOffset()*2)
        err.Draw("E2")
        errLine.Draw("SAME, HIST")
        if len(data) > 0:
            res.Draw("SAME, PE0")
            #res_graph.Draw("SAME, PE0")
            if len(err.GetXaxis().GetBinLabel(
                    1)) == 0:  # Bin labels: not a ordinary plot
                drawRatio(hist['data_obs'], hist['BkgSum'])
                drawStat(hist['data_obs'], hist['BkgSum'])

    if var in ["H_mass"]:
        c1.cd(bool(RATIO))
        boxZ = drawBox(XZMIN, hist['data_obs'].GetMinimum(), XZMAX,
                       hist['data_obs'].GetMaximum() / 1.30, "Z")
        boxH = drawBox(XHMIN, hist['data_obs'].GetMinimum(), XHMAX,
                       hist['data_obs'].GetMaximum() / 1.30, "H")

    c1.Update()

    if True:  #gROOT.IsBatch():
        varname = var.replace('.', '_').replace('()', '')
        if not os.path.exists("plots/" + channel):
            os.makedirs("plots/" + channel)
        c1.Print("plots/" + channel + "/" + varname + ".png")
        c1.Print("plots/" + channel + "/" + varname + ".pdf")

    # Print table
    printTable(hist, sign)

    if not gROOT.IsBatch(): raw_input("Press Enter to continue...")
Esempio n. 4
0
def btag_efficiency(cut, year, pT_range=None):
    ### Preliminary Operations ###
    from root_numpy import hist2array
    from sklearn.metrics import roc_curve
    import numpy as np
    genPoints = [
        1600, 1800, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 7000,
        8000
    ]
    signal = ['ZpBB_M' + str(x) for x in genPoints]
    btag_vars = BTAGGEFFVARS
    nbins = 100
    min_, max_ = 0., 1.

    treeRead = True
    channel = cut
    isAH = False
    for k in sorted(alias.keys(), key=len, reverse=True):
        if k in cut:
            cut = cut.replace(k, aliasSM[k])

    ### Create and fill MC histograms ###
    # Create dict
    file = {}
    tree = {}
    hist = {}

    ### Create and fill MC histograms ###
    for i, s in enumerate(back + signal):
        tree[s] = TChain("tree")
        for j, ss in enumerate(sample[s]['files']):
            if year == "run2" or year in ss:
                ### to run on big ntuples:
                #k = 0
                #while True:
                #    if os.path.exists("/eos/user/m/msommerh/Zprime_to_bb_analysis/" + ss + "/" + ss+ "_flatTuple_{}.root".format(k)):
                #        tree[s].Add("/eos/user/m/msommerh/Zprime_to_bb_analysis/" + ss + "/" + ss + "_flatTuple_{}.root".format(k))
                #        k += 1
                #    else:
                #        print "found {} files for sample:".format(k), ss
                #        break
                #if k == 0:
                #    print '  WARNING: files for sample', ss , 'do not exist, continuing'
                #    return True
                ### end big ntuples
                tree[s].Add(NTUPLEDIR + ss +
                            ".root")  ## to run on skimmed ntuples
        for var in btag_vars:
            hist[s + "_" + var] = TH1F(s + "_" + var,
                                       ";efficiency; mistag rate", nbins, min_,
                                       max_)
            for suf in ["_1", "_2"]:
                temp_hist = TH1F(s + "_" + var + suf,
                                 ";efficiency; mistag rate", nbins, min_, max_)
                temp_hist.Sumw2()
                if pT_range is not None:
                    pT_cut = " && jpt" + suf + ">=" + str(
                        pT_range[0]) + " && jpt" + suf + "<" + str(pT_range[1])
                else:
                    pT_cut = ""
                if s in signal:
                    flavourcut = " && abs(jflavour" + suf + ")==5"
                else:
                    flavourcut = " && (abs(jflavour" + suf + ")<4 || abs(jflavour" + suf + ")==9 || abs(jflavour" + suf + ")==21)"  ## FIXME this currently excludes charms
                if len(cut) == 0: flavourcut = flavourcut[4:]
                cutstring = "(eventWeightLumi)" + "*(" + cut + flavourcut + pT_cut + ")"
                tree[s].Project(s + "_" + var + suf, var + suf, cutstring)
                if not tree[s].GetTree() == None:
                    hist[s + "_" + var].SetOption(
                        "%s" % tree[s].GetTree().GetEntriesFast())
                #hist[s][var+suf].Scale(sample[s]['weight'] if hist[s].Integral() >= 0 else 0)
                hist[s + "_" + var].Add(temp_hist)
                temp_hist.Delete()

    fpr = {}
    tpr = {}
    thr = {}
    for var in btag_vars:
        hist_sig = TH1F(var + "_sig", ";efficiency; mistag rate", nbins, min_,
                        max_)
        hist_bkg = TH1F(var + "_bkg", ";efficiency; mistag rate", nbins, min_,
                        max_)
        for i, s in enumerate(signal):
            hist_sig.Add(hist[s + "_" + var])
        for i, s in enumerate(back):
            hist_bkg.Add(hist[s + "_" + var])
        sig_arr, sig_edges = hist2array(hist_sig, return_edges=True)
        bkg_arr, bkg_edges = hist2array(hist_bkg, return_edges=True)
        assert len(sig_edges[0]) == len(bkg_edges[0])
        vals = []
        sig_weights = []
        bkg_weights = []
        for j, entry in enumerate(sig_edges[0][:-1]):
            vals.append(0.5 * (sig_edges[0][j] + sig_edges[0][j + 1]))
            sig_weights.append(sig_arr[j])
            bkg_weights.append(bkg_arr[j])
        sig_labels = np.ones(len(sig_weights))
        bkg_labels = np.zeros(len(bkg_weights))
        fpr[var], tpr[var], thr[var] = roc_curve(np.concatenate(
            (sig_labels, bkg_labels)),
                                                 np.array(vals + vals),
                                                 sample_weight=np.concatenate(
                                                     (sig_weights,
                                                      bkg_weights)))

    canv = TCanvas('c', 'c', 500, 650)
    canv.SetGrid()

    graphs = {}
    for j, var in enumerate(btag_vars):
        graphs[var] = TGraph(len(tpr[var]), tpr[var], fpr[var])
        graphs[var].SetLineColor(btag_colors[var])
        graphs[var].SetMarkerStyle(1)
        graphs[var].SetMarkerColor(btag_colors[var])
        graphs[var].SetLineWidth(2)

        graphs[var].SetTitle(";b tagging efficiency;mistag rate (udsg jets)")
        graphs[var].GetXaxis().SetLimits(0., 1.)
        graphs[var].GetHistogram().SetMinimum(1e-4)
        graphs[var].GetHistogram().SetMaximum(1.)
        graphs[var].GetYaxis().SetTitleOffset(1.4)
    leg = TLegend(0.65, 0.15, 0.9, 0.35)
    for j, var in enumerate(btag_vars):
        leg.AddEntry(graphs[var], btag_titles[var])
        if j == 0:
            graphs[var].Draw("APL")
        else:
            graphs[var].Draw("PL SAME")

    latex = TLatex(0.05, 0.5,
                   str(pT_range[0]) + '<p_{T}<' + str(pT_range[1]) + ' GeV')
    latex.SetTextSize(0.043)
    latex.Draw()

    leg.Draw()
    canv.SetLogy()

    if pT_range is not None:
        pt_suff = "pT{}to{}".format(pT_range[0], pT_range[1])
    else:
        pt_suff = "incl_pT"
    canv.Print("plots/btag_eff/ROC_{}_{}.png".format(year, pt_suff))
    canv.Print("plots/btag_eff/ROC_{}_{}.pdf".format(year, pt_suff))