Example #1
0
def plot1D(variable, binnings, cutstring, histos, lumi, pdffile, ymin=1e-1, ymax=1e5, showdata = False):

    contains_data = False
    for label in histos.keys():
        if not "Data" in label:
            histos[label].Scale(lumi)
        else:
            contains_data = True
    
    for label in histos:
        histos[label].SetLineWidth(2)
        shared_utils.histoStyler(histos[label])

    histolist = [histos[histos.keys()[0]]]
    for label in histos:
        if not "Data" in label and not "Signal" in label:
            histolist.append(histos[label])

    legend = shared_utils.mklegend(x1 = 0.6, y1 = 0.4, x2 = 0.9, y2 = 0.8)

    canvas = shared_utils.mkcanvas()
    canvas.SetFillStyle(4000)    
      
    lumi = float("%.2f" % (lumi/1e3))

    if contains_data and showdata:
        hratio, pad1, pad2 = shared_utils.FabDraw(canvas, legend, histolist[0], histolist[1:], lumi = lumi, datamc = 'Data')
    else:
        print "No data"
        hratio, pad1, pad2 = shared_utils.FabDraw(canvas, legend, histolist[-1], histolist, lumi = lumi, datamc = 'Data')
        histolist[-1].SetTitle("")

    for i_label, label in enumerate(histos):
        histos[label].GetYaxis().SetRangeUser(ymin, ymax)

    for label in histos:
        if "Signal" in label:
            histos[label].Draw("same")
            legend.AddEntry(histos[label], histos[label].GetTitle())
       
    hratio.GetYaxis().SetRangeUser(-0.1,2.6)    
    hratio.GetYaxis().SetTitle('Data/MC')

    xlabel = variable
    xlabel = xlabel.replace("leptons_mt", "m_{T}^{lepton} (GeV)")
    xlabel = xlabel.replace("leadinglepton_mt", "m_{T}^{lepton} (GeV)")
    xlabel = xlabel.replace("dilepton_invmass", "m_{ll} (GeV)")
    hratio.GetXaxis().SetTitle(xlabel)

    for ibin in range(1,hratio.GetXaxis().GetNbins()+1):
        if hratio.GetBinContent(ibin)==0:
            hratio.SetBinContent(ibin,-999)
    hratio.SetMarkerColor(kBlack)

    if "/" in pdffile:
        os.system("mkdir -p %s" % "/".join(pdffile.split("/")[:-1]))
    canvas.SaveAs(pdffile + ".pdf")
Example #2
0
    def makeplot(stacked_histograms, datahist, plotlabel, ratiovalues = False, ratiolabel = False, ratio_limits = False, header = False):

        foldername = outputfolder + "/%s/%s%s%s_%sFR" % (data_period, category.replace("short", "Short").replace("long", "Long"), region, plotlabel, fakeratevariable.replace("_", "").replace(":", "-"))

        if use_prompt_fakesubtraction:
            foldername += "_FakeSubtraction"
        
        canvas = shared_utils.mkcanvas()
        legend = shared_utils.mklegend(x1 = 0.6, y1 = 0.4, x2 = 0.9, y2 = 0.8)
        
        if header:
            legend.SetHeader(header)
        else:
            legend.SetHeader("%s, %s tracks" % (region, category))
    
        ymin = 1e0; ymax = 1e2
        datahist.GetYaxis().SetRangeUser(ymin, ymax)
        for stacked_histogram in stacked_histograms:
            stacked_histogram.GetYaxis().SetRangeUser(ymin, ymax)
            stacked_histogram.GetYaxis().SetLimits(ymin, ymax)
    
        hratio, pads = shared_utils.FabDraw(canvas, legend, datahist, stacked_histograms, datamc = 'Data', lumi = lumi/1e3)
        stacked_histograms[-1].SetTitle("")
        
        if ratio_limits:
            hratio.GetYaxis().SetRangeUser(ratio_limits[0], ratio_limits[1])
        else:
            hratio.GetYaxis().SetRangeUser(-0.1,2.6)
        
        if "Run201" in dataid:
            hratio.GetYaxis().SetTitle('Data/prediction')
        else:
            hratio.GetYaxis().SetTitle('Fake pred./truth')
            
        if ratiolabel:
            hratio.GetYaxis().SetTitle(ratiolabel)
        
        xlabel = variable
        xlabel = xlabel.replace("leptons_mt", "m_{T}^{lepton} (GeV)")
        xlabel = xlabel.replace("leadinglepton_mt", "m_{T}^{lepton} (GeV)")
        hratio.GetXaxis().SetTitle(xlabel)
    
        for ibin in range(1,hratio.GetXaxis().GetNbins()+1):
            if ratiovalues:
                hratio.SetBinContent(ibin, ratiovalues.GetBinContent(ibin))
            else:
                if hratio.GetBinContent(ibin)==0:
                    hratio.SetBinContent(ibin,-999)
        hratio.SetMarkerColor(kBlack)
        
        os.system("mkdir -p " + foldername)
        canvas.SaveAs(foldername + "/" + pdffile + ".png")
gStyle.SetOptStat(0)
TH1D.SetDefaultSumw2()

tmva_folders = glob.glob("optimize-*/output.root")
rocarea = {}
ks_signal = {}
ks_background = {}

for phase in ["2016", "2017"]:
    for category in ["short", "long"]:

        histos = {}
        colors = range(209, 270)

        canvas = shared_utils.mkcanvas("c1")
        legend = shared_utils.mklegend(x1=.17, y1=.17, x2=.75, y2=.6)
        legend.SetTextSize(0.035)

        rocarea["%s, %s tracks" % (phase, category)] = TH1F(
            "rocarea", ";depth, number of trees;area under ROC", 55, 0, 55)
        ks_signal["%s, %s tracks" % (phase, category)] = TH1F(
            "ks_signal", ";depth, number of trees;KS test (signal)", 55, 0, 55)
        ks_background["%s, %s tracks" % (phase, category)] = TH1F(
            "ks_background", ";depth, number of trees;KS test (background)",
            55, 0, 55)
        shared_utils.histoStyler(rocarea["%s, %s tracks" % (phase, category)])
        shared_utils.histoStyler(ks_signal["%s, %s tracks" %
                                           (phase, category)])
        shared_utils.histoStyler(ks_background["%s, %s tracks" %
                                               (phase, category)])
Example #4
0
def get_reweighting_factor(histofolder, suffix):

    periods = [
        "Summer16",
        "Fall17",
        "Autumn18",
        "Run2016B",
        "Run2016C",
        "Run2016D",
        "Run2016E",
        "Run2016F",
        "Run2016G",
        "Run2016H",
        "Run2017B",
        "Run2017C",
        "Run2017D",
        "Run2017E",
        "Run2017F",
        "Run2018A",
        "Run2018B",
        "Run2018C",
        "Run2018D",
    ]

    histolabels = [
        #"track_nValidPixelHits",
        #"track_nValidPixelHits_short",
        #"track_nValidPixelHits_long",
        "h_muonPtCand",
        #"h_muonPt2Cand",
        "h_muonPt",
        #"track_pt",
        "track_pt_short",
        "track_pt_long",
    ]

    hists = {}
    for period in periods:
        hists[period] = {}
        for label in histolabels:
            fin = TFile(
                "%s/histograms%s_%s.root" % (histofolder, suffix, period),
                "open")
            print label, histofolder, suffix, period
            hists[period][label] = fin.Get("Histograms/" + label)
            hists[period][label].SetDirectory(0)
            hists[period][label].SetLineWidth(2)
            shared_utils.histoStyler(hists[period][label])
            fin.Close()

    print "all loaded"

    # hweight = histTarget_NPixHits.Clone(); hweight.Divide(histSimulation)
    hweight = {}

    for label in histolabels:

        hweight[label] = {}

        for year in ["2016", "2017", "2018"]:
            canvas = shared_utils.mkcanvas()
            legend = shared_utils.mklegend(x1=0.6, y1=0.6, x2=0.85, y2=0.85)
            colors = range(209, 250)[::3]

            for i, period in enumerate(sorted(periods)):

                if year not in period: continue

                if "Run2016" in period:
                    mc = "Summer16"
                elif "Run2017" in period:
                    mc = "Fall17"
                elif "Run2018" in period:
                    mc = "Autumn18"
                else:
                    continue

                #num = hists[period]["track_nValidPixelHits"].Clone()
                #denom = hists[mc]["track_nValidPixelHits"].Clone()
                num = hists[period][label].Clone()
                denom = hists[mc][label].Clone()

                if num.Integral() > 0:
                    num.Scale(1.0 / num.Integral())
                else:
                    print label, year, period, num.Integral()

                if denom.Integral() > 0:
                    denom.Scale(1.0 / denom.Integral())
                else:
                    print label, year, period, denom.Integral()

                hweight[label][period] = num.Clone()
                hweight[label][period].Divide(denom)
                hweight[label][period].SetName(period + "_" + label)
                shared_utils.histoStyler(hweight[label][period])
                hweight[label][period].SetLineColor(colors.pop(0))
                hweight[label][period].GetYaxis().SetRangeUser(0, 5)
                #hweight[label][period].SetTitle(";number of pixel hits;weight")
                hweight[label][period].SetTitle(";track p_{T} (GeV);weight")

                hweight[label][period].GetXaxis().SetRangeUser(0, 200)

                if i == 0:
                    hweight[label][period].Draw("hist e")
                else:
                    hweight[label][period].Draw("hist e same")

                # Draw overflow:
                last_bin = hweight[label][period].GetNbinsX() + 1
                overflow = hweight[label][period].GetBinContent(last_bin)
                print overflow
                hweight[label][period].AddBinContent((last_bin - 1), overflow)

                legend.SetTextSize(0.035)
                legend.AddEntry(hweight[label][period], period)

            legend.Draw()
            shared_utils.stamp()
            canvas.SaveAs("plots/hweights_%s_%s.pdf" % (label, year))

    # save weights:
    fout = TFile("hweights.root", "recreate")
    for label in histolabels:
        for period in hweight[label]:
            hweight[label][period].Write()
    fout.Close()
Example #5
0
                #"aug21v3-noDeltaPt",
                #"aug21v3-noPixelHits",
                #"aug21v3-noPixelHits-noDeltaPt",
                "jul21-noDeltaPt",
               ]

extralabel = "commited"

# ROC plot:

histos = {}

for category in ["short", "long"]:
        
    canvas = shared_utils.mkcanvas("c1")
    legend = shared_utils.mklegend(x1=.17, y1=.17, x2=.6, y2=.6)
    legend.SetTextSize(0.035)
    
    drawhists = []

    for i_phase, phase in enumerate(["2016", "2017"]):

        #colors = range(209,270)
        #colors = [kBlack, kBlue, kBlue-3, kBlue-9, kCyan, kCyan-3, kGreen, kGreen-3, kOrange, kRed, kPink, kMagenta, kMagenta-3]
        colors = [kAzure, kOrange,  kCyan, kGreen-3, kRed, kMagenta, kRed-9, kAzure+2]
        
        color = 0
        dicke = len(tmva_folders)+1
        
        for i_tmva_folder, tmva_folder in enumerate(sorted(tmva_folders)):
            
Example #6
0
def plot2D(variable,
           cutstring,
           histos,
           lumi,
           pdffile,
           ymin=1e-1,
           ymax=1e5,
           showdata=False,
           drawoption="colz"):

    # BDT sideband region plot

    for label in histos:
        histos[label].SetLineWidth(2)
        shared_utils.histoStyler(histos[label])

        size = 0.059
        font = 132
        histos[label].GetZaxis().SetLabelFont(font)
        histos[label].GetZaxis().SetTitleFont(font)
        histos[label].GetZaxis().SetTitleSize(size)
        histos[label].GetZaxis().SetLabelSize(size)
        histos[label].GetZaxis().SetTitleOffset(1.2)

        histos[label].GetXaxis().SetNdivisions(5)

        histos[label].SetMarkerStyle(20)
        histos[label].SetMarkerSize(0.2)
        histos[label].SetMarkerColorAlpha(histos[label].GetFillColor(), 0.5)

        variable = variable.replace("tracks_mva_loose", "MVA score")
        variable = variable.replace("tracks_dxyVtx", "d_{xy} (cm)")
        histos[label].SetTitle(
            ";%s;%s;Events" % (variable.split(":")[1], variable.split(":")[0]))

    legend = shared_utils.mklegend(x1=0.6, y1=0.4, x2=0.9, y2=0.8)

    canvas = shared_utils.mkcanvas()
    canvas.SetLogz(True)
    canvas.SetRightMargin(0.2)

    # combine backgrounds:
    histos["CombinedBg"] = 0
    for label in histos:
        if "Signal" not in label and "CombinedBg" not in label:
            if histos["CombinedBg"] == 0:
                histos["CombinedBg"] = histos[label].Clone()
            else:
                histos["CombinedBg"].Add(histos[label])
    if histos["CombinedBg"] == 0:
        del histos["CombinedBg"]

    #if "/" in pdffile:
    #    os.system("mkdir -p %s" % "/".join(pdffile.split("/")[:-1]))
    #    os.chdir("/".join(pdffile.split("/")[:-1]))

    for label in histos:

        if "CombinedBg" in label or "Signal" in label:

            histos[label].Draw(drawoption)
            shared_utils.stamp()
            canvas.SaveAs(pdffile + "_" +
                          label.replace(":", "_").replace(" ", "_") + ".pdf")

            # draw some funky lcines
            text = TLatex()
            text.SetTextFont(132)
            text.SetTextSize(0.059)

            if "short" in pdffile:
                #upper_line = TLine(0, -0.5, (1 + 0.5)/(0.65/0.01), 1)
                upper_line = TLine(0, -0.5, (0.8 + 0.5) / (0.65 / 0.01), 0.8)
                upper_line.SetLineWidth(2)
                upper_line.Draw("same")
                lower_line = TLine(0.02, -1, 0.02, 1)
                lower_line.SetLineWidth(2)
                lower_line.Draw("same")

                text.DrawLatex(0.005, 0.75, "SR")
                text.DrawLatex(0.03, 0.75, "CR")

            if "long" in pdffile:
                upper_line = TLine(0, -0.05, (1 + 0.05) / (0.7 / 0.01), 1)
                upper_line.SetLineWidth(2)
                upper_line.Draw("same")
                lower_line = TLine(0.02, -1, 0.02, 1)
                lower_line.SetLineWidth(2)
                lower_line.Draw("same")

                text.DrawLatex(0.003, 0.75, "SR")
                text.DrawLatex(0.03, 0.75, "CR")

            canvas.SaveAs(pdffile + "_" +
                          label.replace(":", "_").replace(" ", "_") +
                          "_lines.pdf")
Example #7
0
def plot1D(variable,
           cutstring,
           histos,
           lumi,
           pdffile,
           xlabel=False,
           ymin=1e-1,
           ymax=1e5,
           showdata=True):

    contains_data = False
    for label in histos.keys():
        if not "Data" in label:
            histos[label].Scale(lumi)
            print "scaling with", lumi
        else:
            contains_data = True
        if "Signal" in label:
            histos[label].SetLineWidth(2)

    if contains_data:
        histolist = [histos[histos.keys()[0]]]
    else:
        histolist = []

    histolistbg = []
    for label in histos:
        if not "Data" in label and not "Signal" in label:
            histolistbg.append(histos[label])
    histolistbg = sorted(histolistbg, key=lambda item: item.Integral())
    histolist += histolistbg

    legend = shared_utils.mklegend(x1=0.6, y1=0.4, x2=0.9, y2=0.8)

    canvas = shared_utils.mkcanvas()
    canvas.SetFillStyle(4000)

    lumi = float("%.2f" % (lumi / 1e3))

    if contains_data and showdata:
        hratio, pads = shared_utils.FabDraw(canvas,
                                            legend,
                                            histolist[0],
                                            histolist[1:],
                                            lumi=lumi,
                                            datamc='Data')
    else:
        print "No data"

        example = histos[histos.keys()[0]].Clone()
        print "example.GetXaxis().GetNbins()", example.GetXaxis().GetNbins()
        print "example.GetXaxis().GetBinLowEdge(0)", example.GetXaxis(
        ).GetBinLowEdge(1)
        print "example.GetXaxis().GetBinLowEdge(example.GetXaxis().GetNbins()+1)", example.GetXaxis(
        ).GetBinLowEdge(example.GetXaxis().GetNbins() + 1)

        empty_histo = TH1D(
            "Data", "Data",
            example.GetXaxis().GetNbins(),
            example.GetXaxis().GetBinLowEdge(1),
            example.GetXaxis().GetBinLowEdge(example.GetXaxis().GetNbins() +
                                             1))
        shared_utils.histoStyler(empty_histo)

        print canvas, legend, empty_histo, histolist, lumi

        hratio, pads = shared_utils.FabDraw(canvas,
                                            legend,
                                            empty_histo,
                                            histolist,
                                            lumi=lumi,
                                            datamc='MC')
        histolist[-1].SetTitle("")

    for i_label, label in enumerate(histos):
        histos[label].GetYaxis().SetRangeUser(ymin, ymax)

    for label in histos:
        if "Signal" in label:
            histos[label].Draw("same")
            legend.AddEntry(histos[label], histos[label].GetTitle())

    hratio.GetYaxis().SetRangeUser(-0.1, 2.6)
    hratio.GetYaxis().SetTitle('Data/MC')

    if xlabel == False:
        xlabel = variable
    hratio.GetXaxis().SetTitle(str(xlabel))

    for ibin in range(1, hratio.GetXaxis().GetNbins() + 1):
        if hratio.GetBinContent(ibin) == 0:
            hratio.SetBinContent(ibin, -999)
    hratio.SetMarkerColor(kBlack)

    if "/" in pdffile:
        os.system("mkdir -p %s" % "/".join(pdffile.split("/")[:-1]))
    canvas.SaveAs(pdffile + ".pdf")
Example #8
0
def significances(sg_filelist, bg_filelist, phase, batchname, signalcut=""):

    bdt_short = " && ".join(cutflow.cuts["BDT_short"][:-2])
    bdt_long = " && ".join(cutflow.cuts["BDT_long"][:-2])

    #FIXME phase 1 dE/dx
    if phase == 1:
        bdt_short = bdt_short.replace("tracks_deDxHarmonic2pixel>2.0",
                                      "MHT>=0")
        bdt_long = bdt_long.replace("tracks_deDxHarmonic2pixel>2.0", "MHT>=0")
        lumi = 137000
    else:
        lumi = 35000

    histos = collections.OrderedDict()
    drawoptions = collections.OrderedDict()

    # get histograms:

    def fill_histos(label, variable, shortcuts, longcuts):
        histos["bg_short_%s" % label] = plotting.get_all_histos(
            bg_filelist,
            "Events",
            variable,
            "tracks_is_pixel_track==1 && " + shortcuts,
            nBinsX=200,
            xmin=-1,
            xmax=1)
        histos["bg_long_%s" % label] = plotting.get_all_histos(
            bg_filelist,
            "Events",
            variable,
            "tracks_is_pixel_track==0 && tracks_nMissingOuterHits>=2 && " +
            longcuts,
            nBinsX=200,
            xmin=-1,
            xmax=1)
        histos["sg_short_%s" % label] = plotting.get_all_histos(
            sg_filelist,
            "Events",
            variable,
            "tracks_is_pixel_track==1 && tracks_chiCandGenMatchingDR<0.01 && "
            + shortcuts + signalcut,
            nBinsX=200,
            xmin=-1,
            xmax=1)
        histos["sg_long_%s" % label] = plotting.get_all_histos(
            sg_filelist,
            "Events",
            variable,
            "tracks_is_pixel_track==0 && tracks_nMissingOuterHits>=2 && tracks_chiCandGenMatchingDR<0.01 && "
            + longcuts + signalcut,
            nBinsX=200,
            xmin=-1,
            xmax=1)

    # get common denominator:
    fill_histos(
        "study", "tracks_mva_tight_may20_chi2_pt15",
        "tracks_pt>15 && tracks_matchedCaloEnergy/tracks_p<0.20 && " +
        bdt_short,
        "tracks_pt>40 && tracks_matchedCaloEnergy/tracks_p<0.20 && " +
        bdt_long)
    fill_histos("denom", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15",
                "tracks_pt>40")

    # scale with lumi
    for label in histos:
        shared_utils.histoStyler(histos[label])
        histos[label].Scale(lumi)

    # get efficiencies:
    efficiencies = {}

    for label in histos:

        if "denom" in label: continue

        efficiencies[label] = []

        #denominator = histos[label.replace("study", "denom")].Integral(histos[label].GetXaxis().FindBin(-1), histos[label].GetXaxis().FindBin(1))
        denominator = histos[label.replace("study", "denom")].Integral()
        #denominator = histos[label].Integral(histos[label].GetXaxis().FindBin(-1), histos[label].GetXaxis().FindBin(1))

        for i_score in numpy.arange(-1.0, 1.0, 0.005):
            numerator = histos[label].Integral(
                histos[label].GetXaxis().FindBin(i_score),
                histos[label].GetXaxis().FindBin(1))
            if denominator > 0:
                efficiencies[label].append(
                    [i_score, numerator / denominator, numerator])
            else:
                efficiencies[label].append([i_score, 0, numerator])

    # build TGraphs
    graphs_roc = {}
    graphs_sgeff = {}
    graphs_bgeff = {}
    graphs_significance = {}
    scalingfactor = 1.0
    signlabel = ""

    for label in efficiencies:

        if "bg" in label: continue
        graphs_roc[label] = TGraph()
        graphs_sgeff[label] = TGraph()
        graphs_bgeff[label] = TGraph()
        graphs_significance[label] = TGraph()

        for i in range(len(efficiencies[label])):
            score = efficiencies[label][i][0]
            eff_sg = efficiencies[label][i][1]
            eff_bg = efficiencies[label.replace("sg", "bg")][i][1]
            N_sg = efficiencies[label][i][2]
            N_bg = efficiencies[label.replace("sg", "bg")][i][2]
            graphs_roc[label].SetPoint(graphs_roc[label].GetN(), eff_sg,
                                       1 - eff_bg)
            graphs_sgeff[label].SetPoint(graphs_sgeff[label].GetN(), score,
                                         eff_sg)
            graphs_bgeff[label].SetPoint(graphs_bgeff[label].GetN(), score,
                                         eff_bg)

            try:
                #N = 1000
                #significance = N * eff_sg / math.sqrt(N*eff_sg+N*eff_bg)
                #signlabel = "#epsilon_{sg} / #sqrt{#epsilon_{sg} + #epsilon_{bg}}"
                #significance = 5e4 * N_sg / math.sqrt(5e4*N_sg + N_bg)
                significance = N_sg / math.sqrt(N_sg + N_bg)
                signlabel = "significance = N_{sg} / #sqrt{N_{sg} + N_{bg}}"
                #significance = 1e3 * N_sg / math.sqrt( N_bg + (0.2*N_bg)**2 )
                #signlabel = "significance = 1e3 * N_{sg} / #sqrt{N_{bg} + (0.2*N_{bg})^{2}}"
            except:
                significance = 0

            #scalingfactor = 1.0/1000
            #scalingfactor = 1.0/100
            graphs_significance[label].SetPoint(
                graphs_significance[label].GetN(), score, significance)

    for category in ["short", "long"]:

        # plot significances:
        #####################

        canvas = shared_utils.mkcanvas()

        if category == "short":
            histo = TH2F("empty", "empty", 1, -1, 1, 1, 0, 1)
        else:
            histo = TH2F("empty", "empty", 1, -1, 1, 1, 0, 1)

        shared_utils.histoStyler(histo)
        histo.Draw()
        histo.SetTitle(";BDT response;efficiency, significance")
        legend = shared_utils.mklegend(x1=0.17, y1=0.2, x2=0.65, y2=0.45)

        first = True
        for label in graphs_significance:

            if category not in label: continue

            graphs_significance[label].Draw("same")
            graphStyler(graphs_significance[label])
            graphs_significance[label].SetLineColor(210)

            graphs_sgeff[label].Draw("same")
            graphStyler(graphs_sgeff[label])
            graphs_sgeff[label].SetLineColor(kBlue)
            graphs_sgeff[label].SetFillColor(0)

            graphs_bgeff[label].Draw("same")
            graphStyler(graphs_bgeff[label])
            graphs_bgeff[label].SetLineColor(kRed)
            graphs_bgeff[label].SetFillColor(0)

            legendlabel = label.replace("sg_", "").replace(
                "short_",
                "short tracks ").replace("long_", "long tracks ").replace(
                    "p0", " (phase 0)").replace("p1", " (phase 1)")

        legend.SetHeader("Phase %s, %s tracks" % (phase, category))
        legend.AddEntry(graphs_sgeff["sg_short_study"],
                        "signal efficiency #epsilon_{sg}")
        legend.AddEntry(graphs_bgeff["sg_short_study"],
                        "background efficiency #epsilon_{bg}")

        legend.AddEntry(graphs_significance["sg_short_study"], signlabel)

        legend.Draw()
        shared_utils.stamp()
        canvas.Print("plots/significance_%s_%s_phase%s_completestats.pdf" %
                     (batchname, category, phase))
        canvas.Print("plots/significance_%s_%s_phase%s_completestats.root" %
                     (batchname, category, phase))
Example #9
0
def roc_and_efficiencies(sg_filelist,
                         bg_filelist,
                         phase,
                         batchname,
                         style="A"):

    mt2_short = " && ".join(cutflow.cuts["MT2_short"])
    mt2_long = " && ".join(cutflow.cuts["MT2_long"])
    exo_short = " && ".join(cutflow.cuts["EXO_short"])
    exo_long = " && ".join(cutflow.cuts["EXO_long"])
    exo_pt15_short = " && ".join(cutflow.cuts["EXO_pt15_short"])
    exo_pt15_long = " && ".join(cutflow.cuts["EXO_pt15_long"])
    exo_noeta_short = " && ".join(cutflow.cuts["EXO_noeta_short"])
    exo_noeta_long = " && ".join(cutflow.cuts["EXO_noeta_long"])
    exo_noetapt_short = " && ".join(cutflow.cuts["EXO_noetapt_short"])
    exo_noetapt_long = " && ".join(cutflow.cuts["EXO_noetapt_long"])
    bdt_short = " && ".join(cutflow.cuts["BDT_short"][:-2])
    bdt_long = " && ".join(cutflow.cuts["BDT_long"][:-2])
    bdt_nojets_short = " && ".join(cutflow.cuts["BDT_noJetVeto_short"][:-2])
    bdt_nojets_long = " && ".join(cutflow.cuts["BDT_noJetVeto_long"][:-2])

    #FIXME phase 1 dE/dx
    if phase == 1:
        bdt_short = bdt_short.replace("tracks_deDxHarmonic2pixel>2.0",
                                      "MHT>=0")
        bdt_long = bdt_long.replace("tracks_deDxHarmonic2pixel>2.0", "MHT>=0")
        bdt_nojets_short = bdt_nojets_short.replace(
            "tracks_deDxHarmonic2pixel>2.0", "MHT>=0")
        bdt_nojets_long = bdt_nojets_long.replace(
            "tracks_deDxHarmonic2pixel>2.0", "MHT>=0")
        lumi = 137000
    else:
        bdt_short = bdt_short.replace("tracks_deDxHarmonic2pixel>2.0",
                                      "MHT>=0")
        bdt_long = bdt_long.replace("tracks_deDxHarmonic2pixel>2.0", "MHT>=0")
        bdt_nojets_short = bdt_nojets_short.replace(
            "tracks_deDxHarmonic2pixel>2.0", "MHT>=0")
        bdt_nojets_long = bdt_nojets_long.replace(
            "tracks_deDxHarmonic2pixel>2.0", "MHT>=0")
        lumi = 35000

    histos = collections.OrderedDict()
    drawoptions = collections.OrderedDict()

    # get histograms:

    def fill_histos(label, variable, shortcuts, longcuts):
        histos["bg_short_%s" % label] = plotting.get_all_histos(
            bg_filelist,
            "Events",
            variable,
            "tracks_is_pixel_track==1 && " + shortcuts,
            nBinsX=200,
            xmin=-1,
            xmax=1)
        histos["bg_long_%s" % label] = plotting.get_all_histos(
            bg_filelist,
            "Events",
            variable,
            "tracks_is_pixel_track==0 && tracks_nMissingOuterHits>=2 && " +
            longcuts,
            nBinsX=200,
            xmin=-1,
            xmax=1)
        histos["sg_short_%s" % label] = plotting.get_all_histos(
            sg_filelist,
            "Events",
            variable,
            "tracks_is_pixel_track==1 && tracks_chiCandGenMatchingDR<0.01 && "
            + shortcuts,
            nBinsX=200,
            xmin=-1,
            xmax=1)
        histos["sg_long_%s" % label] = plotting.get_all_histos(
            sg_filelist,
            "Events",
            variable,
            "tracks_is_pixel_track==0 && tracks_nMissingOuterHits>=2 && tracks_chiCandGenMatchingDR<0.01 && "
            + longcuts,
            nBinsX=200,
            xmin=-1,
            xmax=1)

    # get common denominator:
    #fill_histos("denom", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15", "tracks_pt>40")
    fill_histos("denom", "tracks_mva_nov20_noEdep", "tracks_pt>15",
                "tracks_pt>40")

    #fill_histos("denom", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_short, "tracks_pt>30 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_long)

    # get numerator, ROC curve scans:
    #fill_histos("pt10", "tracks_mva_tight_may20_chi2_pt10", "tracks_pt>10 && " + bdt_short, "tracks_pt>30 && " + bdt_long)
    #drawoptions["pt10"] = ["BDT (track p_{T}>10 GeV)", "same", kRed, 1, True, False]

    #fill_histos("noBLpt15", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && " + bdt_short.replace("tracks_passleptonveto==1", "MHT>=0"), "tracks_pt>30 && " + bdt_long.replace("tracks_passleptonveto==1", "MHT>=0"))
    #drawoptions["noBLpt15"] = ["BDT (track p_{T}>15 GeV, no leptonveto)", "same", kYellow, 1, True, True]
    #
    #fill_histos("noBLpt15b", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && " + bdt_short.replace("tracks_passpionveto==1", "MHT>=0"), "tracks_pt>30 && " + bdt_long.replace("tracks_passpionveto==1", "MHT>=0"))
    #drawoptions["noBLpt15b"] = ["BDT (track p_{T}>15 GeV, no pionveto)", "same", kYellow+1, 1, True, True]
    #
    #fill_histos("noBLpt15c", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && " + bdt_short.replace("tracks_passjetveto==1", "MHT>=0"), "tracks_pt>30 && " + bdt_long.replace("tracks_passjetveto==1", "MHT>=0"))
    #drawoptions["noBLpt15c"] = ["BDT (track p_{T}>15 GeV, no jetveto)", "same", kYellow+2, 1, True, True]
    #
    #fill_histos("noBLpt15d", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && " + bdt_short.replace("tracks_deDxHarmonic2pixel>2.0", "MHT>=0"), "tracks_pt>30 && " + bdt_long.replace("tracks_deDxHarmonic2pixel>2.0", "MHT>=0"))
    #drawoptions["noBLpt15d"] = ["BDT (track p_{T}>15 GeV, no dedx)", "same", kYellow+3, 1, True, True]

    if style == "A":

        fill_histos("pt15noEdep", "tracks_mva_nov20_noEdep",
                    "tracks_pt>15 && " + bdt_short,
                    "tracks_pt>40 && " + bdt_long)
        drawoptions["pt15noEdep"] = [
            "BDT (track p_{T}>15 GeV)", "same", kOrange, 1, True, False
        ]

        fill_histos("pt30noEdep", "tracks_mva_nov20_noEdep",
                    "tracks_pt>40 && " + bdt_short,
                    "tracks_pt>40 && " + bdt_long)
        drawoptions["pt30noEdep"] = [
            "BDT (track p_{T}>40 GeV)", "same", kOrange, 1, False, True
        ]

        fill_histos(
            "pt30noEdep", "tracks_mva_nov20_noEdep", "tracks_pt>40 && " +
            bdt_short.replace("tracks_passjetveto==1 &&", " "),
            "tracks_pt>40 && " +
            bdt_long.replace("tracks_passjetveto==1 &&", " "))
        drawoptions["pt30noEdep"] = [
            "BDT (track p_{T}>40 GeV, no jet veto)", "same", kOrange, 2, False,
            True
        ]

        fill_histos(
            "pt15_ratio12", "tracks_mva_nov20_noEdep",
            "tracks_pt>15 && tracks_matchedCaloEnergy<15 && " + bdt_short,
            "tracks_pt>40 && tracks_matchedCaloEnergy/tracks_p<0.15 && " +
            bdt_long)
        drawoptions["pt15_ratio12"] = [
            "BDT (track p_{T}>15 GeV, with cut on E_{dep}", "same", kRed, 1,
            True, True
        ]

    if style == "B":

        #fill_histos("pt15", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && " + bdt_short, "tracks_pt>40 && " + bdt_long)
        #drawoptions["pt15"] = ["BDT (track p_{T}>15 GeV), trained with E_{dep}", "same", kOrange, 1, True, True]

        fill_histos("pt15noEdep", "tracks_mva_nov20_noEdep",
                    "tracks_pt>15 && " + bdt_short,
                    "tracks_pt>40 && " + bdt_long)
        drawoptions["pt15noEdep"] = [
            "BDT (track p_{T}>15 GeV)", "same", kOrange, 1, True, True
        ]

        fill_histos(
            "pt15_ratio12", "tracks_mva_nov20_noEdep",
            "tracks_pt>15 && tracks_matchedCaloEnergy<15 && " + bdt_short,
            "tracks_pt>40 && tracks_matchedCaloEnergy/tracks_p<0.15 && " +
            bdt_long)
        drawoptions["pt15_ratio12"] = [
            "BDT (track p_{T}>15 GeV, E_{Dep}<15 GeV (<0.15/p))", "same", kRed,
            1, True, True
        ]

    #fill_histos("noBLpt15c", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && " + bdt_short.replace("tracks_passjetveto==1", "MHT>=0"), "tracks_pt>30 && " + bdt_long.replace("tracks_passjetveto==1", "MHT>=0"))
    #drawoptions["noBLpt15c"] = ["BDT (track p_{T}>15 GeV, no jet veto)", "same", kOrange, 2, True, True]

    #fill_histos("pt30", "tracks_mva_tight_may20_chi2", "tracks_pt>30 && " + bdt_short, "tracks_pt>30 && " + bdt_long)
    #drawoptions["pt30"] = ["BDT (track p_{T}>30 GeV)", "same", kOrange, 2, True, False]

    #fill_histos("pt15_nojets", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && " + bdt_nojets_short, "tracks_pt>30 && " + bdt_nojets_long)
    #drawoptions["pt15_nojets"] = ["BDT (track p_{T}>15 GeV, no jet veto)", "same", kOrange, 2, True, True]

    #fill_histos("pt15_ratio12", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && tracks_matchedCaloEnergy/tracks_p<0.12 && " + bdt_short, "tracks_pt>30 && tracks_matchedCaloEnergy/tracks_p<0.12 && " + bdt_long)
    #drawoptions["pt15_ratio12"] = ["BDT (track p_{T}>15 GeV, EDep/p<0.12)", "same", kOrange, 3, True, True]

    #fill_histos("pt15_ratio20", "tracks_mva_nov20_noEdep", "tracks_pt>15 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_short, "tracks_pt>40 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_long)
    #drawoptions["pt15_ratio20"] = ["BDT (track p_{T}>15 GeV, not tr. on E_{dep}, E_{Dep}/p<20%)", "same", kRed, 1, True, True]

    #fill_histos("pt15_ratio30", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && tracks_matchedCaloEnergy/tracks_p<0.30 && " + bdt_short, "tracks_pt>30 && tracks_matchedCaloEnergy/tracks_p<0.30 && " + bdt_long)
    #drawoptions["pt15_ratio30"] = ["BDT (track p_{T}>15 GeV, EDep/p<0.30)", "same", kOrange, 5, True, True]

    #fill_histos("pt15_moreiso", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && tracks_trkRelIso<0.1 && " + bdt_short, "tracks_pt>30 && tracks_trkRelIso<0.1 && " + bdt_long)
    #drawoptions["pt15_moreiso"] = ["BDT (track p_{T}>15 GeV, relIso<0.1)", "same", kGreen-3, 1, True, True]

    #fill_histos("pt30", "tracks_mva_tight_may20_chi2", "tracks_pt>30 && " + bdt_short, "tracks_pt>30 && " + bdt_long)
    #drawoptions["pt30"] = ["BDT (track p_{T}>30 GeV)", "same", kPink+4, 1, True, False]

    # get numerator, working points:
    if phase == 0:
        #fill_histos("wpA", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>30 && tracks_mva_tight_may20_chi2>0.13 && tracks_matchedCaloEnergy/tracks_p<0.12 && " + bdt_short, "tracks_pt>30 && tracks_mva_tight_may20_chi2>0.13 && tracks_matchedCaloEnergy/tracks_p<0.12 && " + bdt_long)
        fill_histos(
            "wpA", "tracks_mva_nov20_noEdep",
            "tracks_pt>15 && tracks_mva_nov20_noEdep>0.1 && tracks_matchedCaloEnergy<15 && "
            + bdt_short,
            "tracks_pt>40 && tracks_mva_nov20_noEdep>0.1 && tracks_matchedCaloEnergy/tracks_p<0.15 && "
            + bdt_long)
        #fill_histos("wpB", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && tracks_mva_tight_may20_chi2_pt15>-0.02 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_short, "tracks_pt>30 && tracks_mva_tight_may20_chi2_pt15>0 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_long)
        #fill_histos("wpC", "tracks_mva_nov20_noEdep", "tracks_pt>15 && tracks_mva_nov20_noEdep>0 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_short, "tracks_pt>40 && tracks_mva_nov20_noEdep>0.05 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_long)
        #fill_histos("wpB_pt150", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && tracks_mva_tight_may20_chi2_pt15>-0.02 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_short, "tracks_pt>150 && tracks_mva_tight_may20_chi2_pt15>0 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_long)
        #fill_histos("wpB", "tracks_mva_tight_may20_chi2", "tracks_pt>15 && tracks_mva_tight_may20_chi2_pt15>0.05 && tracks_matchedCaloEnergy/tracks_p<0.30 && " + bdt_short, "tracks_pt>30 && tracks_mva_tight_may20_chi2_pt15>0.1 && tracks_matchedCaloEnergy/tracks_p<0.30 && " + bdt_long)
    elif phase == 1:
        #fill_histos("wpA", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>30 && tracks_mva_tight_may20_chi2>0 && tracks_matchedCaloEnergy/tracks_p<0.12 && " + bdt_short, "tracks_pt>30 && tracks_mva_tight_may20_chi2>0 && tracks_matchedCaloEnergy/tracks_p<0.12 && " + bdt_long)
        fill_histos(
            "wpA", "tracks_mva_nov20_noEdep",
            "tracks_pt>15 && tracks_mva_nov20_noEdep>0.12 && tracks_matchedCaloEnergy<15 && "
            + bdt_short,
            "tracks_pt>40 && tracks_mva_nov20_noEdep>0.15 && tracks_matchedCaloEnergy/tracks_p<0.15 && "
            + bdt_long)
        #fill_histos("wpB", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && tracks_mva_tight_may20_chi2_pt15>-0.1 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_short, "tracks_pt>30 && tracks_mva_tight_may20_chi2_pt15>-0.14 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_long)
        #fill_histos("wpC", "tracks_mva_nov20_noEdep", "tracks_pt>15 && tracks_mva_nov20_noEdep>-0.1 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_short, "tracks_pt>40 && tracks_mva_nov20_noEdep>0 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_long)
        #fill_histos("wpB_pt150", "tracks_mva_tight_may20_chi2_pt15", "tracks_pt>15 && tracks_mva_tight_may20_chi2_pt15>-0.1 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_short, "tracks_pt>150 && tracks_mva_tight_may20_chi2_pt15>-0.14 && tracks_matchedCaloEnergy/tracks_p<0.20 && " + bdt_long)
        #fill_histos("wpB", "tracks_mva_tight_may20_chi2", "tracks_pt>15 && tracks_mva_tight_may20_chi2_pt15>0.1 && tracks_matchedCaloEnergy/tracks_p<0.30 && " + bdt_short, "tracks_pt>30 && tracks_mva_tight_may20_chi2_pt15>0.1 && tracks_matchedCaloEnergy/tracks_p<0.30 && " + bdt_long)
    drawoptions["wpA"] = ["working point", "same p", kRed, 21, True, True]
    #drawoptions["wpA2"] = ["working point A, pT>15", "same p", kRed, 22, True, False]
    #drawoptions["wpB"] = ["working point B", "same p", kMagenta, 20, True, True]
    #drawoptions["wpC"] = ["working point (loose)", "same p", kRed, 20, True, True]

    if style == "A":
        fill_histos("mt2", "tracks_mva_tight_may20_chi2_pt15", mt2_short,
                    mt2_long)
        drawoptions["mt2"] = [
            "SUS-19-005 tag", "same p", kTeal, 20, True, True
        ]

        #fill_histos("mt2_pt150", "tracks_mva_tight_may20_chi2_pt15", mt2_short, mt2_long + " && tracks_pt>150")
        #drawoptions["mt2_pt150"] = ["SUS-19-005 tag (track p_{T}>150 GeV)", "same p", kTeal, 22, False, True]

        fill_histos("exo", "tracks_mva_tight_may20_chi2_pt15", exo_short,
                    exo_long)
        drawoptions["exo"] = [
            "EXO-19-010 tag", "same p", kAzure, 20, True, True
        ]

        #fill_histos("exo_pt15", "tracks_mva_tight_may20_chi2", exo_pt15_short, exo_pt15_long)
        #drawoptions["exo_pt15"] = ["EXO-19-010 tag (p_{T}>15 GeV)", "same p", kAzure+7, 20, True, True]

        fill_histos("exo_noeta", "tracks_mva_tight_may20_chi2_pt15",
                    exo_noeta_short, exo_noeta_long)
        drawoptions["exo_noeta"] = [
            "EXO-19-010 tag (no #eta cuts)", "same p", kAzure + 5, 20, True,
            True
        ]

        fill_histos("exo_noetapt", "tracks_mva_tight_may20_chi2_pt15",
                    exo_noetapt_short, exo_noetapt_long)
        drawoptions["exo_noetapt"] = [
            "EXO-19-010 tag (no p_{T} and #eta cuts)", "same p", kAzure + 5,
            21, True, True
        ]

    # scale with lumi
    for label in histos:
        shared_utils.histoStyler(histos[label])
        histos[label].Scale(lumi)

    # get efficiencies:
    efficiencies = {}
    for label in histos:

        if "denom" in label: continue

        efficiencies[label] = []

        denom_label = label.split("_")[0] + "_" + label.split(
            "_")[1] + "_denom"
        denominator = histos[denom_label].Integral()

        if "mt2" in label or "exo" in label or "wp" in label:
            numerator = histos[label].Integral()
            if denominator > 0:
                efficiencies[label].append(
                    [0, numerator / denominator, numerator])
            else:
                efficiencies[label].append([0, 0, numerator])
        else:
            for i_score in numpy.arange(-1.0, 1.0, 0.005):
                numerator = histos[label].Integral(
                    histos[label].GetXaxis().FindBin(i_score),
                    histos[label].GetXaxis().FindBin(1))
                if denominator > 0:
                    efficiencies[label].append(
                        [i_score, numerator / denominator, numerator])
                else:
                    efficiencies[label].append([i_score, 0, numerator])

    #efffile = "plots/roc_%s_phase%s.dat" % (batchname, phase)
    #with open(efffile, "w+") as fout:
    #    for label in efficiencies:
    #        fout.write("Label: " + label + "\n**************\n")
    #        for item in efficiencies[label]:
    #            fout.write("%s, %s, %s\n" % (item[0], item[1], item[2]))
    #        fout.write("\n")

    # build TGraphs
    graphs_roc = {}
    graphs_sgeff = {}
    graphs_bgeff = {}
    graphs_significance = {}
    for label in efficiencies:

        if "bg" in label: continue
        graphs_roc[label] = TGraph()
        graphs_sgeff[label] = TGraph()
        graphs_bgeff[label] = TGraph()
        graphs_significance[label] = TGraph()

        for i in range(len(efficiencies[label])):
            score = efficiencies[label][i][0]
            eff_sg = efficiencies[label][i][1]
            eff_bg = efficiencies[label.replace("sg", "bg")][i][1]
            N_sg = efficiencies[label][i][2]
            N_bg = efficiencies[label.replace("sg", "bg")][i][2]
            graphs_roc[label].SetPoint(graphs_roc[label].GetN(), eff_sg,
                                       1 - eff_bg)
            graphs_sgeff[label].SetPoint(graphs_sgeff[label].GetN(), score,
                                         eff_sg)
            graphs_bgeff[label].SetPoint(graphs_bgeff[label].GetN(), score,
                                         eff_bg)

            try:
                significance = N_sg / math.sqrt(N_sg + N_bg)
                #significance = N_sg / math.sqrt( N_bg + (0.1*N_bg)**2 )
            except:
                significance = 0

            graphs_significance[label].SetPoint(
                graphs_significance[label].GetN(), score, significance)

    for category in ["short", "long"]:

        # plot ROC curves:
        ##################

        canvas = shared_utils.mkcanvas()

        if category == "short":
            if phase == 0:
                histo = TH2F("empty", "empty", 1, 0, 1, 1, 0.9, 1)
            else:
                histo = TH2F("empty", "empty", 1, 0, 1, 1, 0.98, 1)
        else:
            if phase == 0:
                histo = TH2F("empty", "empty", 1, 0, 1, 1, 0.97, 1)
            else:
                histo = TH2F("empty", "empty", 1, 0, 1, 1, 0.97, 1)

        shared_utils.histoStyler(histo)
        histo.Draw()
        histo.SetTitle(";#epsilon_{  sg};1 - #epsilon_{  bg}")

        if category == "short":
            legend = shared_utils.mklegend(x1=0.17, y1=0.2, x2=0.65, y2=0.6)
        else:
            legend = shared_utils.mklegend(x1=0.17, y1=0.2, x2=0.65, y2=0.6)

        for label in sorted(graphs_roc):

            if category not in label:
                continue

            graphStyler(graphs_roc[label])

            # e.g. drawoptions["pt30"] = ["BDT (track p_{T}>30 GeV)", "same", kPink+4, 1, True, True]

            optionslabel = "_".join(label.split("_")[2:])

            if category == "short" and not drawoptions[optionslabel][4]:
                continue
            if category == "long" and not drawoptions[optionslabel][5]:
                continue

            legendlabel = drawoptions[optionslabel][0]
            if drawoptions[optionslabel][1] == "same p":
                graphs_roc[label].SetLineColor(kWhite)
                graphs_roc[label].SetMarkerStyle(drawoptions[optionslabel][3])
                graphs_roc[label].SetMarkerColor(drawoptions[optionslabel][2])
            else:
                graphs_roc[label].SetLineColor(drawoptions[optionslabel][2])
                graphs_roc[label].SetLineStyle(drawoptions[optionslabel][3])
                print label, optionslabel, drawoptions[optionslabel][3]
            graphs_roc[label].Draw(drawoptions[optionslabel][1])

            if category == "long":
                legendlabel = legendlabel.replace("p_{T}>15 GeV",
                                                  "p_{T}>40 GeV")

            legend.AddEntry(graphs_roc[label], legendlabel)

        #legend.SetTextSize(0.045)
        legend.SetTextSize(0.04)
        legend.SetHeader("Phase %s, %s tracks" % (phase, category))
        legend.Draw()
        #shared_utils.stamp()

        pdfname = "plots/roc_%s_%s_phase%s_noedep.pdf" % (batchname, category,
                                                          phase)

        if style == "A":
            pdfname = pdfname.replace(".pdf", "_styleA.pdf")

        canvas.Print(pdfname)
Example #10
0
                        else:
                            label = "%s_%s_%s_%s_%s" % (
                                dataset, variable.replace(
                                    ":", "_"), region, fakeratetype, category)

                        print label

                        histos[label].SetLineColor(color)
                        histos[label].SetTitle(category + " tracks, " +
                                               dataset)

                        if category == "combined":
                            break

                    legend = shared_utils.mklegend(x1=0.15,
                                                   y1=0.7,
                                                   x2=0.5,
                                                   y2=0.83)
                    legend.SetTextSize(0.04)
                    canvas = shared_utils.mkcanvas()

                    maxvalue = 0

                    for i, label in enumerate(histos):

                        if i == 0:
                            histos[label].Draw("hist e")
                        else:
                            histos[label].Draw("hist e same")

                        histos[label].GetXaxis().SetNdivisions(6)
                        binmax = histos[label].GetMaximumBin()
                                scaledhisto.Scale(official_lumis[dataperiod] / integral)
                                if not hists[mcperiod][label + category]:
                                    hists[mcperiod][label + category] = scaledhisto.Clone()
                                else:
                                    hists[mcperiod][label + category].Add(scaledhisto)
                          

        for category in categories:
            
            for label in variables:
            
                if category == "" and "h_muon" not in label: continue
                if "h_muon" in label and category != "": continue

                canvas = shared_utils.mkcanvas()
                legend = shared_utils.mklegend(x1=0.6, y1=0.55, x2=0.85, y2=0.85)
            
                pad1 = TPad("pad1", "pad1", 0, 0.3, 1, 1.0)
                pad1.SetBottomMargin(0.0)
                pad1.SetLeftMargin(0.12)
                pad1.SetGridx()
                pad1.Draw()
                pad2 = TPad("pad2", "pad2", 0, 0.05, 1, 0.3)
                pad2.SetTopMargin(0.0)
                pad2.SetBottomMargin(0.4)
                pad2.SetLeftMargin(0.12)
                pad2.SetGridx()
                pad2.SetGridy()
                pad2.Draw()
                pad1.cd()
                
def main(options):

    histofolder = options.histofolder
    suffix = options.suffix
    plotfolder = "plots_%s" % options.suffix

    os.system("mkdir -p %s" % plotfolder)

    if options.mc_reweighted:
        periods = [
            "Run2016B",
            "Run2016C",
            "Run2016D",
            "Run2016E",
            "Run2016F",
            "Run2016G",
            "Run2016H",
            "Run2017B",
            "Run2017C",
            "Run2017D",
            "Run2017E",
            "Run2017F",
            "Run2018A",
            "Run2018B",
            "Run2018C",
            "Run2018D",
            "Summer16rwRun2016B",
            "Summer16rwRun2016C",
            "Summer16rwRun2016D",
            "Summer16rwRun2016E",
            "Summer16rwRun2016F",
            "Summer16rwRun2016G",
            "Summer16rwRun2016H",
            "Fall17rwRun2017B",
            "Fall17rwRun2017C",
            "Fall17rwRun2017D",
            "Fall17rwRun2017E",
            "Fall17rwRun2017F",
            "Autumn18rwRun2018A",
            "Autumn18rwRun2018B",
            "Autumn18rwRun2018C",
            "Autumn18rwRun2018D",
        ]
    else:
        periods = [
            "Run2016B",
            "Run2016C",
            "Run2016D",
            "Run2016E",
            "Run2016F",
            "Run2016G",
            "Run2016H",
            "Run2017B",
            "Run2017C",
            "Run2017D",
            "Run2017E",
            "Run2017F",
            "Run2018A",
            "Run2018B",
            "Run2018C",
            "Run2018D",
            "Summer16",
            "Fall17",
            "Autumn18",
        ]

    exact = "layers_remaining==track_trackerLayersWithMeasurement && "

    cuts = {
        "baseline": {
            "base_cuts": "layers_remaining>=3 && ",
            "taggedextra": "",
            "legendheader": "baseline",
        },
        #"lowdxydz": {
        #              "base_cuts":    "layers_remaining>=3 && ",
        #              "taggedextra":  "track_dxyVtx<0.01 && track_dzVtx<0.01 && ",
        #              "legendheader": "lowdxydz",
        #            },
    }

    if not options.get_from_tree:
        cuts = {
            "baseline": {
                "base_cuts": "",
                "taggedextra": "",
                "legendheader": "baseline",
            },
        }

    for cut_label in cuts:

        if options.weightkinematic:
            cuts[cut_label]["base_cuts"] += " weight_kinematicMLP2>0 && "
        elif options.weighttrackprop:
            cuts[cut_label]["base_cuts"] += " weight_trackpropMLP2>0 && "

        histolabels = {
            "h_tracks_reco": [
                "track_reco", cuts[cut_label]["base_cuts"] + "track_reco==1",
                1, 1, 2
            ],
            "h_tracks_rereco_short": [
                "track_reco",
                cuts[cut_label]["base_cuts"] + cuts[cut_label]["taggedextra"] +
                exact + " track_rereco==1 && track_is_pixel_track==1", 1, 1, 2
            ],
            "h_tracks_rereco_long": [
                "track_reco",
                cuts[cut_label]["base_cuts"] + cuts[cut_label]["taggedextra"] +
                exact + " track_rereco==1 && track_is_pixel_track==0", 1, 1, 2
            ],
            "h_tracks_tagged_short": [
                "track_reco", cuts[cut_label]["base_cuts"] +
                cuts[cut_label]["taggedextra"] + exact +
                " track_preselected==1 && track_mva>0.1 && track_pt>25 && track_is_pixel_track==1 && (track_matchedCaloEnergy<15 || track_matchedCaloEnergy/track_p<0.15)",
                1, 1, 2
            ],
            "h_tracks_tagged_long": [
                "track_reco", cuts[cut_label]["base_cuts"] +
                cuts[cut_label]["taggedextra"] + exact +
                " track_preselected==1 && track_mva>0.1 && track_pt>40 && track_is_pixel_track==0 && (track_matchedCaloEnergy<15 || track_matchedCaloEnergy/track_p<0.15)",
                1, 1, 2
            ],
            #"h_tracks_tagged_short":    ["track_reco", cuts[cut_label]["base_cuts"] + cuts[cut_label]["taggedextra"] + exact + " track_preselected==1 && track_tagged==1 && track_is_pixel_track==1", 1, 1, 2],
            #"h_tracks_tagged_long":     ["track_reco", cuts[cut_label]["base_cuts"] + cuts[cut_label]["taggedextra"] + exact + " track_preselected==1 && track_tagged==1 && track_is_pixel_track==0", 1, 1, 2],
            #h_tracks_tagged_short":    ["track_reco", base_cutstagged + "track_tagged==1 && track_is_pixel_track==1", 1, 1, 2],
            #"h_tracks_tagged_short":   ["track_reco", base_cutstagged + "track_is_pixel_track==1", 1, 1, 2],
            #"h_tracks_tagged_short":   ["track_reco", base_cuts + exact + " abs(track_dxyVtx)<0.005 && abs(track_dzVtx)<0.005 && track_tagged==1 && track_pt>25 && track_is_pixel_track==1", 1, 1, 2],
            #"h_tracks_tagged_short":   ["track_reco", base_cuts + exact + base_cutstagged + " track_is_pixel_track==1", 1, 1, 2],
            #"h_tracks_tagged_short":   ["track_reco", exo + " && track_is_pixel_track==1", 1, 1, 2],
            #"h_tracks_tagged_short":   ["track_reco", base_cuts + "track_pt>25 && track_dxyVtx<0.001 && track_dzVtx<0.001 && track_tagged==1 && track_is_pixel_track==1", 1, 1, 2],
            #"h_tracks_tagged_short":   ["track_reco", "track_rereco==1 && track_is_pixel_track==1 && " + exo, 1, 1, 2],
        }

        categories = ["_short", "_long"]

        # get all histos:
        hists = {}
        for period in periods:

            if "Run201" in period and "rw" not in period:
                is_data = False
            else:
                is_data = True

            hists[period] = {}
            for label in histolabels:
                print period, label

                filename = "%s/histograms%s_%s.root" % (histofolder, suffix,
                                                        period)

                if options.get_from_tree:
                    tree = TChain("Events")
                    tree.Add(filename)

                    # apply weights before getting the histograms from the tree:
                    treecuts = "(%s)" % histolabels[label][1]
                    if not is_data and options.weightkinematic:
                        treecuts += "*weight_kinematicMLP2/(1.0-weight_kinematicMLP2)"
                    if not is_data and options.weighttrackprop:
                        treecuts += "*weight_trackpropMLP2/(1.0-weight_trackpropMLP2)"
                    if options.mc_reweighted:
                        treecuts += "*weight_ptreweighting"

                    hists[period][label] = plotting.get_histogram_from_tree(
                        tree,
                        histolabels[label][0],
                        cutstring=treecuts,
                        nBinsX=histolabels[label][2],
                        xmin=histolabels[label][3],
                        xmax=histolabels[label][4])
                    hists[period][label].SetDirectory(0)
                    hists[period][label].SetLineWidth(2)
                else:
                    fin = TFile(filename, "open")
                    print "%s/histograms%s_%s.root" % (histofolder, suffix,
                                                       period)
                    hists[period][label] = fin.Get("Histograms/" + label)
                    hists[period][label].SetDirectory(0)
                    hists[period][label].SetLineWidth(2)
                    fin.Close()

                shared_utils.histoStyler(hists[period][label])

        # denom. fix:
        for period in periods:
            hists[period]["h_tracks_reco_short"] = hists[period][
                "h_tracks_reco"].Clone()
            hists[period]["h_tracks_reco_long"] = hists[period][
                "h_tracks_reco"].Clone()

        # global SF:
        fitresults = {}
        fitresults["fit_sf"] = {}
        fitresults["fit_uncert"] = {}
        fitresults["fit_sfreco"] = {}
        fitresults["fit_uncertreco"] = {}
        fitresults["fit_sftag"] = {}
        fitresults["fit_uncerttag"] = {}

        # calculated histograms:
        finaleff_global = {}
        finaleff_reco = {}
        finaleff_tag = {}

        finaleff_global_num = {}
        finaleff_reco_num = {}
        finaleff_tag_num = {}
        finaleff_global_denom = {}
        finaleff_reco_denom = {}
        finaleff_tag_denom = {}

        h_sf_global = {}
        h_sf_reco = {}
        h_sf_tag = {}
        g1_global = {}
        g1_reco = {}
        g1_tag = {}

        for category in categories:

            # first, get efficiencies:

            finaleff_global[category] = {}
            finaleff_reco[category] = {}
            finaleff_tag[category] = {}

            finaleff_global_num[category] = {}
            finaleff_reco_num[category] = {}
            finaleff_tag_num[category] = {}

            finaleff_global_denom[category] = {}
            finaleff_reco_denom[category] = {}
            finaleff_tag_denom[category] = {}

            for period in periods:

                finaleff_global_num[category][period] = hists[period][
                    "h_tracks_tagged" + category].Clone()
                finaleff_global_denom[category][period] = hists[period][
                    "h_tracks_reco" + category].Clone()
                finaleff_global[category][period] = finaleff_global_num[
                    category][period].Clone()
                finaleff_global[category][period].Divide(
                    finaleff_global_denom[category][period])

                finaleff_reco_num[category][period] = hists[period][
                    "h_tracks_rereco" + category].Clone()
                finaleff_reco_denom[category][period] = hists[period][
                    "h_tracks_reco" + category].Clone()
                finaleff_reco[category][period] = finaleff_reco_num[category][
                    period].Clone()
                finaleff_reco[category][period].Divide(
                    finaleff_reco_denom[category][period])

                finaleff_tag_num[category][period] = hists[period][
                    "h_tracks_tagged" + category].Clone()
                finaleff_tag_denom[category][period] = hists[period][
                    "h_tracks_rereco" + category].Clone()
                finaleff_tag[category][period] = finaleff_tag_num[category][
                    period].Clone()
                finaleff_tag[category][period].Divide(
                    finaleff_tag_denom[category][period])

            # get SF:

            h_sf_global[category] = {}
            h_sf_reco[category] = {}
            h_sf_tag[category] = {}
            g1_global[category] = {}
            g1_reco[category] = {}
            g1_tag[category] = {}

            for period in periods:

                print category, period
                if "rw" in period: continue
                if "Run201" not in period: continue

                if not options.mc_reweighted:
                    if "Run2016" in period:
                        mcperiod = "Summer16"
                    elif "Run2017" in period:
                        mcperiod = "Fall17"
                    elif "Run2018" in period:
                        mcperiod = "Autumn18"
                else:
                    if "Run2016" in period:
                        mcperiod = "Summer16rw" + period
                    elif "Run2017" in period:
                        mcperiod = "Fall17rw" + period
                    elif "Run2018" in period:
                        mcperiod = "Autumn18rw" + period

                if not options.get_from_tree:

                    print "fitting global SF..."
                    g1_global[category][period] = TF1('g1_global', '[0]', 3,
                                                      20)
                    h_sf_global[category][period] = finaleff_global[category][
                        period].Clone()
                    h_sf_global[category][period].Divide(
                        finaleff_global[category][mcperiod])
                    fit = h_sf_global[category][period].Fit(
                        g1_global[category][period], "", "same", 3, 20)
                    fitresults["fit_sf"][
                        period +
                        category] = g1_global[category][period].GetParameter(0)
                    if "short" in category:
                        fitresults["fit_uncert"][
                            period + category] = h_sf_global[category][
                                period].GetBinError(4)
                    else:
                        fitresults["fit_uncert"][
                            period + category] = h_sf_global[category][
                                period].GetBinError(6)

                    print "fitting reco SF..."
                    g1_reco[category][period] = TF1('g1_reco', '[0]', 3, 20)
                    h_sf_reco[category][period] = finaleff_reco[category][
                        period].Clone()
                    h_sf_reco[category][period].Divide(
                        finaleff_reco[category][mcperiod])
                    fit = h_sf_reco[category][period].Fit(
                        g1_reco[category][period], "", "same", 3, 20)
                    fitresults["fit_sfreco"][
                        period +
                        category] = g1_reco[category][period].GetParameter(0)
                    if "short" in category:
                        fitresults["fit_uncertreco"][
                            period + category] = h_sf_reco[category][
                                period].GetBinError(4)
                    else:
                        fitresults["fit_uncertreco"][
                            period + category] = h_sf_reco[category][
                                period].GetBinError(6)

                    print "fitting tagging SF..."
                    g1_tag[category][period] = TF1('g1_tag', '[0]', 3, 20)
                    h_sf_tag[category][period] = finaleff_tag[category][
                        period].Clone()
                    h_sf_tag[category][period].Divide(
                        finaleff_tag[category][mcperiod])
                    fit = h_sf_tag[category][period].Fit(
                        g1_tag[category][period], "", "same", 3, 20)
                    fitresults["fit_sftag"][
                        period +
                        category] = g1_tag[category][period].GetParameter(0)
                    if "short" in category:
                        fitresults["fit_uncerttag"][
                            period +
                            category] = h_sf_tag[category][period].GetBinError(
                                4)
                    else:
                        fitresults["fit_uncerttag"][
                            period +
                            category] = h_sf_tag[category][period].GetBinError(
                                6)

                else:

                    # using the tree:
                    h_sf_global[category][period] = finaleff_global[category][
                        period].Clone()
                    h_sf_global[category][period].Divide(
                        finaleff_global[category][mcperiod])
                    fitresults["fit_sf"][period + category] = h_sf_global[
                        category][period].GetBinContent(1)
                    fitresults["fit_uncert"][period + category] = h_sf_global[
                        category][period].GetBinError(1)

                    h_sf_reco[category][period] = finaleff_reco[category][
                        period].Clone()
                    h_sf_reco[category][period].Divide(
                        finaleff_reco[category][mcperiod])
                    fitresults["fit_sfreco"][period + category] = h_sf_reco[
                        category][period].GetBinContent(1)
                    fitresults["fit_uncertreco"][
                        period +
                        category] = h_sf_reco[category][period].GetBinError(1)

                    h_sf_tag[category][period] = finaleff_tag[category][
                        period].Clone()
                    h_sf_tag[category][period].Divide(
                        finaleff_tag[category][mcperiod])
                    fitresults["fit_sftag"][
                        period +
                        category] = h_sf_tag[category][period].GetBinContent(1)
                    fitresults["fit_uncerttag"][
                        period +
                        category] = h_sf_tag[category][period].GetBinError(1)

            # Lumi-weighting:
            if options.lumiweighted:

                official_lumis = {
                    "Run2016B": 5.8,
                    "Run2016C": 2.6,
                    "Run2016D": 4.2,
                    "Run2016E": 4.0,
                    "Run2016F": 3.1,
                    "Run2016G": 7.5,
                    "Run2016H": 8.6,
                    "Run2017B": 4.8,
                    "Run2017C": 9.7,
                    "Run2017D": 4.3,
                    "Run2017E": 9.3,
                    "Run2017F": 13.5,
                    "Run2018A": 14,
                    "Run2018B": 7.1,
                    "Run2018C": 6.94,
                    "Run2018D": 31.93,
                }

                for runyear in ["Run2016", "Run2017", "Run2018"]:

                    print "runyear", runyear

                    fitresults["fit_sf"][runyear + category] = 0
                    fitresults["fit_uncert"][runyear + category] = 0
                    fitresults["fit_sfreco"][runyear + category] = 0
                    fitresults["fit_uncertreco"][runyear + category] = 0
                    fitresults["fit_sftag"][runyear + category] = 0
                    fitresults["fit_uncerttag"][runyear + category] = 0

                    sum_lumi = 0.0

                    for lumiyear in official_lumis:
                        if runyear in lumiyear:

                            sum_lumi += official_lumis[lumiyear]

                            fitresults["fit_sf"][
                                runyear + category] += fitresults["fit_sf"][
                                    lumiyear +
                                    category] * official_lumis[lumiyear]
                            fitresults["fit_uncert"][
                                runyear +
                                category] += fitresults["fit_uncert"][
                                    lumiyear +
                                    category] * official_lumis[lumiyear]
                            fitresults["fit_sfreco"][
                                runyear +
                                category] += fitresults["fit_sfreco"][
                                    lumiyear +
                                    category] * official_lumis[lumiyear]
                            fitresults["fit_uncertreco"][
                                runyear +
                                category] += fitresults["fit_uncertreco"][
                                    lumiyear +
                                    category] * official_lumis[lumiyear]
                            fitresults["fit_sftag"][
                                runyear + category] += fitresults["fit_sftag"][
                                    lumiyear +
                                    category] * official_lumis[lumiyear]
                            fitresults["fit_uncerttag"][
                                runyear +
                                category] += fitresults["fit_uncerttag"][
                                    lumiyear +
                                    category] * official_lumis[lumiyear]

                    print runyear, sum_lumi

                    fitresults["fit_sf"][runyear + category] /= sum_lumi
                    fitresults["fit_uncert"][runyear + category] /= sum_lumi
                    fitresults["fit_sfreco"][runyear + category] /= sum_lumi
                    fitresults["fit_uncertreco"][runyear +
                                                 category] /= sum_lumi
                    fitresults["fit_sftag"][runyear + category] /= sum_lumi
                    fitresults["fit_uncerttag"][runyear + category] /= sum_lumi

        # efficiencies:

        print "plot SF..."

        output_rootfile = TFile(
            "%s/allperiods_sf_combined.root" % (plotfolder), "recreate")

        for category in ["short", "long"]:

            canvas = shared_utils.mkcanvas()
            legend = shared_utils.mklegend(x1=0.47, y1=0.65, x2=0.85, y2=0.85)
            legend.SetHeader("%s tracks %s" %
                             (category, cuts[cut_label]["legendheader"]))
            legend.SetTextSize(0.035)

            h_sf_short = {}
            h_sf_long = {}

            for label in ["fit_sf", "fit_sfreco", "fit_sftag"]:

                if options.lumiweighted:
                    h_sf_short[label] = TH1F("h_sf_short[label]", "", 3, 0, 3)
                    h_sf_long[label] = TH1F("h_sf_long[label]", "", 3, 0, 3)
                else:
                    h_sf_short[label] = TH1F("h_sf_short[label]", "", 16, 0,
                                             16)
                    h_sf_long[label] = TH1F("h_sf_long[label]", "", 16, 0, 16)

                shared_utils.histoStyler(h_sf_short[label])
                shared_utils.histoStyler(h_sf_long[label])

                i_short = 0
                i_long = 0
                binlabels_short = []
                binlabels_long = []

                for i, period in enumerate(sorted(fitresults[label])):

                    sf = fitresults[label][period]
                    uncert = fitresults[label.replace("_sf",
                                                      "_uncert")][period]

                    if options.lumiweighted:
                        if period.split("_")[0] == "Run2016" or period.split(
                                "_")[0] == "Run2017" or period.split(
                                    "_")[0] == "Run2018":
                            pass
                        else:
                            continue

                    print category, period

                    if "short" in period:
                        h_sf_short[label].SetBinContent(i_short + 1, sf)
                        h_sf_short[label].SetBinError(i_short + 1, uncert)
                        i_short += 1
                        binlabels_short.append(
                            period.replace("Run", "").replace("_short",
                                                              "").replace(
                                                                  "_long", ""))
                    elif "long" in period:
                        h_sf_long[label].SetBinContent(i_long + 1, sf)
                        h_sf_long[label].SetBinError(i_long + 1, uncert)
                        i_long += 1
                        binlabels_long.append(
                            period.replace("Run", "").replace("_short",
                                                              "").replace(
                                                                  "_long", ""))

                if category == "short":
                    if label == "fit_sf":
                        h_sf_short[label].Draw("hist e")
                    else:
                        h_sf_short[label].Draw("hist e same")

                    outhist = h_sf_short[label].Clone()
                    outhist.SetDirectory(0)
                    outhist.SetName(label + "_" + category)
                    outhist.Write()

                    if "reco" in label:
                        h_sf_short[label].SetTitle(
                            ";;fitted track reconstruction scale factor")
                    elif "tag" in label:
                        h_sf_short[label].SetTitle(
                            ";;fitted track tagging scale factor")
                    else:
                        h_sf_short[label].SetTitle(";;fitted scale factor")
                    h_sf_short[label].GetYaxis().SetRangeUser(0.4, 1.6)
                else:
                    if label == "fit_sf":
                        h_sf_long[label].Draw("hist e")
                    else:
                        h_sf_long[label].Draw("hist e same")

                    outhist = h_sf_long[label].Clone()
                    outhist.SetDirectory(0)
                    outhist.SetName(label + "_" + category)
                    outhist.Write()

                    if "reco" in label:
                        h_sf_long[label].SetTitle(
                            ";;fitted track reconstruction scale factor")
                    elif "tag" in label:
                        h_sf_long[label].SetTitle(
                            ";;fitted track tagging scale factor")
                    else:
                        h_sf_long[label].SetTitle(";;fitted scale factor")
                    h_sf_long[label].GetYaxis().SetRangeUser(0.4, 1.6)

                if label == "fit_sf":
                    #h_sf_short[label].SetLineStyle(1)
                    #h_sf_long[label].SetLineStyle(1)
                    h_sf_short[label].SetLineWidth(3)
                    h_sf_long[label].SetLineWidth(3)
                    h_sf_short[label].SetLineColor(kRed)
                    h_sf_long[label].SetLineColor(kBlue)
                    if category == "short":
                        legend.AddEntry(h_sf_short[label], "combined SF")
                    else:
                        legend.AddEntry(h_sf_long[label], "combined SF")
                if label == "fit_sfreco":
                    #h_sf_short[label].SetLineStyle(2)
                    #h_sf_long[label].SetLineStyle(2)
                    h_sf_short[label].SetLineColor(97)
                    h_sf_long[label].SetLineColor(62)
                    if category == "short":
                        legend.AddEntry(h_sf_short[label], "reconstruction SF")
                    else:
                        legend.AddEntry(h_sf_long[label], "reconstruction SF")
                elif label == "fit_sftag":
                    h_sf_short[label].SetLineStyle(2)
                    h_sf_long[label].SetLineStyle(2)
                    h_sf_short[label].SetLineColor(97)
                    h_sf_long[label].SetLineColor(62)
                    if category == "short":
                        legend.AddEntry(h_sf_short[label], "tagging SF")
                    else:
                        legend.AddEntry(h_sf_long[label], "tagging SF")

                for i, i_binlabel in enumerate(binlabels_short):
                    h_sf_short[label].GetXaxis().SetBinLabel(i + 1, i_binlabel)
                for i, i_binlabel in enumerate(binlabels_long):
                    h_sf_long[label].GetXaxis().SetBinLabel(i + 1, i_binlabel)

                if options.lumiweighted:
                    h_sf_short[label].GetXaxis().SetLabelSize(0.09)
                    h_sf_short[label].GetXaxis().SetTitleSize(0.09)
                    h_sf_long[label].GetXaxis().SetLabelSize(0.09)
                    h_sf_long[label].GetXaxis().SetTitleSize(0.09)
                else:
                    h_sf_short[label].GetXaxis().SetTitleSize(0.045)
                    h_sf_short[label].GetXaxis().SetLabelSize(0.045)
                    h_sf_long[label].GetXaxis().SetTitleSize(0.045)
                    h_sf_long[label].GetXaxis().SetLabelSize(0.045)

            legend.Draw()

            shared_utils.stamp()

            pdfname = "%s/allperiods_sf_combined_%s_%s.pdf" % (
                plotfolder, category, cut_label)

            if options.get_from_tree:
                pdfname = pdfname.replace(".pdf", "_tree.pdf")
            if options.mc_reweighted:
                pdfname = pdfname.replace(".pdf", "_mcreweighted.pdf")
            if options.lumiweighted:
                pdfname = pdfname.replace(".pdf", "_lumiweighted.pdf")
            if options.weightkinematic:
                pdfname = pdfname.replace(".pdf", "_weightkinematic.pdf")
            if options.weighttrackprop:
                pdfname = pdfname.replace(".pdf", "_weighttrackprop.pdf")

            canvas.SaveAs(pdfname)
            #canvas.SaveAs(pdfname.replace(".pdf", ".root"))

            #fout = TFile(pdfname.replace(".pdf", ".root"), "recreate")
            #canvas.Write()
            #h_sf_short.SetName("h_scalefactor_short")
            #h_sf_short.Write()
            #h_sf_long.SetName("h_scalefactor_long")
            #h_sf_long.Write()
            #fout.Close()

        output_rootfile.Close()

        if options.lumiweighted:
            #this_periods = finaleff_global_num[category].keys()
            this_periods = [
                "Run2016",
                "Run2017",
                "Run2018",
            ]
        else:
            this_periods = periods

        # plot underlying efficiencies and SFs:

        if not options.lumiweighted and not options.get_from_tree:

            for i_finaleff, finaleff in enumerate(
                [finaleff_global, finaleff_reco, finaleff_tag]):

                for year in ["2016", "2017", "2018"]:

                    for category in categories:

                        canvas = shared_utils.mkcanvas()
                        legend = shared_utils.mklegend(x1=0.6,
                                                       y1=0.6,
                                                       x2=0.85,
                                                       y2=0.85)
                        legend.SetHeader("%s tracks (%s)" %
                                         (category.replace("_", ""), year))
                        legend.SetTextSize(0.035)

                        colors = [kBlack, 97, 94, 91, 86, 81, 70, 65, 61, 51]

                        for i_period, period in enumerate(this_periods):

                            if "rw" in period: continue

                            if year == "2016":
                                mcperiod = "Summer16"
                            elif year == "2017":
                                mcperiod = "Fall17"
                            elif year == "2018":
                                mcperiod = "Autumn18"

                            if period == mcperiod or year in period:

                                color = colors.pop(0)

                                if i_period == 0:
                                    drawoption = "p e"
                                else:
                                    drawoption = "hist e same"

                                print finaleff[category].keys()

                                if period != mcperiod:
                                    finaleff[category][period].SetMarkerStyle(
                                        20)
                                    finaleff[category][period].SetMarkerColor(
                                        color)

                                finaleff[category][period].Draw(drawoption)
                                finaleff[category][period].SetLineColor(color)
                                finaleff[category][period].SetLineStyle(1)
                                finaleff[category][period].SetTitle(
                                    ";number of remaining tracker layers;efficiency, scale factor"
                                )
                                finaleff[category][period].GetXaxis(
                                ).SetRangeUser(0, 20)
                                finaleff[category][period].GetYaxis(
                                ).SetRangeUser(0.4, 1.6)

                                legend.AddEntry(finaleff[category][period],
                                                period)

                                # include scale factor with fit result...:

                                if "Run" in period and "rw" not in period:

                                    if i_finaleff == 0:
                                        h_sf_global[category][period].Draw(
                                            "e same")
                                        h_sf_global[category][
                                            period].SetLineColor(color)
                                        h_sf_global[category][
                                            period].SetLineWidth(2)
                                        g1_global[category][period].Draw(
                                            "same")
                                        g1_global[category][
                                            period].SetLineColor(color)
                                        g1_global[category][
                                            period].SetLineWidth(2)
                                        #legend.AddEntry(g1_global[category][period], "scale factor")
                                    elif i_finaleff == 1:
                                        h_sf_reco[category][period].Draw(
                                            "e same")
                                        h_sf_reco[category][
                                            period].SetLineColor(color)
                                        h_sf_reco[category][
                                            period].SetLineWidth(2)
                                        g1_reco[category][period].Draw("same")
                                        g1_reco[category][period].SetLineColor(
                                            color)
                                        g1_reco[category][period].SetLineWidth(
                                            2)
                                        #legend.AddEntry(g1_reco[category][period], "scale factor")
                                    elif i_finaleff == 2:
                                        h_sf_tag[category][period].Draw(
                                            "e same")
                                        h_sf_tag[category][
                                            period].SetLineColor(color)
                                        h_sf_tag[category][
                                            period].SetLineWidth(2)
                                        g1_tag[category][period].Draw("same")
                                        g1_tag[category][period].SetLineColor(
                                            color)
                                        g1_tag[category][period].SetLineWidth(
                                            2)
                                        #legend.AddEntry(g1_tag[category][period], "scale factor")

                                    #finaleff_global_num[category][period].Draw("e same")
                                    #finaleff_global_num[category][period].SetLineColor(color)
                                    #finaleff_global_num[category][period].SetLineWidth(2)
                                    #finaleff_global_denom[category][period].Draw("e same")
                                    #finaleff_global_denom[category][period].SetLineColor(color)
                                    #finaleff_global_denom[category][period].SetLineWidth(2)
                                    #finaleff_global_denom[category][period].SetLineStyle(2)

                        legend.Draw()
                        shared_utils.stamp()

                        if i_finaleff == 0:
                            pdfname = "%s/underlying%s%s.pdf" % (
                                plotfolder, category, year)
                        elif i_finaleff == 1:
                            pdfname = "%s/underlying_reco%s%s.pdf" % (
                                plotfolder, category, year)
                        elif i_finaleff == 2:
                            pdfname = "%s/underlying_tag%s%s.pdf" % (
                                plotfolder, category, year)

                        if options.get_from_tree:
                            pdfname = pdfname.replace(".pdf", "_tree.pdf")
                        if options.mc_reweighted:
                            pdfname = pdfname.replace(".pdf",
                                                      "_mcreweighted.pdf")
                        if options.lumiweighted:
                            pdfname = pdfname.replace(".pdf",
                                                      "_lumiweighted.pdf")
                        #if not plot_sf:
                        #    pdfname = pdfname.replace(".pdf", "_numdenom.pdf")

                        canvas.SaveAs(pdfname)
Example #13
0
def plot_cutflow(files, header, is_signal, prefix):

    if is_signal:
        signal_cutstring = " && tracks_chiCandGenMatchingDR<0.01"
    else:
        signal_cutstring = ""

    histos = {}
    for label in cuts:
        histos[label] = TH1D(label, label, 20, 0, 20)

    # get consecutive cuts:
    cuts_consecutive = {}
    for label in cuts:
        cuts_consecutive[label] = []
        for i, cut in enumerate(cuts[label]):
            cuts_consecutive[label].append(" && ".join(cuts[label][:i + 1]))

    # get nev:
    counts = {}
    for label in cuts_consecutive:
        counts[label] = []
        for i, cut in enumerate(cuts_consecutive[label]):

            if "p1" in prefix:
                #FIXME phase 1 dE/dx
                cut = cut.replace("tracks_deDxHarmonic2pixel>2.0", "MHT>=0")

            h_tmp = plotting.get_all_histos(files,
                                            "Events",
                                            "tracks_is_pixel_track",
                                            cut + signal_cutstring,
                                            nBinsX=2,
                                            xmin=0,
                                            xmax=2)
            if h_tmp:
                count = h_tmp.Integral()
            else:
                count = 0
            counts[label].append(count)
            histos[label].Fill(i, count)

    # normalize histos:
    for label in histos:
        normalization = histos[label].GetBinContent(1)
        if normalization > 0:
            histos[label].Scale(1.0 / normalization)

    # set alphanumeric x-axis labels:
    for label in histos:
        for i in range(histos[label].GetNbinsX()):
            if i <= len(cuts[label]):

                binlabel = cuts[label][i - 1]
                if "tracks_is_pixel_track" in binlabel: binlabel = "category"
                elif "tracks_ptErrOverPt2" in binlabel:
                    binlabel = "#Delta p_{T}"
                elif "tracks_neutralPtSum/tracks_pt" in binlabel:
                    binlabel = "nt. #Sigma p_{T}/pT"
                elif "tracks_neutralPtSum" in binlabel:
                    binlabel = "nt. pTSum"
                elif "tracks_chargedPtSum/tracks_pt" in binlabel:
                    binlabel = "ch. #Sigma p_{T}/pT"
                elif "tracks_chargedPtSum" in binlabel:
                    binlabel = "ch. #Sigma p_{T}"
                elif "tracks_pt" in binlabel:
                    binlabel = "p_{T}"
                elif "tracks_passmask" in binlabel:
                    binlabel = "mask"
                elif "tracks_trackQualityHighPurity" in binlabel:
                    binlabel = "purity"
                elif "tracks_eta" in binlabel:
                    binlabel = "#eta"
                elif "tracks_dzVtx" in binlabel:
                    binlabel = "d_{z}"
                elif "tracks_dxyVtx" in binlabel:
                    binlabel = "d_{xy}"
                elif "tracks_trkRelIso" in binlabel:
                    binlabel = "relIso"
                elif "tracks_trackerLayersWithMeasurement" in binlabel:
                    binlabel = "layers"
                elif "tracks_nValidTrackerHits" in binlabel:
                    binlabel = "tracker hits"
                elif "tracks_nMissingInnerHits" in binlabel:
                    binlabel = "miss. inner hits"
                elif "tracks_nValidPixelHits" in binlabel:
                    binlabel = "pixel hits"
                elif "tracks_passPFCandVeto" in binlabel:
                    binlabel = "PFCand"
                elif "tracks_passleptonveto" in binlabel:
                    binlabel = "lepton veto"
                elif "tracks_passpionveto" in binlabel:
                    binlabel = "pion veto"
                elif "tracks_passjetveto" in binlabel:
                    binlabel = "jet veto"
                elif "tracks_deDxHarmonic2pixel" in binlabel:
                    binlabel = "dE/dx"
                elif "tracks_mva_tight_may20_chi2" in binlabel:
                    binlabel = "BDT"
                elif "tracks_matchedCaloEnergy/tracks_p" in binlabel:
                    binlabel = "EDep/track p"
                elif "tracks_nMissingOuterHits" in binlabel:
                    binlabel = "miss. outer hits"
                elif "tracks_nMissingMiddleHits" in binlabel:
                    binlabel = "miss. middle hits"
                elif "tracks_exo_leptoniso" in binlabel:
                    binlabel = "lepton iso"
                elif "tracks_exo_trackiso" in binlabel:
                    binlabel = "track iso"
                elif "tracks_exo_jetiso" in binlabel:
                    binlabel = "jet iso"
                elif "tracks_matchedCaloEnergy" in binlabel:
                    binlabel = "EDep"
                elif "tracks_mt2_leptoniso" in binlabel:
                    binlabel = "lepton iso"
                elif "tracks_mt2_trackiso" in binlabel:
                    binlabel = "track iso"
                elif "tracks_pixelLayersWithMeasurement" in binlabel:
                    binlabel = "pixel layers"

                histos[label].GetXaxis().SetBinLabel(i, binlabel)

    for label in histos:

        if "long" in label: continue

        canvas = TCanvas("c1", "c1", 1000, 630)
        canvas.SetBottomMargin(.16)
        canvas.SetLeftMargin(.14)
        canvas.SetGrid()

        if "bg" in prefix:
            canvas.SetLogy()

        if is_signal:
            legend = shared_utils.mklegend(x1=0.17, y1=0.17, x2=0.4, y2=0.4)
        else:
            legend = shared_utils.mklegend(x1=0.6, y1=0.7, x2=0.9, y2=0.9)
        legend.SetTextSize(0.03)

        shared_utils.histoStyler(histos[label])
        histos[label].Draw("hist")
        histos[label].SetLineColor(kRed)
        histos[label].SetTitle(";;percentage of tracks remaining")
        legend.AddEntry(histos[label], "short tracks")

        if "bg" in prefix:
            histos[label].GetYaxis().SetRangeUser(1e-4, 2e0)
        else:
            histos[label].GetYaxis().SetRangeUser(0, 1.1)

        label_long = label.replace("short", "long")
        shared_utils.histoStyler(histos[label_long])
        histos[label_long].Draw("hist same")
        histos[label_long].SetLineColor(kBlue)
        histos[label_long].SetTitle(";cut stage;tracks")
        legend.AddEntry(histos[label_long], "long tracks")

        legend.SetTextSize(0.045)
        legend.SetHeader(header + ", " +
                         label.replace("_short", "").replace("_", " ") +
                         " tag")
        legend.Draw()

        shared_utils.stamp()

        batchname = files[0].split("/")[2]
        canvas.Print("plots/cutflow_" + batchname + "_" + prefix + "_" +
                     label.replace("_short", "") + ".pdf")