Exemple #1
0
def getEfficiency(datasets, numerator="Numerator", denominator="Denominator"):

    #    statOption = ROOT.TEfficiency.kFNormal
    statOption = ROOT.TEfficiency.kFCP  # Clopper-Pearson
    #    statOption = ROOT.TEfficiency.kFFC # Feldman-Cousins

    first = True
    isData = False

    teff = ROOT.TEfficiency()
    for dataset in datasets:
        n = dataset.getDatasetRootHisto(numerator).getHistogram()
        d = dataset.getDatasetRootHisto(denominator).getHistogram()

        if d.GetEntries() == 0:
            continue

        checkNegatives(n, d)

        #        removeNegatives(n)
        #        removeNegatives(d)
        print dataset.getName(), "entries", n.GetEntries(), d.GetEntries()
        print "    bins", n.GetNbinsX(), d.GetNbinsX()
        print "    lowedge", n.GetBinLowEdge(1), d.GetBinLowEdge(1)

        eff = ROOT.TEfficiency(n, d)
        eff.SetStatisticOption(statOption)

        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection() / d.GetEntries()
            for i in range(1, d.GetNbinsX() + 1):
                print "    bin", i, d.GetBinLowEdge(i), n.GetBinContent(
                    i), d.GetBinContent(i)
        eff.SetWeight(weight)

        if first:
            teff = eff
            if dataset.isData():
                tn = n
                td = d
            first = False
        else:
            teff.Add(eff)
            if dataset.isData():
                tn.Add(n)
                td.Add(d)
    if isData:
        teff = ROOT.TEfficiency(tn, td)
        teff.SetStatisticOption(self.statOption)

    return teff
def getEfficiency(datasets,numerator="Numerator",denominator="Denominator"):

#    statOption = ROOT.TEfficiency.kFNormal
    statOption = ROOT.TEfficiency.kFCP # Clopper-Pearson
#    statOption = ROOT.TEfficiency.kFFC # Feldman-Cousins

    first = True
    isData = False
     
    teff = ROOT.TEfficiency()
    for dataset in datasets:
        n = dataset.getDatasetRootHisto(numerator).getHistogram()                                               
        d = dataset.getDatasetRootHisto(denominator).getHistogram()

        if d.GetEntries() == 0:
            continue

        checkNegatives(n,d)

#        removeNegatives(n)
#        removeNegatives(d)
        print dataset.getName(),"entries",n.GetEntries(),d.GetEntries()
        print "    bins",n.GetNbinsX(),d.GetNbinsX()
        print "    lowedge",n.GetBinLowEdge(1),d.GetBinLowEdge(1)
        
        eff = ROOT.TEfficiency(n,d)
        eff.SetStatisticOption(statOption)

        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection()/d.GetEntries()
            for i in range(1,d.GetNbinsX()+1):
                print "    bin",i,d.GetBinLowEdge(i),n.GetBinContent(i),d.GetBinContent(i)
        eff.SetWeight(weight)

        if first:
            teff = eff
            if dataset.isData():
                tn = n
                td = d
            first = False
        else:
            teff.Add(eff)
            if dataset.isData():
                tn.Add(n)
                td.Add(d)
    if isData:
        teff = ROOT.TEfficiency(tn, td)
        teff.SetStatisticOption(self.statOption)

    return teff
def GetEfficiency(datasetsMgr, datasets, numPath, denPath, intLumi):     
    # Definitions
    myList  = []
    myList_MC  = []
    myList_Data = []
    index   = 0
    _kwargs = GetHistoKwargs(numPath, opts)        
    # For-loop: All datasets
    for dataset in datasets:
        if dataset.isMC():
            n = dataset.getDatasetRootHisto(numPath)
            n.normalizeToLuminosity(intLumi)                                                                                                                       
            num = n.getHistogram()
            d = dataset.getDatasetRootHisto(denPath)
            d.normalizeToLuminosity(intLumi)                                                                                                                       
            den = d.getHistogram()

        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()


        total = den.Integral(0, den.GetXaxis().GetNbins()+1)
        selected = num.Integral(0, num.GetXaxis().GetNbins()+1)

        print "Numerical Efficiency", numPath, dataset.getName(), ":", round(selected/total, 3)
        print "Pass :"******" events"
        if "binList" in _kwargs:
            #if len(_kwargs["binList"]) == 1:
            #    continue
            xBins   = _kwargs["binList"]
            nx      = len(xBins)-1
            num     = num.Rebin(nx, "", xBins)
            den     = den.Rebin(nx, "", xBins)
        #elif "Eta" in numPath or "Phi" in numPath:
        #    num     = num.Rebin(2)
        #    den     = den.Rebin(2)
        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP
                
        # Convert to TGraph
        eff = convert2TGraph(eff)
    return eff
Exemple #4
0
def GetEfficiency(datasetsMgr, datasets, numPath, denPath, intLumi):
    # Definitions
    myList = []
    myList_MC = []
    myList_Data = []
    index = 0
    _kwargs = GetHistoKwargs(numPath, opts)
    # For-loop: All datasets
    for dataset in datasets:
        if dataset.isMC():
            n = dataset.getDatasetRootHisto(numPath)
            n.normalizeToLuminosity(intLumi)
            num = n.getHistogram()
            d = dataset.getDatasetRootHisto(denPath)
            d.normalizeToLuminosity(intLumi)
            den = d.getHistogram()

        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()

        total = den.Integral(0, den.GetXaxis().GetNbins() + 1)
        selected = num.Integral(0, num.GetXaxis().GetNbins() + 1)

        print "Numerical Efficiency", numPath, dataset.getName(), ":", round(
            selected / total, 3)
        print "Pass :"******" events"
        if "binList" in _kwargs:
            #if len(_kwargs["binList"]) == 1:
            #    continue
            xBins = _kwargs["binList"]
            nx = len(xBins) - 1
            num = num.Rebin(nx, "", xBins)
            den = den.Rebin(nx, "", xBins)
        #elif "Eta" in numPath or "Phi" in numPath:
        #    num     = num.Rebin(2)
        #    den     = den.Rebin(2)
        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den)  # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP)  #FCP

        # Convert to TGraph
        eff = convert2TGraph(eff)
    return eff
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi):  
    # Definitions
    myList  = []
    myList_MC  = []
    myList_Data = []
    index   = 0
    _kwargs = GetHistoKwargs(numPath, opts)        
    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():

        if "Fake" in numPath and "TT" in dataset.getName():
            continue

        if dataset.isMC():
            n = dataset.getDatasetRootHisto(numPath)
            n.normalizeToLuminosity(intLumi)                                                                                                                       
            num = n.getHistogram()
            d = dataset.getDatasetRootHisto(denPath)
            d.normalizeToLuminosity(intLumi)                                                                                                                       
            den = d.getHistogram()

        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()


        total = den.Integral(0, den.GetXaxis().GetNbins()+1)
        selected = num.Integral(0, num.GetXaxis().GetNbins()+1)
        print "Pass :"******" events"
        print "Numerical Efficiency", numPath, dataset.getName(), ":", round(selected/total, 3)

        if "binList" in _kwargs:
            #if len(_kwargs["binList"]) == 1:
            #    continue
            xBins   = _kwargs["binList"]
            nx      = len(xBins)-1
            num     = num.Rebin(nx, "", xBins)
            den     = den.Rebin(nx, "", xBins)
        elif "Eta" in numPath or "Phi" in numPath:
            num     = num.Rebin(2)
            den     = den.Rebin(2)
        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue


        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP
                
        # Convert to TGraph
        eff = convert2TGraph(eff)
        # Apply default style (according to dataset name)
        plots._plotStyles[dataset.getName()].apply(eff)

        # Append in list
        myList.append(histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P"))
        if dataset.isMC():
            eff_MC = eff
            if "QCD" in dataset.getName():
                eff_QCD = eff
            elif "TT" in dataset.getName():
                eff_TT= eff
            myList_MC.append(histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P"))
        else:
            eff_Data = eff
            plots._plotStyles[dataset.getName()].apply(eff_Data)
            #styles.dataStyle.apply(eff_Data)
            eff_Data.SetMarkerSize(1.2)
            myList_Data.append(histograms.HistoGraph(eff_Data, plots._legendLabels[dataset.getName()], "p", "P"))

    numPath = numPath.replace("AfterAllSelections_","")
    # Define save name
    saveName = "Eff_" + numPath.split("/")[-1]

    # Plot the efficiency
    p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[])
    plots.drawPlot(p, saveName, **_kwargs)

    p1 = plots.ComparisonManyPlot(histograms.HistoGraph(eff_Data, "Data",  drawStyle="P"), 
                                  myList_MC, saveFormats=[])

    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, "HplusMasses", numPath.split("/")[0], opts.optMode)
    save_path = savePath + opts.MVAcut
    print "save_path", save_path
    # Draw and save the plot                                                                                                                                                     
    p1.setLuminosity(intLumi)
    _kwargs["ratio"] = True
    _kwargs["ratioInvert"] = True
    _kwargs["cutBoxY"] = {"cutValue": 1.0, "fillColor": 16, "box": False, "line": True, "greaterThan": True, "mainCanvas": True, "ratioCanvas": True}
    plots.drawPlot(p1, save_path1, **_kwargs)
    SavePlot(p1, saveName, save_path, saveFormats = [".png", ".pdf", ".C"])
    return
def main(opts):

    # Apply TDR style
    style = tdrstyle.TDRStyle()
    style.setOptStat(True)
    style.setGridX(False)
    style.setGridY(False)
    
    # If user does not define optimisation mode do all of them
    if opts.optMode == None:
        if len(optList) < 1:
            optList.append("")
        else:
            pass
        optModes = optList
    else:
        optModes = [opts.optMode]

    # For-loop: All optimisation modes
    for opt in optModes:
        opts.optMode = opt

        # Setup & configure the dataset manager 
        datasetsMgr = GetDatasetsFromDir(opts)
        datasetsMgr.updateNAllEventsToPUWeighted()
        datasetsMgr.loadLuminosities() # from lumi.json

        if opts.verbose:
            datasetsMgr.PrintCrossSections()
            datasetsMgr.PrintLuminosities()

        # Set/Overwrite cross-sections
        for d in datasetsMgr.getAllDatasets():
            if "ChargedHiggs" in d.getName():
                datasetsMgr.getDataset(d.getName()).setCrossSection(1.0)

        # Print dataset information before removing anything?
        datasetsMgr.PrintInfo()

        # Determine integrated Lumi before removing data
        if "Data" in datasetsMgr.getAllDatasetNames():
            intLumi = datasetsMgr.getDataset("Data").getLuminosity()

        # Remove datasets
        filterKeys = ["TTW"]
        for key in filterKeys:
            datasetsMgr.remove(filter(lambda name: key in name, datasetsMgr.getAllDatasetNames()))
        else:
            intLumi = 35920

        for key in filterKeys:
            datasetsMgr.remove(filter(lambda name: key in name, datasetsMgr.getAllDatasetNames()))
        
        # Re-order datasets
        datasetOrder = []
        haveQCD = False
        for d in datasetsMgr.getAllDatasets():
            if "QCD" in d.getName():
                haveQCD = True
            datasetOrder.append(d.getName())
        datasetsMgr.selectAndReorder(datasetOrder)

        # Define the mapping histograms in numerator->denominator pairs
        VariableList = [
            "LdgTop_Pt",
            ]

        minRunRange, maxRunRange, runRange = GetRunRange(datasetsMgr)

        # Merge histograms (see NtupleAnalysis/python/tools/plots.py) 
        plots.mergeRenameReorderForDataMC(datasetsMgr) 
        
        datasets_  = datasetsMgr.getAllDatasets()
        dataset_Data = datasetsMgr.getDataDatasets()
        dataset_MC   = datasetsMgr.getMCDatasets()

        # Print dataset information
        datasetsMgr.PrintInfo()

        # For-loop: All numerator-denominator pairs
        counter =  0
        nPlots  = len(VariableList)

        for var in VariableList:
            histoN = "AfterAllSelections_"+var
            histoD = "AfterStandardSelections_"+var
            numerator   = os.path.join(opts.folder, histoN)
            denominator = os.path.join(opts.folder, histoD)

            counter+=1
            msg = "{:<9} {:>3} {:<1} {:<3} {:<50}".format("Histogram", "%i" % counter, "/", "%s:" % (nPlots), "%s" % (var))
            Print(ShellStyles.SuccessStyle() + msg + ShellStyles.NormalStyle(), counter==1)
            
            for dataset in datasets_:

                if dataset.isMC():
                    n = dataset.getDatasetRootHisto(numerator)
                    n.normalizeToLuminosity(intLumi)
                    num = n.getHistogram()
                    d = dataset.getDatasetRootHisto(denominator)
                    d.normalizeToLuminosity(intLumi)
                    den = d.getHistogram()
                    
                else:
                    num = dataset.getDatasetRootHisto(numerator).getHistogram()
                    den = dataset.getDatasetRootHisto(denominator).getHistogram()
                                                            
                x_bins = num.GetXaxis().GetNbins()
                
                i = 1
                while i < x_bins:
                    xvalue = num.GetXaxis().GetBinLowEdge(i)+0.5*num.GetXaxis().GetBinWidth(i)
                    if xvalue < 20:
                        my_bin = i
                    i+=1
                        
                my_xvalue = num.GetXaxis().GetBinUpEdge(my_bin)+0.5*num.GetXaxis().GetBinWidth(my_bin)
                total = den.Integral(0, x_bins) #my_bin
                selected = num.Integral(0, x_bins) #my_bin
                            
            plotName     = "Eff_%s" % (var)
            # Get Efficiency Plots  
            _kwargs  = GetHistoKwargs(var, opts)
            eff_Data = GetEfficiency(datasetsMgr, dataset_Data, numerator, denominator, intLumi)
            eff_MC   = GetEfficiency(datasetsMgr, dataset_MC, numerator, denominator, intLumi)

            # Apply Styles 
            styles.dataStyle.apply(eff_Data)
            styles.qcdStyle.apply(eff_MC)
            # Create the plot
            p = plots.ComparisonPlot(histograms.HistoGraph(eff_Data, "eff_Data", "p", "P"),
                                     histograms.HistoGraph(eff_MC,   "eff_MC"  , "p", "P"),
                                     saveFormats=[])
            # Define the legend entries 
            p.histoMgr.setHistoLegendLabelMany(
                {
                    "eff_Data": "Data",
                    "eff_MC"  : "QCD"
                    }
                )

            # Append in list
            myList = []
            myList.append(histograms.HistoGraph(eff_Data, plots._legendLabels["Data"], "lp", "P"))

            p.setLuminosity(intLumi)
            _kwargs["ratio"] = True
            _kwargs["opts"]   = {"xmin": 0.0, "xmax": 600.0, "ymin": 0.0, "ymax": 0.16, "ymaxfactor": 1.8}
            _kwargs["cutBoxY"] = {"cutValue": 1.10, "fillColor": ROOT.kGray+1, "fillStyle": 3001, "box": False, "line": True, "greaterThan": True, "mainCanvas": False, 
                                  "ratioCanvas": True, "mirror": True}
            plots.drawPlot(p, plotName, **_kwargs)

            # Draw 
            savePath = os.path.join(opts.saveDir, opts.optMode)
            SavePlot(p, plotName, savePath, saveFormats = [".png", ".pdf", ".C"])
        
    # Save results in JSON
    name = opts.mcrab.split("_")[-3]
    name = name.replace(opts.analysisName, "")
    jsonName = "topMisID_"+ name +"_TopMassCut400.json"
    analysis = opts.analysisName
    label = "2016"
    plotDir =  os.path.join(opts.folder, jsonName)
    pythonWriter.addParameters(plotDir, label, runRange, opts.intLumi, eff_Data)
    pythonWriter.addMCParameters(label, eff_MC)
    fileName_json = jsonName
    pythonWriter.writeJSON(fileName_json)


        
    return
def PlotEfficiency(datasetsMgr, numPath, denPath, eff_def):  
    
    # Definitions
    myList       = []
    
    default_eff    = None
    datasetList    = []
    ttVariationEff = []

    
    _kwargs     = GetHistoKwargs(numPath, opts)        
    nx          = 0
    if len(_kwargs["binList"]) > 0:
        xBins   = _kwargs["binList"]
        nx      = len(xBins)-1
    counter     = 0

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():
        
        if dataset.isMC():
            n   = dataset.getDatasetRootHisto(numPath)
            d   = dataset.getDatasetRootHisto(denPath)
            num = n.getHistogram()
            den = d.getHistogram()

            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)
        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()
            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)

        # Calculations    
        total    = den.Integral(0, den.GetXaxis().GetNbins()+1)
        selected = num.Integral(0, num.GetXaxis().GetNbins()+1)

        if 0:
            print "Numerical Efficiency", numPath, dataset.getName(), ":", round(selected/total, 3)
            
        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue
        
        
        
        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den) 
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP
        
        datasetTT = datasetsMgr.getDataset("TT")
        # Get the histograms
        numTT = datasetTT.getDatasetRootHisto(numPath).getHistogram()
        denTT = datasetTT.getDatasetRootHisto(denPath).getHistogram()
        if nx > 0:
            numTT = numTT.Rebin(nx, "", xBins) #num.Rebin(nx, "", xBins)
            denTT = denTT.Rebin(nx, "", xBins) #den.Rebin(nx, "", xBins)

        '''
        for i in range(1, num.GetNbinsX()+1):
            nbin = num.GetBinContent(i)
            dbin = den.GetBinContent(i)
            nbinTT = numTT.GetBinContent(i)
            dbinTT = denTT.GetBinContent(i)
            eps = nbin/dbin
            epsTT = nbinTT/dbinTT
            ratioTT = eps/epsTT
            if ratioTT > 1:
                ratioTT = 1/ratioTT
            #print "bin: ", i, "eps: ", round(eps,5) , "epsTT: ", round(epsTT,5)
            #print "bin: ", i, "eps/epsTT: ", (1.0 - round(ratioTT, 3))*100
        '''
        eff_ref = ROOT.TEfficiency(numTT, denTT) 
        eff_ref.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP
        
        # Convert to TGraph
        gEff    = convert2TGraph(eff)
        gEffRef = convert2TGraph(eff_ref)
        
        # Keep the default tt and variations tt efficiency plots 
        if dataset.getName() == "TT":
            default_eff = gEffRef.Clone()
        else:
            datasetList.append(dataset.getName())
            ttVariationEff.append(gEff)

        # Style definitions
        stylesDef = styles.ttStyle
        styles0 = styles.signalStyleHToTB300                                            
        styles1 = styles.signalStyleHToTB500
        styles2 = styles.signalStyleHToTB800
        styles3 = styles.signalStyleHToTB500
        styles4 = styles.signalStyleHToTB1000
        styles5 = styles.signalStyleHToTB2000
        styles6 = styles.signalStyleHToTB180
        styles7 = styles.signalStyleHToTB3000
        styles8 = styles.signalStyleHToTB200

        


        if dataset.getName() == "TT":
            styles.ttStyle.apply(gEffRef)
            legend_ref = "t#bar{t}"
            if opts.type == "partonShower":
                legend_ref = "t#bar{t} (Pythia8)"
            elif opts.type == "evtGen": 
                legend_ref = "t#bar{t} (Powheg)"
            refGraph = histograms.HistoGraph(gEffRef, legend_ref, "p", "P")
        else:
            styles.markerStyles[counter].apply(gEff)
            legend  = dataset.getName().replace("TT_", "t#bar{t} (").replace("isr", "ISR ").replace("fsr", "FSR ")
            legend  = legend.replace("hdamp", "hdamp ").replace("DOWN", "down").replace("UP", "up")
            legend  = legend.replace("mtop1665", "m_{t} = 166.5 GeV")
            legend  = legend.replace("mtop1695", "m_{t} = 169.5 GeV")
            legend  = legend.replace("mtop1715", "m_{t} = 171.5 GeV")
            legend  = legend.replace("mtop1735", "m_{t} = 173.5 GeV")
            legend  = legend.replace("mtop1755", "m_{t} = 175.5 GeV")
            legend  = legend.replace("mtop1785", "m_{t} = 178.5 GeV")
            legend  = legend.replace("TuneEE5C", "Herwig++")
            legend += ")"
            counter+=1
            #myList.append(histograms.HistoGraph(gEff, legend, "lp", "P"))
            myList.append(histograms.HistoGraph(gEff, legend, "p", "P"))
         
   
    units = "GeV/c"
    if eff_def == "fakeTop":
        _kwargs["xlabel"]  = "candidate p_{T} (%s)" % (units)
    elif eff_def == "inclusiveTop" or eff_def == "genuineTop":
        _kwargs["xlabel"]  = "generated top p_{T} (%s)" % (units)
    else:
        _kwargs["xlabel"]  = "p_{T} (%s)" % (units)



    # Define stuff
    numPath  = numPath.replace("AfterAllSelections_","")
    saveName = "Efficiency_%s_%s" % (eff_def, opts.type) 
    #saveName = saveName.replace("__", "_Inclusive_")

    # Plot the efficiency
    p = plots.ComparisonManyPlot(refGraph, myList, saveFormats=[])
    savePath = os.path.join(opts.saveDir, opts.optMode)    
    plots.drawPlot(p, savePath, **_kwargs)

    # Save plot in all formats
    SavePlot(p, saveName, savePath, saveFormats = [".png", ".pdf", ".C"])
    
    # ==============================================================================
    #   I need the uncertainties from the ratio of all plots (ONLY for Genuine)
    # ==============================================================================
    
    if eff_def == "genuineTop":
        
        uncWriter = UncertaintyWriter()
        jsonName = "uncertainties_%s_BDT_%s.json" % (opts.type, opts.BDT)
        analysis = opts.analysisName
        saveDir  =  os.path.join("", jsonName)
        
        for i in range(0, len(datasetList)):
            uncWriter.addParameters(datasetList[i], analysis, saveDir, default_eff, ttVariationEff[i])

            #print "i = ", i, " Dataset = ",  datasetList[i]
            #for iBin in range(1, len(xBins)):
            #ratio  = float(default_eff.GetEfficiency(iBin))/float(ttVariationEff[i].GetEfficiency(iBin))
            #unc = 0.5*(1.0 - ratio) 
            #print "iBin = ", iBin, " Default TT=", default_eff.GetEfficiency(iBin), "    Variation (", datasetList[i], ") =", ttVariationEff[i].GetEfficiency(iBin), "   Uncertainty =", unc
        
        uncWriter.writeJSON(jsonName)


    return
def PlotProb(datasets, numPath, denPath):


    EfficiencyList = []
    index = 0
    for dataset in datasets:
        
        datasetName = dataset.getName()
        print "Dataset = ", datasetName
        
        
        statOption = ROOT.TEfficiency.kFNormal        
##        n = dataset.getDatasetRootHisto(numPath).getHistogram()
#        n.normalizeToOne()
##        d = dataset.getDatasetRootHisto(denPath).getHistogram()
        nn = dataset.getDatasetRootHisto(numPath)
        nn.normalizeToLuminosity(35.8*(10**3))
        n = nn.getHistogram()
        dd = dataset.getDatasetRootHisto(denPath)
#        dd.normalizeToOne()                                                                                                                                            
#        dd.normalizeToLuminosity(36.3*(10**3))
        dd.normalizeToLuminosity(35.8*(10**3))                                                                                                                                   
#        dd.normalizeByCrossSection()
        d = dd.getHistogram()


#        if "TT" in datasetName and ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
#            continue
#        elif "M_"  in datasetName and not ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
#            continue
        
        if "Event" in numPath:
            n.Rebin(10)
            d.Rebin(10)
        else:
            n.Rebin(5)
            d.Rebin(5)
        
        if d.GetEntries() == 0 or n.GetEntries() == 0:
            continue

        if n.GetEntries() > d.GetEntries():
            continue
        # Check Negatives
        CheckNegatives(n, d, True)
        
        # Remove Negatives 
        RemoveNegatives(n)
                
        nBins = d.GetNbinsX()
        xMin  = d.GetXaxis().GetXmin()
        xMax  = d.GetXaxis().GetXmax()
        
        binwidth = int(n.GetBinWidth(0))
                
        # ----------------------------------------------------------------------------------------- #
        #      Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows
        # ----------------------------------------------------------------------------------------- #
        if (0):
            print "\n"
            print "=========== getEfficiency:"
            print "Dataset             = ", dataset.getName()
            print "Numerator:   entries=", n.GetEntries(), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1)
            print "Denominator: entries=", d.GetEntries(), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1)
            print "\n"
            
            print ">>>>>>  Sanity Check:  <<<<<<"
            print "Numerator Mean       = ", n.GetMean()
            print "Numerator RMS        = ", n.GetRMS()                                                                                                                
            print "Numerator Integral   = ", n.Integral(1, nBins)
            print "Denominator Mean     = ", d.GetMean()
            print "Denominator RMS      = ", d.GetRMS()
            print "Denominator Integral = ", d.Integral(1, nBins)
            
        if (n.GetMean() == 0 or d.GetMean() == 0): continue
        if (n.GetRMS()  == 0 or d.GetRMS()  == 0): continue
        if (n.Integral(1,nBins) == 0 or d.Integral(1,nBins) == 0): continue
        
#        if not (ROOT.TEfficiency.CheckConsistency(n,d)): continue;
        effic = ROOT.TEfficiency(n,d)
        effic.SetStatisticOption(statOption)
        
        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection()
            effic.SetWeight(weight)
            
        eff = convert2TGraph(effic)
    

        
        # Apply Styles
        if "TT" in datasetName:
            if index == 0:
                styles.signalStyleHToTB500.apply(eff)
#            styles.ttStyle.apply(eff)
                eff.SetLineStyle(1)
                eff.SetLineWidth(3)
                eff.SetLineColor(619)
                legend = "Default: t#bar{t}"
                index = 1
            else:
                styles.signalStyleHToTB500.apply(eff)
                #            styles.ttStyle.apply(eff)
                eff.SetLineStyle(1)
                eff.SetLineWidth(3)
                eff.SetLineColor(417)
                legend = "#Delta R(q,q')>0.8: t#bar{t}"

        elif "M_500" in datasetName:
            styles.signalStyleHToTB500.apply(eff)
            legend = "H^{+} m_{H^{+}} = 500 GeV"
        elif "M_300" in datasetName:
            styles.signalStyleHToTB300.apply(eff)
            legend = "H^{+} m_{H^{+}} = 300 GeV"
        elif "M_1000" in datasetName:
            styles.signalStyleHToTB1000.apply(eff)
            legend = "H^{+} m_{H^{+}} = 1000 GeV"
        elif "M_800" in datasetName:
            styles.signalStyleHToTB800.apply(eff)
            legend = "H^{+} m_{H^{+}} = 800 GeV"
        elif "M_200" in datasetName:
            styles.signalStyleHToTB200.apply(eff)
            legend = "H^{+} m_{H^{+}} = 200 GeV"
        else:
            styles.ttStyle.apply(eff)
            legend = "other"


        EfficiencyList.append(histograms.HistoGraph(eff, legend, "lp", "P"))
            
    saveName = "Eff_"+numPath.split("/")[-1]+"Over"+denPath.split("/")[-1]
    if "Pt" in numPath:
        xMin = 0.0
#        rebinX = 2
        xMax = 805.0
#        xMax = 555.0 # For topPt < 500GeV
        xTitle = "p_{T} (GeV/c)"
        units = "GeV/c"
        _format = "%0.1f" + units
        yTitle = "Efficiency / "   + str(binwidth) + " "+units
        yMin = 0.0
        yMax = 1.1

    elif "_Eta" in numPath:
        xMin = -3.0
        xMax = +3.0
        xTitle = "#eta"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    elif "_Mass" in numPath:
        xMin = 50.0
        xMax = 300
        xTitle = "M (GeV/c^{2})"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    elif "_Phi" in numPath:
        xMin = -3
        xMax = +3
        xTitle = "#phi"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    else:
        xMin = 0.0
        xMax = 250.0
        xTitle = "xTitle"
        yTitle = "yTitle"
        yMin = 0.0
        yMax = 1.1

    if "Fake" in numPath:
#        xMin = 95.0
#        rebinX = 4                                                                                                                                                  
        xMax = 805.0
        xTitle = "candidate p_{T} (GeV/c)"
        units = "GeV/c"
        _format = "%0.1f" + units
        yTitle = "Misid rate / "  + str(binwidth) + " " +units
        yMin = 0.0
        yMax = 0.11

    if "Event" in numPath:
        rebinX = 2
#        xMin = 95.0
        xMax = 805.0
        xTitle = "candidate p_{T} (GeV/c)"
        units = "GeV/c"
        _format = "%0.1f" + units
        yTitle = "Efficiency  / "  + str(binwidth) + " "+ units
        yMin = 0.0
        yMax = 1.1
        
    if "NonMatched" in numPath:
        xMin = 90.0
        rebinX = 4
        xMax = 700.0
        xMax = 555.0 # For topPt < 500GeV
        xTitle = "p_{T} (GeV)"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 0.15

    if "AllTopQuarkPt_MatchedBDT" in numPath and "TopQuarkPt" in denPath:
        xMin = 0.0
#        rebinX = 4
        xMax = 805.0 #705
        units = "GeV/c"
        xTitle = "generated top p_{T} (GeV/c)"
        yTitle = "Efficiency / "  + str(binwidth) + " " + units
        yMin = 0.0
        yMax = 1.1

    if "SameFake" in numPath:
        xMin = 95.0
        rebinX = 4
        xMax = 705.0
        xMax = 555.0 # For topPt < 500GeV
        xTitle = "p_{T} [GeV]"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1


    options = {"ymin": yMin  , "ymax": yMax, "xmin":xMin, "xMax":xMax}

#    if "TT" in datasetName and ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
#        return
#    if "M_"  in datasetName and not ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
#        return

    p = plots.PlotBase(datasetRootHistos=EfficiencyList, saveFormats=kwargs.get("saveFormats"))

    #p = plots.ComparisonManyPlot(refEff, EfficiencyList, saveFormats=[])
    
    p.createFrame(saveName, opts=options)

#    p.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(kwargs.get("rebinX")))

     # Set Titles                                                                                                                                                                                                
#    p.getFrame().GetYaxis().SetTitle(kwargs.get("ylabel"))  #"ylabel"
    p.getFrame().GetXaxis().SetTitle(xTitle)
    p.getFrame().GetYaxis().SetTitle(yTitle)
    
    # Set range
    p.getFrame().GetXaxis().SetRangeUser(xMin, xMax)
    
    
    moveLegend = {"dx": -0.55, "dy": -0.02, "dh": -0.2}
#    moveLegend = {"dx": -0.55, "dy": -0.01, "dh": -0.1}
    p.setLegend(histograms.moveLegend(histograms.createLegend(), **moveLegend))
    
    # Add Standard Texts to plot                                                                                                                                                                
    histograms.addStandardTexts()

    p.draw()

    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, "HplusMasses", numPath.split("/")[0], opts.optMode)
    save_path = savePath + opts.MVAcut

#    SavePlot(p, saveName, savePath)
    SavePlot(p, saveName, save_path)
    return
def GetCutEfficiencyHisto(dataset, histoName, statOpt, **kwargs):
    '''
    See https://root.cern.ch/doc/master/classTEfficiency.html
    '''
    HasKeys(["verbose", "normalizeTo", "cutDirection"], **kwargs)
    verbose = kwargs.get("verbose")
    normalizeTo = kwargs.get("normalizeTo")
    cutDirection = kwargs.get("cutDirection")
    Verbose("Calculating the cut-efficiency (%s) for histo with name %s" %
            (cutDirection, histoName))

    # Choose statistics options
    statOpts = [
        "kFCP", "kFNormal", "KFWilson", "kFAC", "kFFC", "kBJeffrey",
        "kBUniform", "kBayesian"
    ]
    if statOpt not in statOpts:
        raise Exception(
            "Invalid statistics option \"%s\". Please choose one from the following:\n\t%s"
            % (statOpt, "\n\t".join(statOpts)))

    if statOpt == "kFCP":
        statOption = ROOT.TEfficiency.kFCP  # Clopper-Pearson
    elif statOpt == "kFNormal":
        statOption = ROOT.TEfficiency.kFNormal  # Normal Approximation
    elif statOpt == "kFWilson":
        statOption = ROOT.TEfficiency.kFWilson  # Wilson
    elif statOpt == "kFAC":
        statOption = ROOT.TEfficiency.kFAC  # Agresti-Coull
    elif statOpt == "kFFC":
        statOption = ROOT.TEfficiency.kFFC  # Feldman-Cousins
    elif statOpt == "kBJeffrey":
        statOption = ROOT.TEfficiency.kBJeffrey  # Jeffrey
    elif statOpt == "kBUniform":
        statOption = ROOT.TEfficiency.kBUniform  # Uniform Prior
    elif statOpt == "kBayesian":
        statOption = ROOT.TEfficiency.kBayesian  # Custom Prior
    else:
        raise Exception("This should never be reached")

    # Declare variables & options
    first = True
    isData = False
    teff = ROOT.TEfficiency()

    # Get the ROOT histogram
    rootHisto = dataset.getDatasetRootHisto(histoName)

    # Normalise the histogram
    NormalizeRootHisto(datasetsMgr, rootHisto, dataset.isMC(), normalizeTo)
    #NormalizeRootHisto(datasetsMgr, rootHisto, d.isMC(), normalizeTo)

    ## Get a clone of the wrapped histogram normalized as requested.
    h = rootHisto.getHistogram()
    titleX = h.GetXaxis().GetTitle()
    binWidth = h.GetXaxis().GetBinWidth(0)
    titleY = "efficiency (%s) / %s" % (cutDirection,
                                       GetBinwidthDecimals(binWidth) %
                                       (binWidth))

    # If empty return
    if h.GetEntries() == 0:
        return

    # Create the numerator/denominator histograms
    numerator = h.Clone("Numerator")
    denominator = h.Clone("Denominator")

    # Reset the numerator/denominator histograms
    numerator.Reset()
    denominator.Reset()

    # Calculate the instances passing a given cut (all bins)
    nBinsX = h.GetNbinsX() + 1
    for iBin in range(1, nBinsX):

        nTotal = h.Integral(0, nBinsX)

        if cutDirection == ">":
            nPass = h.Integral(iBin + 1, nBinsX)
        elif cutDirection == "<":
            nPass = nTotal - h.Integral(iBin + 1, nBinsX)
        else:
            raise Exception(
                "Invalid cut direction  \"%s\". Please choose either \">\" or \"<\""
                % (cutDirection))

        # Sanity check
        if nPass < 0:
            nPass = 0

        # Fill the numerator/denominator histograms
        # print "iBin = %s, nPass = %s, nTotal = %s" % (iBin, nPass, nTotal)
        numerator.SetBinContent(iBin, nPass)
        numerator.SetBinError(iBin, math.sqrt(nPass) / 10)
        #
        denominator.SetBinContent(iBin, nTotal)
        denominator.SetBinError(iBin, math.sqrt(nTotal) / 10)

    # Check for negative values
    CheckNegatives(numerator, denominator)

    # Create TEfficiency object using the two histos
    eff = ROOT.TEfficiency(numerator, denominator)
    eff.SetStatisticOption(statOption)
    Verbose("The statistic option was set to %s" % (eff.GetStatisticOption()))

    # Save info in a table (debugging)
    table = []
    hLine = "=" * 70
    msgAlign = '{:<5} {:<20} {:<20} {:<20}'
    title = msgAlign.format("Bin", "Efficiency", "Error-Low", "Error-Up")
    table.append("\n" + hLine)
    table.append(title)
    table.append(hLine)
    for iBin in range(1, nBinsX):
        e = eff.GetEfficiency(iBin)
        errLow = eff.GetEfficiencyErrorLow(iBin)
        errUp = eff.GetEfficiencyErrorUp(iBin)
        values = msgAlign.format(iBin, e, errLow, errUp)
        table.append(values)
    table.append(hLine)

    # Verbose mode
    if verbose:
        for l in table:
            print l

    weight = 1
    if dataset.isMC():
        weight = dataset.getCrossSection()
    eff.SetWeight(weight)

    if first:
        teff = eff
        if dataset.isData():
            tn = numerator
            td = denominator
        first = False
    else:
        teff.Add(eff)
        if dataset.isData():
            tn.Add(numerator)
            td.Add(denominator)
    if isData:
        teff = ROOT.TEfficiency(tn, td)
        teff.SetStatisticOption(self.statOption)

    style = styleDict[dataset.getName()]
    return Convert2TGraph(teff, dataset, style, titleX, titleY)
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi):

    # Definitions
    myList = []
    index = 0
    _kwargs = GetHistoKwargs(numPath, opts)

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():
        #if "Fake" in numPath and "TT" in dataset.getName():
        #    continue
        # Get the histograms
        #num = dataset.getDatasetRootHisto(numPath).getHistogram()
        #den = dataset.getDatasetRootHisto(denPath).getHistogram()

        n = dataset.getDatasetRootHisto(numPath)
        n.normalizeToLuminosity(intLumi)
        num = n.getHistogram()
        d = dataset.getDatasetRootHisto(denPath)
        d.normalizeToLuminosity(intLumi)
        den = d.getHistogram()

        if "binList" in _kwargs:
            xBins = _kwargs["binList"]
            nx = len(xBins) - 1
            num = num.Rebin(nx, "", xBins)
            den = den.Rebin(nx, "", xBins)

        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Remove negative bins and ensure numerator bin <= denominator bin
        CheckNegatives(num, den, True)
        # RemoveNegatives(num)
        # RemoveNegatives(den)

        # Sanity check (Histograms are valid and consistent) - Always false!
        # if not ROOT.TEfficiency.CheckConsistency(num, den):
        #    continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den)  # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP)  #
        # Set the weights - Why is this needed?
        if 0:
            weight = 1
            if dataset.isMC():
                weight = dataset.getCrossSection()
                eff.SetWeight(weight)

        # Convert to TGraph
        eff = convert2TGraph(eff)
        # Apply default style (according to dataset name)
        plots._plotStyles[dataset.getName()].apply(eff)

        # Append in list
        myList.append(
            histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()],
                                  "lp", "P"))

    # Define save name
    saveName = "Eff_" + numPath.split("/")[-1] + "Over" + denPath.split(
        "/")[-1]

    # Plot the efficiency
    p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[])
    plots.drawPlot(p, saveName, **_kwargs)

    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, "HplusMasses",
                            numPath.split("/")[0], opts.optMode)
    #savePath = os.path.join(opts.saveDir, numPath.split("/")[0], opts.optMode)
    save_path = savePath + opts.MVAcut
    SavePlot(p, saveName, save_path, saveFormats=[".png", ".pdf", ".C"])
    return
Exemple #11
0
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi):
    # Definitions
    myList = []
    myList_MC = []
    myList_Data = []
    index = 0
    _kwargs = GetHistoKwargs(numPath, opts)
    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():

        if "Fake" in numPath and "TT" in dataset.getName():
            continue

        if dataset.isMC():
            n = dataset.getDatasetRootHisto(numPath)
            n.normalizeToLuminosity(intLumi)
            num = n.getHistogram()
            d = dataset.getDatasetRootHisto(denPath)
            d.normalizeToLuminosity(intLumi)
            den = d.getHistogram()

        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()

        total = den.Integral(0, den.GetXaxis().GetNbins() + 1)
        selected = num.Integral(0, num.GetXaxis().GetNbins() + 1)
        print "Pass :"******" events"
        print "Numerical Efficiency", numPath, dataset.getName(), ":", round(
            selected / total, 3)

        if "binList" in _kwargs:
            #if len(_kwargs["binList"]) == 1:
            #    continue
            xBins = _kwargs["binList"]
            nx = len(xBins) - 1
            num = num.Rebin(nx, "", xBins)
            den = den.Rebin(nx, "", xBins)
        elif "Eta" in numPath or "Phi" in numPath:
            num = num.Rebin(2)
            den = den.Rebin(2)
        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den)  # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP)  #FCP

        # Convert to TGraph
        eff = convert2TGraph(eff)
        # Apply default style (according to dataset name)
        plots._plotStyles[dataset.getName()].apply(eff)

        # Append in list
        myList.append(
            histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()],
                                  "lp", "P"))
        if dataset.isMC():
            eff_MC = eff
            if "QCD" in dataset.getName():
                eff_QCD = eff
            elif "TT" in dataset.getName():
                eff_TT = eff
            myList_MC.append(
                histograms.HistoGraph(eff,
                                      plots._legendLabels[dataset.getName()],
                                      "lp", "P"))
        else:
            eff_Data = eff
            plots._plotStyles[dataset.getName()].apply(eff_Data)
            #styles.dataStyle.apply(eff_Data)
            eff_Data.SetMarkerSize(1.2)
            myList_Data.append(
                histograms.HistoGraph(eff_Data,
                                      plots._legendLabels[dataset.getName()],
                                      "p", "P"))

    numPath = numPath.replace("AfterAllSelections_", "")
    # Define save name
    saveName = "Eff_" + numPath.split("/")[-1]

    # Plot the efficiency
    p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[])
    plots.drawPlot(p, saveName, **_kwargs)

    p1 = plots.ComparisonManyPlot(histograms.HistoGraph(eff_Data,
                                                        "Data",
                                                        drawStyle="P"),
                                  myList_MC,
                                  saveFormats=[])

    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, "HplusMasses",
                            numPath.split("/")[0], opts.optMode)
    save_path = savePath + opts.MVAcut
    print "save_path", save_path
    # Draw and save the plot
    p1.setLuminosity(intLumi)
    _kwargs["ratio"] = True
    _kwargs["ratioInvert"] = True
    _kwargs["cutBoxY"] = {
        "cutValue": 1.0,
        "fillColor": 16,
        "box": False,
        "line": True,
        "greaterThan": True,
        "mainCanvas": True,
        "ratioCanvas": True
    }
    plots.drawPlot(p1, save_path1, **_kwargs)
    SavePlot(p1, saveName, save_path, saveFormats=[".png", ".pdf", ".C"])
    return
Exemple #12
0
def main(opts):

    # Apply TDR style
    style = tdrstyle.TDRStyle()
    style.setOptStat(True)
    style.setGridX(False)
    style.setGridY(False)

    # If user does not define optimisation mode do all of them
    if opts.optMode == None:
        if len(optList) < 1:
            optList.append("")
        else:
            pass
        optModes = optList
    else:
        optModes = [opts.optMode]

    # For-loop: All optimisation modes
    for opt in optModes:
        opts.optMode = opt

        # Setup & configure the dataset manager
        datasetsMgr = GetDatasetsFromDir(opts)
        datasetsMgr.updateNAllEventsToPUWeighted()
        datasetsMgr.loadLuminosities()  # from lumi.json

        if opts.verbose:
            datasetsMgr.PrintCrossSections()
            datasetsMgr.PrintLuminosities()

        # Set/Overwrite cross-sections
        for d in datasetsMgr.getAllDatasets():
            if "ChargedHiggs" in d.getName():
                datasetsMgr.getDataset(d.getName()).setCrossSection(1.0)

        # Print dataset information before removing anything?
        datasetsMgr.PrintInfo()

        # Determine integrated Lumi before removing data
        if "Data" in datasetsMgr.getAllDatasetNames():
            intLumi = datasetsMgr.getDataset("Data").getLuminosity()

        # Remove datasets
        filterKeys = ["TTW"]
        for key in filterKeys:
            datasetsMgr.remove(
                filter(lambda name: key in name,
                       datasetsMgr.getAllDatasetNames()))
        else:
            intLumi = 35920

        for key in filterKeys:
            datasetsMgr.remove(
                filter(lambda name: key in name,
                       datasetsMgr.getAllDatasetNames()))

        # Re-order datasets
        datasetOrder = []
        haveQCD = False
        for d in datasetsMgr.getAllDatasets():
            if "QCD" in d.getName():
                haveQCD = True
            datasetOrder.append(d.getName())
        datasetsMgr.selectAndReorder(datasetOrder)

        # Define the mapping histograms in numerator->denominator pairs
        VariableList = [
            "LdgTop_Pt",
        ]

        minRunRange, maxRunRange, runRange = GetRunRange(datasetsMgr)

        # Merge histograms (see NtupleAnalysis/python/tools/plots.py)
        plots.mergeRenameReorderForDataMC(datasetsMgr)

        datasets_ = datasetsMgr.getAllDatasets()
        dataset_Data = datasetsMgr.getDataDatasets()
        dataset_MC = datasetsMgr.getMCDatasets()

        # Print dataset information
        datasetsMgr.PrintInfo()

        # For-loop: All numerator-denominator pairs
        counter = 0
        nPlots = len(VariableList)

        for var in VariableList:
            histoN = "AfterAllSelections_" + var
            histoD = "AfterStandardSelections_" + var
            numerator = os.path.join(opts.folder, histoN)
            denominator = os.path.join(opts.folder, histoD)

            counter += 1
            msg = "{:<9} {:>3} {:<1} {:<3} {:<50}".format(
                "Histogram", "%i" % counter, "/", "%s:" % (nPlots),
                "%s" % (var))
            Print(ShellStyles.SuccessStyle() + msg + ShellStyles.NormalStyle(),
                  counter == 1)

            for dataset in datasets_:

                if dataset.isMC():
                    n = dataset.getDatasetRootHisto(numerator)
                    n.normalizeToLuminosity(intLumi)
                    num = n.getHistogram()
                    d = dataset.getDatasetRootHisto(denominator)
                    d.normalizeToLuminosity(intLumi)
                    den = d.getHistogram()

                else:
                    num = dataset.getDatasetRootHisto(numerator).getHistogram()
                    den = dataset.getDatasetRootHisto(
                        denominator).getHistogram()

                x_bins = num.GetXaxis().GetNbins()

                i = 1
                while i < x_bins:
                    xvalue = num.GetXaxis().GetBinLowEdge(
                        i) + 0.5 * num.GetXaxis().GetBinWidth(i)
                    if xvalue < 20:
                        my_bin = i
                    i += 1

                my_xvalue = num.GetXaxis().GetBinUpEdge(
                    my_bin) + 0.5 * num.GetXaxis().GetBinWidth(my_bin)
                total = den.Integral(0, x_bins)  #my_bin
                selected = num.Integral(0, x_bins)  #my_bin

            plotName = "Eff_%s" % (var)
            # Get Efficiency Plots
            _kwargs = GetHistoKwargs(var, opts)
            eff_Data = GetEfficiency(datasetsMgr, dataset_Data, numerator,
                                     denominator, intLumi)
            eff_MC = GetEfficiency(datasetsMgr, dataset_MC, numerator,
                                   denominator, intLumi)

            # Apply Styles
            styles.dataStyle.apply(eff_Data)
            styles.qcdStyle.apply(eff_MC)
            # Create the plot
            p = plots.ComparisonPlot(
                histograms.HistoGraph(eff_Data, "eff_Data", "p", "P"),
                histograms.HistoGraph(eff_MC, "eff_MC", "p", "P"),
                saveFormats=[])
            # Define the legend entries
            p.histoMgr.setHistoLegendLabelMany({
                "eff_Data": "Data",
                "eff_MC": "QCD"
            })

            # Append in list
            myList = []
            myList.append(
                histograms.HistoGraph(eff_Data, plots._legendLabels["Data"],
                                      "lp", "P"))

            p.setLuminosity(intLumi)
            _kwargs["ratio"] = True
            _kwargs["opts"] = {
                "xmin": 0.0,
                "xmax": 600.0,
                "ymin": 0.0,
                "ymax": 0.16,
                "ymaxfactor": 1.8
            }
            _kwargs["cutBoxY"] = {
                "cutValue": 1.10,
                "fillColor": ROOT.kGray + 1,
                "fillStyle": 3001,
                "box": False,
                "line": True,
                "greaterThan": True,
                "mainCanvas": False,
                "ratioCanvas": True,
                "mirror": True
            }
            plots.drawPlot(p, plotName, **_kwargs)

            # Draw
            savePath = os.path.join(opts.saveDir, opts.optMode)
            SavePlot(p, plotName, savePath, saveFormats=[".png", ".pdf", ".C"])

    # Save results in JSON
    name = opts.mcrab.split("_")[-3]
    name = name.replace(opts.analysisName, "")
    jsonName = "topMisID_" + name + "_TopMassCut400.json"
    analysis = opts.analysisName
    label = "2016"
    plotDir = os.path.join(opts.folder, jsonName)
    pythonWriter.addParameters(plotDir, label, runRange, opts.intLumi,
                               eff_Data)
    pythonWriter.addMCParameters(label, eff_MC)
    fileName_json = jsonName
    pythonWriter.writeJSON(fileName_json)

    return
Exemple #13
0
def PlotEfficiency(datasetsMgr, numPath, denPath, eff_def):

    # Definitions
    myList = []

    default_eff = None
    datasetList = []
    ttVariationEff = []

    _kwargs = GetHistoKwargs(numPath, opts)
    nx = 0
    if len(_kwargs["binList"]) > 0:
        xBins = _kwargs["binList"]
        nx = len(xBins) - 1
    counter = 0

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():

        if dataset.isMC():
            n = dataset.getDatasetRootHisto(numPath)
            d = dataset.getDatasetRootHisto(denPath)
            num = n.getHistogram()
            den = d.getHistogram()

            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)
        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()
            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)

        # Calculations
        total = den.Integral(0, den.GetXaxis().GetNbins() + 1)
        selected = num.Integral(0, num.GetXaxis().GetNbins() + 1)

        if 0:
            print "Numerical Efficiency", numPath, dataset.getName(
            ), ":", round(selected / total, 3)

        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den)
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP)  #FCP

        datasetTT = datasetsMgr.getDataset("TT")
        # Get the histograms
        numTT = datasetTT.getDatasetRootHisto(numPath).getHistogram()
        denTT = datasetTT.getDatasetRootHisto(denPath).getHistogram()
        if nx > 0:
            numTT = numTT.Rebin(nx, "", xBins)  #num.Rebin(nx, "", xBins)
            denTT = denTT.Rebin(nx, "", xBins)  #den.Rebin(nx, "", xBins)
        '''
        for i in range(1, num.GetNbinsX()+1):
            nbin = num.GetBinContent(i)
            dbin = den.GetBinContent(i)
            nbinTT = numTT.GetBinContent(i)
            dbinTT = denTT.GetBinContent(i)
            eps = nbin/dbin
            epsTT = nbinTT/dbinTT
            ratioTT = eps/epsTT
            if ratioTT > 1:
                ratioTT = 1/ratioTT
            #print "bin: ", i, "eps: ", round(eps,5) , "epsTT: ", round(epsTT,5)
            #print "bin: ", i, "eps/epsTT: ", (1.0 - round(ratioTT, 3))*100
        '''
        eff_ref = ROOT.TEfficiency(numTT, denTT)
        eff_ref.SetStatisticOption(ROOT.TEfficiency.kFCP)  #FCP

        # Convert to TGraph
        gEff = convert2TGraph(eff)
        gEffRef = convert2TGraph(eff_ref)

        # Keep the default tt and variations tt efficiency plots
        if dataset.getName() == "TT":
            default_eff = gEffRef.Clone()
        else:
            datasetList.append(dataset.getName())
            ttVariationEff.append(gEff)

        # Style definitions
        stylesDef = styles.ttStyle
        styles0 = styles.signalStyleHToTB300
        styles1 = styles.signalStyleHToTB500
        styles2 = styles.signalStyleHToTB800
        styles3 = styles.signalStyleHToTB500
        styles4 = styles.signalStyleHToTB1000
        styles5 = styles.signalStyleHToTB2000
        styles6 = styles.signalStyleHToTB180
        styles7 = styles.signalStyleHToTB3000
        styles8 = styles.signalStyleHToTB200

        if dataset.getName() == "TT":
            styles.ttStyle.apply(gEffRef)
            legend_ref = "t#bar{t}"
            if opts.type == "partonShower":
                legend_ref = "t#bar{t} (Pythia8)"
            elif opts.type == "evtGen":
                legend_ref = "t#bar{t} (Powheg)"
            refGraph = histograms.HistoGraph(gEffRef, legend_ref, "p", "P")
        else:
            styles.markerStyles[counter].apply(gEff)
            legend = dataset.getName().replace("TT_", "t#bar{t} (").replace(
                "isr", "ISR ").replace("fsr", "FSR ")
            legend = legend.replace("hdamp", "hdamp ").replace("DOWN",
                                                               "down").replace(
                                                                   "UP", "up")
            legend = legend.replace("mtop1665", "m_{t} = 166.5 GeV")
            legend = legend.replace("mtop1695", "m_{t} = 169.5 GeV")
            legend = legend.replace("mtop1715", "m_{t} = 171.5 GeV")
            legend = legend.replace("mtop1735", "m_{t} = 173.5 GeV")
            legend = legend.replace("mtop1755", "m_{t} = 175.5 GeV")
            legend = legend.replace("mtop1785", "m_{t} = 178.5 GeV")
            legend = legend.replace("TuneEE5C", "Herwig++")
            legend += ")"
            counter += 1
            #myList.append(histograms.HistoGraph(gEff, legend, "lp", "P"))
            myList.append(histograms.HistoGraph(gEff, legend, "p", "P"))

    units = "GeV/c"
    if eff_def == "fakeTop":
        _kwargs["xlabel"] = "candidate p_{T} (%s)" % (units)
    elif eff_def == "inclusiveTop" or eff_def == "genuineTop":
        _kwargs["xlabel"] = "generated top p_{T} (%s)" % (units)
    else:
        _kwargs["xlabel"] = "p_{T} (%s)" % (units)

    # Define stuff
    numPath = numPath.replace("AfterAllSelections_", "")
    saveName = "Efficiency_%s_%s" % (eff_def, opts.type)
    #saveName = saveName.replace("__", "_Inclusive_")

    # Plot the efficiency
    p = plots.ComparisonManyPlot(refGraph, myList, saveFormats=[])
    savePath = os.path.join(opts.saveDir, opts.optMode)
    plots.drawPlot(p, savePath, **_kwargs)

    # Save plot in all formats
    SavePlot(p, saveName, savePath, saveFormats=[".png", ".pdf", ".C"])

    # ==============================================================================
    #   I need the uncertainties from the ratio of all plots (ONLY for Genuine)
    # ==============================================================================

    if eff_def == "genuineTop":

        uncWriter = UncertaintyWriter()
        jsonName = "uncertainties_%s_BDT_%s.json" % (opts.type, opts.BDT)
        analysis = opts.analysisName
        saveDir = os.path.join("", jsonName)

        for i in range(0, len(datasetList)):
            uncWriter.addParameters(datasetList[i], analysis, saveDir,
                                    default_eff, ttVariationEff[i])

            #print "i = ", i, " Dataset = ",  datasetList[i]
            #for iBin in range(1, len(xBins)):
            #ratio  = float(default_eff.GetEfficiency(iBin))/float(ttVariationEff[i].GetEfficiency(iBin))
            #unc = 0.5*(1.0 - ratio)
            #print "iBin = ", iBin, " Default TT=", default_eff.GetEfficiency(iBin), "    Variation (", datasetList[i], ") =", ttVariationEff[i].GetEfficiency(iBin), "   Uncertainty =", unc

        uncWriter.writeJSON(jsonName)

    return
def PlotProb(datasets, numPath, denPath):

    EfficiencyList = []
    index = 0
    for dataset in datasets:

        datasetName = dataset.getName()
        print "Dataset = ", datasetName

        statOption = ROOT.TEfficiency.kFNormal
        ##        n = dataset.getDatasetRootHisto(numPath).getHistogram()
        #        n.normalizeToOne()
        ##        d = dataset.getDatasetRootHisto(denPath).getHistogram()
        nn = dataset.getDatasetRootHisto(numPath)
        nn.normalizeToLuminosity(35.8 * (10**3))
        n = nn.getHistogram()
        dd = dataset.getDatasetRootHisto(denPath)
        #        dd.normalizeToOne()
        #        dd.normalizeToLuminosity(36.3*(10**3))
        dd.normalizeToLuminosity(35.8 * (10**3))
        #        dd.normalizeByCrossSection()
        d = dd.getHistogram()

        #        if "TT" in datasetName and ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
        #            continue
        #        elif "M_"  in datasetName and not ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
        #            continue

        if "Event" in numPath:
            n.Rebin(10)
            d.Rebin(10)
        else:
            n.Rebin(5)
            d.Rebin(5)

        if d.GetEntries() == 0 or n.GetEntries() == 0:
            continue

        if n.GetEntries() > d.GetEntries():
            continue
        # Check Negatives
        CheckNegatives(n, d, True)

        # Remove Negatives
        RemoveNegatives(n)

        nBins = d.GetNbinsX()
        xMin = d.GetXaxis().GetXmin()
        xMax = d.GetXaxis().GetXmax()

        binwidth = int(n.GetBinWidth(0))

        # ----------------------------------------------------------------------------------------- #
        #      Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows
        # ----------------------------------------------------------------------------------------- #
        if (0):
            print "\n"
            print "=========== getEfficiency:"
            print "Dataset             = ", dataset.getName()
            print "Numerator:   entries=", n.GetEntries(
            ), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1)
            print "Denominator: entries=", d.GetEntries(
            ), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1)
            print "\n"

            print ">>>>>>  Sanity Check:  <<<<<<"
            print "Numerator Mean       = ", n.GetMean()
            print "Numerator RMS        = ", n.GetRMS()
            print "Numerator Integral   = ", n.Integral(1, nBins)
            print "Denominator Mean     = ", d.GetMean()
            print "Denominator RMS      = ", d.GetRMS()
            print "Denominator Integral = ", d.Integral(1, nBins)

        if (n.GetMean() == 0 or d.GetMean() == 0): continue
        if (n.GetRMS() == 0 or d.GetRMS() == 0): continue
        if (n.Integral(1, nBins) == 0 or d.Integral(1, nBins) == 0): continue

        #        if not (ROOT.TEfficiency.CheckConsistency(n,d)): continue;
        effic = ROOT.TEfficiency(n, d)
        effic.SetStatisticOption(statOption)

        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection()
            effic.SetWeight(weight)

        eff = convert2TGraph(effic)

        # Apply Styles
        if "TT" in datasetName:
            if index == 0:
                styles.signalStyleHToTB500.apply(eff)
                #            styles.ttStyle.apply(eff)
                eff.SetLineStyle(1)
                eff.SetLineWidth(3)
                eff.SetLineColor(619)
                legend = "Default: t#bar{t}"
                index = 1
            else:
                styles.signalStyleHToTB500.apply(eff)
                #            styles.ttStyle.apply(eff)
                eff.SetLineStyle(1)
                eff.SetLineWidth(3)
                eff.SetLineColor(417)
                legend = "#Delta R(q,q')>0.8: t#bar{t}"

        elif "M_500" in datasetName:
            styles.signalStyleHToTB500.apply(eff)
            legend = "H^{+} m_{H^{+}} = 500 GeV"
        elif "M_300" in datasetName:
            styles.signalStyleHToTB300.apply(eff)
            legend = "H^{+} m_{H^{+}} = 300 GeV"
        elif "M_1000" in datasetName:
            styles.signalStyleHToTB1000.apply(eff)
            legend = "H^{+} m_{H^{+}} = 1000 GeV"
        elif "M_800" in datasetName:
            styles.signalStyleHToTB800.apply(eff)
            legend = "H^{+} m_{H^{+}} = 800 GeV"
        elif "M_200" in datasetName:
            styles.signalStyleHToTB200.apply(eff)
            legend = "H^{+} m_{H^{+}} = 200 GeV"
        else:
            styles.ttStyle.apply(eff)
            legend = "other"

        EfficiencyList.append(histograms.HistoGraph(eff, legend, "lp", "P"))

    saveName = "Eff_" + numPath.split("/")[-1] + "Over" + denPath.split(
        "/")[-1]
    if "Pt" in numPath:
        xMin = 0.0
        #        rebinX = 2
        xMax = 805.0
        #        xMax = 555.0 # For topPt < 500GeV
        xTitle = "p_{T} (GeV/c)"
        units = "GeV/c"
        _format = "%0.1f" + units
        yTitle = "Efficiency / " + str(binwidth) + " " + units
        yMin = 0.0
        yMax = 1.1

    elif "_Eta" in numPath:
        xMin = -3.0
        xMax = +3.0
        xTitle = "#eta"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    elif "_Mass" in numPath:
        xMin = 50.0
        xMax = 300
        xTitle = "M (GeV/c^{2})"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    elif "_Phi" in numPath:
        xMin = -3
        xMax = +3
        xTitle = "#phi"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    else:
        xMin = 0.0
        xMax = 250.0
        xTitle = "xTitle"
        yTitle = "yTitle"
        yMin = 0.0
        yMax = 1.1

    if "Fake" in numPath:
        #        xMin = 95.0
        #        rebinX = 4
        xMax = 805.0
        xTitle = "candidate p_{T} (GeV/c)"
        units = "GeV/c"
        _format = "%0.1f" + units
        yTitle = "Misid rate / " + str(binwidth) + " " + units
        yMin = 0.0
        yMax = 0.11

    if "Event" in numPath:
        rebinX = 2
        #        xMin = 95.0
        xMax = 805.0
        xTitle = "candidate p_{T} (GeV/c)"
        units = "GeV/c"
        _format = "%0.1f" + units
        yTitle = "Efficiency  / " + str(binwidth) + " " + units
        yMin = 0.0
        yMax = 1.1

    if "NonMatched" in numPath:
        xMin = 90.0
        rebinX = 4
        xMax = 700.0
        xMax = 555.0  # For topPt < 500GeV
        xTitle = "p_{T} (GeV)"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 0.15

    if "AllTopQuarkPt_MatchedBDT" in numPath and "TopQuarkPt" in denPath:
        xMin = 0.0
        #        rebinX = 4
        xMax = 805.0  #705
        units = "GeV/c"
        xTitle = "generated top p_{T} (GeV/c)"
        yTitle = "Efficiency / " + str(binwidth) + " " + units
        yMin = 0.0
        yMax = 1.1

    if "SameFake" in numPath:
        xMin = 95.0
        rebinX = 4
        xMax = 705.0
        xMax = 555.0  # For topPt < 500GeV
        xTitle = "p_{T} [GeV]"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    options = {"ymin": yMin, "ymax": yMax, "xmin": xMin, "xMax": xMax}

    #    if "TT" in datasetName and ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
    #        return
    #    if "M_"  in datasetName and not ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
    #        return

    p = plots.PlotBase(datasetRootHistos=EfficiencyList,
                       saveFormats=kwargs.get("saveFormats"))

    #p = plots.ComparisonManyPlot(refEff, EfficiencyList, saveFormats=[])

    p.createFrame(saveName, opts=options)

    #    p.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(kwargs.get("rebinX")))

    # Set Titles
    #    p.getFrame().GetYaxis().SetTitle(kwargs.get("ylabel"))  #"ylabel"
    p.getFrame().GetXaxis().SetTitle(xTitle)
    p.getFrame().GetYaxis().SetTitle(yTitle)

    # Set range
    p.getFrame().GetXaxis().SetRangeUser(xMin, xMax)

    moveLegend = {"dx": -0.55, "dy": -0.02, "dh": -0.2}
    #    moveLegend = {"dx": -0.55, "dy": -0.01, "dh": -0.1}
    p.setLegend(histograms.moveLegend(histograms.createLegend(), **moveLegend))

    # Add Standard Texts to plot
    histograms.addStandardTexts()

    p.draw()

    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, "HplusMasses",
                            numPath.split("/")[0], opts.optMode)
    save_path = savePath + opts.MVAcut

    #    SavePlot(p, saveName, savePath)
    SavePlot(p, saveName, save_path)
    return
Exemple #15
0
def GetEfficiency(datasetsMgr, datasets, numerator="Numerator",denominator="Denominator", **kwargs):
    '''
    TEfficiency method:
    See https://root.cern.ch/doc/master/classTEfficiency.html    
    
    '''
    lumi = GetLumi(datasetsMgr)

    # Select Statistic Options
    statOption = ROOT.TEfficiency.kFCP
    '''
    statOption = ROOT.TEfficiency.kFCP      # Clopper-Pearson
    statOption = ROOT.TEfficiency.kFNormal  # Normal Approximation
    statOption = ROOT.TEfficiency.kFWilson  # Wilson
    statOption = ROOT.TEfficiency.kFAC      # Agresti-Coull
    statOption = ROOT.TEfficiency.kFFC      # Feldman-Cousins
    statOption = ROOT.TEfficiency.kBBJeffrey # Jeffrey
    statOption = ROOT.TEfficiency.kBBUniform # Uniform Prior
    statOption = ROOT.TEfficiency.kBBayesian # Custom Prior
    '''
    
    first  = True
    teff   = ROOT.TEfficiency()
    #    teff.SetStatisticOption(statOption)

    # For-loop: All datasets
    for dataset in datasets:
        
        num = dataset.getDatasetRootHisto(numerator)
        den = dataset.getDatasetRootHisto(denominator)

        # 
        if dataset.isMC():
            num.normalizeToLuminosity(lumi)
            den.normalizeToLuminosity(lumi) 

        # Get Numerator and Denominator
        n = num.getHistogram()
        d = den.getHistogram()
        
        if d.GetEntries() == 0 or n.GetEntries() == 0:
            msg =  "Denominator Or Numerator has no entries"
            Print(ErrorStyle() + msg + NormalStyle(), True)
            continue
        
        # Check Negatives
        CheckNegatives(n, d, True)
        
        # Remove Negatives
        RemoveNegatives(n)
        #RemoveNegatives(d)
       
        NumeratorBins   = n.GetNbinsX()
        DenominatorBins = d.GetNbinsX()


        # Sanity Check
        if (NumeratorBins != DenominatorBins) :
            raise Exception("Numerator and Denominator Bins are NOT equal!")
        
        nBins = d.GetNbinsX()
        xMin  = d.GetXaxis().GetXmin()
        xMax  = d.GetXaxis().GetXmax()
        
        # ----------------------------------------------------------------------------------------- # 
        #      Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows
        # ----------------------------------------------------------------------------------------- #
        if 0:
            print "\n"
            print "=========== getEfficiency:"
            print "Dataset             = ", dataset.getName()
            
            print "Numerator  :", n.GetName(), "   entries=", n.GetEntries(), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1)
            print "Denominator:", d.GetName(), "   entries=", d.GetEntries(), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1)
            print "\n"
            print ">>>>>>  Sanity Check:  <<<<<<"
            print "Numerator Mean       = ", n.GetMean()
            print "Numerator RMS        = ", n.GetRMS()
            print "Numerator Integral   = ", n.Integral(1, nBins)
            print "Denominator Mean     = ", d.GetMean()
            print "Denominator RMS      = ", d.GetRMS()
            print "Denominator Integral = ", d.Integral(1, nBins)
        
        if (n.GetMean() == 0 or d.GetMean() == 0): continue
        if (n.GetRMS()  == 0 or d.GetRMS()  == 0): continue
        if (n.Integral(1,nBins) == 0 or d.Integral(1,nBins) == 0): continue

        Verbose("Passed the sanity check", True)
        
        eff = ROOT.TEfficiency(n, d)
        eff.SetStatisticOption(statOption)
        
        # For-loop: All bins
        if 0:
            for iBin in range(1, nBins+1):
                print iBin, "x=", n.GetBinLowEdge(iBin), " Num=", n.GetBinContent(iBin),  " Den=", d.GetBinContent(iBin)," Eff=", eff.GetEfficiency(iBin)
            
        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection()
        eff.SetWeight(weight)
        
        if first:
            teff  = eff
            first = False
            if dataset.isData():
                tn = n
                td = d
        else:
            teff.Add(eff)
            
            if dataset.isData():
                tn.Add(n)
                td.Add(d)
                
        if dataset.isData():
            teff = ROOT.TEfficiency(tn, td)
            teff.SetStatisticOption(statOption)
        
    Verbose("Final tEff", True)
    if 0:
        for iBin in range(1,nBins+1):
            print iBin, "x=", n.GetBinLowEdge(iBin)," Efficiency=", teff.GetEfficiency(iBin), " Weight = ", teff.GetWeight()
    return convert2TGraph(teff)
def GetEfficiency(datasetsMgr, datasets, numerator="Numerator",denominator="Denominator", **kwargs):
    '''
    TEfficiency method:
    See https://root.cern.ch/doc/master/classTEfficiency.html    
    
    '''
    lumi = GetLumi(datasetsMgr)

    # Select Statistic Options
    statOption = ROOT.TEfficiency.kFCP
    '''
    statOption = ROOT.TEfficiency.kFCP      # Clopper-Pearson
    statOption = ROOT.TEfficiency.kFNormal  # Normal Approximation
    statOption = ROOT.TEfficiency.kFWilson  # Wilson
    statOption = ROOT.TEfficiency.kFAC      # Agresti-Coull
    statOption = ROOT.TEfficiency.kFFC      # Feldman-Cousins
    statOption = ROOT.TEfficiency.kBBJeffrey # Jeffrey
    statOption = ROOT.TEfficiency.kBBUniform # Uniform Prior
    statOption = ROOT.TEfficiency.kBBayesian # Custom Prior
    '''
    
    first  = True
    teff   = ROOT.TEfficiency()
    #    teff.SetStatisticOption(statOption)

    # For-loop: All datasets
    for dataset in datasets:
        
        num = dataset.getDatasetRootHisto(numerator)
        den = dataset.getDatasetRootHisto(denominator)

        # 
        if dataset.isMC():
            num.normalizeToLuminosity(lumi)
            den.normalizeToLuminosity(lumi) 

        # Get Numerator and Denominator
        n = num.getHistogram()
        d = den.getHistogram()
        
        if d.GetEntries() == 0 or n.GetEntries() == 0:
            msg =  "Denominator Or Numerator has no entries"
            Print(ErrorStyle() + msg + NormalStyle(), True)
            continue
        
        # Check Negatives
        CheckNegatives(n, d, True)
        
        # Remove Negatives
        RemoveNegatives(n)
        #RemoveNegatives(d)
       
        NumeratorBins   = n.GetNbinsX()
        DenominatorBins = d.GetNbinsX()


        # Sanity Check
        if (NumeratorBins != DenominatorBins) :
            raise Exception("Numerator and Denominator Bins are NOT equal!")
        
        nBins = d.GetNbinsX()
        xMin  = d.GetXaxis().GetXmin()
        xMax  = d.GetXaxis().GetXmax()
        
        # ----------------------------------------------------------------------------------------- # 
        #      Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows
        # ----------------------------------------------------------------------------------------- #
        if 0:
            print "\n"
            print "=========== getEfficiency:"
            print "Dataset             = ", dataset.getName()
            
            print "Numerator  :", n.GetName(), "   entries=", n.GetEntries(), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1)
            print "Denominator:", d.GetName(), "   entries=", d.GetEntries(), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1)
            print "\n"
            print ">>>>>>  Sanity Check:  <<<<<<"
            print "Numerator Mean       = ", n.GetMean()
            print "Numerator RMS        = ", n.GetRMS()
            print "Numerator Integral   = ", n.Integral(1, nBins)
            print "Denominator Mean     = ", d.GetMean()
            print "Denominator RMS      = ", d.GetRMS()
            print "Denominator Integral = ", d.Integral(1, nBins)
        
        if (n.GetMean() == 0 or d.GetMean() == 0): continue
        if (n.GetRMS()  == 0 or d.GetRMS()  == 0): continue
        if (n.Integral(1,nBins) == 0 or d.Integral(1,nBins) == 0): continue

        Verbose("Passed the sanity check", True)
        
        eff = ROOT.TEfficiency(n, d)
        eff.SetStatisticOption(statOption)
        
        # For-loop: All bins
        if 0:
            for iBin in range(1, nBins+1):
                print iBin, "x=", n.GetBinLowEdge(iBin), " Num=", n.GetBinContent(iBin),  " Den=", d.GetBinContent(iBin)," Eff=", eff.GetEfficiency(iBin)
            
        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection()
        eff.SetWeight(weight)
        
        if first:
            teff  = eff
            first = False
            if dataset.isData():
                tn = n
                td = d
        else:
            teff.Add(eff)
            
            if dataset.isData():
                tn.Add(n)
                td.Add(d)
                
        if dataset.isData():
            teff = ROOT.TEfficiency(tn, td)
            teff.SetStatisticOption(statOption)
        
    Verbose("Final tEff", True)
    if 0:
        for iBin in range(1,nBins+1):
            print iBin, "x=", n.GetBinLowEdge(iBin)," Efficiency=", teff.GetEfficiency(iBin), " Weight = ", teff.GetWeight()
    return convert2TGraph(teff)
Exemple #17
0
def GetCutEfficiencyHisto(dataset, histoName, statOpt, **kwargs):
    '''
    See https://root.cern.ch/doc/master/classTEfficiency.html
    '''
    HasKeys(["verbose", "normalizeTo", "cutDirection"], **kwargs)
    verbose     = kwargs.get("verbose")
    normalizeTo = kwargs.get("normalizeTo")
    cutDirection= kwargs.get("cutDirection")
    Verbose("Calculating the cut-efficiency (%s) for histo with name %s" % (cutDirection, histoName) )
        
    # Choose statistics options
    statOpts = ["kFCP", "kFNormal", "KFWilson", "kFAC", "kFFC", "kBJeffrey", "kBUniform", "kBayesian"]
    if statOpt not in statOpts:
        raise Exception("Invalid statistics option \"%s\". Please choose one from the following:\n\t%s" % (statOpt, "\n\t".join(statOpts)))

    if statOpt == "kFCP":
        statOption = ROOT.TEfficiency.kFCP      # Clopper-Pearson
    elif statOpt == "kFNormal":
        statOption = ROOT.TEfficiency.kFNormal  # Normal Approximation
    elif statOpt == "kFWilson":
        statOption = ROOT.TEfficiency.kFWilson  # Wilson
    elif statOpt == "kFAC":
        statOption = ROOT.TEfficiency.kFAC      # Agresti-Coull
    elif statOpt == "kFFC":
        statOption = ROOT.TEfficiency.kFFC      # Feldman-Cousins
    elif statOpt == "kBJeffrey":
        statOption = ROOT.TEfficiency.kBJeffrey # Jeffrey
    elif statOpt == "kBUniform":
        statOption = ROOT.TEfficiency.kBUniform # Uniform Prior
    elif statOpt == "kBayesian":
        statOption = ROOT.TEfficiency.kBayesian # Custom Prior
    else:
        raise Exception("This should never be reached")    
        

    # Declare variables & options
    first  = True
    isData = False
    teff   = ROOT.TEfficiency()

    # Get the ROOT histogram
    rootHisto = dataset.getDatasetRootHisto(histoName)

    # Normalise the histogram
    NormalizeRootHisto(datasetsMgr, rootHisto, dataset.isMC(), normalizeTo)
    #NormalizeRootHisto(datasetsMgr, rootHisto, d.isMC(), normalizeTo)

    ## Get a clone of the wrapped histogram normalized as requested.
    h = rootHisto.getHistogram()
    titleX   = h.GetXaxis().GetTitle()
    binWidth = h.GetXaxis().GetBinWidth(0)
    titleY   = "efficiency (%s) / %s" % (cutDirection, GetBinwidthDecimals(binWidth) % (binWidth) )
    
    # If empty return
    if h.GetEntries() == 0:
        return

    # Create the numerator/denominator histograms
    numerator   = h.Clone("Numerator")
    denominator = h.Clone("Denominator")

    # Reset the numerator/denominator histograms
    numerator.Reset()
    denominator.Reset()

    # Calculate the instances passing a given cut (all bins)
    nBinsX = h.GetNbinsX()+1
    for iBin in range(1, nBinsX):

        nTotal = h.Integral(0, nBinsX)

        if cutDirection == ">":
            nPass  = h.Integral(iBin+1, nBinsX)
        elif cutDirection == "<":
            nPass  = nTotal - h.Integral(iBin+1, nBinsX)
        else:
            raise Exception("Invalid cut direction  \"%s\". Please choose either \">\" or \"<\"" % (cutDirection))

        # Sanity check
        if nPass < 0:
            nPass = 0
            
        # Fill the numerator/denominator histograms
        # print "iBin = %s, nPass = %s, nTotal = %s" % (iBin, nPass, nTotal)
        numerator.SetBinContent(iBin, nPass)
        numerator.SetBinError(iBin, math.sqrt(nPass)/10)
        #
        denominator.SetBinContent(iBin, nTotal)
        denominator.SetBinError(iBin, math.sqrt(nTotal)/10)
        
    # Check for negative values
    CheckNegatives(numerator, denominator)

    # Create TEfficiency object using the two histos
    eff = ROOT.TEfficiency(numerator, denominator)
    eff.SetStatisticOption(statOption)
    Verbose("The statistic option was set to %s" % (eff.GetStatisticOption()) )

    # Save info in a table (debugging)
    table    = []
    hLine    = "="*70
    msgAlign = '{:<5} {:<20} {:<20} {:<20}'
    title    = msgAlign.format("Bin", "Efficiency", "Error-Low", "Error-Up")
    table.append("\n" + hLine)
    table.append(title)
    table.append(hLine)
    for iBin in range(1, nBinsX):
        e      = eff.GetEfficiency(iBin)
        errLow = eff.GetEfficiencyErrorLow(iBin)
        errUp  = eff.GetEfficiencyErrorUp(iBin)
        values = msgAlign.format(iBin, e, errLow, errUp)
        table.append(values)
    table.append(hLine)

    # Verbose mode
    if verbose:
        for l in table:
            print l

    weight = 1
    if dataset.isMC():
        weight = dataset.getCrossSection()
    eff.SetWeight(weight)
        
    if first:
        teff = eff
        if dataset.isData():
            tn = numerator
            td = denominator
        first = False
    else:
        teff.Add(eff)
        if dataset.isData():
            tn.Add(numerator)
            td.Add(denominator)
    if isData:
        teff = ROOT.TEfficiency(tn, td)
        teff.SetStatisticOption(self.statOption)

    style = styleDict[dataset.getName()]
    return Convert2TGraph(teff, dataset, style, titleX, titleY)
Exemple #18
0
def getEfficiency2D(datasetsMgr,
                    datasets,
                    numerator="Numerator",
                    denominator="Denominator",
                    **kwargs):
    '''                                                                                                                                                               
    TEfficiency method:                                                                                                                                               
                                                                                                                                                                      
    See https://root.cern.ch/doc/master/classTEfficiency.html                                                                                                         
    
    
    '''
    HasKeys(["verbose"], **kwargs)
    verbose = True  #kwargs.get("verbose")

    lumi = GetLumi(datasetsMgr)

    # Select Statistic Options
    statOption = ROOT.TEfficiency.kFCP
    '''                                                                                                                                                               
    statOption = ROOT.TEfficiency.kFCP      # Clopper-Pearson                                                                                                         
    statOption = ROOT.TEfficiency.kFNormal  # Normal Approximation                                                                                                    
    statOption = ROOT.TEfficiency.kFWilson  # Wilson                                                                                                                  
    statOption = ROOT.TEfficiency.kFAC      # Agresti-Coull                                                                                                           
    statOption = ROOT.TEfficiency.kFFC      # Feldman-Cousins                                                                                                         
    statOption = ROOT.TEfficiency.kBBJeffrey # Jeffrey                                                                                                                
    statOption = ROOT.TEfficiency.kBBUniform # Uniform Prior                                                                                                          
    statOption = ROOT.TEfficiency.kBBayesian # Custom Prior                                                                                                           
    '''

    print "getEfficiency function"
    first = True
    teff = ROOT.TEfficiency()
    #    teff.SetStatisticOption(statOption)
    print "Loop over Datasets"
    for dataset in datasets:
        print "Datasets"

    #datasets.normalizeMCByLuminosity()
    for dataset in datasets:
        num = dataset.getDatasetRootHisto(numerator)
        den = dataset.getDatasetRootHisto(denominator)
        if dataset.isMC():
            num.normalizeToLuminosity(lumi)
            den.normalizeToLuminosity(lumi)
        #num.normalizeMCByLuminosity()
        #den.normalizeMCByLuminosity()

        # Get Numerator and Denominator
        n = num.getHistogram()
        d = den.getHistogram()

        #tn = None
        #td = None
        #n.normalizeMCByLuminosity()
        #d.normalizeMCByLuminosity()

        #n = dataset.getDatasetRootHisto(numerator).getHistogram()
        #d = dataset.getDatasetRootHisto(denominator).getHistogram()

        if d.GetEntries() == 0 or n.GetEntries() == 0:
            print "Denominator Or Numerator has no entries"
            continue

        # Check Negatives
        CheckNegatives(n, d, True)
        # Remove Negatives
        RemoveNegatives(n)
        #RemoveNegatives(d)

        NumeratorBins = n.GetNbinsX()
        DenominatorBins = d.GetNbinsX()

        # Sanity Check
        if (NumeratorBins != DenominatorBins):
            raise Exception("Numerator and Denominator Bins are NOT equal!")
        nBinsX = d.GetNbinsX()
        xMin = d.GetXaxis().GetXmin()
        xMax = d.GetXaxis().GetXmax()

        nBinsY = d.GetNbinsY()
        #yMin  = d.GetYaxis().GetYmin()
        #yMax  = d.GetYaxis().GetYmax()
        print("NoProblem till here asdasd...")

        # ----------------------------------------------------------------------------------------- #
        #      Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows
        # ----------------------------------------------------------------------------------------- #

        print "\n"
        print "=========== getEfficiency:"
        print "Dataset             = ", dataset.getName()

        #print "Numerator  :", n.GetName(), "   entries=", n.GetEntries(), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1)
        #print "Denominator:", d.GetName(), "   entries=", d.GetEntries(), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1)
        print "\n"
        print ">>>>>>  Sanity Check:  <<<<<<"
        print "Numerator Mean       = ", n.GetMean()
        print "Numerator RMS        = ", n.GetRMS()
        print "Numerator Integral   = ", n.Integral()
        print "Denominator Mean     = ", d.GetMean()
        print "Denominator RMS      = ", d.GetRMS()
        print "Denominator Integral = ", d.Integral()

        if (n.GetMean() == 0 or d.GetMean() == 0): continue
        if (n.GetRMS() == 0 or d.GetRMS() == 0): continue
        if (n.Integral() == 0 or d.Integral() == 0): continue

        print "Passed the sanity check"

        eff = ROOT.TEfficiency(n, d)
        eff.SetStatisticOption(statOption)

        #        if "TT" in dataset.getName():
        #    print " "
        #    print " TT sample"
        #for iBin in range(1, nBins+1):
        #    print iBin, "x=", n.GetBinLowEdge(iBin), " Num=", n.GetBinContent(iBin),  " Den=", d.GetBinContent(iBin)," Eff=", eff.GetEfficiency(iBin)
        # "Contrib. =", d.GetBinContent(iBin)/d.Integral(1, nBins)*100.0, "Contrib. = ", n.GetBinContent(iBin)/n.Integral(1, nBins)*100.0,
        '''                                                                                                                                                           
        #if (verbose):                                                                                                                                                
        print "\n"                                                                                                                                                    
        for iBin in range(1,nBins+1):                                                                                                                                 
        #print iBin, "x=", n.GetBinLowEdge(iBin), " Numerator=", n.GetBinContent(iBin), " Denominator=", d.GetBinContent(iBin), " Efficiency=", eff.GetEfficiency(iBin\
), " Weight=", eff.GetWeight()                                                                                                                                        
        print "\n"                                                                                                                                                    
        '''

        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection()
        eff.SetWeight(weight)
        #print "dataset=", dataset.getName(), "has weight=", weight
        #print " Efficiency plot has weight=", eff.GetWeight()

        if first:
            teff = eff
            first = False
            if dataset.isData():
                tn = n
                td = d
        else:
            teff.Add(eff)

            #print " "
            #print "Adding eff to TEfficiency="
            #for iBin in range(1, nBins+1):
            #    print iBin, "x=", n.GetBinLowEdge(iBin), " Numerator=", n.GetBinContent(iBin), "Contrib. = ", n.GetBinContent(iBin)/n.Integral(1, nBins)*100.0, " Denominator=", d.GetBinContent(iBin), "Contrib. =", d.GetBinContent(iBin)/d.Integral(1, nBins)*100.0, " Efficiency=", teff.GetEfficiency(iBin), " Weight=", teff.GetWeight()

            if dataset.isData():
                tn.Add(n)
                td.Add(d)

        if dataset.isData():
            teff = ROOT.TEfficiency(tn, td)
            teff.SetStatisticOption(statOption)
            '''                                                                                                                                                       
            print " ------------------------- Final Data Plot ------------------------- "                                                                             
            print "Integral = ", tn.Integral(1, nBins)                                                                                                                
            print "Numerator:"                                                                                                                                        
            for iBin in range(1, nBins+1):                                                                                                                            
            print iBin, "x=", tn.GetBinLowEdge(iBin), " Bin Content = ", tn.GetBinContent(iBin), " Percentage=", tn.GetBinContent(iBin)/tn.Integral(1, nBins)*100.0   
                                                                                                                                                                      
            print "Denominator:  "                                                                                                                                    
            print "Integral = ", td.Integral(1,nBins)                                                                                                                 
            for iBin in range(1, nBins+1):                                                                                                                            
            print iBin, "x=", td.GetBinLowEdge(iBin), " Bin Content = ", td.GetBinContent(iBin), " Percentage=", td.GetBinContent(iBin)/td.Integral(1, nBins)*100     
            print "-------------------------------------------------------------------- "                                                                             
            '''

    print " -----------------> Final tEff"
    #for iBin in range(1,nBins+1):
    #    print iBin, "x=", n.GetBinLowEdge(iBin)," Efficiency=", teff.GetEfficiency(iBin), " Weight = ", teff.GetWeight()

    return teff
Exemple #19
0
def PlotEfficiency(datasetsMgr, numPath, denPath):
    # Definitions
    myList = []
    _kwargs = GetHistoKwargs(numPath, opts)
    nx = 0
    if len(_kwargs["binList"]) > 0:
        xBins = _kwargs["binList"]
        nx = len(xBins) - 1
    counter = 0

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():

        if dataset.isMC():
            n = dataset.getDatasetRootHisto(numPath)
            d = dataset.getDatasetRootHisto(denPath)
            num = n.getHistogram()
            den = d.getHistogram()

            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)
        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()
            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)

        # Calculations
        total = den.Integral(0, den.GetXaxis().GetNbins() + 1)
        selected = num.Integral(0, num.GetXaxis().GetNbins() + 1)

        if 0:
            print "Numerical Efficiency", numPath, dataset.getName(
            ), ":", round(selected / total, 3)

        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den)
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP)  #FCP

        datasetTT = datasetsMgr.getDataset("TT")
        # Get the histograms
        numTT = datasetTT.getDatasetRootHisto(numPath).getHistogram()
        denTT = datasetTT.getDatasetRootHisto(denPath).getHistogram()
        if nx > 0:
            numTT = numTT.Rebin(nx, "", xBins)  #num.Rebin(nx, "", xBins)
            denTT = denTT.Rebin(nx, "", xBins)  #den.Rebin(nx, "", xBins)
        '''
        for i in range(1, num.GetNbinsX()+1):
            nbin = num.GetBinContent(i)
            dbin = den.GetBinContent(i)
            nbinTT = numTT.GetBinContent(i)
            dbinTT = denTT.GetBinContent(i)
            eps = nbin/dbin
            epsTT = nbinTT/dbinTT
            ratioTT = eps/epsTT
            if ratioTT > 1:
                ratioTT = 1/ratioTT
            #print "bin: ", i, "eps: ", round(eps,5) , "epsTT: ", round(epsTT,5)
            #print "bin: ", i, "eps/epsTT: ", (1.0 - round(ratioTT, 3))*100
        '''
        eff_ref = ROOT.TEfficiency(numTT, denTT)
        eff_ref.SetStatisticOption(ROOT.TEfficiency.kFCP)  #FCP

        # Convert to TGraph
        gEff = convert2TGraph(eff)
        gEffRef = convert2TGraph(eff_ref)

        # Style definitions
        stylesDef = styles.ttStyle
        styles0 = styles.signalStyleHToTB300
        styles1 = styles.signalStyleHToTB500
        styles2 = styles.signalStyleHToTB800
        styles3 = styles.signalStyleHToTB500
        styles4 = styles.signalStyleHToTB1000
        styles5 = styles.signalStyleHToTB2000
        styles6 = styles.signalStyleHToTB180
        styles7 = styles.signalStyleHToTB3000
        styles8 = styles.signalStyleHToTB200

        if dataset.getName() == "TT":
            styles.ttStyle.apply(gEffRef)
            legend_ref = "t#bar{t}"
            if opts.type == "partonShower":
                legend_ref = "t#bar{t} (Pythia8)"
            elif opts.type == "evtGen":
                legend_ref = "t#bar{t} (Powheg)"
            refGraph = histograms.HistoGraph(gEffRef, legend_ref, "p", "P")
        else:
            styles.markerStyles[counter].apply(gEff)
            legend = dataset.getName().replace("TT_", "t#bar{t} (").replace(
                "isr", "ISR ").replace("fsr", "FSR ")
            legend = legend.replace("hdamp", "hdamp ").replace("DOWN",
                                                               "down").replace(
                                                                   "UP", "up")
            legend = legend.replace("mtop1665", "m_{t} = 166.5 GeV")
            legend = legend.replace("mtop1695", "m_{t} = 169.5 GeV")
            legend = legend.replace("mtop1715", "m_{t} = 171.5 GeV")
            legend = legend.replace("mtop1735", "m_{t} = 173.5 GeV")
            legend = legend.replace("mtop1755", "m_{t} = 175.5 GeV")
            legend = legend.replace("mtop1785", "m_{t} = 178.5 GeV")
            legend = legend.replace("TuneEE5C", "Herwig++")
            legend += ")"
            counter += 1
            #myList.append(histograms.HistoGraph(gEff, legend, "lp", "P"))
            myList.append(histograms.HistoGraph(gEff, legend, "p", "P"))

    # Define stuff
    numPath = numPath.replace("AfterAllSelections_", "")
    saveName = "Efficiency_%s_%s" % (opts.folder, opts.type)
    saveName = saveName.replace("__", "_Inclusive_")

    # Plot the efficiency
    p = plots.ComparisonManyPlot(refGraph, myList, saveFormats=[])
    savePath = os.path.join(opts.saveDir, opts.optMode)
    plots.drawPlot(p, savePath, **_kwargs)

    # Save plot in all formats
    SavePlot(p, saveName, savePath, saveFormats=[".png", ".pdf", ".C"])
    return
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi):
  
    # Definitions
    myList  = []
    index   = 0
    _kwargs = GetHistoKwargs(numPath, opts)        

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():
        if "Fake" in numPath and "TT" in dataset.getName():
            continue
        # Get the histograms
        #num = dataset.getDatasetRootHisto(numPath).getHistogram()
        #den = dataset.getDatasetRootHisto(denPath).getHistogram()

        n = dataset.getDatasetRootHisto(numPath)
        n.normalizeToLuminosity(intLumi)                                                                                                                       
        num = n.getHistogram()
        d = dataset.getDatasetRootHisto(denPath)
        d.normalizeToLuminosity(intLumi)                                                                                                                       
        den = d.getHistogram()

        
        if "binList" in _kwargs:
            xBins   = _kwargs["binList"]
            nx      = len(xBins)-1
            num     = num.Rebin(nx, "", xBins)
            den     = den.Rebin(nx, "", xBins)

        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Remove negative bins and ensure numerator bin <= denominator bin
        CheckNegatives(num, den, True)
        # RemoveNegatives(num)
        # RemoveNegatives(den)
                
        # Sanity check (Histograms are valid and consistent) - Always false!
        # if not ROOT.TEfficiency.CheckConsistency(num, den):
        #    continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #
        # Set the weights - Why is this needed?
        if 0:
            weight = 1
            if dataset.isMC():
                weight = dataset.getCrossSection()
                eff.SetWeight(weight)
                
        # Convert to TGraph
        eff = convert2TGraph(eff)
        # Apply default style (according to dataset name)
        plots._plotStyles[dataset.getName()].apply(eff)

        # Append in list
        myList.append(histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P"))
            
    # Define save name
    saveName = "Eff_" + numPath.split("/")[-1] + "Over" + denPath.split("/")[-1]

    # Plot the efficiency
    p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[])
    plots.drawPlot(p, saveName, **_kwargs)


    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, numPath.split("/")[0], opts.optMode)
    #savePath = os.path.join(opts.saveDir, numPath.split("/")[0], opts.optMode)
    save_path = savePath + opts.MVAcut
    SavePlot(p, saveName, save_path, saveFormats = [".png", ".pdf", ".C"])
    return
def PlotEfficiency(datasetsMgr, numPath, denPath):  
    # Definitions
    myList      = []
    _kwargs     = GetHistoKwargs(numPath, opts)        
    nx          = 0
    if len(_kwargs["binList"]) > 0:
        xBins   = _kwargs["binList"]
        nx      = len(xBins)-1
    counter     = 0

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():

        if dataset.isMC():
            n   = dataset.getDatasetRootHisto(numPath)
            d   = dataset.getDatasetRootHisto(denPath)
            num = n.getHistogram()
            den = d.getHistogram()

            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)
        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()
            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)

        # Calculations    
        total    = den.Integral(0, den.GetXaxis().GetNbins()+1)
        selected = num.Integral(0, num.GetXaxis().GetNbins()+1)

        if 0:
            print "Numerical Efficiency", numPath, dataset.getName(), ":", round(selected/total, 3)
            
        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue
        
        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den) 
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP
        
        datasetTT = datasetsMgr.getDataset("TT")
        # Get the histograms
        numTT = datasetTT.getDatasetRootHisto(numPath).getHistogram()
        denTT = datasetTT.getDatasetRootHisto(denPath).getHistogram()
        if nx > 0:
            numTT = numTT.Rebin(nx, "", xBins) #num.Rebin(nx, "", xBins)
            denTT = denTT.Rebin(nx, "", xBins) #den.Rebin(nx, "", xBins)


        '''
        for i in range(1, num.GetNbinsX()+1):
            nbin = num.GetBinContent(i)
            dbin = den.GetBinContent(i)
            nbinTT = numTT.GetBinContent(i)
            dbinTT = denTT.GetBinContent(i)
            eps = nbin/dbin
            epsTT = nbinTT/dbinTT
            ratioTT = eps/epsTT
            if ratioTT > 1:
                ratioTT = 1/ratioTT
            #print "bin: ", i, "eps: ", round(eps,5) , "epsTT: ", round(epsTT,5)
            #print "bin: ", i, "eps/epsTT: ", (1.0 - round(ratioTT, 3))*100
        '''
        eff_ref = ROOT.TEfficiency(numTT, denTT) 
        eff_ref.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP

        # Convert to TGraph
        gEff    = convert2TGraph(eff)
        gEffRef = convert2TGraph(eff_ref)
            
        # Style definitions
        stylesDef = styles.ttStyle
        styles0 = styles.signalStyleHToTB300                                            
        styles1 = styles.signalStyleHToTB500
        styles2 = styles.signalStyleHToTB800
        styles3 = styles.signalStyleHToTB500
        styles4 = styles.signalStyleHToTB1000
        styles5 = styles.signalStyleHToTB2000
        styles6 = styles.signalStyleHToTB180
        styles7 = styles.signalStyleHToTB3000
        styles8 = styles.signalStyleHToTB200

        if dataset.getName() == "TT":
            styles.ttStyle.apply(gEffRef)
            legend_ref = "t#bar{t}"
            if opts.type == "partonShower":
                legend_ref = "t#bar{t} (Pythia8)"
            elif opts.type == "evtGen": 
                legend_ref = "t#bar{t} (Powheg)"
            refGraph = histograms.HistoGraph(gEffRef, legend_ref, "p", "P")
        else:
            styles.markerStyles[counter].apply(gEff)
            legend  = dataset.getName().replace("TT_", "t#bar{t} (").replace("isr", "ISR ").replace("fsr", "FSR ")
            legend  = legend.replace("hdamp", "hdamp ").replace("DOWN", "down").replace("UP", "up")
            legend  = legend.replace("mtop1665", "m_{t} = 166.5 GeV")
            legend  = legend.replace("mtop1695", "m_{t} = 169.5 GeV")
            legend  = legend.replace("mtop1715", "m_{t} = 171.5 GeV")
            legend  = legend.replace("mtop1735", "m_{t} = 173.5 GeV")
            legend  = legend.replace("mtop1755", "m_{t} = 175.5 GeV")
            legend  = legend.replace("mtop1785", "m_{t} = 178.5 GeV")
            legend  = legend.replace("TuneEE5C", "Herwig++")
            legend += ")"
            counter+=1
            #myList.append(histograms.HistoGraph(gEff, legend, "lp", "P"))
            myList.append(histograms.HistoGraph(gEff, legend, "p", "P"))

    # Define stuff
    numPath  = numPath.replace("AfterAllSelections_","")
    saveName = "Efficiency_%s_%s" % (opts.folder, opts.type) 
    saveName = saveName.replace("__", "_Inclusive_")

    # Plot the efficiency
    p = plots.ComparisonManyPlot(refGraph, myList, saveFormats=[])
    savePath = os.path.join(opts.saveDir, opts.optMode)    
    plots.drawPlot(p, savePath, **_kwargs)

    # Save plot in all formats
    SavePlot(p, saveName, savePath, saveFormats = [".png", ".pdf", ".C"])
    return
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi):
  
    # Definitions
    myList  = []
    myBckList = []
    index   = 0
    _kwargs = GetHistoKwargs(denPath, opts)        
    counter = 0
    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():
        name_N = numPath
        name_D = denPath
        # Get the histograms
        #num = dataset.getDatasetRootHisto(numPath).getHistogram()
        #den = dataset.getDatasetRootHisto(denPath).getHistogram()
        #if "TT" in dataset.getName():
        #    numPath = numPath.replace("HiggsTop", "AllTop")
        #    denPath = denPath.replace("HiggsTop", "AllTop")
        #    numPath = numPath.replace("AssocTop", "AllTop")
        #    denPath = denPath.replace("AssocTop", "AllTop")
                
        n = dataset.getDatasetRootHisto(numPath)
        n.normalizeToLuminosity(intLumi)
        num = n.getHistogram()
        d = dataset.getDatasetRootHisto(denPath)
        d.normalizeToLuminosity(intLumi)
        den = d.getHistogram()


        if "binList" in _kwargs:
            xBins   = _kwargs["binList"]
            nx      = len(xBins)-1
            num     = num.Rebin(nx, "", xBins)
            den     = den.Rebin(nx, "", xBins)


        for i in range(1, num.GetNbinsX()+1):
            nbin = num.GetBinContent(i)
            dbin = den.GetBinContent(i)
            #print dataset.getName(), nbin, dbin
            if (nbin > dbin):
                print "error"

        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Remove negative bins and ensure numerator bin <= denominator bin
        #CheckNegatives(num, den, False)
        #CheckNegatives(num, den, True)
        #RemoveNegatives(num)
        #RemoveNegatives(den)
        # Sanity check (Histograms are valid and consistent) - Always false!
        # if not ROOT.TEfficiency.CheckConsistency(num, den):
        #    continue
        
        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #
        
        # Set the weights - Why is this needed?
        if 0:
            weight = 1
            if dataset.isMC():
                weight = dataset.getCrossSection()
                eff.SetWeight(weight)
                
        # Convert to TGraph
        eff = convert2TGraph(eff)
    
        # Apply default style (according to dataset name)
        plots._plotStyles[dataset.getName()].apply(eff)
        # Apply random histo styles and append
        if "charged" in dataset.getName().lower():
            counter +=1
            mass = dataset.getName().split("M_")[-1]    
            styles.markerStyles[counter].apply(eff)
            if "300" in mass or "650" in mass:
                s = styles.getSignalStyleHToTB_M(mass)
                s.apply(eff)
                eff.SetLineStyle(ROOT.kSolid)
                eff.SetLineWidth(3)
                eff.SetMarkerSize(1.2)
                '''
                mass = dataset.getName().split("M_")[-1]
                mass = mass.replace("650", "1000")
                s = styles.getSignalStyleHToTB_M(mass)
                s.apply(eff)
                '''
        '''
        ttStyle = styles.getEWKLineStyle()
        if "tt" in dataset.getName().lower():
            ttStyle.apply(eff)
        '''

        
        # Append in list
        #if "charged" in dataset.getName().lower():
        #    if "m_500" in dataset.getName().lower():
        if 1:
            #if "tt" in dataset.getName().lower():
            if "m_500" in dataset.getName().lower():
                eff_ref = histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P")
            else:
                myList.append(histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P"))
        #elif "tt" in dataset.getName().lower():
        #    eff_ref = histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P")
            
    # Define save name
    saveName = "Eff_" + name_N.split("/")[-1] + "Over"+ name_D.split("/")[-1]

    # Plot the efficiency
    #p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[])
    p = plots.ComparisonManyPlot(eff_ref, myList, saveFormats=[])
    plots.drawPlot(p, saveName, **_kwargs)

    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, name_N.split("/")[0], opts.optMode)
    SavePlot(p, saveName, savePath, saveFormats = [".png", ".C", ".pdf"])#, ".pdf"])
    return