예제 #1
0
def getEfficiency(datasets, numerator="Numerator", denominator="Denominator"):

    #    statOption = ROOT.TEfficiency.kFNormal
    statOption = ROOT.TEfficiency.kFCP  # Clopper-Pearson
    #    statOption = ROOT.TEfficiency.kFFC # Feldman-Cousins

    first = True
    isData = False

    teff = ROOT.TEfficiency()
    for dataset in datasets:
        n = dataset.getDatasetRootHisto(numerator).getHistogram()
        d = dataset.getDatasetRootHisto(denominator).getHistogram()

        if d.GetEntries() == 0:
            continue

        checkNegatives(n, d)

        #        removeNegatives(n)
        #        removeNegatives(d)
        print dataset.getName(), "entries", n.GetEntries(), d.GetEntries()
        print "    bins", n.GetNbinsX(), d.GetNbinsX()
        print "    lowedge", n.GetBinLowEdge(1), d.GetBinLowEdge(1)

        eff = ROOT.TEfficiency(n, d)
        eff.SetStatisticOption(statOption)

        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection() / d.GetEntries()
            for i in range(1, d.GetNbinsX() + 1):
                print "    bin", i, d.GetBinLowEdge(i), n.GetBinContent(
                    i), d.GetBinContent(i)
        eff.SetWeight(weight)

        if first:
            teff = eff
            if dataset.isData():
                tn = n
                td = d
            first = False
        else:
            teff.Add(eff)
            if dataset.isData():
                tn.Add(n)
                td.Add(d)
    if isData:
        teff = ROOT.TEfficiency(tn, td)
        teff.SetStatisticOption(self.statOption)

    return teff
def getEfficiency(datasets,numerator="Numerator",denominator="Denominator"):

#    statOption = ROOT.TEfficiency.kFNormal
    statOption = ROOT.TEfficiency.kFCP # Clopper-Pearson
#    statOption = ROOT.TEfficiency.kFFC # Feldman-Cousins

    first = True
    isData = False
     
    teff = ROOT.TEfficiency()
    for dataset in datasets:
        n = dataset.getDatasetRootHisto(numerator).getHistogram()                                               
        d = dataset.getDatasetRootHisto(denominator).getHistogram()

        if d.GetEntries() == 0:
            continue

        checkNegatives(n,d)

#        removeNegatives(n)
#        removeNegatives(d)
        print dataset.getName(),"entries",n.GetEntries(),d.GetEntries()
        print "    bins",n.GetNbinsX(),d.GetNbinsX()
        print "    lowedge",n.GetBinLowEdge(1),d.GetBinLowEdge(1)
        
        eff = ROOT.TEfficiency(n,d)
        eff.SetStatisticOption(statOption)

        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection()/d.GetEntries()
            for i in range(1,d.GetNbinsX()+1):
                print "    bin",i,d.GetBinLowEdge(i),n.GetBinContent(i),d.GetBinContent(i)
        eff.SetWeight(weight)

        if first:
            teff = eff
            if dataset.isData():
                tn = n
                td = d
            first = False
        else:
            teff.Add(eff)
            if dataset.isData():
                tn.Add(n)
                td.Add(d)
    if isData:
        teff = ROOT.TEfficiency(tn, td)
        teff.SetStatisticOption(self.statOption)

    return teff
예제 #3
0
def GetEfficiency(datasetsMgr, datasets, numPath, denPath, intLumi):
    # Definitions
    myList = []
    myList_MC = []
    myList_Data = []
    index = 0
    _kwargs = GetHistoKwargs(numPath, opts)
    # For-loop: All datasets
    for dataset in datasets:
        if dataset.isMC():
            n = dataset.getDatasetRootHisto(numPath)
            n.normalizeToLuminosity(intLumi)
            num = n.getHistogram()
            d = dataset.getDatasetRootHisto(denPath)
            d.normalizeToLuminosity(intLumi)
            den = d.getHistogram()

        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()

        total = den.Integral(0, den.GetXaxis().GetNbins() + 1)
        selected = num.Integral(0, num.GetXaxis().GetNbins() + 1)

        print "Numerical Efficiency", numPath, dataset.getName(), ":", round(
            selected / total, 3)
        print "Pass :"******" events"
        if "binList" in _kwargs:
            #if len(_kwargs["binList"]) == 1:
            #    continue
            xBins = _kwargs["binList"]
            nx = len(xBins) - 1
            num = num.Rebin(nx, "", xBins)
            den = den.Rebin(nx, "", xBins)
        #elif "Eta" in numPath or "Phi" in numPath:
        #    num     = num.Rebin(2)
        #    den     = den.Rebin(2)
        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den)  # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP)  #FCP

        # Convert to TGraph
        eff = convert2TGraph(eff)
    return eff
def GetEfficiency(datasetsMgr, datasets, numPath, denPath, intLumi):     
    # Definitions
    myList  = []
    myList_MC  = []
    myList_Data = []
    index   = 0
    _kwargs = GetHistoKwargs(numPath, opts)        
    # For-loop: All datasets
    for dataset in datasets:
        if dataset.isMC():
            n = dataset.getDatasetRootHisto(numPath)
            n.normalizeToLuminosity(intLumi)                                                                                                                       
            num = n.getHistogram()
            d = dataset.getDatasetRootHisto(denPath)
            d.normalizeToLuminosity(intLumi)                                                                                                                       
            den = d.getHistogram()

        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()


        total = den.Integral(0, den.GetXaxis().GetNbins()+1)
        selected = num.Integral(0, num.GetXaxis().GetNbins()+1)

        print "Numerical Efficiency", numPath, dataset.getName(), ":", round(selected/total, 3)
        print "Pass :"******" events"
        if "binList" in _kwargs:
            #if len(_kwargs["binList"]) == 1:
            #    continue
            xBins   = _kwargs["binList"]
            nx      = len(xBins)-1
            num     = num.Rebin(nx, "", xBins)
            den     = den.Rebin(nx, "", xBins)
        #elif "Eta" in numPath or "Phi" in numPath:
        #    num     = num.Rebin(2)
        #    den     = den.Rebin(2)
        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP
                
        # Convert to TGraph
        eff = convert2TGraph(eff)
    return eff
def PlotMC_ForEffPlots(datasetsMgr, histoName, intLumi):

    index = 0
    for dataset in datasetsMgr.getAllDatasets():

        datasetName = dataset.getName()
        rootHisto = dataset.getDatasetRootHisto(histoName)
        rootHisto.normalizeToLuminosity(intLumi)
        histo = rootHisto.getHistogram()
        
        kwargs = {}

        
        if index == 0:
        # Apply Styles                                                                                                                                                                   
            if "TT" in datasetsMgr.getAllDatasets():
                p.histoMgr.setHistoDrawStyle("TT", "AP")
                p.histoMgr.setHistoLegendStyle("TT", "LP")
                index = 1
                
        else:
        # Apply Styles                                                                                                                                                                   
            if "TT" in datasetsMgr.getAllDatasets():
                p.histoMgr.setHistoDrawStyle("TTTT", "AP")
                p.histoMgr.setHistoLegendStyle("TTTT", "LP")
                
        _xlabel = "p_{T} (GeV/c)"
        logY = False
        _rebinX = 1
        units = "GeV/c"
        _format = "%0.1f" + units
        _opts   = {"ymin": 1e-3, "ymaxfactor": 1.0}
        _cutBox = False
#        _cutBox = {"cutValue": 0.5, "fillColor": 16, "box": False, "line": False, "greaterThan": True}
 
        plots.drawPlot(p,
                       histo,
                       xlabel       = _xlabel,
                       ylabel       = "Arbitrary Units / %s" % (_format),
                       log          = logY,
                       rebinX       = _rebinX, cmsExtraText = "Preliminary",
                       createLegend = {"x1": 0.58, "y1": 0.65, "x2": 0.92, "y2": 0.92},
                       opts         = _opts,
                       opts2        = {"ymin": 0.6, "ymax": 1.4},
                       cutBox       = _cutBox,
                       )

    # Save plot in all formats                                                                                                                                                           
    saveName = histo.split("/")[-1]
    savePath = os.path.join(opts.saveDir, "HplusMasses", histo.split("/")[0], opts.optMode)
    SavePlot(p, saveName, savePath)
    return
def main(opts):

    # Apply TDR style
    style = tdrstyle.TDRStyle()
    style.setOptStat(False)
    style.setGridX(False)
    style.setGridY(False)

    # Setup & configure the dataset manager 
    datasetsMgr = GetDatasetsFromDir(opts, 0)
    datasetsMgr.updateNAllEventsToPUWeighted()
    datasetsMgr.loadLuminosities() # from lumi.json
        
    if opts.verbose:
        datasetsMgr.PrintCrossSections()
        datasetsMgr.PrintLuminosities()

    # Set/Overwrite cross-sections
    for d in datasetsMgr.getAllDatasets():
        if "ChargedHiggs" in d.getName():
            datasetsMgr.getDataset(d.getName()).setCrossSection(1.0)

    # Merge histograms (see NtupleAnalysis/python/tools/plots.py) 
    if 1:
        plots.mergeRenameReorderForDataMC(datasetsMgr) 

    # Print datasets info summary
    datasetsMgr.PrintInfo()

    # Define the mapping histograms in numerator->denominator pairs
    VariableList = ["TetrajetMass"]

    counter =  0
    opts.nDatasets = len(datasetsMgr.getAllDatasets())
    nPlots  = len(VariableList)*opts.nDatasets

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():
        # For-looop: All variables
        for hName in VariableList:
            hPath = os.path.join(opts.folder, hName)

            counter+=1
            msg = "{:<9} {:>3} {:<1} {:<3} {:<50}".format("Histogram", "%i" % counter, "/", "%s:" % (nPlots), "%s" % (dataset.getName()))
            aux.Print(ShellStyles.SuccessStyle() + msg + ShellStyles.NormalStyle(), counter==1)
        
            PlotHistos(dataset.getName(), hPath) # For each dataset: Plot histos from different multicrabs on same canvas

    aux.Print("All plots saved under directory %s" % (ShellStyles.NoteStyle() + aux.convertToURL(opts.saveDir, opts.url) + ShellStyles.NormalStyle()), True)
    return
예제 #7
0
def Convert2TGraph(tefficiency, dataset, style, titleX, titleY):
    '''
    '''
    x = []
    y = []
    xerrl = []
    xerrh = []
    yerrl = []
    yerrh = []
    h = tefficiency.GetCopyTotalHisto()
    n = h.GetNbinsX()
    for i in range(1, n + 1):
        x.append(h.GetBinLowEdge(i) + 0.5 * h.GetBinWidth(i))
        xerrl.append(0.5 * h.GetBinWidth(i))
        xerrh.append(0.5 * h.GetBinWidth(i))
        y.append(tefficiency.GetEfficiency(i))
        yerrl.append(tefficiency.GetEfficiencyErrorLow(i))
        # ugly hack to prevent error going above 1
        errUp = tefficiency.GetEfficiencyErrorUp(i)
        if y[-1] == 1.0:
            errUp = 0
        yerrh.append(errUp)

    tgraph = ROOT.TGraphAsymmErrors(n, array.array("d",
                                                   x), array.array("d", y),
                                    array.array("d", xerrl),
                                    array.array("d", xerrh),
                                    array.array("d", yerrl),
                                    array.array("d", yerrh))

    tgraph.SetName(dataset.getName())
    tgraph.GetXaxis().SetTitle(titleX)
    tgraph.GetYaxis().SetTitle(titleY)

    style.apply(tgraph)
    return tgraph
예제 #8
0
def Convert2TGraph(tefficiency, dataset, style, titleX, titleY):
    '''
    '''
    x     = []
    y     = []
    xerrl = []
    xerrh = []
    yerrl = []
    yerrh = []
    h = tefficiency.GetCopyTotalHisto()
    n = h.GetNbinsX()
    for i in range(1,n+1):
        x.append(h.GetBinLowEdge(i)+0.5*h.GetBinWidth(i))
        xerrl.append(0.5*h.GetBinWidth(i))
        xerrh.append(0.5*h.GetBinWidth(i))
        y.append(tefficiency.GetEfficiency(i))
        yerrl.append(tefficiency.GetEfficiencyErrorLow(i))
        # ugly hack to prevent error going above 1                                                                                                              
        errUp = tefficiency.GetEfficiencyErrorUp(i)
        if y[-1] == 1.0:
            errUp = 0
        yerrh.append(errUp)

    tgraph = ROOT.TGraphAsymmErrors(n,array.array("d",x),
                                    array.array("d",y),
                                    array.array("d",xerrl),
                                    array.array("d",xerrh),
                                    array.array("d",yerrl),
                                    array.array("d",yerrh))

    tgraph.SetName(dataset.getName())
    tgraph.GetXaxis().SetTitle(titleX)
    tgraph.GetYaxis().SetTitle(titleY)

    style.apply(tgraph)
    return tgraph
def GetEfficiency(datasetsMgr, datasets, numerator="Numerator",denominator="Denominator", **kwargs):
    '''
    TEfficiency method:
    See https://root.cern.ch/doc/master/classTEfficiency.html    
    
    '''
    lumi = GetLumi(datasetsMgr)

    # Select Statistic Options
    statOption = ROOT.TEfficiency.kFCP
    '''
    statOption = ROOT.TEfficiency.kFCP      # Clopper-Pearson
    statOption = ROOT.TEfficiency.kFNormal  # Normal Approximation
    statOption = ROOT.TEfficiency.kFWilson  # Wilson
    statOption = ROOT.TEfficiency.kFAC      # Agresti-Coull
    statOption = ROOT.TEfficiency.kFFC      # Feldman-Cousins
    statOption = ROOT.TEfficiency.kBBJeffrey # Jeffrey
    statOption = ROOT.TEfficiency.kBBUniform # Uniform Prior
    statOption = ROOT.TEfficiency.kBBayesian # Custom Prior
    '''
    
    first  = True
    teff   = ROOT.TEfficiency()
    #    teff.SetStatisticOption(statOption)

    # For-loop: All datasets
    for dataset in datasets:
        
        num = dataset.getDatasetRootHisto(numerator)
        den = dataset.getDatasetRootHisto(denominator)

        # 
        if dataset.isMC():
            num.normalizeToLuminosity(lumi)
            den.normalizeToLuminosity(lumi) 

        # Get Numerator and Denominator
        n = num.getHistogram()
        d = den.getHistogram()
        
        if d.GetEntries() == 0 or n.GetEntries() == 0:
            msg =  "Denominator Or Numerator has no entries"
            Print(ErrorStyle() + msg + NormalStyle(), True)
            continue
        
        # Check Negatives
        CheckNegatives(n, d, True)
        
        # Remove Negatives
        RemoveNegatives(n)
        #RemoveNegatives(d)
       
        NumeratorBins   = n.GetNbinsX()
        DenominatorBins = d.GetNbinsX()


        # Sanity Check
        if (NumeratorBins != DenominatorBins) :
            raise Exception("Numerator and Denominator Bins are NOT equal!")
        
        nBins = d.GetNbinsX()
        xMin  = d.GetXaxis().GetXmin()
        xMax  = d.GetXaxis().GetXmax()
        
        # ----------------------------------------------------------------------------------------- # 
        #      Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows
        # ----------------------------------------------------------------------------------------- #
        if 0:
            print "\n"
            print "=========== getEfficiency:"
            print "Dataset             = ", dataset.getName()
            
            print "Numerator  :", n.GetName(), "   entries=", n.GetEntries(), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1)
            print "Denominator:", d.GetName(), "   entries=", d.GetEntries(), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1)
            print "\n"
            print ">>>>>>  Sanity Check:  <<<<<<"
            print "Numerator Mean       = ", n.GetMean()
            print "Numerator RMS        = ", n.GetRMS()
            print "Numerator Integral   = ", n.Integral(1, nBins)
            print "Denominator Mean     = ", d.GetMean()
            print "Denominator RMS      = ", d.GetRMS()
            print "Denominator Integral = ", d.Integral(1, nBins)
        
        if (n.GetMean() == 0 or d.GetMean() == 0): continue
        if (n.GetRMS()  == 0 or d.GetRMS()  == 0): continue
        if (n.Integral(1,nBins) == 0 or d.Integral(1,nBins) == 0): continue

        Verbose("Passed the sanity check", True)
        
        eff = ROOT.TEfficiency(n, d)
        eff.SetStatisticOption(statOption)
        
        # For-loop: All bins
        if 0:
            for iBin in range(1, nBins+1):
                print iBin, "x=", n.GetBinLowEdge(iBin), " Num=", n.GetBinContent(iBin),  " Den=", d.GetBinContent(iBin)," Eff=", eff.GetEfficiency(iBin)
            
        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection()
        eff.SetWeight(weight)
        
        if first:
            teff  = eff
            first = False
            if dataset.isData():
                tn = n
                td = d
        else:
            teff.Add(eff)
            
            if dataset.isData():
                tn.Add(n)
                td.Add(d)
                
        if dataset.isData():
            teff = ROOT.TEfficiency(tn, td)
            teff.SetStatisticOption(statOption)
        
    Verbose("Final tEff", True)
    if 0:
        for iBin in range(1,nBins+1):
            print iBin, "x=", n.GetBinLowEdge(iBin)," Efficiency=", teff.GetEfficiency(iBin), " Weight = ", teff.GetWeight()
    return convert2TGraph(teff)
예제 #10
0
def GetCutEfficiencyHisto(dataset, histoName, statOpt, **kwargs):
    '''
    See https://root.cern.ch/doc/master/classTEfficiency.html
    '''
    HasKeys(["verbose", "normalizeTo", "cutDirection"], **kwargs)
    verbose     = kwargs.get("verbose")
    normalizeTo = kwargs.get("normalizeTo")
    cutDirection= kwargs.get("cutDirection")
    Verbose("Calculating the cut-efficiency (%s) for histo with name %s" % (cutDirection, histoName) )
        
    # Choose statistics options
    statOpts = ["kFCP", "kFNormal", "KFWilson", "kFAC", "kFFC", "kBJeffrey", "kBUniform", "kBayesian"]
    if statOpt not in statOpts:
        raise Exception("Invalid statistics option \"%s\". Please choose one from the following:\n\t%s" % (statOpt, "\n\t".join(statOpts)))

    if statOpt == "kFCP":
        statOption = ROOT.TEfficiency.kFCP      # Clopper-Pearson
    elif statOpt == "kFNormal":
        statOption = ROOT.TEfficiency.kFNormal  # Normal Approximation
    elif statOpt == "kFWilson":
        statOption = ROOT.TEfficiency.kFWilson  # Wilson
    elif statOpt == "kFAC":
        statOption = ROOT.TEfficiency.kFAC      # Agresti-Coull
    elif statOpt == "kFFC":
        statOption = ROOT.TEfficiency.kFFC      # Feldman-Cousins
    elif statOpt == "kBJeffrey":
        statOption = ROOT.TEfficiency.kBJeffrey # Jeffrey
    elif statOpt == "kBUniform":
        statOption = ROOT.TEfficiency.kBUniform # Uniform Prior
    elif statOpt == "kBayesian":
        statOption = ROOT.TEfficiency.kBayesian # Custom Prior
    else:
        raise Exception("This should never be reached")    
        

    # Declare variables & options
    first  = True
    isData = False
    teff   = ROOT.TEfficiency()

    # Get the ROOT histogram
    rootHisto = dataset.getDatasetRootHisto(histoName)

    # Normalise the histogram
    NormalizeRootHisto(datasetsMgr, rootHisto, dataset.isMC(), normalizeTo)
    #NormalizeRootHisto(datasetsMgr, rootHisto, d.isMC(), normalizeTo)

    ## Get a clone of the wrapped histogram normalized as requested.
    h = rootHisto.getHistogram()
    titleX   = h.GetXaxis().GetTitle()
    binWidth = h.GetXaxis().GetBinWidth(0)
    titleY   = "efficiency (%s) / %s" % (cutDirection, GetBinwidthDecimals(binWidth) % (binWidth) )
    
    # If empty return
    if h.GetEntries() == 0:
        return

    # Create the numerator/denominator histograms
    numerator   = h.Clone("Numerator")
    denominator = h.Clone("Denominator")

    # Reset the numerator/denominator histograms
    numerator.Reset()
    denominator.Reset()

    # Calculate the instances passing a given cut (all bins)
    nBinsX = h.GetNbinsX()+1
    for iBin in range(1, nBinsX):

        nTotal = h.Integral(0, nBinsX)

        if cutDirection == ">":
            nPass  = h.Integral(iBin+1, nBinsX)
        elif cutDirection == "<":
            nPass  = nTotal - h.Integral(iBin+1, nBinsX)
        else:
            raise Exception("Invalid cut direction  \"%s\". Please choose either \">\" or \"<\"" % (cutDirection))

        # Sanity check
        if nPass < 0:
            nPass = 0
            
        # Fill the numerator/denominator histograms
        # print "iBin = %s, nPass = %s, nTotal = %s" % (iBin, nPass, nTotal)
        numerator.SetBinContent(iBin, nPass)
        numerator.SetBinError(iBin, math.sqrt(nPass)/10)
        #
        denominator.SetBinContent(iBin, nTotal)
        denominator.SetBinError(iBin, math.sqrt(nTotal)/10)
        
    # Check for negative values
    CheckNegatives(numerator, denominator)

    # Create TEfficiency object using the two histos
    eff = ROOT.TEfficiency(numerator, denominator)
    eff.SetStatisticOption(statOption)
    Verbose("The statistic option was set to %s" % (eff.GetStatisticOption()) )

    # Save info in a table (debugging)
    table    = []
    hLine    = "="*70
    msgAlign = '{:<5} {:<20} {:<20} {:<20}'
    title    = msgAlign.format("Bin", "Efficiency", "Error-Low", "Error-Up")
    table.append("\n" + hLine)
    table.append(title)
    table.append(hLine)
    for iBin in range(1, nBinsX):
        e      = eff.GetEfficiency(iBin)
        errLow = eff.GetEfficiencyErrorLow(iBin)
        errUp  = eff.GetEfficiencyErrorUp(iBin)
        values = msgAlign.format(iBin, e, errLow, errUp)
        table.append(values)
    table.append(hLine)

    # Verbose mode
    if verbose:
        for l in table:
            print l

    weight = 1
    if dataset.isMC():
        weight = dataset.getCrossSection()
    eff.SetWeight(weight)
        
    if first:
        teff = eff
        if dataset.isData():
            tn = numerator
            td = denominator
        first = False
    else:
        teff.Add(eff)
        if dataset.isData():
            tn.Add(numerator)
            td.Add(denominator)
    if isData:
        teff = ROOT.TEfficiency(tn, td)
        teff.SetStatisticOption(self.statOption)

    style = styleDict[dataset.getName()]
    return Convert2TGraph(teff, dataset, style, titleX, titleY)
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi):  
    # Definitions
    myList  = []
    myList_MC  = []
    myList_Data = []
    index   = 0
    _kwargs = GetHistoKwargs(numPath, opts)        
    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():

        if "Fake" in numPath and "TT" in dataset.getName():
            continue

        if dataset.isMC():
            n = dataset.getDatasetRootHisto(numPath)
            n.normalizeToLuminosity(intLumi)                                                                                                                       
            num = n.getHistogram()
            d = dataset.getDatasetRootHisto(denPath)
            d.normalizeToLuminosity(intLumi)                                                                                                                       
            den = d.getHistogram()

        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()


        total = den.Integral(0, den.GetXaxis().GetNbins()+1)
        selected = num.Integral(0, num.GetXaxis().GetNbins()+1)
        print "Pass :"******" events"
        print "Numerical Efficiency", numPath, dataset.getName(), ":", round(selected/total, 3)

        if "binList" in _kwargs:
            #if len(_kwargs["binList"]) == 1:
            #    continue
            xBins   = _kwargs["binList"]
            nx      = len(xBins)-1
            num     = num.Rebin(nx, "", xBins)
            den     = den.Rebin(nx, "", xBins)
        elif "Eta" in numPath or "Phi" in numPath:
            num     = num.Rebin(2)
            den     = den.Rebin(2)
        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue


        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP
                
        # Convert to TGraph
        eff = convert2TGraph(eff)
        # Apply default style (according to dataset name)
        plots._plotStyles[dataset.getName()].apply(eff)

        # Append in list
        myList.append(histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P"))
        if dataset.isMC():
            eff_MC = eff
            if "QCD" in dataset.getName():
                eff_QCD = eff
            elif "TT" in dataset.getName():
                eff_TT= eff
            myList_MC.append(histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P"))
        else:
            eff_Data = eff
            plots._plotStyles[dataset.getName()].apply(eff_Data)
            #styles.dataStyle.apply(eff_Data)
            eff_Data.SetMarkerSize(1.2)
            myList_Data.append(histograms.HistoGraph(eff_Data, plots._legendLabels[dataset.getName()], "p", "P"))

    numPath = numPath.replace("AfterAllSelections_","")
    # Define save name
    saveName = "Eff_" + numPath.split("/")[-1]

    # Plot the efficiency
    p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[])
    plots.drawPlot(p, saveName, **_kwargs)

    p1 = plots.ComparisonManyPlot(histograms.HistoGraph(eff_Data, "Data",  drawStyle="P"), 
                                  myList_MC, saveFormats=[])

    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, "HplusMasses", numPath.split("/")[0], opts.optMode)
    save_path = savePath + opts.MVAcut
    print "save_path", save_path
    # Draw and save the plot                                                                                                                                                     
    p1.setLuminosity(intLumi)
    _kwargs["ratio"] = True
    _kwargs["ratioInvert"] = True
    _kwargs["cutBoxY"] = {"cutValue": 1.0, "fillColor": 16, "box": False, "line": True, "greaterThan": True, "mainCanvas": True, "ratioCanvas": True}
    plots.drawPlot(p1, save_path1, **_kwargs)
    SavePlot(p1, saveName, save_path, saveFormats = [".png", ".pdf", ".C"])
    return
def PlotProb(datasets, numPath, denPath):


    EfficiencyList = []
    index = 0
    for dataset in datasets:
        
        datasetName = dataset.getName()
        print "Dataset = ", datasetName
        
        
        statOption = ROOT.TEfficiency.kFNormal        
##        n = dataset.getDatasetRootHisto(numPath).getHistogram()
#        n.normalizeToOne()
##        d = dataset.getDatasetRootHisto(denPath).getHistogram()
        nn = dataset.getDatasetRootHisto(numPath)
        nn.normalizeToLuminosity(35.8*(10**3))
        n = nn.getHistogram()
        dd = dataset.getDatasetRootHisto(denPath)
#        dd.normalizeToOne()                                                                                                                                            
#        dd.normalizeToLuminosity(36.3*(10**3))
        dd.normalizeToLuminosity(35.8*(10**3))                                                                                                                                   
#        dd.normalizeByCrossSection()
        d = dd.getHistogram()


#        if "TT" in datasetName and ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
#            continue
#        elif "M_"  in datasetName and not ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
#            continue
        
        if "Event" in numPath:
            n.Rebin(10)
            d.Rebin(10)
        else:
            n.Rebin(5)
            d.Rebin(5)
        
        if d.GetEntries() == 0 or n.GetEntries() == 0:
            continue

        if n.GetEntries() > d.GetEntries():
            continue
        # Check Negatives
        CheckNegatives(n, d, True)
        
        # Remove Negatives 
        RemoveNegatives(n)
                
        nBins = d.GetNbinsX()
        xMin  = d.GetXaxis().GetXmin()
        xMax  = d.GetXaxis().GetXmax()
        
        binwidth = int(n.GetBinWidth(0))
                
        # ----------------------------------------------------------------------------------------- #
        #      Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows
        # ----------------------------------------------------------------------------------------- #
        if (0):
            print "\n"
            print "=========== getEfficiency:"
            print "Dataset             = ", dataset.getName()
            print "Numerator:   entries=", n.GetEntries(), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1)
            print "Denominator: entries=", d.GetEntries(), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1)
            print "\n"
            
            print ">>>>>>  Sanity Check:  <<<<<<"
            print "Numerator Mean       = ", n.GetMean()
            print "Numerator RMS        = ", n.GetRMS()                                                                                                                
            print "Numerator Integral   = ", n.Integral(1, nBins)
            print "Denominator Mean     = ", d.GetMean()
            print "Denominator RMS      = ", d.GetRMS()
            print "Denominator Integral = ", d.Integral(1, nBins)
            
        if (n.GetMean() == 0 or d.GetMean() == 0): continue
        if (n.GetRMS()  == 0 or d.GetRMS()  == 0): continue
        if (n.Integral(1,nBins) == 0 or d.Integral(1,nBins) == 0): continue
        
#        if not (ROOT.TEfficiency.CheckConsistency(n,d)): continue;
        effic = ROOT.TEfficiency(n,d)
        effic.SetStatisticOption(statOption)
        
        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection()
            effic.SetWeight(weight)
            
        eff = convert2TGraph(effic)
    

        
        # Apply Styles
        if "TT" in datasetName:
            if index == 0:
                styles.signalStyleHToTB500.apply(eff)
#            styles.ttStyle.apply(eff)
                eff.SetLineStyle(1)
                eff.SetLineWidth(3)
                eff.SetLineColor(619)
                legend = "Default: t#bar{t}"
                index = 1
            else:
                styles.signalStyleHToTB500.apply(eff)
                #            styles.ttStyle.apply(eff)
                eff.SetLineStyle(1)
                eff.SetLineWidth(3)
                eff.SetLineColor(417)
                legend = "#Delta R(q,q')>0.8: t#bar{t}"

        elif "M_500" in datasetName:
            styles.signalStyleHToTB500.apply(eff)
            legend = "H^{+} m_{H^{+}} = 500 GeV"
        elif "M_300" in datasetName:
            styles.signalStyleHToTB300.apply(eff)
            legend = "H^{+} m_{H^{+}} = 300 GeV"
        elif "M_1000" in datasetName:
            styles.signalStyleHToTB1000.apply(eff)
            legend = "H^{+} m_{H^{+}} = 1000 GeV"
        elif "M_800" in datasetName:
            styles.signalStyleHToTB800.apply(eff)
            legend = "H^{+} m_{H^{+}} = 800 GeV"
        elif "M_200" in datasetName:
            styles.signalStyleHToTB200.apply(eff)
            legend = "H^{+} m_{H^{+}} = 200 GeV"
        else:
            styles.ttStyle.apply(eff)
            legend = "other"


        EfficiencyList.append(histograms.HistoGraph(eff, legend, "lp", "P"))
            
    saveName = "Eff_"+numPath.split("/")[-1]+"Over"+denPath.split("/")[-1]
    if "Pt" in numPath:
        xMin = 0.0
#        rebinX = 2
        xMax = 805.0
#        xMax = 555.0 # For topPt < 500GeV
        xTitle = "p_{T} (GeV/c)"
        units = "GeV/c"
        _format = "%0.1f" + units
        yTitle = "Efficiency / "   + str(binwidth) + " "+units
        yMin = 0.0
        yMax = 1.1

    elif "_Eta" in numPath:
        xMin = -3.0
        xMax = +3.0
        xTitle = "#eta"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    elif "_Mass" in numPath:
        xMin = 50.0
        xMax = 300
        xTitle = "M (GeV/c^{2})"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    elif "_Phi" in numPath:
        xMin = -3
        xMax = +3
        xTitle = "#phi"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    else:
        xMin = 0.0
        xMax = 250.0
        xTitle = "xTitle"
        yTitle = "yTitle"
        yMin = 0.0
        yMax = 1.1

    if "Fake" in numPath:
#        xMin = 95.0
#        rebinX = 4                                                                                                                                                  
        xMax = 805.0
        xTitle = "candidate p_{T} (GeV/c)"
        units = "GeV/c"
        _format = "%0.1f" + units
        yTitle = "Misid rate / "  + str(binwidth) + " " +units
        yMin = 0.0
        yMax = 0.11

    if "Event" in numPath:
        rebinX = 2
#        xMin = 95.0
        xMax = 805.0
        xTitle = "candidate p_{T} (GeV/c)"
        units = "GeV/c"
        _format = "%0.1f" + units
        yTitle = "Efficiency  / "  + str(binwidth) + " "+ units
        yMin = 0.0
        yMax = 1.1
        
    if "NonMatched" in numPath:
        xMin = 90.0
        rebinX = 4
        xMax = 700.0
        xMax = 555.0 # For topPt < 500GeV
        xTitle = "p_{T} (GeV)"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 0.15

    if "AllTopQuarkPt_MatchedBDT" in numPath and "TopQuarkPt" in denPath:
        xMin = 0.0
#        rebinX = 4
        xMax = 805.0 #705
        units = "GeV/c"
        xTitle = "generated top p_{T} (GeV/c)"
        yTitle = "Efficiency / "  + str(binwidth) + " " + units
        yMin = 0.0
        yMax = 1.1

    if "SameFake" in numPath:
        xMin = 95.0
        rebinX = 4
        xMax = 705.0
        xMax = 555.0 # For topPt < 500GeV
        xTitle = "p_{T} [GeV]"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1


    options = {"ymin": yMin  , "ymax": yMax, "xmin":xMin, "xMax":xMax}

#    if "TT" in datasetName and ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
#        return
#    if "M_"  in datasetName and not ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
#        return

    p = plots.PlotBase(datasetRootHistos=EfficiencyList, saveFormats=kwargs.get("saveFormats"))

    #p = plots.ComparisonManyPlot(refEff, EfficiencyList, saveFormats=[])
    
    p.createFrame(saveName, opts=options)

#    p.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(kwargs.get("rebinX")))

     # Set Titles                                                                                                                                                                                                
#    p.getFrame().GetYaxis().SetTitle(kwargs.get("ylabel"))  #"ylabel"
    p.getFrame().GetXaxis().SetTitle(xTitle)
    p.getFrame().GetYaxis().SetTitle(yTitle)
    
    # Set range
    p.getFrame().GetXaxis().SetRangeUser(xMin, xMax)
    
    
    moveLegend = {"dx": -0.55, "dy": -0.02, "dh": -0.2}
#    moveLegend = {"dx": -0.55, "dy": -0.01, "dh": -0.1}
    p.setLegend(histograms.moveLegend(histograms.createLegend(), **moveLegend))
    
    # Add Standard Texts to plot                                                                                                                                                                
    histograms.addStandardTexts()

    p.draw()

    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, "HplusMasses", numPath.split("/")[0], opts.optMode)
    save_path = savePath + opts.MVAcut

#    SavePlot(p, saveName, savePath)
    SavePlot(p, saveName, save_path)
    return
예제 #13
0
def GetCutEfficiencyHisto(dataset, histoName, statOpt, **kwargs):
    '''
    See https://root.cern.ch/doc/master/classTEfficiency.html
    '''
    HasKeys(["verbose", "normalizeTo", "cutDirection"], **kwargs)
    verbose = kwargs.get("verbose")
    normalizeTo = kwargs.get("normalizeTo")
    cutDirection = kwargs.get("cutDirection")
    Verbose("Calculating the cut-efficiency (%s) for histo with name %s" %
            (cutDirection, histoName))

    # Choose statistics options
    statOpts = [
        "kFCP", "kFNormal", "KFWilson", "kFAC", "kFFC", "kBJeffrey",
        "kBUniform", "kBayesian"
    ]
    if statOpt not in statOpts:
        raise Exception(
            "Invalid statistics option \"%s\". Please choose one from the following:\n\t%s"
            % (statOpt, "\n\t".join(statOpts)))

    if statOpt == "kFCP":
        statOption = ROOT.TEfficiency.kFCP  # Clopper-Pearson
    elif statOpt == "kFNormal":
        statOption = ROOT.TEfficiency.kFNormal  # Normal Approximation
    elif statOpt == "kFWilson":
        statOption = ROOT.TEfficiency.kFWilson  # Wilson
    elif statOpt == "kFAC":
        statOption = ROOT.TEfficiency.kFAC  # Agresti-Coull
    elif statOpt == "kFFC":
        statOption = ROOT.TEfficiency.kFFC  # Feldman-Cousins
    elif statOpt == "kBJeffrey":
        statOption = ROOT.TEfficiency.kBJeffrey  # Jeffrey
    elif statOpt == "kBUniform":
        statOption = ROOT.TEfficiency.kBUniform  # Uniform Prior
    elif statOpt == "kBayesian":
        statOption = ROOT.TEfficiency.kBayesian  # Custom Prior
    else:
        raise Exception("This should never be reached")

    # Declare variables & options
    first = True
    isData = False
    teff = ROOT.TEfficiency()

    # Get the ROOT histogram
    rootHisto = dataset.getDatasetRootHisto(histoName)

    # Normalise the histogram
    NormalizeRootHisto(datasetsMgr, rootHisto, dataset.isMC(), normalizeTo)
    #NormalizeRootHisto(datasetsMgr, rootHisto, d.isMC(), normalizeTo)

    ## Get a clone of the wrapped histogram normalized as requested.
    h = rootHisto.getHistogram()
    titleX = h.GetXaxis().GetTitle()
    binWidth = h.GetXaxis().GetBinWidth(0)
    titleY = "efficiency (%s) / %s" % (cutDirection,
                                       GetBinwidthDecimals(binWidth) %
                                       (binWidth))

    # If empty return
    if h.GetEntries() == 0:
        return

    # Create the numerator/denominator histograms
    numerator = h.Clone("Numerator")
    denominator = h.Clone("Denominator")

    # Reset the numerator/denominator histograms
    numerator.Reset()
    denominator.Reset()

    # Calculate the instances passing a given cut (all bins)
    nBinsX = h.GetNbinsX() + 1
    for iBin in range(1, nBinsX):

        nTotal = h.Integral(0, nBinsX)

        if cutDirection == ">":
            nPass = h.Integral(iBin + 1, nBinsX)
        elif cutDirection == "<":
            nPass = nTotal - h.Integral(iBin + 1, nBinsX)
        else:
            raise Exception(
                "Invalid cut direction  \"%s\". Please choose either \">\" or \"<\""
                % (cutDirection))

        # Sanity check
        if nPass < 0:
            nPass = 0

        # Fill the numerator/denominator histograms
        # print "iBin = %s, nPass = %s, nTotal = %s" % (iBin, nPass, nTotal)
        numerator.SetBinContent(iBin, nPass)
        numerator.SetBinError(iBin, math.sqrt(nPass) / 10)
        #
        denominator.SetBinContent(iBin, nTotal)
        denominator.SetBinError(iBin, math.sqrt(nTotal) / 10)

    # Check for negative values
    CheckNegatives(numerator, denominator)

    # Create TEfficiency object using the two histos
    eff = ROOT.TEfficiency(numerator, denominator)
    eff.SetStatisticOption(statOption)
    Verbose("The statistic option was set to %s" % (eff.GetStatisticOption()))

    # Save info in a table (debugging)
    table = []
    hLine = "=" * 70
    msgAlign = '{:<5} {:<20} {:<20} {:<20}'
    title = msgAlign.format("Bin", "Efficiency", "Error-Low", "Error-Up")
    table.append("\n" + hLine)
    table.append(title)
    table.append(hLine)
    for iBin in range(1, nBinsX):
        e = eff.GetEfficiency(iBin)
        errLow = eff.GetEfficiencyErrorLow(iBin)
        errUp = eff.GetEfficiencyErrorUp(iBin)
        values = msgAlign.format(iBin, e, errLow, errUp)
        table.append(values)
    table.append(hLine)

    # Verbose mode
    if verbose:
        for l in table:
            print l

    weight = 1
    if dataset.isMC():
        weight = dataset.getCrossSection()
    eff.SetWeight(weight)

    if first:
        teff = eff
        if dataset.isData():
            tn = numerator
            td = denominator
        first = False
    else:
        teff.Add(eff)
        if dataset.isData():
            tn.Add(numerator)
            td.Add(denominator)
    if isData:
        teff = ROOT.TEfficiency(tn, td)
        teff.SetStatisticOption(self.statOption)

    style = styleDict[dataset.getName()]
    return Convert2TGraph(teff, dataset, style, titleX, titleY)
def PlotProb(datasets, numPath, denPath):

    EfficiencyList = []
    index = 0
    for dataset in datasets:

        datasetName = dataset.getName()
        print "Dataset = ", datasetName

        statOption = ROOT.TEfficiency.kFNormal
        ##        n = dataset.getDatasetRootHisto(numPath).getHistogram()
        #        n.normalizeToOne()
        ##        d = dataset.getDatasetRootHisto(denPath).getHistogram()
        nn = dataset.getDatasetRootHisto(numPath)
        nn.normalizeToLuminosity(35.8 * (10**3))
        n = nn.getHistogram()
        dd = dataset.getDatasetRootHisto(denPath)
        #        dd.normalizeToOne()
        #        dd.normalizeToLuminosity(36.3*(10**3))
        dd.normalizeToLuminosity(35.8 * (10**3))
        #        dd.normalizeByCrossSection()
        d = dd.getHistogram()

        #        if "TT" in datasetName and ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
        #            continue
        #        elif "M_"  in datasetName and not ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
        #            continue

        if "Event" in numPath:
            n.Rebin(10)
            d.Rebin(10)
        else:
            n.Rebin(5)
            d.Rebin(5)

        if d.GetEntries() == 0 or n.GetEntries() == 0:
            continue

        if n.GetEntries() > d.GetEntries():
            continue
        # Check Negatives
        CheckNegatives(n, d, True)

        # Remove Negatives
        RemoveNegatives(n)

        nBins = d.GetNbinsX()
        xMin = d.GetXaxis().GetXmin()
        xMax = d.GetXaxis().GetXmax()

        binwidth = int(n.GetBinWidth(0))

        # ----------------------------------------------------------------------------------------- #
        #      Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows
        # ----------------------------------------------------------------------------------------- #
        if (0):
            print "\n"
            print "=========== getEfficiency:"
            print "Dataset             = ", dataset.getName()
            print "Numerator:   entries=", n.GetEntries(
            ), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1)
            print "Denominator: entries=", d.GetEntries(
            ), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1)
            print "\n"

            print ">>>>>>  Sanity Check:  <<<<<<"
            print "Numerator Mean       = ", n.GetMean()
            print "Numerator RMS        = ", n.GetRMS()
            print "Numerator Integral   = ", n.Integral(1, nBins)
            print "Denominator Mean     = ", d.GetMean()
            print "Denominator RMS      = ", d.GetRMS()
            print "Denominator Integral = ", d.Integral(1, nBins)

        if (n.GetMean() == 0 or d.GetMean() == 0): continue
        if (n.GetRMS() == 0 or d.GetRMS() == 0): continue
        if (n.Integral(1, nBins) == 0 or d.Integral(1, nBins) == 0): continue

        #        if not (ROOT.TEfficiency.CheckConsistency(n,d)): continue;
        effic = ROOT.TEfficiency(n, d)
        effic.SetStatisticOption(statOption)

        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection()
            effic.SetWeight(weight)

        eff = convert2TGraph(effic)

        # Apply Styles
        if "TT" in datasetName:
            if index == 0:
                styles.signalStyleHToTB500.apply(eff)
                #            styles.ttStyle.apply(eff)
                eff.SetLineStyle(1)
                eff.SetLineWidth(3)
                eff.SetLineColor(619)
                legend = "Default: t#bar{t}"
                index = 1
            else:
                styles.signalStyleHToTB500.apply(eff)
                #            styles.ttStyle.apply(eff)
                eff.SetLineStyle(1)
                eff.SetLineWidth(3)
                eff.SetLineColor(417)
                legend = "#Delta R(q,q')>0.8: t#bar{t}"

        elif "M_500" in datasetName:
            styles.signalStyleHToTB500.apply(eff)
            legend = "H^{+} m_{H^{+}} = 500 GeV"
        elif "M_300" in datasetName:
            styles.signalStyleHToTB300.apply(eff)
            legend = "H^{+} m_{H^{+}} = 300 GeV"
        elif "M_1000" in datasetName:
            styles.signalStyleHToTB1000.apply(eff)
            legend = "H^{+} m_{H^{+}} = 1000 GeV"
        elif "M_800" in datasetName:
            styles.signalStyleHToTB800.apply(eff)
            legend = "H^{+} m_{H^{+}} = 800 GeV"
        elif "M_200" in datasetName:
            styles.signalStyleHToTB200.apply(eff)
            legend = "H^{+} m_{H^{+}} = 200 GeV"
        else:
            styles.ttStyle.apply(eff)
            legend = "other"

        EfficiencyList.append(histograms.HistoGraph(eff, legend, "lp", "P"))

    saveName = "Eff_" + numPath.split("/")[-1] + "Over" + denPath.split(
        "/")[-1]
    if "Pt" in numPath:
        xMin = 0.0
        #        rebinX = 2
        xMax = 805.0
        #        xMax = 555.0 # For topPt < 500GeV
        xTitle = "p_{T} (GeV/c)"
        units = "GeV/c"
        _format = "%0.1f" + units
        yTitle = "Efficiency / " + str(binwidth) + " " + units
        yMin = 0.0
        yMax = 1.1

    elif "_Eta" in numPath:
        xMin = -3.0
        xMax = +3.0
        xTitle = "#eta"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    elif "_Mass" in numPath:
        xMin = 50.0
        xMax = 300
        xTitle = "M (GeV/c^{2})"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    elif "_Phi" in numPath:
        xMin = -3
        xMax = +3
        xTitle = "#phi"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    else:
        xMin = 0.0
        xMax = 250.0
        xTitle = "xTitle"
        yTitle = "yTitle"
        yMin = 0.0
        yMax = 1.1

    if "Fake" in numPath:
        #        xMin = 95.0
        #        rebinX = 4
        xMax = 805.0
        xTitle = "candidate p_{T} (GeV/c)"
        units = "GeV/c"
        _format = "%0.1f" + units
        yTitle = "Misid rate / " + str(binwidth) + " " + units
        yMin = 0.0
        yMax = 0.11

    if "Event" in numPath:
        rebinX = 2
        #        xMin = 95.0
        xMax = 805.0
        xTitle = "candidate p_{T} (GeV/c)"
        units = "GeV/c"
        _format = "%0.1f" + units
        yTitle = "Efficiency  / " + str(binwidth) + " " + units
        yMin = 0.0
        yMax = 1.1

    if "NonMatched" in numPath:
        xMin = 90.0
        rebinX = 4
        xMax = 700.0
        xMax = 555.0  # For topPt < 500GeV
        xTitle = "p_{T} (GeV)"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 0.15

    if "AllTopQuarkPt_MatchedBDT" in numPath and "TopQuarkPt" in denPath:
        xMin = 0.0
        #        rebinX = 4
        xMax = 805.0  #705
        units = "GeV/c"
        xTitle = "generated top p_{T} (GeV/c)"
        yTitle = "Efficiency / " + str(binwidth) + " " + units
        yMin = 0.0
        yMax = 1.1

    if "SameFake" in numPath:
        xMin = 95.0
        rebinX = 4
        xMax = 705.0
        xMax = 555.0  # For topPt < 500GeV
        xTitle = "p_{T} [GeV]"
        yTitle = "Efficiency"
        yMin = 0.0
        yMax = 1.1

    options = {"ymin": yMin, "ymax": yMax, "xmin": xMin, "xMax": xMax}

    #    if "TT" in datasetName and ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
    #        return
    #    if "M_"  in datasetName and not ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath):
    #        return

    p = plots.PlotBase(datasetRootHistos=EfficiencyList,
                       saveFormats=kwargs.get("saveFormats"))

    #p = plots.ComparisonManyPlot(refEff, EfficiencyList, saveFormats=[])

    p.createFrame(saveName, opts=options)

    #    p.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(kwargs.get("rebinX")))

    # Set Titles
    #    p.getFrame().GetYaxis().SetTitle(kwargs.get("ylabel"))  #"ylabel"
    p.getFrame().GetXaxis().SetTitle(xTitle)
    p.getFrame().GetYaxis().SetTitle(yTitle)

    # Set range
    p.getFrame().GetXaxis().SetRangeUser(xMin, xMax)

    moveLegend = {"dx": -0.55, "dy": -0.02, "dh": -0.2}
    #    moveLegend = {"dx": -0.55, "dy": -0.01, "dh": -0.1}
    p.setLegend(histograms.moveLegend(histograms.createLegend(), **moveLegend))

    # Add Standard Texts to plot
    histograms.addStandardTexts()

    p.draw()

    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, "HplusMasses",
                            numPath.split("/")[0], opts.optMode)
    save_path = savePath + opts.MVAcut

    #    SavePlot(p, saveName, savePath)
    SavePlot(p, saveName, save_path)
    return
예제 #15
0
def CalcEfficiency(datasetsMgr, numPath, denPath, intLumi):
    # Definitions
    myList  = []
    index   = 0
    _kwargs = GetHistoKwargs(numPath, opts)        

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():
        x = []
        y = []

        n = dataset.getDatasetRootHisto(numPath)
        n.normalizeToLuminosity(intLumi)
        num = n.getHistogram()
        d = dataset.getDatasetRootHisto(denPath)
        d.normalizeToLuminosity(intLumi)
        den = d.getHistogram()

        if "binList" in _kwargs:
            xBins   = _kwargs["binList"]
            nx      = len(xBins)-1
            num     = num.Rebin(nx, "", xBins)
            den     = den.Rebin(nx, "", xBins)


        for i in range(1, num.GetNbinsX()+1):
            nbin = num.GetBinContent(i)
            dbin = den.GetBinContent(i)

            if nbin < 0:
                nbin = 0
            if dbin < 0:
                nbin = 0
                dbin = 1
            if nbin > dbin:
                nbin = dbin

            x.append(num.GetBinLowEdge(i)+0.5*num.GetBinWidth(i))
            y.append(nbin/dbin)

        n     = num.GetNbinsX()
        eff = ROOT.TGraph(n, array.array("d",x), array.array("d",y))

        # Apply default style (according to dataset name)
        plots._plotStyles[dataset.getName()].apply(eff)
                          
        # Apply random histo styles and append
                          
        if "charged" in dataset.getName().lower():                              
            mass = dataset.getName().split("M_")[-1]
            s = styles.getSignalStyleHToTB_M(mass)
            s.apply(eff)

        # Append in list
        myList.append(histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P"))
            
    # Define save name
    saveName = "Eff_" + numPath.split("/")[-1] + "Over" + denPath.split("/")[-1]

    # Plot the efficiency
    p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[])
    plots.drawPlot(p, saveName, **_kwargs)

    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, numPath.split("/")[0], opts.optMode)
    SavePlot(p, saveName, savePath, saveFormats = [".png"])#, ".pdf"])
    return
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi):
  
    # Definitions
    myList  = []
    index   = 0
    _kwargs = GetHistoKwargs(numPath, opts)        

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():
        if "Fake" in numPath and "TT" in dataset.getName():
            continue
        # Get the histograms
        #num = dataset.getDatasetRootHisto(numPath).getHistogram()
        #den = dataset.getDatasetRootHisto(denPath).getHistogram()

        n = dataset.getDatasetRootHisto(numPath)
        n.normalizeToLuminosity(intLumi)                                                                                                                       
        num = n.getHistogram()
        d = dataset.getDatasetRootHisto(denPath)
        d.normalizeToLuminosity(intLumi)                                                                                                                       
        den = d.getHistogram()

        
        if "binList" in _kwargs:
            xBins   = _kwargs["binList"]
            nx      = len(xBins)-1
            num     = num.Rebin(nx, "", xBins)
            den     = den.Rebin(nx, "", xBins)

        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Remove negative bins and ensure numerator bin <= denominator bin
        CheckNegatives(num, den, True)
        # RemoveNegatives(num)
        # RemoveNegatives(den)
                
        # Sanity check (Histograms are valid and consistent) - Always false!
        # if not ROOT.TEfficiency.CheckConsistency(num, den):
        #    continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #
        # Set the weights - Why is this needed?
        if 0:
            weight = 1
            if dataset.isMC():
                weight = dataset.getCrossSection()
                eff.SetWeight(weight)
                
        # Convert to TGraph
        eff = convert2TGraph(eff)
        # Apply default style (according to dataset name)
        plots._plotStyles[dataset.getName()].apply(eff)

        # Append in list
        myList.append(histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P"))
            
    # Define save name
    saveName = "Eff_" + numPath.split("/")[-1] + "Over" + denPath.split("/")[-1]

    # Plot the efficiency
    p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[])
    plots.drawPlot(p, saveName, **_kwargs)


    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, numPath.split("/")[0], opts.optMode)
    #savePath = os.path.join(opts.saveDir, numPath.split("/")[0], opts.optMode)
    save_path = savePath + opts.MVAcut
    SavePlot(p, saveName, save_path, saveFormats = [".png", ".pdf", ".C"])
    return
예제 #17
0
def GetEfficiency(datasetsMgr, datasets, numerator="Numerator",denominator="Denominator", **kwargs):
    '''
    TEfficiency method:
    See https://root.cern.ch/doc/master/classTEfficiency.html    
    
    '''
    lumi = GetLumi(datasetsMgr)

    # Select Statistic Options
    statOption = ROOT.TEfficiency.kFCP
    '''
    statOption = ROOT.TEfficiency.kFCP      # Clopper-Pearson
    statOption = ROOT.TEfficiency.kFNormal  # Normal Approximation
    statOption = ROOT.TEfficiency.kFWilson  # Wilson
    statOption = ROOT.TEfficiency.kFAC      # Agresti-Coull
    statOption = ROOT.TEfficiency.kFFC      # Feldman-Cousins
    statOption = ROOT.TEfficiency.kBBJeffrey # Jeffrey
    statOption = ROOT.TEfficiency.kBBUniform # Uniform Prior
    statOption = ROOT.TEfficiency.kBBayesian # Custom Prior
    '''
    
    first  = True
    teff   = ROOT.TEfficiency()
    #    teff.SetStatisticOption(statOption)

    # For-loop: All datasets
    for dataset in datasets:
        
        num = dataset.getDatasetRootHisto(numerator)
        den = dataset.getDatasetRootHisto(denominator)

        # 
        if dataset.isMC():
            num.normalizeToLuminosity(lumi)
            den.normalizeToLuminosity(lumi) 

        # Get Numerator and Denominator
        n = num.getHistogram()
        d = den.getHistogram()
        
        if d.GetEntries() == 0 or n.GetEntries() == 0:
            msg =  "Denominator Or Numerator has no entries"
            Print(ErrorStyle() + msg + NormalStyle(), True)
            continue
        
        # Check Negatives
        CheckNegatives(n, d, True)
        
        # Remove Negatives
        RemoveNegatives(n)
        #RemoveNegatives(d)
       
        NumeratorBins   = n.GetNbinsX()
        DenominatorBins = d.GetNbinsX()


        # Sanity Check
        if (NumeratorBins != DenominatorBins) :
            raise Exception("Numerator and Denominator Bins are NOT equal!")
        
        nBins = d.GetNbinsX()
        xMin  = d.GetXaxis().GetXmin()
        xMax  = d.GetXaxis().GetXmax()
        
        # ----------------------------------------------------------------------------------------- # 
        #      Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows
        # ----------------------------------------------------------------------------------------- #
        if 0:
            print "\n"
            print "=========== getEfficiency:"
            print "Dataset             = ", dataset.getName()
            
            print "Numerator  :", n.GetName(), "   entries=", n.GetEntries(), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1)
            print "Denominator:", d.GetName(), "   entries=", d.GetEntries(), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1)
            print "\n"
            print ">>>>>>  Sanity Check:  <<<<<<"
            print "Numerator Mean       = ", n.GetMean()
            print "Numerator RMS        = ", n.GetRMS()
            print "Numerator Integral   = ", n.Integral(1, nBins)
            print "Denominator Mean     = ", d.GetMean()
            print "Denominator RMS      = ", d.GetRMS()
            print "Denominator Integral = ", d.Integral(1, nBins)
        
        if (n.GetMean() == 0 or d.GetMean() == 0): continue
        if (n.GetRMS()  == 0 or d.GetRMS()  == 0): continue
        if (n.Integral(1,nBins) == 0 or d.Integral(1,nBins) == 0): continue

        Verbose("Passed the sanity check", True)
        
        eff = ROOT.TEfficiency(n, d)
        eff.SetStatisticOption(statOption)
        
        # For-loop: All bins
        if 0:
            for iBin in range(1, nBins+1):
                print iBin, "x=", n.GetBinLowEdge(iBin), " Num=", n.GetBinContent(iBin),  " Den=", d.GetBinContent(iBin)," Eff=", eff.GetEfficiency(iBin)
            
        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection()
        eff.SetWeight(weight)
        
        if first:
            teff  = eff
            first = False
            if dataset.isData():
                tn = n
                td = d
        else:
            teff.Add(eff)
            
            if dataset.isData():
                tn.Add(n)
                td.Add(d)
                
        if dataset.isData():
            teff = ROOT.TEfficiency(tn, td)
            teff.SetStatisticOption(statOption)
        
    Verbose("Final tEff", True)
    if 0:
        for iBin in range(1,nBins+1):
            print iBin, "x=", n.GetBinLowEdge(iBin)," Efficiency=", teff.GetEfficiency(iBin), " Weight = ", teff.GetWeight()
    return convert2TGraph(teff)
예제 #18
0
def Fit(datasets, histo, function):

    FitList = []
    for dataset in datasets:

        datasetName = dataset.getName()
        print "Dataset = ", datasetName
        hh = dataset.getDatasetRootHisto(histo)

        hh.normalizeToOne()
        h = hh.getHistogram()

        #h = dataset.getDatasetRootHisto(histo).getHistogram()
        xMin = h.GetXaxis().GetXmin()
        xMax = h.GetXaxis().GetXmax()
        yMin = 0
        yMax = 1.2
        #statOption = ROOT.TEfficiency.kFNormal
        if "TT" in datasetName:
            if function == "gaus":
                fitGauss = ROOT.TF1("fitGauss", "gaus", -2.5, 2.5)
                #                TF1 *fitBoFreq = new TF1("fitBoFreq","[0]*x+[1]",0,20);
                #                h.Fit("gaus")
                #fitTest = ROOT.TF1("fitTest", "0.01", -2.5, 2.5)

                h.Fit("fitGauss", "SRBM")
                #h.GetListOfFunctions().Add(fitTest)
                legend = "TT"

        legend = "a legend"
        print "Legend", legend
        saveName = histo.split("/")[-1] + "_Fit"

        print saveName

        xTitle = "fixXTitle"
        yTitle = "fixYTitle"

        yMin = 0.
        yMax = 0.03
        xMin = -2.3
        xMax = 2.3
        kwargs = {}

        options = {"ymin": yMin, "ymax": yMax, "xmin": xMin, "xMax": xMax}
        FitList.append(h)
        #p = plots.MCPlot(dataset, h, normalizeToLumi=0, saveFormats=[], **kwargs)

        p = plots.PlotBase(datasetRootHistos=FitList,
                           saveFormats=kwargs.get("saveFormats"))
        p.createFrame(saveName, opts=options)

        p.getFrame().GetXaxis().SetTitle(xTitle)
        p.getFrame().GetYaxis().SetTitle(yTitle)
        #p.histoMgr.setHistoDrawStyle(datasetName, "AP")

        # Set range
        p.getFrame().GetXaxis().SetRangeUser(xMin, xMax)

        moveLegend = {"dx": -0.55, "dy": -0.01, "dh": -0.1}

        p.setLegend(
            histograms.moveLegend(histograms.createLegend(), **moveLegend))
        # Add Standard Texts to plot
        histograms.addStandardTexts()

        p.draw()

        # Save plot in all formats
        savePath = os.path.join(opts.saveDir, "HplusMasses",
                                histo.split("/")[0], opts.optMode)
        save_path = savePath
        SavePlot(p, saveName, save_path)
    return
예제 #19
0
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi):
    # Definitions
    myList = []
    myList_MC = []
    myList_Data = []
    index = 0
    _kwargs = GetHistoKwargs(numPath, opts)
    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():

        if "Fake" in numPath and "TT" in dataset.getName():
            continue

        if dataset.isMC():
            n = dataset.getDatasetRootHisto(numPath)
            n.normalizeToLuminosity(intLumi)
            num = n.getHistogram()
            d = dataset.getDatasetRootHisto(denPath)
            d.normalizeToLuminosity(intLumi)
            den = d.getHistogram()

        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()

        total = den.Integral(0, den.GetXaxis().GetNbins() + 1)
        selected = num.Integral(0, num.GetXaxis().GetNbins() + 1)
        print "Pass :"******" events"
        print "Numerical Efficiency", numPath, dataset.getName(), ":", round(
            selected / total, 3)

        if "binList" in _kwargs:
            #if len(_kwargs["binList"]) == 1:
            #    continue
            xBins = _kwargs["binList"]
            nx = len(xBins) - 1
            num = num.Rebin(nx, "", xBins)
            den = den.Rebin(nx, "", xBins)
        elif "Eta" in numPath or "Phi" in numPath:
            num = num.Rebin(2)
            den = den.Rebin(2)
        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den)  # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP)  #FCP

        # Convert to TGraph
        eff = convert2TGraph(eff)
        # Apply default style (according to dataset name)
        plots._plotStyles[dataset.getName()].apply(eff)

        # Append in list
        myList.append(
            histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()],
                                  "lp", "P"))
        if dataset.isMC():
            eff_MC = eff
            if "QCD" in dataset.getName():
                eff_QCD = eff
            elif "TT" in dataset.getName():
                eff_TT = eff
            myList_MC.append(
                histograms.HistoGraph(eff,
                                      plots._legendLabels[dataset.getName()],
                                      "lp", "P"))
        else:
            eff_Data = eff
            plots._plotStyles[dataset.getName()].apply(eff_Data)
            #styles.dataStyle.apply(eff_Data)
            eff_Data.SetMarkerSize(1.2)
            myList_Data.append(
                histograms.HistoGraph(eff_Data,
                                      plots._legendLabels[dataset.getName()],
                                      "p", "P"))

    numPath = numPath.replace("AfterAllSelections_", "")
    # Define save name
    saveName = "Eff_" + numPath.split("/")[-1]

    # Plot the efficiency
    p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[])
    plots.drawPlot(p, saveName, **_kwargs)

    p1 = plots.ComparisonManyPlot(histograms.HistoGraph(eff_Data,
                                                        "Data",
                                                        drawStyle="P"),
                                  myList_MC,
                                  saveFormats=[])

    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, "HplusMasses",
                            numPath.split("/")[0], opts.optMode)
    save_path = savePath + opts.MVAcut
    print "save_path", save_path
    # Draw and save the plot
    p1.setLuminosity(intLumi)
    _kwargs["ratio"] = True
    _kwargs["ratioInvert"] = True
    _kwargs["cutBoxY"] = {
        "cutValue": 1.0,
        "fillColor": 16,
        "box": False,
        "line": True,
        "greaterThan": True,
        "mainCanvas": True,
        "ratioCanvas": True
    }
    plots.drawPlot(p1, save_path1, **_kwargs)
    SavePlot(p1, saveName, save_path, saveFormats=[".png", ".pdf", ".C"])
    return
def main(opts):

    # Setup & configure the dataset manager 
    datasetsMgr = GetDatasetsFromDir(opts)
    datasetsMgr.updateNAllEventsToPUWeighted()
    datasetsMgr.loadLuminosities() # from lumi.json
    if opts.verbose:
        datasetsMgr.PrintCrossSections()
        datasetsMgr.PrintLuminosities()

    # Print dataset information
    if opts.verbose:
        datasetsMgr.PrintInfo()

    # Check if path exists
    if not os.path.exists(opts.outputDir):
        os.makedirs(opts.outputDir)

    # Get all files under the input pseudo-multicrab 
    src_files = os.listdir(opts.mcrabs[0])
    nDatasets = len(datasetsMgr.getAllDatasets())
    nDirs     = len(opts.mcrabs)

    # For-loop: All files inside the directory (e.g. multicrab.cfg, lumi.json)
    for i, fileName in enumerate(src_files, 1):

        filePath = os.path.join(opts.mcrabs[0], fileName)
        isFile   = os.path.isfile(filePath)

        # If not a file skip 
        if not isFile:
            continue
        else:
            shutil.copy(filePath, opts.outputDir)


    # For-loop: All datasets
    for i, dataset in enumerate(datasetsMgr.getAllDatasets(), 1):
        dsetName  = dataset.getName() 

        # Create new directory with structure of pseudomulticrab
        dirTree = os.path.join(opts.outputDir, dsetName, "res")
        
        # Check if path exists
        if not os.path.exists(dirTree):
            os.makedirs(dirTree)
            
        # Output file Name
        outputFileName = "/histograms-%s.root" % (dsetName)
        outputFileName = dirTree + outputFileName


        Verbose("Creating ROOT file %s" % (outputFileName), True)
        outputFile = ROOT.TFile.Open(outputFileName, "RECREATE")
            
        # For-loop: All pseudo-multicrabs in list
        for j, dirName in enumerate(opts.mcrabs, 1):
            
            # Get root file from dataset
            dirPath  = os.path.join(dirName, dsetName, "res")
            filePath = "%s/histograms-%s.root" % (dirPath, dsetName)
            
            # Open input root file and change directory
            inputFile = ROOT.TFile.Open(filePath)
            outputFile.cd()

            nFolders = len(inputFile.GetListOfKeys())
            # For-loop: All folders  in ROOT file
            for k, key in enumerate(inputFile.GetListOfKeys(), 1):
                folderName = key.GetName()

                # Inform user of progress                
                msg = "{:<10} {:<2} {:>1} {:<2} {:<10} {:<2} {:>1} {:<2} {:<10} {:<2} {:>1} {:<2}".format("%sDataset" % (ShellStyles.HighlightAltStyle()), "%d" % i, "/", "%d" % (nDatasets), "%sPseudomulticrab" % (ShellStyles.NoteStyle()), "%d" % j, "/", "%d" % (nDirs), "%sFolder" % (ShellStyles.HighlightStyle()), "%d" % k, "/", "%d%s" % (nFolders, ShellStyles.NormalStyle()) )
                aux.PrintFlushed(msg, i*j*k==1)        

                # Skip if object already exists
                if (folderName in outputFile.GetListOfKeys()):
                    continue
                
                Verbose("Merging %s" % folderName, False)
                MergeFiles(inputFile.Get(folderName), outputFile, msg)

                #p = Process(target=MergeFiles(inputFile.Get(folderName), outputFile, msg))
                #p.start()
                #p.join()
        
        Verbose("Writing & closing file %s" % (outputFile.GetName()), True)
        outputFile.Write()
        outputFile.Close()
    print
    Print("Results saved in %s" % (ShellStyles.SuccessStyle() + opts.outputDir + ShellStyles.NormalStyle()), True)
    return
예제 #21
0
def PlotEfficiency(datasetsMgr, numPath, denPath, eff_def):

    # Definitions
    myList = []

    default_eff = None
    datasetList = []
    ttVariationEff = []

    _kwargs = GetHistoKwargs(numPath, opts)
    nx = 0
    if len(_kwargs["binList"]) > 0:
        xBins = _kwargs["binList"]
        nx = len(xBins) - 1
    counter = 0

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():

        if dataset.isMC():
            n = dataset.getDatasetRootHisto(numPath)
            d = dataset.getDatasetRootHisto(denPath)
            num = n.getHistogram()
            den = d.getHistogram()

            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)
        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()
            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)

        # Calculations
        total = den.Integral(0, den.GetXaxis().GetNbins() + 1)
        selected = num.Integral(0, num.GetXaxis().GetNbins() + 1)

        if 0:
            print "Numerical Efficiency", numPath, dataset.getName(
            ), ":", round(selected / total, 3)

        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den)
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP)  #FCP

        datasetTT = datasetsMgr.getDataset("TT")
        # Get the histograms
        numTT = datasetTT.getDatasetRootHisto(numPath).getHistogram()
        denTT = datasetTT.getDatasetRootHisto(denPath).getHistogram()
        if nx > 0:
            numTT = numTT.Rebin(nx, "", xBins)  #num.Rebin(nx, "", xBins)
            denTT = denTT.Rebin(nx, "", xBins)  #den.Rebin(nx, "", xBins)
        '''
        for i in range(1, num.GetNbinsX()+1):
            nbin = num.GetBinContent(i)
            dbin = den.GetBinContent(i)
            nbinTT = numTT.GetBinContent(i)
            dbinTT = denTT.GetBinContent(i)
            eps = nbin/dbin
            epsTT = nbinTT/dbinTT
            ratioTT = eps/epsTT
            if ratioTT > 1:
                ratioTT = 1/ratioTT
            #print "bin: ", i, "eps: ", round(eps,5) , "epsTT: ", round(epsTT,5)
            #print "bin: ", i, "eps/epsTT: ", (1.0 - round(ratioTT, 3))*100
        '''
        eff_ref = ROOT.TEfficiency(numTT, denTT)
        eff_ref.SetStatisticOption(ROOT.TEfficiency.kFCP)  #FCP

        # Convert to TGraph
        gEff = convert2TGraph(eff)
        gEffRef = convert2TGraph(eff_ref)

        # Keep the default tt and variations tt efficiency plots
        if dataset.getName() == "TT":
            default_eff = gEffRef.Clone()
        else:
            datasetList.append(dataset.getName())
            ttVariationEff.append(gEff)

        # Style definitions
        stylesDef = styles.ttStyle
        styles0 = styles.signalStyleHToTB300
        styles1 = styles.signalStyleHToTB500
        styles2 = styles.signalStyleHToTB800
        styles3 = styles.signalStyleHToTB500
        styles4 = styles.signalStyleHToTB1000
        styles5 = styles.signalStyleHToTB2000
        styles6 = styles.signalStyleHToTB180
        styles7 = styles.signalStyleHToTB3000
        styles8 = styles.signalStyleHToTB200

        if dataset.getName() == "TT":
            styles.ttStyle.apply(gEffRef)
            legend_ref = "t#bar{t}"
            if opts.type == "partonShower":
                legend_ref = "t#bar{t} (Pythia8)"
            elif opts.type == "evtGen":
                legend_ref = "t#bar{t} (Powheg)"
            refGraph = histograms.HistoGraph(gEffRef, legend_ref, "p", "P")
        else:
            styles.markerStyles[counter].apply(gEff)
            legend = dataset.getName().replace("TT_", "t#bar{t} (").replace(
                "isr", "ISR ").replace("fsr", "FSR ")
            legend = legend.replace("hdamp", "hdamp ").replace("DOWN",
                                                               "down").replace(
                                                                   "UP", "up")
            legend = legend.replace("mtop1665", "m_{t} = 166.5 GeV")
            legend = legend.replace("mtop1695", "m_{t} = 169.5 GeV")
            legend = legend.replace("mtop1715", "m_{t} = 171.5 GeV")
            legend = legend.replace("mtop1735", "m_{t} = 173.5 GeV")
            legend = legend.replace("mtop1755", "m_{t} = 175.5 GeV")
            legend = legend.replace("mtop1785", "m_{t} = 178.5 GeV")
            legend = legend.replace("TuneEE5C", "Herwig++")
            legend += ")"
            counter += 1
            #myList.append(histograms.HistoGraph(gEff, legend, "lp", "P"))
            myList.append(histograms.HistoGraph(gEff, legend, "p", "P"))

    units = "GeV/c"
    if eff_def == "fakeTop":
        _kwargs["xlabel"] = "candidate p_{T} (%s)" % (units)
    elif eff_def == "inclusiveTop" or eff_def == "genuineTop":
        _kwargs["xlabel"] = "generated top p_{T} (%s)" % (units)
    else:
        _kwargs["xlabel"] = "p_{T} (%s)" % (units)

    # Define stuff
    numPath = numPath.replace("AfterAllSelections_", "")
    saveName = "Efficiency_%s_%s" % (eff_def, opts.type)
    #saveName = saveName.replace("__", "_Inclusive_")

    # Plot the efficiency
    p = plots.ComparisonManyPlot(refGraph, myList, saveFormats=[])
    savePath = os.path.join(opts.saveDir, opts.optMode)
    plots.drawPlot(p, savePath, **_kwargs)

    # Save plot in all formats
    SavePlot(p, saveName, savePath, saveFormats=[".png", ".pdf", ".C"])

    # ==============================================================================
    #   I need the uncertainties from the ratio of all plots (ONLY for Genuine)
    # ==============================================================================

    if eff_def == "genuineTop":

        uncWriter = UncertaintyWriter()
        jsonName = "uncertainties_%s_BDT_%s.json" % (opts.type, opts.BDT)
        analysis = opts.analysisName
        saveDir = os.path.join("", jsonName)

        for i in range(0, len(datasetList)):
            uncWriter.addParameters(datasetList[i], analysis, saveDir,
                                    default_eff, ttVariationEff[i])

            #print "i = ", i, " Dataset = ",  datasetList[i]
            #for iBin in range(1, len(xBins)):
            #ratio  = float(default_eff.GetEfficiency(iBin))/float(ttVariationEff[i].GetEfficiency(iBin))
            #unc = 0.5*(1.0 - ratio)
            #print "iBin = ", iBin, " Default TT=", default_eff.GetEfficiency(iBin), "    Variation (", datasetList[i], ") =", ttVariationEff[i].GetEfficiency(iBin), "   Uncertainty =", unc

        uncWriter.writeJSON(jsonName)

    return
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi):

    # Definitions
    myList = []
    index = 0
    _kwargs = GetHistoKwargs(numPath, opts)

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():
        #if "Fake" in numPath and "TT" in dataset.getName():
        #    continue
        # Get the histograms
        #num = dataset.getDatasetRootHisto(numPath).getHistogram()
        #den = dataset.getDatasetRootHisto(denPath).getHistogram()

        n = dataset.getDatasetRootHisto(numPath)
        n.normalizeToLuminosity(intLumi)
        num = n.getHistogram()
        d = dataset.getDatasetRootHisto(denPath)
        d.normalizeToLuminosity(intLumi)
        den = d.getHistogram()

        if "binList" in _kwargs:
            xBins = _kwargs["binList"]
            nx = len(xBins) - 1
            num = num.Rebin(nx, "", xBins)
            den = den.Rebin(nx, "", xBins)

        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Remove negative bins and ensure numerator bin <= denominator bin
        CheckNegatives(num, den, True)
        # RemoveNegatives(num)
        # RemoveNegatives(den)

        # Sanity check (Histograms are valid and consistent) - Always false!
        # if not ROOT.TEfficiency.CheckConsistency(num, den):
        #    continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den)  # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP)  #
        # Set the weights - Why is this needed?
        if 0:
            weight = 1
            if dataset.isMC():
                weight = dataset.getCrossSection()
                eff.SetWeight(weight)

        # Convert to TGraph
        eff = convert2TGraph(eff)
        # Apply default style (according to dataset name)
        plots._plotStyles[dataset.getName()].apply(eff)

        # Append in list
        myList.append(
            histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()],
                                  "lp", "P"))

    # Define save name
    saveName = "Eff_" + numPath.split("/")[-1] + "Over" + denPath.split(
        "/")[-1]

    # Plot the efficiency
    p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[])
    plots.drawPlot(p, saveName, **_kwargs)

    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, "HplusMasses",
                            numPath.split("/")[0], opts.optMode)
    #savePath = os.path.join(opts.saveDir, numPath.split("/")[0], opts.optMode)
    save_path = savePath + opts.MVAcut
    SavePlot(p, saveName, save_path, saveFormats=[".png", ".pdf", ".C"])
    return
def PlotEfficiency(datasetsMgr, numPath, denPath, eff_def):  
    
    # Definitions
    myList       = []
    
    default_eff    = None
    datasetList    = []
    ttVariationEff = []

    
    _kwargs     = GetHistoKwargs(numPath, opts)        
    nx          = 0
    if len(_kwargs["binList"]) > 0:
        xBins   = _kwargs["binList"]
        nx      = len(xBins)-1
    counter     = 0

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():
        
        if dataset.isMC():
            n   = dataset.getDatasetRootHisto(numPath)
            d   = dataset.getDatasetRootHisto(denPath)
            num = n.getHistogram()
            den = d.getHistogram()

            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)
        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()
            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)

        # Calculations    
        total    = den.Integral(0, den.GetXaxis().GetNbins()+1)
        selected = num.Integral(0, num.GetXaxis().GetNbins()+1)

        if 0:
            print "Numerical Efficiency", numPath, dataset.getName(), ":", round(selected/total, 3)
            
        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue
        
        
        
        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den) 
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP
        
        datasetTT = datasetsMgr.getDataset("TT")
        # Get the histograms
        numTT = datasetTT.getDatasetRootHisto(numPath).getHistogram()
        denTT = datasetTT.getDatasetRootHisto(denPath).getHistogram()
        if nx > 0:
            numTT = numTT.Rebin(nx, "", xBins) #num.Rebin(nx, "", xBins)
            denTT = denTT.Rebin(nx, "", xBins) #den.Rebin(nx, "", xBins)

        '''
        for i in range(1, num.GetNbinsX()+1):
            nbin = num.GetBinContent(i)
            dbin = den.GetBinContent(i)
            nbinTT = numTT.GetBinContent(i)
            dbinTT = denTT.GetBinContent(i)
            eps = nbin/dbin
            epsTT = nbinTT/dbinTT
            ratioTT = eps/epsTT
            if ratioTT > 1:
                ratioTT = 1/ratioTT
            #print "bin: ", i, "eps: ", round(eps,5) , "epsTT: ", round(epsTT,5)
            #print "bin: ", i, "eps/epsTT: ", (1.0 - round(ratioTT, 3))*100
        '''
        eff_ref = ROOT.TEfficiency(numTT, denTT) 
        eff_ref.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP
        
        # Convert to TGraph
        gEff    = convert2TGraph(eff)
        gEffRef = convert2TGraph(eff_ref)
        
        # Keep the default tt and variations tt efficiency plots 
        if dataset.getName() == "TT":
            default_eff = gEffRef.Clone()
        else:
            datasetList.append(dataset.getName())
            ttVariationEff.append(gEff)

        # Style definitions
        stylesDef = styles.ttStyle
        styles0 = styles.signalStyleHToTB300                                            
        styles1 = styles.signalStyleHToTB500
        styles2 = styles.signalStyleHToTB800
        styles3 = styles.signalStyleHToTB500
        styles4 = styles.signalStyleHToTB1000
        styles5 = styles.signalStyleHToTB2000
        styles6 = styles.signalStyleHToTB180
        styles7 = styles.signalStyleHToTB3000
        styles8 = styles.signalStyleHToTB200

        


        if dataset.getName() == "TT":
            styles.ttStyle.apply(gEffRef)
            legend_ref = "t#bar{t}"
            if opts.type == "partonShower":
                legend_ref = "t#bar{t} (Pythia8)"
            elif opts.type == "evtGen": 
                legend_ref = "t#bar{t} (Powheg)"
            refGraph = histograms.HistoGraph(gEffRef, legend_ref, "p", "P")
        else:
            styles.markerStyles[counter].apply(gEff)
            legend  = dataset.getName().replace("TT_", "t#bar{t} (").replace("isr", "ISR ").replace("fsr", "FSR ")
            legend  = legend.replace("hdamp", "hdamp ").replace("DOWN", "down").replace("UP", "up")
            legend  = legend.replace("mtop1665", "m_{t} = 166.5 GeV")
            legend  = legend.replace("mtop1695", "m_{t} = 169.5 GeV")
            legend  = legend.replace("mtop1715", "m_{t} = 171.5 GeV")
            legend  = legend.replace("mtop1735", "m_{t} = 173.5 GeV")
            legend  = legend.replace("mtop1755", "m_{t} = 175.5 GeV")
            legend  = legend.replace("mtop1785", "m_{t} = 178.5 GeV")
            legend  = legend.replace("TuneEE5C", "Herwig++")
            legend += ")"
            counter+=1
            #myList.append(histograms.HistoGraph(gEff, legend, "lp", "P"))
            myList.append(histograms.HistoGraph(gEff, legend, "p", "P"))
         
   
    units = "GeV/c"
    if eff_def == "fakeTop":
        _kwargs["xlabel"]  = "candidate p_{T} (%s)" % (units)
    elif eff_def == "inclusiveTop" or eff_def == "genuineTop":
        _kwargs["xlabel"]  = "generated top p_{T} (%s)" % (units)
    else:
        _kwargs["xlabel"]  = "p_{T} (%s)" % (units)



    # Define stuff
    numPath  = numPath.replace("AfterAllSelections_","")
    saveName = "Efficiency_%s_%s" % (eff_def, opts.type) 
    #saveName = saveName.replace("__", "_Inclusive_")

    # Plot the efficiency
    p = plots.ComparisonManyPlot(refGraph, myList, saveFormats=[])
    savePath = os.path.join(opts.saveDir, opts.optMode)    
    plots.drawPlot(p, savePath, **_kwargs)

    # Save plot in all formats
    SavePlot(p, saveName, savePath, saveFormats = [".png", ".pdf", ".C"])
    
    # ==============================================================================
    #   I need the uncertainties from the ratio of all plots (ONLY for Genuine)
    # ==============================================================================
    
    if eff_def == "genuineTop":
        
        uncWriter = UncertaintyWriter()
        jsonName = "uncertainties_%s_BDT_%s.json" % (opts.type, opts.BDT)
        analysis = opts.analysisName
        saveDir  =  os.path.join("", jsonName)
        
        for i in range(0, len(datasetList)):
            uncWriter.addParameters(datasetList[i], analysis, saveDir, default_eff, ttVariationEff[i])

            #print "i = ", i, " Dataset = ",  datasetList[i]
            #for iBin in range(1, len(xBins)):
            #ratio  = float(default_eff.GetEfficiency(iBin))/float(ttVariationEff[i].GetEfficiency(iBin))
            #unc = 0.5*(1.0 - ratio) 
            #print "iBin = ", iBin, " Default TT=", default_eff.GetEfficiency(iBin), "    Variation (", datasetList[i], ") =", ttVariationEff[i].GetEfficiency(iBin), "   Uncertainty =", unc
        
        uncWriter.writeJSON(jsonName)


    return
예제 #24
0
def PlotEfficiency(datasetsMgr, numPath, denPath):
    # Definitions
    myList = []
    _kwargs = GetHistoKwargs(numPath, opts)
    nx = 0
    if len(_kwargs["binList"]) > 0:
        xBins = _kwargs["binList"]
        nx = len(xBins) - 1
    counter = 0

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():

        if dataset.isMC():
            n = dataset.getDatasetRootHisto(numPath)
            d = dataset.getDatasetRootHisto(denPath)
            num = n.getHistogram()
            den = d.getHistogram()

            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)
        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()
            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)

        # Calculations
        total = den.Integral(0, den.GetXaxis().GetNbins() + 1)
        selected = num.Integral(0, num.GetXaxis().GetNbins() + 1)

        if 0:
            print "Numerical Efficiency", numPath, dataset.getName(
            ), ":", round(selected / total, 3)

        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den)
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP)  #FCP

        datasetTT = datasetsMgr.getDataset("TT")
        # Get the histograms
        numTT = datasetTT.getDatasetRootHisto(numPath).getHistogram()
        denTT = datasetTT.getDatasetRootHisto(denPath).getHistogram()
        if nx > 0:
            numTT = numTT.Rebin(nx, "", xBins)  #num.Rebin(nx, "", xBins)
            denTT = denTT.Rebin(nx, "", xBins)  #den.Rebin(nx, "", xBins)
        '''
        for i in range(1, num.GetNbinsX()+1):
            nbin = num.GetBinContent(i)
            dbin = den.GetBinContent(i)
            nbinTT = numTT.GetBinContent(i)
            dbinTT = denTT.GetBinContent(i)
            eps = nbin/dbin
            epsTT = nbinTT/dbinTT
            ratioTT = eps/epsTT
            if ratioTT > 1:
                ratioTT = 1/ratioTT
            #print "bin: ", i, "eps: ", round(eps,5) , "epsTT: ", round(epsTT,5)
            #print "bin: ", i, "eps/epsTT: ", (1.0 - round(ratioTT, 3))*100
        '''
        eff_ref = ROOT.TEfficiency(numTT, denTT)
        eff_ref.SetStatisticOption(ROOT.TEfficiency.kFCP)  #FCP

        # Convert to TGraph
        gEff = convert2TGraph(eff)
        gEffRef = convert2TGraph(eff_ref)

        # Style definitions
        stylesDef = styles.ttStyle
        styles0 = styles.signalStyleHToTB300
        styles1 = styles.signalStyleHToTB500
        styles2 = styles.signalStyleHToTB800
        styles3 = styles.signalStyleHToTB500
        styles4 = styles.signalStyleHToTB1000
        styles5 = styles.signalStyleHToTB2000
        styles6 = styles.signalStyleHToTB180
        styles7 = styles.signalStyleHToTB3000
        styles8 = styles.signalStyleHToTB200

        if dataset.getName() == "TT":
            styles.ttStyle.apply(gEffRef)
            legend_ref = "t#bar{t}"
            if opts.type == "partonShower":
                legend_ref = "t#bar{t} (Pythia8)"
            elif opts.type == "evtGen":
                legend_ref = "t#bar{t} (Powheg)"
            refGraph = histograms.HistoGraph(gEffRef, legend_ref, "p", "P")
        else:
            styles.markerStyles[counter].apply(gEff)
            legend = dataset.getName().replace("TT_", "t#bar{t} (").replace(
                "isr", "ISR ").replace("fsr", "FSR ")
            legend = legend.replace("hdamp", "hdamp ").replace("DOWN",
                                                               "down").replace(
                                                                   "UP", "up")
            legend = legend.replace("mtop1665", "m_{t} = 166.5 GeV")
            legend = legend.replace("mtop1695", "m_{t} = 169.5 GeV")
            legend = legend.replace("mtop1715", "m_{t} = 171.5 GeV")
            legend = legend.replace("mtop1735", "m_{t} = 173.5 GeV")
            legend = legend.replace("mtop1755", "m_{t} = 175.5 GeV")
            legend = legend.replace("mtop1785", "m_{t} = 178.5 GeV")
            legend = legend.replace("TuneEE5C", "Herwig++")
            legend += ")"
            counter += 1
            #myList.append(histograms.HistoGraph(gEff, legend, "lp", "P"))
            myList.append(histograms.HistoGraph(gEff, legend, "p", "P"))

    # Define stuff
    numPath = numPath.replace("AfterAllSelections_", "")
    saveName = "Efficiency_%s_%s" % (opts.folder, opts.type)
    saveName = saveName.replace("__", "_Inclusive_")

    # Plot the efficiency
    p = plots.ComparisonManyPlot(refGraph, myList, saveFormats=[])
    savePath = os.path.join(opts.saveDir, opts.optMode)
    plots.drawPlot(p, savePath, **_kwargs)

    # Save plot in all formats
    SavePlot(p, saveName, savePath, saveFormats=[".png", ".pdf", ".C"])
    return
def Fit (datasets, histo, function):
    
    
    FitList = []
    for dataset in datasets:

        datasetName = dataset.getName()
        print "Dataset = ", datasetName
        hh = dataset.getDatasetRootHisto(histo)
 
        hh.normalizeToOne()
        h = hh.getHistogram()

        #h = dataset.getDatasetRootHisto(histo).getHistogram()
        xMin  = h.GetXaxis().GetXmin()
        xMax  = h.GetXaxis().GetXmax()
        yMin  = 0
        yMax  = 1.2
        #statOption = ROOT.TEfficiency.kFNormal
        if "TT" in datasetName:
            if function == "gaus":
                fitGauss = ROOT.TF1("fitGauss", "gaus", -2.5, 2.5)
#                TF1 *fitBoFreq = new TF1("fitBoFreq","[0]*x+[1]",0,20);
#                h.Fit("gaus")
                #fitTest = ROOT.TF1("fitTest", "0.01", -2.5, 2.5)
                
                h.Fit("fitGauss","SRBM")
                #h.GetListOfFunctions().Add(fitTest)
                legend = "TT"

        legend = "a legend"
        print "Legend", legend
        saveName = histo.split("/")[-1]+"_Fit"

        print saveName

        xTitle = "fixXTitle"
        yTitle = "fixYTitle"
    
        yMin = 0.
        yMax = 0.03
        xMin = -2.3
        xMax = 2.3
        kwargs = {}

        options = {"ymin": yMin  , "ymax": yMax, "xmin":xMin, "xMax":xMax}
        FitList.append(h)
        #p = plots.MCPlot(dataset, h, normalizeToLumi=0, saveFormats=[], **kwargs)

        p = plots.PlotBase(datasetRootHistos=FitList, saveFormats=kwargs.get("saveFormats"))
        p.createFrame(saveName, opts=options)
        
        p.getFrame().GetXaxis().SetTitle(xTitle)
        p.getFrame().GetYaxis().SetTitle(yTitle)
        #p.histoMgr.setHistoDrawStyle(datasetName, "AP")
        
# Set range                                                                                                                                                                          
        p.getFrame().GetXaxis().SetRangeUser(xMin, xMax)

        
        moveLegend = {"dx": -0.55, "dy": -0.01, "dh": -0.1}

        p.setLegend(histograms.moveLegend(histograms.createLegend(), **moveLegend))
        # Add Standard Texts to plot        
        histograms.addStandardTexts()
    
        p.draw()
    
    # Save plot in all formats                                                                                                                                                           
        savePath = os.path.join(opts.saveDir, "HplusMasses", histo.split("/")[0], opts.optMode)
        save_path = savePath 
        SavePlot(p, saveName, save_path)
    return
예제 #26
0
def getEfficiency2D(datasetsMgr,
                    datasets,
                    numerator="Numerator",
                    denominator="Denominator",
                    **kwargs):
    '''                                                                                                                                                               
    TEfficiency method:                                                                                                                                               
                                                                                                                                                                      
    See https://root.cern.ch/doc/master/classTEfficiency.html                                                                                                         
    
    
    '''
    HasKeys(["verbose"], **kwargs)
    verbose = True  #kwargs.get("verbose")

    lumi = GetLumi(datasetsMgr)

    # Select Statistic Options
    statOption = ROOT.TEfficiency.kFCP
    '''                                                                                                                                                               
    statOption = ROOT.TEfficiency.kFCP      # Clopper-Pearson                                                                                                         
    statOption = ROOT.TEfficiency.kFNormal  # Normal Approximation                                                                                                    
    statOption = ROOT.TEfficiency.kFWilson  # Wilson                                                                                                                  
    statOption = ROOT.TEfficiency.kFAC      # Agresti-Coull                                                                                                           
    statOption = ROOT.TEfficiency.kFFC      # Feldman-Cousins                                                                                                         
    statOption = ROOT.TEfficiency.kBBJeffrey # Jeffrey                                                                                                                
    statOption = ROOT.TEfficiency.kBBUniform # Uniform Prior                                                                                                          
    statOption = ROOT.TEfficiency.kBBayesian # Custom Prior                                                                                                           
    '''

    print "getEfficiency function"
    first = True
    teff = ROOT.TEfficiency()
    #    teff.SetStatisticOption(statOption)
    print "Loop over Datasets"
    for dataset in datasets:
        print "Datasets"

    #datasets.normalizeMCByLuminosity()
    for dataset in datasets:
        num = dataset.getDatasetRootHisto(numerator)
        den = dataset.getDatasetRootHisto(denominator)
        if dataset.isMC():
            num.normalizeToLuminosity(lumi)
            den.normalizeToLuminosity(lumi)
        #num.normalizeMCByLuminosity()
        #den.normalizeMCByLuminosity()

        # Get Numerator and Denominator
        n = num.getHistogram()
        d = den.getHistogram()

        #tn = None
        #td = None
        #n.normalizeMCByLuminosity()
        #d.normalizeMCByLuminosity()

        #n = dataset.getDatasetRootHisto(numerator).getHistogram()
        #d = dataset.getDatasetRootHisto(denominator).getHistogram()

        if d.GetEntries() == 0 or n.GetEntries() == 0:
            print "Denominator Or Numerator has no entries"
            continue

        # Check Negatives
        CheckNegatives(n, d, True)
        # Remove Negatives
        RemoveNegatives(n)
        #RemoveNegatives(d)

        NumeratorBins = n.GetNbinsX()
        DenominatorBins = d.GetNbinsX()

        # Sanity Check
        if (NumeratorBins != DenominatorBins):
            raise Exception("Numerator and Denominator Bins are NOT equal!")
        nBinsX = d.GetNbinsX()
        xMin = d.GetXaxis().GetXmin()
        xMax = d.GetXaxis().GetXmax()

        nBinsY = d.GetNbinsY()
        #yMin  = d.GetYaxis().GetYmin()
        #yMax  = d.GetYaxis().GetYmax()
        print("NoProblem till here asdasd...")

        # ----------------------------------------------------------------------------------------- #
        #      Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows
        # ----------------------------------------------------------------------------------------- #

        print "\n"
        print "=========== getEfficiency:"
        print "Dataset             = ", dataset.getName()

        #print "Numerator  :", n.GetName(), "   entries=", n.GetEntries(), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1)
        #print "Denominator:", d.GetName(), "   entries=", d.GetEntries(), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1)
        print "\n"
        print ">>>>>>  Sanity Check:  <<<<<<"
        print "Numerator Mean       = ", n.GetMean()
        print "Numerator RMS        = ", n.GetRMS()
        print "Numerator Integral   = ", n.Integral()
        print "Denominator Mean     = ", d.GetMean()
        print "Denominator RMS      = ", d.GetRMS()
        print "Denominator Integral = ", d.Integral()

        if (n.GetMean() == 0 or d.GetMean() == 0): continue
        if (n.GetRMS() == 0 or d.GetRMS() == 0): continue
        if (n.Integral() == 0 or d.Integral() == 0): continue

        print "Passed the sanity check"

        eff = ROOT.TEfficiency(n, d)
        eff.SetStatisticOption(statOption)

        #        if "TT" in dataset.getName():
        #    print " "
        #    print " TT sample"
        #for iBin in range(1, nBins+1):
        #    print iBin, "x=", n.GetBinLowEdge(iBin), " Num=", n.GetBinContent(iBin),  " Den=", d.GetBinContent(iBin)," Eff=", eff.GetEfficiency(iBin)
        # "Contrib. =", d.GetBinContent(iBin)/d.Integral(1, nBins)*100.0, "Contrib. = ", n.GetBinContent(iBin)/n.Integral(1, nBins)*100.0,
        '''                                                                                                                                                           
        #if (verbose):                                                                                                                                                
        print "\n"                                                                                                                                                    
        for iBin in range(1,nBins+1):                                                                                                                                 
        #print iBin, "x=", n.GetBinLowEdge(iBin), " Numerator=", n.GetBinContent(iBin), " Denominator=", d.GetBinContent(iBin), " Efficiency=", eff.GetEfficiency(iBin\
), " Weight=", eff.GetWeight()                                                                                                                                        
        print "\n"                                                                                                                                                    
        '''

        weight = 1
        if dataset.isMC():
            weight = dataset.getCrossSection()
        eff.SetWeight(weight)
        #print "dataset=", dataset.getName(), "has weight=", weight
        #print " Efficiency plot has weight=", eff.GetWeight()

        if first:
            teff = eff
            first = False
            if dataset.isData():
                tn = n
                td = d
        else:
            teff.Add(eff)

            #print " "
            #print "Adding eff to TEfficiency="
            #for iBin in range(1, nBins+1):
            #    print iBin, "x=", n.GetBinLowEdge(iBin), " Numerator=", n.GetBinContent(iBin), "Contrib. = ", n.GetBinContent(iBin)/n.Integral(1, nBins)*100.0, " Denominator=", d.GetBinContent(iBin), "Contrib. =", d.GetBinContent(iBin)/d.Integral(1, nBins)*100.0, " Efficiency=", teff.GetEfficiency(iBin), " Weight=", teff.GetWeight()

            if dataset.isData():
                tn.Add(n)
                td.Add(d)

        if dataset.isData():
            teff = ROOT.TEfficiency(tn, td)
            teff.SetStatisticOption(statOption)
            '''                                                                                                                                                       
            print " ------------------------- Final Data Plot ------------------------- "                                                                             
            print "Integral = ", tn.Integral(1, nBins)                                                                                                                
            print "Numerator:"                                                                                                                                        
            for iBin in range(1, nBins+1):                                                                                                                            
            print iBin, "x=", tn.GetBinLowEdge(iBin), " Bin Content = ", tn.GetBinContent(iBin), " Percentage=", tn.GetBinContent(iBin)/tn.Integral(1, nBins)*100.0   
                                                                                                                                                                      
            print "Denominator:  "                                                                                                                                    
            print "Integral = ", td.Integral(1,nBins)                                                                                                                 
            for iBin in range(1, nBins+1):                                                                                                                            
            print iBin, "x=", td.GetBinLowEdge(iBin), " Bin Content = ", td.GetBinContent(iBin), " Percentage=", td.GetBinContent(iBin)/td.Integral(1, nBins)*100     
            print "-------------------------------------------------------------------- "                                                                             
            '''

    print " -----------------> Final tEff"
    #for iBin in range(1,nBins+1):
    #    print iBin, "x=", n.GetBinLowEdge(iBin)," Efficiency=", teff.GetEfficiency(iBin), " Weight = ", teff.GetWeight()

    return teff
def PlotMC_ForEffPlots(datasetsMgr, histoName, intLumi):

    index = 0
    for dataset in datasetsMgr.getAllDatasets():

        datasetName = dataset.getName()
        rootHisto = dataset.getDatasetRootHisto(histoName)
        rootHisto.normalizeToLuminosity(intLumi)
        histo = rootHisto.getHistogram()

        kwargs = {}

        if index == 0:
            # Apply Styles
            if "TT" in datasetsMgr.getAllDatasets():
                p.histoMgr.setHistoDrawStyle("TT", "AP")
                p.histoMgr.setHistoLegendStyle("TT", "LP")
                index = 1

        else:
            # Apply Styles
            if "TT" in datasetsMgr.getAllDatasets():
                p.histoMgr.setHistoDrawStyle("TTTT", "AP")
                p.histoMgr.setHistoLegendStyle("TTTT", "LP")

        _xlabel = "p_{T} (GeV/c)"
        logY = False
        _rebinX = 1
        units = "GeV/c"
        _format = "%0.1f" + units
        _opts = {"ymin": 1e-3, "ymaxfactor": 1.0}
        _cutBox = False
        #        _cutBox = {"cutValue": 0.5, "fillColor": 16, "box": False, "line": False, "greaterThan": True}

        plots.drawPlot(
            p,
            histo,
            xlabel=_xlabel,
            ylabel="Arbitrary Units / %s" % (_format),
            log=logY,
            rebinX=_rebinX,
            cmsExtraText="Preliminary",
            createLegend={
                "x1": 0.58,
                "y1": 0.65,
                "x2": 0.92,
                "y2": 0.92
            },
            opts=_opts,
            opts2={
                "ymin": 0.6,
                "ymax": 1.4
            },
            cutBox=_cutBox,
        )

    # Save plot in all formats
    saveName = histo.split("/")[-1]
    savePath = os.path.join(opts.saveDir, "HplusMasses",
                            histo.split("/")[0], opts.optMode)
    SavePlot(p, saveName, savePath)
    return
def PlotEfficiency(datasetsMgr, numPath, denPath):  
    # Definitions
    myList      = []
    _kwargs     = GetHistoKwargs(numPath, opts)        
    nx          = 0
    if len(_kwargs["binList"]) > 0:
        xBins   = _kwargs["binList"]
        nx      = len(xBins)-1
    counter     = 0

    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():

        if dataset.isMC():
            n   = dataset.getDatasetRootHisto(numPath)
            d   = dataset.getDatasetRootHisto(denPath)
            num = n.getHistogram()
            den = d.getHistogram()

            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)
        else:
            num = dataset.getDatasetRootHisto(numPath).getHistogram()
            den = dataset.getDatasetRootHisto(denPath).getHistogram()
            if nx > 0:
                num = num.Rebin(nx, "", xBins)
                den = den.Rebin(nx, "", xBins)

        # Calculations    
        total    = den.Integral(0, den.GetXaxis().GetNbins()+1)
        selected = num.Integral(0, num.GetXaxis().GetNbins()+1)

        if 0:
            print "Numerical Efficiency", numPath, dataset.getName(), ":", round(selected/total, 3)
            
        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue
        
        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den) 
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP
        
        datasetTT = datasetsMgr.getDataset("TT")
        # Get the histograms
        numTT = datasetTT.getDatasetRootHisto(numPath).getHistogram()
        denTT = datasetTT.getDatasetRootHisto(denPath).getHistogram()
        if nx > 0:
            numTT = numTT.Rebin(nx, "", xBins) #num.Rebin(nx, "", xBins)
            denTT = denTT.Rebin(nx, "", xBins) #den.Rebin(nx, "", xBins)


        '''
        for i in range(1, num.GetNbinsX()+1):
            nbin = num.GetBinContent(i)
            dbin = den.GetBinContent(i)
            nbinTT = numTT.GetBinContent(i)
            dbinTT = denTT.GetBinContent(i)
            eps = nbin/dbin
            epsTT = nbinTT/dbinTT
            ratioTT = eps/epsTT
            if ratioTT > 1:
                ratioTT = 1/ratioTT
            #print "bin: ", i, "eps: ", round(eps,5) , "epsTT: ", round(epsTT,5)
            #print "bin: ", i, "eps/epsTT: ", (1.0 - round(ratioTT, 3))*100
        '''
        eff_ref = ROOT.TEfficiency(numTT, denTT) 
        eff_ref.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP

        # Convert to TGraph
        gEff    = convert2TGraph(eff)
        gEffRef = convert2TGraph(eff_ref)
            
        # Style definitions
        stylesDef = styles.ttStyle
        styles0 = styles.signalStyleHToTB300                                            
        styles1 = styles.signalStyleHToTB500
        styles2 = styles.signalStyleHToTB800
        styles3 = styles.signalStyleHToTB500
        styles4 = styles.signalStyleHToTB1000
        styles5 = styles.signalStyleHToTB2000
        styles6 = styles.signalStyleHToTB180
        styles7 = styles.signalStyleHToTB3000
        styles8 = styles.signalStyleHToTB200

        if dataset.getName() == "TT":
            styles.ttStyle.apply(gEffRef)
            legend_ref = "t#bar{t}"
            if opts.type == "partonShower":
                legend_ref = "t#bar{t} (Pythia8)"
            elif opts.type == "evtGen": 
                legend_ref = "t#bar{t} (Powheg)"
            refGraph = histograms.HistoGraph(gEffRef, legend_ref, "p", "P")
        else:
            styles.markerStyles[counter].apply(gEff)
            legend  = dataset.getName().replace("TT_", "t#bar{t} (").replace("isr", "ISR ").replace("fsr", "FSR ")
            legend  = legend.replace("hdamp", "hdamp ").replace("DOWN", "down").replace("UP", "up")
            legend  = legend.replace("mtop1665", "m_{t} = 166.5 GeV")
            legend  = legend.replace("mtop1695", "m_{t} = 169.5 GeV")
            legend  = legend.replace("mtop1715", "m_{t} = 171.5 GeV")
            legend  = legend.replace("mtop1735", "m_{t} = 173.5 GeV")
            legend  = legend.replace("mtop1755", "m_{t} = 175.5 GeV")
            legend  = legend.replace("mtop1785", "m_{t} = 178.5 GeV")
            legend  = legend.replace("TuneEE5C", "Herwig++")
            legend += ")"
            counter+=1
            #myList.append(histograms.HistoGraph(gEff, legend, "lp", "P"))
            myList.append(histograms.HistoGraph(gEff, legend, "p", "P"))

    # Define stuff
    numPath  = numPath.replace("AfterAllSelections_","")
    saveName = "Efficiency_%s_%s" % (opts.folder, opts.type) 
    saveName = saveName.replace("__", "_Inclusive_")

    # Plot the efficiency
    p = plots.ComparisonManyPlot(refGraph, myList, saveFormats=[])
    savePath = os.path.join(opts.saveDir, opts.optMode)    
    plots.drawPlot(p, savePath, **_kwargs)

    # Save plot in all formats
    SavePlot(p, saveName, savePath, saveFormats = [".png", ".pdf", ".C"])
    return
예제 #29
0
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi):
  
    # Definitions
    myList  = []
    myBckList = []
    index   = 0
    _kwargs = GetHistoKwargs(denPath, opts)        
    counter = 0
    # For-loop: All datasets
    for dataset in datasetsMgr.getAllDatasets():
        name_N = numPath
        name_D = denPath
        # Get the histograms
        #num = dataset.getDatasetRootHisto(numPath).getHistogram()
        #den = dataset.getDatasetRootHisto(denPath).getHistogram()
        #if "TT" in dataset.getName():
        #    numPath = numPath.replace("HiggsTop", "AllTop")
        #    denPath = denPath.replace("HiggsTop", "AllTop")
        #    numPath = numPath.replace("AssocTop", "AllTop")
        #    denPath = denPath.replace("AssocTop", "AllTop")
                
        n = dataset.getDatasetRootHisto(numPath)
        n.normalizeToLuminosity(intLumi)
        num = n.getHistogram()
        d = dataset.getDatasetRootHisto(denPath)
        d.normalizeToLuminosity(intLumi)
        den = d.getHistogram()


        if "binList" in _kwargs:
            xBins   = _kwargs["binList"]
            nx      = len(xBins)-1
            num     = num.Rebin(nx, "", xBins)
            den     = den.Rebin(nx, "", xBins)


        for i in range(1, num.GetNbinsX()+1):
            nbin = num.GetBinContent(i)
            dbin = den.GetBinContent(i)
            #print dataset.getName(), nbin, dbin
            if (nbin > dbin):
                print "error"

        # Sanity checks
        if den.GetEntries() == 0 or num.GetEntries() == 0:
            continue
        if num.GetEntries() > den.GetEntries():
            continue

        # Remove negative bins and ensure numerator bin <= denominator bin
        #CheckNegatives(num, den, False)
        #CheckNegatives(num, den, True)
        #RemoveNegatives(num)
        #RemoveNegatives(den)
        # Sanity check (Histograms are valid and consistent) - Always false!
        # if not ROOT.TEfficiency.CheckConsistency(num, den):
        #    continue
        
        # Create Efficiency plots with Clopper-Pearson stats
        eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings
        eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #
        
        # Set the weights - Why is this needed?
        if 0:
            weight = 1
            if dataset.isMC():
                weight = dataset.getCrossSection()
                eff.SetWeight(weight)
                
        # Convert to TGraph
        eff = convert2TGraph(eff)
    
        # Apply default style (according to dataset name)
        plots._plotStyles[dataset.getName()].apply(eff)
        # Apply random histo styles and append
        if "charged" in dataset.getName().lower():
            counter +=1
            mass = dataset.getName().split("M_")[-1]    
            styles.markerStyles[counter].apply(eff)
            if "300" in mass or "650" in mass:
                s = styles.getSignalStyleHToTB_M(mass)
                s.apply(eff)
                eff.SetLineStyle(ROOT.kSolid)
                eff.SetLineWidth(3)
                eff.SetMarkerSize(1.2)
                '''
                mass = dataset.getName().split("M_")[-1]
                mass = mass.replace("650", "1000")
                s = styles.getSignalStyleHToTB_M(mass)
                s.apply(eff)
                '''
        '''
        ttStyle = styles.getEWKLineStyle()
        if "tt" in dataset.getName().lower():
            ttStyle.apply(eff)
        '''

        
        # Append in list
        #if "charged" in dataset.getName().lower():
        #    if "m_500" in dataset.getName().lower():
        if 1:
            #if "tt" in dataset.getName().lower():
            if "m_500" in dataset.getName().lower():
                eff_ref = histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P")
            else:
                myList.append(histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P"))
        #elif "tt" in dataset.getName().lower():
        #    eff_ref = histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P")
            
    # Define save name
    saveName = "Eff_" + name_N.split("/")[-1] + "Over"+ name_D.split("/")[-1]

    # Plot the efficiency
    #p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[])
    p = plots.ComparisonManyPlot(eff_ref, myList, saveFormats=[])
    plots.drawPlot(p, saveName, **_kwargs)

    # Save plot in all formats
    savePath = os.path.join(opts.saveDir, name_N.split("/")[0], opts.optMode)
    SavePlot(p, saveName, savePath, saveFormats = [".png", ".C", ".pdf"])#, ".pdf"])
    return