Ejemplo n.º 1
0
def getHistFromPkl(subdirs, plotName, sys, *selectors):
    global loadePkls
    hist = None
    resultFile = os.path.join(*((plotDir, ) + subdirs + (plotName + '.pkl', )))

    if os.path.isdir(os.path.dirname(resultFile)):
        if resultFile not in loadedPkls:
            with lock(resultFile, 'rb') as f:
                loadedPkls[resultFile] = pickle.load(f)
        for selector in selectors:
            filtered = {
                s: h
                for s, h in loadedPkls[resultFile][plotName + sys].iteritems()
                if all(s.count(sel) for sel in selector)
            }
            if len(filtered) == 1:
                hist = addHist(hist, filtered[filtered.keys()[0]])
            elif len(filtered) > 1:
                log.error('Multiple possibilities to look for ' +
                          str(selector) + ': ' + str(filtered.keys()))
    else:
        log.error('Missing cache file ' + resultFile)
    if 'Scale' in sys and not any('MuonEG' in sel for sel in selectors):
        data = getHistFromPkl(subdirs, plotName, '', ['MuonEG'], ['DoubleEG'],
                              ['DoubleMuon'])
        dataSys = getHistFromPkl(subdirs, plotName, sys, ['MuonEG'],
                                 ['DoubleEG'], ['DoubleMuon'])
        hist = applySysToOtherHist(data, dataSys, hist)
    if not hist:
        log.error('Missing ' + str(selectors) + ' for plot ' + plotName +
                  ' in ' + resultFile)
    return hist
Ejemplo n.º 2
0
    def saveToCache(self, dir, sys):
        try:
            os.makedirs(os.path.join(dir))
        except:
            pass

        resultFile = os.path.join(dir, self.name + '.pkl')
        histos = {s.name + s.texName: h for s, h in self.histos.iteritems()}
        plotName = self.name + (sys if sys else '')
        try:
            with lock(resultFile, 'rb', keepLock=True) as f:
                allPlots = pickle.load(f)
            allPlots.update({plotName: histos})
        except:
            allPlots = {plotName: histos}
        with lock(resultFile, 'wb', existingLock=True) as f:
            pickle.dump(allPlots, f)
        log.info("Plot " + plotName + " saved to cache")
Ejemplo n.º 3
0
 def loadFromCache(self, resultsDir):
     resultsFile = os.path.join(resultsDir, self.name + '.pkl')
     try:
         with lock(resultsFile, 'rb') as f:
             allPlots = pickle.load(f)
         for s in self.histos.keys():
             self.histos[s] = allPlots[self.name][s.name + s.texName]
     except:
         log.warning('No resultsfile for ' + self.name + '.pkl')
         return True
Ejemplo n.º 4
0
    def calcSystematics(self,
                        stackForSys,
                        systematics,
                        linearSystematics,
                        resultsDir,
                        postFitInfo=None,
                        addMCStat=True):
        resultsFile = os.path.join(resultsDir, self.name + '.pkl')
        with lock(resultsFile, 'rb') as f:
            allPlots = pickle.load(f)

        sysKeys = systematics.keys()
        if addMCStat:
            sysKeys += [s.name + s.texName + 'StatUp' for s in stackForSys]
            sysKeys += [s.name + s.texName + 'StatDown' for s in stackForSys]

        histos_summed = {}
        for sys in [None] + sysKeys:
            if sys and not any(x in sys
                               for x in ['Stat', 'sideBand', 'Scale']):
                plotName = self.name + sys  # in the 2D cache, the first key is plotname+sys
            else:
                plotName = self.name  # for nominal and some exceptions

            if plotName not in allPlots:  # check if sys variation has been run already
                log.error('No ' + sys + ' variation found for ' + self.name)

            histos_summed[sys] = None
            for histName in [
                    s.name + s.texName for s in stackForSys
            ]:  # in the 2D cache, the second key is name+texName of the sample
                if sys and 'Scale' in sys and not 'noData' in resultsDir:  # ugly hack to apply scale systematics on MC instead of data (only when data is available)
                    data, dataSys = None, None
                    for d in [
                            d for d in allPlots[self.name] if d.count('data')
                    ]:  # for data (if available depending on ee, mumu, emu, SF)
                        data = addHist(
                            data,
                            allPlots[self.name][d])  # get nominal for data
                        dataSys = addHist(
                            dataSys, allPlots[self.name + sys]
                            [d])  # and the eScale or phScale sys for data
                    h = applySysToOtherHist(
                        data, dataSys, allPlots[plotName][histName].Clone(
                        ))  # apply the eScale or phScale sys on MC
                elif sys and 'sideBand' in sys:  # ugly hack to apply side band uncertainty
                    h = applySidebandUnc(allPlots[self.name][histName].Clone(),
                                         self.name, resultsDir, 'Up' in sys)
                else:  # normal case, simply taken from cache
                    h = allPlots[plotName][histName].Clone()

                if sys and 'StatUp' in sys and sys.replace(
                        'StatUp', '') in histName:  # MC statistics for plots
                    for i in range(0, h.GetNbinsX() + 1):
                        h.SetBinContent(i,
                                        h.GetBinContent(i) + h.GetBinError(i))
                if sys and 'StatDown' in sys and sys.replace('StatDown',
                                                             '') in histName:
                    for i in range(0, h.GetNbinsX() + 1):
                        h.SetBinContent(i,
                                        h.GetBinContent(i) - h.GetBinError(i))

                if postFitInfo:  # apply post-fit scalefactors if available
                    for i in postFitInfo:
                        if histName.count(i): h.Scale(postFitInfo[i])
                if h.Integral() == 0:
                    log.debug("Found empty histogram %s:%s in %s/%s.pkl",
                              plotName, histName, resultsDir, self.name)
                if self.scaleFactor: h.Scale(self.scaleFactor)
                normalizeBinWidth(h, self.normBinWidth)
                self.addOverFlowBin1D(h, self.overflowBin)

                histos_summed[sys] = addHist(histos_summed[sys], h)

        # Adding the systematics in quadrature
        relErrors = {}
        for variation in ['Up', 'Down'
                          ]:  # Consider both up and down variations separately
            summedErrors = histos_summed[None].Clone()
            summedErrors.Reset()
            for sys in sysKeys:
                sysOther = sys.replace('Up',
                                       'Down') if 'Up' in sys else sys.replace(
                                           'Down', 'Up')
                for i in range(summedErrors.GetNbinsX() + 1):
                    uncertainty = histos_summed[sys].GetBinContent(
                        i) - histos_summed[None].GetBinContent(i)
                    uncertaintyOther = histos_summed[sysOther].GetBinContent(
                        i) - histos_summed[None].GetBinContent(i)
                    if uncertainty * uncertaintyOther > 0 and abs(
                            uncertainty) < abs(uncertaintyOther):
                        continue  # Check if both up and down go to same direction, only take the maximum
                    if (variation == 'Up'
                            and uncertainty > 0) or (variation == 'Down'
                                                     and uncertainty < 0):
                        if sys.count('fsr'):
                            uncertainty *= 1 / sqrt(
                                2
                            )  # Hacky, scale fsr uncertainty with 1/sqrt(2) as recommended for TOP pag (in the fit this is handled in the cards)
                        summedErrors.SetBinContent(
                            i,
                            summedErrors.GetBinContent(i) + uncertainty**2)

            for sampleFilter, unc in linearSystematics.values():
                for i in range(summedErrors.GetNbinsX() + 1):
                    if sampleFilter:
                        uncertainty = unc / 100 * sum([
                            h.GetBinContent(i)
                            for s, h in self.histos.iteritems()
                            if any([s.name.count(f) for f in sampleFilter])
                        ])
                    else:
                        uncertainty = unc / 100 * sum([
                            h.GetBinContent(i)
                            for s, h in self.histos.iteritems()
                        ])
                    summedErrors.SetBinContent(
                        i,
                        summedErrors.GetBinContent(i) + uncertainty**2)

            for i in range(summedErrors.GetNbinsX() + 1):
                summedErrors.SetBinContent(i,
                                           sqrt(summedErrors.GetBinContent(i)))

            summedErrors.Divide(histos_summed[None])
            relErrors[variation] = summedErrors

        return relErrors
Ejemplo n.º 5
0
Archivo: plot.py Proyecto: lwezenbe/ttg
    def getSysHistos(self,
                     stackForSys,
                     resultsDir,
                     systematics,
                     postFitInfo=None,
                     addMCStat=True):
        resultsFile = os.path.join(resultsDir, self.name + '.pkl')
        if resultsFile not in loadedPkls:  # Speed optimization: check if plots already loaded
            with lock(resultsFile, 'rb') as f:
                loadedPkls[resultsFile] = pickle.load(f)
        allPlots = {
            i: {k: l.Clone()
                for k, l in j.iteritems()}
            for i, j in loadedPkls[resultsFile].iteritems()
        }

        if postFitInfo:  # Apply postfit scaling
            _, sysHistos = self.getSysHistos(
                stackForSys, resultsDir, systematics
            )  # Get first the sys histos without post-fit scaling
            for p in allPlots:
                allPlots[p] = applyPostFitScaling(
                    allPlots[p], postFitInfo, sysHistos
                )  # Then use it to apply the same post-fit as for the central value

        sysKeys = [i + 'Up'
                   for i in systematics] + [i + 'Down' for i in systematics]
        if addMCStat:
            sysKeys += [s.name + s.texName + 'StatUp' for s in stackForSys]
            sysKeys += [s.name + s.texName + 'StatDown' for s in stackForSys]

        if 'q2' in systematics:
            constructQ2Sys(allPlots, self.name, stackForSys)
        if 'pdf' in systematics:
            constructPdfSys(allPlots, self.name, stackForSys)

        histos_summed = {}
        histos_splitted = {}
        for sys in [None] + sysKeys:
            histos_splitted[sys] = {}
            if sys and not any(x in sys
                               for x in ['Stat', 'sideBand', 'Scale']):
                plotName = self.name + sys  # in the 2D cache, the first key is plotname+sys
            else:
                plotName = self.name  # for nominal and some exceptions

            if plotName not in allPlots:  # check if sys variation has been run already
                log.error('No ' + sys + ' variation found for ' + self.name)

            histos_summed[sys] = None
            for histName in [
                    s.name + s.texName for s in stackForSys
            ]:  # in the 2D cache, the second key is name+texName of the sample
                if sys and 'Scale' in sys and not 'noData' in resultsDir:  # ugly hack to apply scale systematics on MC instead of data (only when data is available)
                    data, dataSys = None, None
                    for d in [
                            d for d in allPlots[self.name] if d.count('data')
                    ]:  # for data (if available depending on ee, mumu, emu, SF)
                        data = addHist(
                            data,
                            allPlots[self.name][d])  # get nominal for data
                        dataSys = addHist(
                            dataSys, allPlots[self.name + sys]
                            [d])  # and the eScale or phScale sys for data
                    h = applySysToOtherHist(
                        data, dataSys, allPlots[plotName][histName].Clone(
                        ))  # apply the eScale or phScale sys on MC
                elif sys and 'sideBand' in sys:  # ugly hack to apply side band uncertainty
                    h = applySidebandUnc(allPlots[self.name][histName].Clone(),
                                         self.name, resultsDir, 'Up' in sys)
                else:  # normal case, simply taken from cache
                    h = allPlots[plotName][histName].Clone()

                if sys and 'StatUp' in sys and sys.replace(
                        'StatUp', '') in histName:  # MC statistics for plots
                    for i in range(0, h.GetNbinsX() + 1):
                        h.SetBinContent(i,
                                        h.GetBinContent(i) + h.GetBinError(i))
                if sys and 'StatDown' in sys and sys.replace('StatDown',
                                                             '') in histName:
                    for i in range(0, h.GetNbinsX() + 1):
                        h.SetBinContent(i,
                                        h.GetBinContent(i) - h.GetBinError(i))

                if h.Integral() == 0:
                    log.debug("Found empty histogram %s:%s in %s/%s.pkl",
                              plotName, histName, resultsDir, self.name)
                if self.scaleFactor: h.Scale(self.scaleFactor)

                histos_splitted[sys][histName] = h
                histos_summed[sys] = addHist(histos_summed[sys], h)

        return histos_summed, histos_splitted