class SamplesMerger :
    def __init__(self) :
        self.filenamesByReqid = collections.defaultdict(list)
        self.masspointByReqid = collections.defaultdict(list)
        self.reqidProvider = ModeAWhDbMergedFake2Lreqid()
        self.parDb = ModeAWhDbPar()
        self.reqDb = ModeAWhDbReqid()
        self.overwrite = False
        self.verbose = False
        self.regexHist = '.*'
    def idTagFromFilename(self, fname) :
        'parse something like wA_noslep_WH_2Lep_9_May16_n0139.AnaHists'
        match = re.search('wA_noslep_WH_2Lep_(?P<id>\d+?)_(?P<tag>.*?).AnaHist', fname)# nongreedy
        return match.group('id'), match.group('tag')
    def addFile(self, filename) :
        sample = guessSampleFromFilename(filename)
        mc1, mn1 = self.parDb.mc1Mn1ByReqid(self.reqDb.reqidBySample(sample))
        fakeReqid = self.reqidProvider.reqidByMc1Mn1(mc1, mn1)
        self.filenamesByReqid[fakeReqid].append(filename)
        self.masspointByReqid[fakeReqid].append((mc1, mn1))
    def mergeAndWrite(self, outdir) :
        """write merged histos, unless !overwrite, in which case just
        compute merged coords and fnames"""
        def mergeFiles(targetFile, filenames, regex) :
            infiles = [r.TFile.Open(f) for f in filenames]
            histonames = [h for h in getAllHistoNames(infiles[0]) if re.search(regex, h)]
            scale = 1.0/len(infiles)
            outFile = r.TFile.Open(targetFile, 'recreate')
            outFile.cd()
            for hn in histonames :
                h = infiles[0].Get(hn).Clone()
                h.Scale(scale)
                for inf in infiles[1:] : h.Add(inf.Get(hn), scale)
                h.Write()
            outFile.Close()
        self.mergedPointsByReqid = {}
        self.outFnameByReqid     = {}
        reqids = self.filenamesByReqid.keys()
        for rid in reqids :
            fnames = self.filenamesByReqid[rid]
            points = self.masspointByReqid[rid]
            npoints = len(points)
            assert npoints,"cannot merge 0 samples"
            def avgPoint(points) :
                np = float(len(points))
                return (sum([x for x,y in points])/np,
                        sum([y for x,y in points])/np)
            point = avgPoint(points)
            ids, tags = zip(*[self.idTagFromFilename(f) for f in fnames]) # vertical slice
            assert len(set(tags))==1,"cannot merge samples with different tags %s"%str(fnames)
            outfname = outdir+'/'+os.path.basename(fnames[0]).replace(str(ids[0]), str(rid))
            self.outFnameByReqid[rid], self.mergedPointsByReqid[rid] = outfname, point
            if self.verbose : print str(point)+' : '+outfname
            skipMerge = os.path.exists(outfname) and not self.overwrite
            if  skipMerge : continue
            else          : mergeFiles(outfname, fnames, self.regexHist)
    def printMergedFiles(self) :
        for rid in self.outFnameByReqid.keys() :
            print self.mergedPointsByReqid[rid], ' : ', self.outFnameByReqid[rid]
        countBkgTot[sel] += counts
print countBkgTot

reqDb = ModeAWhDbReqid()
parDb = ModeAWhDbPar()

def selIsRelevant(sel) : return sel.startswith('sr')
def selIsFinal(sel) : return sel in interestingSelections

mc1Range = {'min': min(parDb.allMc1()), 'max' : max(parDb.allMc1())}
mn1Range = {'min': min(parDb.allMn1()), 'max' : max(parDb.allMn1())}

histos = dict()
backgroundRelErr = 0.2
for sample, countsSel in countsSigSampleSel.iteritems() :
    mc1, mn1 = parDb.mc1Mn1ByReqid(reqDb.reqidBySample(sample))
    print "%s (%.1f, %.1f) " % (sample, mc1, mn1)
    for sel, counts in sorted(countsSel.iteritems()) :
        if not selIsFinal(sel) : continue
        histo = histos[sel] if sel in histos else r.TH2F(sel+'_zn',
                                                         sel+" Z_{n} (#deltab=%.2f);mc_{1};mn_{1}"%backgroundRelErr,
                                                         50, float(mc1Range['min']), float(mc1Range['max']),
                                                         50, float(mn1Range['min']), float(mn1Range['max']))
        if sel not in histos : histos[sel] = histo
        sig, bkg, dBkg = counts, countBkgTot[sel], backgroundRelErr
        zn = r.RooStats.NumberCountingUtils.BinomialExpZ(sigScale*sig, bkg, dBkg)
        histo.Fill(mc1, mn1, zn)
        print "%s : %.2f / %.2f -> %.2f"%(sel, sig, bkg, zn)

r.gStyle.SetPaintTextFormat('.2f')
for s, h in histos.iteritems() :
예제 #3
0
def selIsRelevant(sel):
    return sel.startswith('sr')


def selIsFinal(sel):
    return sel in interestingSelections


mc1Range = {'min': min(parDb.allMc1()), 'max': max(parDb.allMc1())}
mn1Range = {'min': min(parDb.allMn1()), 'max': max(parDb.allMn1())}

histos = dict()
backgroundRelErr = 0.2
for sample, countsSel in countsSigSampleSel.iteritems():
    mc1, mn1 = parDb.mc1Mn1ByReqid(reqDb.reqidBySample(sample))
    print "%s (%.1f, %.1f) " % (sample, mc1, mn1)
    for sel, counts in sorted(countsSel.iteritems()):
        if not selIsFinal(sel): continue
        histo = histos[sel] if sel in histos else r.TH2F(
            sel + '_zn', sel +
            " Z_{n} (#deltab=%.2f);mc_{1};mn_{1}" % backgroundRelErr, 50,
            float(mc1Range['min']), float(mc1Range['max']), 50,
            float(mn1Range['min']), float(mn1Range['max']))
        if sel not in histos: histos[sel] = histo
        sig, bkg, dBkg = counts, countBkgTot[sel], backgroundRelErr
        zn = r.RooStats.NumberCountingUtils.BinomialExpZ(
            sigScale * sig, bkg, dBkg)
        histo.Fill(mc1, mn1, zn)
        print "%s : %.2f / %.2f -> %.2f" % (sel, sig, bkg, zn)
예제 #4
0
class SamplesMerger:
    def __init__(self):
        self.filenamesByReqid = collections.defaultdict(list)
        self.masspointByReqid = collections.defaultdict(list)
        self.reqidProvider = ModeAWhDbMergedFake2Lreqid()
        self.parDb = ModeAWhDbPar()
        self.reqDb = ModeAWhDbReqid()
        self.overwrite = False
        self.verbose = False
        self.regexHist = '.*'

    def idTagFromFilename(self, fname):
        'parse something like wA_noslep_WH_2Lep_9_May16_n0139.AnaHists'
        match = re.search(
            'wA_noslep_WH_2Lep_(?P<id>\d+?)_(?P<tag>.*?).AnaHist',
            fname)  # nongreedy
        return match.group('id'), match.group('tag')

    def addFile(self, filename):
        sample = guessSampleFromFilename(filename)
        mc1, mn1 = self.parDb.mc1Mn1ByReqid(self.reqDb.reqidBySample(sample))
        fakeReqid = self.reqidProvider.reqidByMc1Mn1(mc1, mn1)
        self.filenamesByReqid[fakeReqid].append(filename)
        self.masspointByReqid[fakeReqid].append((mc1, mn1))

    def mergeAndWrite(self, outdir):
        """write merged histos, unless !overwrite, in which case just
        compute merged coords and fnames"""
        def mergeFiles(targetFile, filenames, regex):
            infiles = [r.TFile.Open(f) for f in filenames]
            histonames = [
                h for h in getAllHistoNames(infiles[0]) if re.search(regex, h)
            ]
            scale = 1.0 / len(infiles)
            outFile = r.TFile.Open(targetFile, 'recreate')
            outFile.cd()
            for hn in histonames:
                h = infiles[0].Get(hn).Clone()
                h.Scale(scale)
                for inf in infiles[1:]:
                    h.Add(inf.Get(hn), scale)
                h.Write()
            outFile.Close()

        self.mergedPointsByReqid = {}
        self.outFnameByReqid = {}
        reqids = self.filenamesByReqid.keys()
        for rid in reqids:
            fnames = self.filenamesByReqid[rid]
            points = self.masspointByReqid[rid]
            npoints = len(points)
            assert npoints, "cannot merge 0 samples"

            def avgPoint(points):
                np = float(len(points))
                return (sum([x for x, y in points]) / np,
                        sum([y for x, y in points]) / np)

            point = avgPoint(points)
            ids, tags = zip(*[self.idTagFromFilename(f)
                              for f in fnames])  # vertical slice
            assert len(
                set(tags)
            ) == 1, "cannot merge samples with different tags %s" % str(fnames)
            outfname = outdir + '/' + os.path.basename(fnames[0]).replace(
                str(ids[0]), str(rid))
            self.outFnameByReqid[rid], self.mergedPointsByReqid[
                rid] = outfname, point
            if self.verbose: print str(point) + ' : ' + outfname
            skipMerge = os.path.exists(outfname) and not self.overwrite
            if skipMerge: continue
            else: mergeFiles(outfname, fnames, self.regexHist)

    def printMergedFiles(self):
        for rid in self.outFnameByReqid.keys():
            print self.mergedPointsByReqid[rid], ' : ', self.outFnameByReqid[
                rid]