class SamplesMerger : def __init__(self) : self.filenamesByReqid = collections.defaultdict(list) self.masspointByReqid = collections.defaultdict(list) self.reqidProvider = ModeAWhDbMergedFake2Lreqid() self.parDb = ModeAWhDbPar() self.reqDb = ModeAWhDbReqid() self.overwrite = False self.verbose = False self.regexHist = '.*' def idTagFromFilename(self, fname) : 'parse something like wA_noslep_WH_2Lep_9_May16_n0139.AnaHists' match = re.search('wA_noslep_WH_2Lep_(?P<id>\d+?)_(?P<tag>.*?).AnaHist', fname)# nongreedy return match.group('id'), match.group('tag') def addFile(self, filename) : sample = guessSampleFromFilename(filename) mc1, mn1 = self.parDb.mc1Mn1ByReqid(self.reqDb.reqidBySample(sample)) fakeReqid = self.reqidProvider.reqidByMc1Mn1(mc1, mn1) self.filenamesByReqid[fakeReqid].append(filename) self.masspointByReqid[fakeReqid].append((mc1, mn1)) def mergeAndWrite(self, outdir) : """write merged histos, unless !overwrite, in which case just compute merged coords and fnames""" def mergeFiles(targetFile, filenames, regex) : infiles = [r.TFile.Open(f) for f in filenames] histonames = [h for h in getAllHistoNames(infiles[0]) if re.search(regex, h)] scale = 1.0/len(infiles) outFile = r.TFile.Open(targetFile, 'recreate') outFile.cd() for hn in histonames : h = infiles[0].Get(hn).Clone() h.Scale(scale) for inf in infiles[1:] : h.Add(inf.Get(hn), scale) h.Write() outFile.Close() self.mergedPointsByReqid = {} self.outFnameByReqid = {} reqids = self.filenamesByReqid.keys() for rid in reqids : fnames = self.filenamesByReqid[rid] points = self.masspointByReqid[rid] npoints = len(points) assert npoints,"cannot merge 0 samples" def avgPoint(points) : np = float(len(points)) return (sum([x for x,y in points])/np, sum([y for x,y in points])/np) point = avgPoint(points) ids, tags = zip(*[self.idTagFromFilename(f) for f in fnames]) # vertical slice assert len(set(tags))==1,"cannot merge samples with different tags %s"%str(fnames) outfname = outdir+'/'+os.path.basename(fnames[0]).replace(str(ids[0]), str(rid)) self.outFnameByReqid[rid], self.mergedPointsByReqid[rid] = outfname, point if self.verbose : print str(point)+' : '+outfname skipMerge = os.path.exists(outfname) and not self.overwrite if skipMerge : continue else : mergeFiles(outfname, fnames, self.regexHist) def printMergedFiles(self) : for rid in self.outFnameByReqid.keys() : print self.mergedPointsByReqid[rid], ' : ', self.outFnameByReqid[rid]
countBkgTot[sel] += counts print countBkgTot reqDb = ModeAWhDbReqid() parDb = ModeAWhDbPar() def selIsRelevant(sel) : return sel.startswith('sr') def selIsFinal(sel) : return sel in interestingSelections mc1Range = {'min': min(parDb.allMc1()), 'max' : max(parDb.allMc1())} mn1Range = {'min': min(parDb.allMn1()), 'max' : max(parDb.allMn1())} histos = dict() backgroundRelErr = 0.2 for sample, countsSel in countsSigSampleSel.iteritems() : mc1, mn1 = parDb.mc1Mn1ByReqid(reqDb.reqidBySample(sample)) print "%s (%.1f, %.1f) " % (sample, mc1, mn1) for sel, counts in sorted(countsSel.iteritems()) : if not selIsFinal(sel) : continue histo = histos[sel] if sel in histos else r.TH2F(sel+'_zn', sel+" Z_{n} (#deltab=%.2f);mc_{1};mn_{1}"%backgroundRelErr, 50, float(mc1Range['min']), float(mc1Range['max']), 50, float(mn1Range['min']), float(mn1Range['max'])) if sel not in histos : histos[sel] = histo sig, bkg, dBkg = counts, countBkgTot[sel], backgroundRelErr zn = r.RooStats.NumberCountingUtils.BinomialExpZ(sigScale*sig, bkg, dBkg) histo.Fill(mc1, mn1, zn) print "%s : %.2f / %.2f -> %.2f"%(sel, sig, bkg, zn) r.gStyle.SetPaintTextFormat('.2f') for s, h in histos.iteritems() :
def selIsRelevant(sel): return sel.startswith('sr') def selIsFinal(sel): return sel in interestingSelections mc1Range = {'min': min(parDb.allMc1()), 'max': max(parDb.allMc1())} mn1Range = {'min': min(parDb.allMn1()), 'max': max(parDb.allMn1())} histos = dict() backgroundRelErr = 0.2 for sample, countsSel in countsSigSampleSel.iteritems(): mc1, mn1 = parDb.mc1Mn1ByReqid(reqDb.reqidBySample(sample)) print "%s (%.1f, %.1f) " % (sample, mc1, mn1) for sel, counts in sorted(countsSel.iteritems()): if not selIsFinal(sel): continue histo = histos[sel] if sel in histos else r.TH2F( sel + '_zn', sel + " Z_{n} (#deltab=%.2f);mc_{1};mn_{1}" % backgroundRelErr, 50, float(mc1Range['min']), float(mc1Range['max']), 50, float(mn1Range['min']), float(mn1Range['max'])) if sel not in histos: histos[sel] = histo sig, bkg, dBkg = counts, countBkgTot[sel], backgroundRelErr zn = r.RooStats.NumberCountingUtils.BinomialExpZ( sigScale * sig, bkg, dBkg) histo.Fill(mc1, mn1, zn) print "%s : %.2f / %.2f -> %.2f" % (sel, sig, bkg, zn)
class SamplesMerger: def __init__(self): self.filenamesByReqid = collections.defaultdict(list) self.masspointByReqid = collections.defaultdict(list) self.reqidProvider = ModeAWhDbMergedFake2Lreqid() self.parDb = ModeAWhDbPar() self.reqDb = ModeAWhDbReqid() self.overwrite = False self.verbose = False self.regexHist = '.*' def idTagFromFilename(self, fname): 'parse something like wA_noslep_WH_2Lep_9_May16_n0139.AnaHists' match = re.search( 'wA_noslep_WH_2Lep_(?P<id>\d+?)_(?P<tag>.*?).AnaHist', fname) # nongreedy return match.group('id'), match.group('tag') def addFile(self, filename): sample = guessSampleFromFilename(filename) mc1, mn1 = self.parDb.mc1Mn1ByReqid(self.reqDb.reqidBySample(sample)) fakeReqid = self.reqidProvider.reqidByMc1Mn1(mc1, mn1) self.filenamesByReqid[fakeReqid].append(filename) self.masspointByReqid[fakeReqid].append((mc1, mn1)) def mergeAndWrite(self, outdir): """write merged histos, unless !overwrite, in which case just compute merged coords and fnames""" def mergeFiles(targetFile, filenames, regex): infiles = [r.TFile.Open(f) for f in filenames] histonames = [ h for h in getAllHistoNames(infiles[0]) if re.search(regex, h) ] scale = 1.0 / len(infiles) outFile = r.TFile.Open(targetFile, 'recreate') outFile.cd() for hn in histonames: h = infiles[0].Get(hn).Clone() h.Scale(scale) for inf in infiles[1:]: h.Add(inf.Get(hn), scale) h.Write() outFile.Close() self.mergedPointsByReqid = {} self.outFnameByReqid = {} reqids = self.filenamesByReqid.keys() for rid in reqids: fnames = self.filenamesByReqid[rid] points = self.masspointByReqid[rid] npoints = len(points) assert npoints, "cannot merge 0 samples" def avgPoint(points): np = float(len(points)) return (sum([x for x, y in points]) / np, sum([y for x, y in points]) / np) point = avgPoint(points) ids, tags = zip(*[self.idTagFromFilename(f) for f in fnames]) # vertical slice assert len( set(tags) ) == 1, "cannot merge samples with different tags %s" % str(fnames) outfname = outdir + '/' + os.path.basename(fnames[0]).replace( str(ids[0]), str(rid)) self.outFnameByReqid[rid], self.mergedPointsByReqid[ rid] = outfname, point if self.verbose: print str(point) + ' : ' + outfname skipMerge = os.path.exists(outfname) and not self.overwrite if skipMerge: continue else: mergeFiles(outfname, fnames, self.regexHist) def printMergedFiles(self): for rid in self.outFnameByReqid.keys(): print self.mergedPointsByReqid[rid], ' : ', self.outFnameByReqid[ rid]