def __init__(self): self.filenamesByReqid = collections.defaultdict(list) self.masspointByReqid = collections.defaultdict(list) self.reqidProvider = ModeAWhDbMergedFake2Lreqid() self.parDb = ModeAWhDbPar() self.reqDb = ModeAWhDbReqid() self.overwrite = False self.verbose = False self.regexHist = '.*'
class SamplesMerger : def __init__(self) : self.filenamesByReqid = collections.defaultdict(list) self.masspointByReqid = collections.defaultdict(list) self.reqidProvider = ModeAWhDbMergedFake2Lreqid() self.parDb = ModeAWhDbPar() self.reqDb = ModeAWhDbReqid() self.overwrite = False self.verbose = False self.regexHist = '.*' def idTagFromFilename(self, fname) : 'parse something like wA_noslep_WH_2Lep_9_May16_n0139.AnaHists' match = re.search('wA_noslep_WH_2Lep_(?P<id>\d+?)_(?P<tag>.*?).AnaHist', fname)# nongreedy return match.group('id'), match.group('tag') def addFile(self, filename) : sample = guessSampleFromFilename(filename) mc1, mn1 = self.parDb.mc1Mn1ByReqid(self.reqDb.reqidBySample(sample)) fakeReqid = self.reqidProvider.reqidByMc1Mn1(mc1, mn1) self.filenamesByReqid[fakeReqid].append(filename) self.masspointByReqid[fakeReqid].append((mc1, mn1)) def mergeAndWrite(self, outdir) : """write merged histos, unless !overwrite, in which case just compute merged coords and fnames""" def mergeFiles(targetFile, filenames, regex) : infiles = [r.TFile.Open(f) for f in filenames] histonames = [h for h in getAllHistoNames(infiles[0]) if re.search(regex, h)] scale = 1.0/len(infiles) outFile = r.TFile.Open(targetFile, 'recreate') outFile.cd() for hn in histonames : h = infiles[0].Get(hn).Clone() h.Scale(scale) for inf in infiles[1:] : h.Add(inf.Get(hn), scale) h.Write() outFile.Close() self.mergedPointsByReqid = {} self.outFnameByReqid = {} reqids = self.filenamesByReqid.keys() for rid in reqids : fnames = self.filenamesByReqid[rid] points = self.masspointByReqid[rid] npoints = len(points) assert npoints,"cannot merge 0 samples" def avgPoint(points) : np = float(len(points)) return (sum([x for x,y in points])/np, sum([y for x,y in points])/np) point = avgPoint(points) ids, tags = zip(*[self.idTagFromFilename(f) for f in fnames]) # vertical slice assert len(set(tags))==1,"cannot merge samples with different tags %s"%str(fnames) outfname = outdir+'/'+os.path.basename(fnames[0]).replace(str(ids[0]), str(rid)) self.outFnameByReqid[rid], self.mergedPointsByReqid[rid] = outfname, point if self.verbose : print str(point)+' : '+outfname skipMerge = os.path.exists(outfname) and not self.overwrite if skipMerge : continue else : mergeFiles(outfname, fnames, self.regexHist) def printMergedFiles(self) : for rid in self.outFnameByReqid.keys() : print self.mergedPointsByReqid[rid], ' : ', self.outFnameByReqid[rid]
def __init__(self) : self.filenamesByReqid = collections.defaultdict(list) self.masspointByReqid = collections.defaultdict(list) self.reqidProvider = ModeAWhDbMergedFake2Lreqid() self.parDb = ModeAWhDbPar() self.reqDb = ModeAWhDbReqid() self.overwrite = False self.verbose = False self.regexHist = '.*'
import ROOT as r r.PyConfig.IgnoreCommandLineOptions = True r.gROOT.SetBatch(1) from PickleUtils import readFromPickle from SampleUtils import ModeAWhDbPar, ModeAWhDbReqid, ModeAWhDbMergedFake2Lreqid parser = optparse.OptionParser() parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="print more details about what is going on") (options, args) = parser.parse_args() verbose = options.verbose reqDb = ModeAWhDbReqid() parDb = ModeAWhDbPar() allMc1 = [float(m) for m in parDb.allMc1()] allMn1 = [float(m) for m in parDb.allMn1()] def roundup(val) : return round(float(val)+0.5) def rounddo(val) : return round(float(val)+0.0) mc1Range = {'min': rounddo(min(allMc1)), 'max' : roundup(max(allMc1))} mn1Range = {'min': rounddo(min(allMn1)), 'max' : roundup(max(allMn1))} histo2l = r.TH2F('mc1mn1_2lep_reqids', 'ReqIds for the WH 2lep grid ;mc_{1};mn_{1}', 50, float(mc1Range['min']), float(mc1Range['max']), 50, float(mn1Range['min']), float(mn1Range['max'])) histo2lnth = r.TH2F('mc1mn1_2lep_notauhad_reqids', 'ReqIds for the WH notauhad 2lep grid ;mc_{1};mn_{1}', 50, float(mc1Range['min']), float(mc1Range['max']),
verbose = options.verbose countsSigSampleSel = readFromPickle(sigInputFname) countsBkgSampleSel = readFromPickle(bkgInputFname) interestingSelections = ["sr%d" % i for i in range(6, 9 + 1)] countBkgTot = collections.defaultdict(float) for sample, countsSel in countsBkgSampleSel.iteritems(): if sample in ['data', 'totbkg']: continue for sel, counts in countsSel.iteritems(): if sel not in interestingSelections: continue print 'adding ' + sample + ' to ' + sel + ' (' + str(counts) + ')' countBkgTot[sel] += counts print countBkgTot reqDb = ModeAWhDbReqid() parDb = ModeAWhDbPar() def selIsRelevant(sel): return sel.startswith('sr') def selIsFinal(sel): return sel in interestingSelections mc1Range = {'min': min(parDb.allMc1()), 'max': max(parDb.allMc1())} mn1Range = {'min': min(parDb.allMn1()), 'max': max(parDb.allMn1())} histos = dict() backgroundRelErr = 0.2
verbose = options.verbose countsSigSampleSel = readFromPickle(sigInputFname) countsBkgSampleSel = readFromPickle(bkgInputFname) interestingSelections = ["sr%d"%i for i in range(6,9+1)] countBkgTot = collections.defaultdict(float) for sample, countsSel in countsBkgSampleSel.iteritems() : if sample in ['data', 'totbkg'] : continue for sel, counts in countsSel.iteritems() : if sel not in interestingSelections : continue print 'adding '+sample+' to '+sel+' ('+str(counts)+')' countBkgTot[sel] += counts print countBkgTot reqDb = ModeAWhDbReqid() parDb = ModeAWhDbPar() def selIsRelevant(sel) : return sel.startswith('sr') def selIsFinal(sel) : return sel in interestingSelections mc1Range = {'min': min(parDb.allMc1()), 'max' : max(parDb.allMc1())} mn1Range = {'min': min(parDb.allMn1()), 'max' : max(parDb.allMn1())} histos = dict() backgroundRelErr = 0.2 for sample, countsSel in countsSigSampleSel.iteritems() : mc1, mn1 = parDb.mc1Mn1ByReqid(reqDb.reqidBySample(sample)) print "%s (%.1f, %.1f) " % (sample, mc1, mn1) for sel, counts in sorted(countsSel.iteritems()) : if not selIsFinal(sel) : continue histo = histos[sel] if sel in histos else r.TH2F(sel+'_zn',
from PickleUtils import readFromPickle from SampleUtils import ModeAWhDbPar, ModeAWhDbReqid, ModeAWhDbMergedFake2Lreqid parser = optparse.OptionParser() parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="print more details about what is going on") (options, args) = parser.parse_args() verbose = options.verbose reqDb = ModeAWhDbReqid() parDb = ModeAWhDbPar() allMc1 = [float(m) for m in parDb.allMc1()] allMn1 = [float(m) for m in parDb.allMn1()] def roundup(val): return round(float(val) + 0.5) def rounddo(val): return round(float(val) + 0.0) mc1Range = {'min': rounddo(min(allMc1)), 'max': roundup(max(allMc1))} mn1Range = {'min': rounddo(min(allMn1)), 'max': roundup(max(allMn1))}
class SamplesMerger: def __init__(self): self.filenamesByReqid = collections.defaultdict(list) self.masspointByReqid = collections.defaultdict(list) self.reqidProvider = ModeAWhDbMergedFake2Lreqid() self.parDb = ModeAWhDbPar() self.reqDb = ModeAWhDbReqid() self.overwrite = False self.verbose = False self.regexHist = '.*' def idTagFromFilename(self, fname): 'parse something like wA_noslep_WH_2Lep_9_May16_n0139.AnaHists' match = re.search( 'wA_noslep_WH_2Lep_(?P<id>\d+?)_(?P<tag>.*?).AnaHist', fname) # nongreedy return match.group('id'), match.group('tag') def addFile(self, filename): sample = guessSampleFromFilename(filename) mc1, mn1 = self.parDb.mc1Mn1ByReqid(self.reqDb.reqidBySample(sample)) fakeReqid = self.reqidProvider.reqidByMc1Mn1(mc1, mn1) self.filenamesByReqid[fakeReqid].append(filename) self.masspointByReqid[fakeReqid].append((mc1, mn1)) def mergeAndWrite(self, outdir): """write merged histos, unless !overwrite, in which case just compute merged coords and fnames""" def mergeFiles(targetFile, filenames, regex): infiles = [r.TFile.Open(f) for f in filenames] histonames = [ h for h in getAllHistoNames(infiles[0]) if re.search(regex, h) ] scale = 1.0 / len(infiles) outFile = r.TFile.Open(targetFile, 'recreate') outFile.cd() for hn in histonames: h = infiles[0].Get(hn).Clone() h.Scale(scale) for inf in infiles[1:]: h.Add(inf.Get(hn), scale) h.Write() outFile.Close() self.mergedPointsByReqid = {} self.outFnameByReqid = {} reqids = self.filenamesByReqid.keys() for rid in reqids: fnames = self.filenamesByReqid[rid] points = self.masspointByReqid[rid] npoints = len(points) assert npoints, "cannot merge 0 samples" def avgPoint(points): np = float(len(points)) return (sum([x for x, y in points]) / np, sum([y for x, y in points]) / np) point = avgPoint(points) ids, tags = zip(*[self.idTagFromFilename(f) for f in fnames]) # vertical slice assert len( set(tags) ) == 1, "cannot merge samples with different tags %s" % str(fnames) outfname = outdir + '/' + os.path.basename(fnames[0]).replace( str(ids[0]), str(rid)) self.outFnameByReqid[rid], self.mergedPointsByReqid[ rid] = outfname, point if self.verbose: print str(point) + ' : ' + outfname skipMerge = os.path.exists(outfname) and not self.overwrite if skipMerge: continue else: mergeFiles(outfname, fnames, self.regexHist) def printMergedFiles(self): for rid in self.outFnameByReqid.keys(): print self.mergedPointsByReqid[rid], ' : ', self.outFnameByReqid[ rid]