def main(filename1, filename2, outdir, regexp, verbose) :
    # there are too many histos; by default compare only the ones vs. pt for few selections
    relevantHistograms = ['l_pt_den','l_pt_num']
    relevantSelections = ['CRPremT2','CR_SSInc','CR_WHSS','CRPremT2','CR_SSInc','CR_WHSS']
    outdir = outdir if outdir else guessOutdirFromInputs(filename1, filename2)
    if verbose : print "saving output plots to '%s'"%outdir
    file1, file2 = r.TFile.Open(filename1), r.TFile.Open(filename2)
    histonames1 = getAllHistoNames(file1, onlyTH1=True)
    histonames2 = getAllHistoNames(file2, onlyTH1=True)
    if verbose :
        print '\n'.join(["%s: %d histograms"%(f, len(hs))
                         for f, hs in [(filename1, histonames1), (filename2, histonames2)]])
    commonHistos = [h for h in histonames1 if h in histonames2]
    def diff(a, b) : return list(set(a)-set(b))
    if len(commonHistos)!=len(histonames2) :
        missFrom1, missFrom2 = diff(histonames2, histonames1), diff(histonames1, histonames2)
        print ('histograms not in common:\n'
               +"%d missing from %s\n"%(len(missFrom1), filename1)
               #+'\n\t'.join(missFrom1)
               +"%d missing from %s\n"%(len(missFrom2), filename2)
               #+'\n\t'.join(missFrom2)
               )
    label1, label2 = labelFromFilename(filename1), labelFromFilename(filename2)
    commonHistos = (filterWithRegexp(commonHistos, regexp) if regexp is not None
                    else filter(lambda h:
                                any(s in h for s in relevantHistograms) and
                                any(s in h for s in relevantSelections),
                                commonHistos))
    canvas = r.TCanvas('diff_fakeMatrix','diff_fakeMatrix')
    for h in commonHistos :
        outname = outdir+'/'+h
        h1, h2 = file1.Get(h), file2.Get(h)
        plotComparison(h1, h2, canvas, outname, label1, label2, verbose)
def processFile(filename, outdir, label=''):
    file = r.TFile.Open(filename)
    outfname = (outdir + '/%(histoname)s_' +
                os.path.basename(filename).replace('.root', '.png'))
    histonames = getAllHistoNames(file, onlyTH1=True)
    probHistos = [
        buildProbHisto(file, n, d)
        for n, d in findHistonamePairs(histonames, '_num', '_den')
    ]
    probHistos = filter(
        None, probHistos)  # remove empty graphs that make TAxis complain
    c = r.TCanvas('p_tight', '')
    for ph in probHistos:
        c.cd()
        c.Clear()
        if label:
            ph.SetTitle("%s %s" %
                        (ph.GetTitle(), ", %s" % label if label else ''))
        ph.Draw('ap')
        xAx, yAx = ph.GetXaxis(), ph.GetYaxis()
        yAx.SetRangeUser(0.0, 1.1)
        l = referenceLine(xAx.GetXmin(), xAx.GetXmax())
        l.Draw()
        c.Update()
        c.SaveAs(outfname % {'histoname': ph.GetName()})
Esempio n. 3
0
def main(filename1, filename2, outdir, regexp, verbose):
    # there are too many histos; by default compare only the ones vs. pt for few selections
    relevantHistograms = ['l_pt_den', 'l_pt_num']
    relevantSelections = [
        'CRPremT2', 'CR_SSInc', 'CR_WHSS', 'CRPremT2', 'CR_SSInc', 'CR_WHSS'
    ]
    outdir = outdir if outdir else guessOutdirFromInputs(filename1, filename2)
    if verbose: print "saving output plots to '%s'" % outdir
    file1, file2 = r.TFile.Open(filename1), r.TFile.Open(filename2)
    histonames1 = getAllHistoNames(file1, onlyTH1=True)
    histonames2 = getAllHistoNames(file2, onlyTH1=True)
    if verbose:
        print '\n'.join([
            "%s: %d histograms" % (f, len(hs))
            for f, hs in [(filename1, histonames1), (filename2, histonames2)]
        ])
    commonHistos = [h for h in histonames1 if h in histonames2]

    def diff(a, b):
        return list(set(a) - set(b))

    if len(commonHistos) != len(histonames2):
        missFrom1, missFrom2 = diff(histonames2,
                                    histonames1), diff(histonames1,
                                                       histonames2)
        print('histograms not in common:\n' + "%d missing from %s\n" %
              (len(missFrom1), filename1)
              #+'\n\t'.join(missFrom1)
              + "%d missing from %s\n" % (len(missFrom2), filename2)
              #+'\n\t'.join(missFrom2)
              )
    label1, label2 = labelFromFilename(filename1), labelFromFilename(filename2)
    commonHistos = (filterWithRegexp(commonHistos, regexp)
                    if regexp is not None else filter(
                        lambda h: any(s in h for s in relevantHistograms) and
                        any(s in h for s in relevantSelections), commonHistos))
    canvas = r.TCanvas('diff_fakeMatrix', 'diff_fakeMatrix')
    for h in commonHistos:
        outname = outdir + '/' + h
        h1, h2 = file1.Get(h), file2.Get(h)
        plotComparison(h1, h2, canvas, outname, label1, label2, verbose)
 def mergeFiles(targetFile, filenames, regex) :
     infiles = [r.TFile.Open(f) for f in filenames]
     histonames = [h for h in getAllHistoNames(infiles[0]) if re.search(regex, h)]
     scale = 1.0/len(infiles)
     outFile = r.TFile.Open(targetFile, 'recreate')
     outFile.cd()
     for hn in histonames :
         h = infiles[0].Get(hn).Clone()
         h.Scale(scale)
         for inf in infiles[1:] : h.Add(inf.Get(hn), scale)
         h.Write()
     outFile.Close()
def main(filenameOrig, filenameDest, substr1, substr2) :
    s1, s2 = substr1, substr2
    input = r.TFile.Open(filenameOrig)
    histonames = getAllHistoNames(input)
    output = r.TFile.Open(filenameDest, 'recreate')
    output.cd()
    nhFixed = 0
    for hn in histonames :
        h = input.Get(hn)
        hnn = hn.replace(s1,s2) if s1 in hn else hn.replace(s2,s1) # avoid swapping twice s1->s2->s1
        h.Write(hnn)
        if hn!=hnn : nhFixed += 1
    print "renamed %d histograms"%nhFixed
    output.Close()
    input.Close()
Esempio n. 6
0
 def mergeFiles(targetFile, filenames, regex):
     infiles = [r.TFile.Open(f) for f in filenames]
     histonames = [
         h for h in getAllHistoNames(infiles[0]) if re.search(regex, h)
     ]
     scale = 1.0 / len(infiles)
     outFile = r.TFile.Open(targetFile, 'recreate')
     outFile.cd()
     for hn in histonames:
         h = infiles[0].Get(hn).Clone()
         h.Scale(scale)
         for inf in infiles[1:]:
             h.Add(inf.Get(hn), scale)
         h.Write()
     outFile.Close()
def main(filenameOrig, filenameDest, substr1, substr2):
    s1, s2 = substr1, substr2
    input = r.TFile.Open(filenameOrig)
    histonames = getAllHistoNames(input)
    output = r.TFile.Open(filenameDest, 'recreate')
    output.cd()
    nhFixed = 0
    for hn in histonames:
        h = input.Get(hn)
        hnn = hn.replace(s1, s2) if s1 in hn else hn.replace(
            s2, s1)  # avoid swapping twice s1->s2->s1
        h.Write(hnn)
        if hn != hnn: nhFixed += 1
    print "renamed %d histograms" % nhFixed
    output.Close()
    input.Close()
def processFile(filename, outdir, label='') :
    file = r.TFile.Open(filename)
    outfname = (outdir+'/%(histoname)s_'
                +os.path.basename(filename).replace('.root','.png'))
    histonames = getAllHistoNames(file, onlyTH1=True)
    probHistos = [buildProbHisto(file, n, d)
                  for n,d in findHistonamePairs(histonames, '_num', '_den')]
    probHistos = filter(None, probHistos) # remove empty graphs that make TAxis complain
    c = r.TCanvas('p_tight','')
    for ph in probHistos :
        c.cd()
        c.Clear()
        if label : ph.SetTitle("%s %s"%(ph.GetTitle(), ", %s"%label if label else ''))
        ph.Draw('ap')
        xAx, yAx = ph.GetXaxis(), ph.GetYaxis()
        yAx.SetRangeUser(0.0, 1.1)
        l = referenceLine(xAx.GetXmin(), xAx.GetXmax())
        l.Draw()
        c.Update()
        c.SaveAs(outfname%{'histoname':ph.GetName()})
                                 'pickleFile']])
    print 'Input files:\n'+'\n'.join(inputFileNames)

# navigate the files and collect the histos
referenceType = HistoType(pr='', ch=channel, var=referenceHisto, syst=referenceSyst)
histosByType = collections.defaultdict(list)
classifier = HistoNameClassifier()

histoNames = []
for fname, infile in zip(inputFileNames, inputFiles) :
    sample = guessGroupFromFilename(fname)
    setType, setSample = setHistoType, setHistoSample
    def getType(histoName) : return classifier.histoType(histoName)
    def isRightType(histo) : return referenceType.matchAllAvailabeAttrs(histo.type)
    histonamesCached = len(histoNames)>0
    if not histonamesCached : histoNames = getAllHistoNames(infile, onlyTH1=True, nameStem=histoname)
    histos = filter(isRightType, map(lambda hn :
                                     setSample(setType(infile.Get(hn), getType(hn)), sample),
                                     histoNames))
    if not histonamesCached : histoNames = [h.GetName() for h in histos] # after filtering
    organizeHistosByType(histosByType, histos)
refHistos = histosByType # already filtered histonames, all histosByType are refHistos
allSamples = sorted(list(set([h.sample for histos in refHistos.values() for h in histos])))
allSamples += ['totbkg'] if printTotBkg else []
allSelects = sorted(list(set([k.pr for k in histosByType.keys()])))
if verbose : print 'allSamples : ',allSamples
if verbose : print 'allSelects : ',allSelects

# get the counts (adding up what needs to be merged by samplename)
sampleCountsPerSel = dict() # counts[sample][sel]
countsSampleSel = dict([(s, collections.defaultdict(float)) for s in allSamples])
plotRegions     = options.regions.split(',')
referenceSyst   = options.syst
verbose         = options.verbose
assert channel in validChannels,"Invalid channel %s (should be one of %s)" % (channel, str(validChannels))
inputFileNames = glob.glob(inputDir+'/'+'*'+prodTag+'*.root') + glob.glob(signalFname)
inputFiles = [r.TFile.Open(f) for f in inputFileNames]
assert len(inputFileNames)==len(inputFiles),"Cannot open some of the input files"


refHistoType = HistoType(pr='', ch=channel, var=referenceHisto, syst=referenceSyst)
histosByType = collections.defaultdict(list)
classifier = HistoNameClassifier()

for fname, infile in zip(inputFileNames, inputFiles) :
    samplename = guessSampleFromFilename(fname)
    histoNames = [n for n in getAllHistoNames(infile, onlyTH1=True)
                  if refHistoType.matchAllAvailabeAttrs( classifier.histoType( n ) )]
    histos = [infile.Get(hn) for hn in histoNames]
    for h in histos :
        setHistoType(h, classifier.histoType(h.GetName()))
        setHistoSample(h, samplename)
    histos = [h for h in histos if h.type.pr in plotRegions]
    organizeHistosByType(histosByType, histos)
refHistos = histosByType # already filtered histonames, all histosByType are refHistos

def isSignal(sampleName) : return 'WH_' in sampleName
allSamples = list(set([h.sample for histos in refHistos.values() for h in histos]))
allBkgNames  = [s for s in allSamples if not isSignal(s)]
sigName = next(s for s in allSamples if isSignal(s))
if verbose : print '\n'.join("%s : %s" % (s,l) for s,l in zip(['bkg','sig'], [str(allBkgNames), sigName]))
                           '*.root') + glob.glob(signalFname)
inputFiles = [r.TFile.Open(f) for f in inputFileNames]
assert len(inputFileNames) == len(
    inputFiles), "Cannot open some of the input files"

refHistoType = HistoType(pr='',
                         ch=channel,
                         var=referenceHisto,
                         syst=referenceSyst)
histosByType = collections.defaultdict(list)
classifier = HistoNameClassifier()

for fname, infile in zip(inputFileNames, inputFiles):
    samplename = guessSampleFromFilename(fname)
    histoNames = [
        n for n in getAllHistoNames(infile, onlyTH1=True)
        if refHistoType.matchAllAvailabeAttrs(classifier.histoType(n))
    ]
    histos = [infile.Get(hn) for hn in histoNames]
    for h in histos:
        setHistoType(h, classifier.histoType(h.GetName()))
        setHistoSample(h, samplename)
    histos = [h for h in histos if h.type.pr in plotRegions]
    organizeHistosByType(histosByType, histos)
refHistos = histosByType  # already filtered histonames, all histosByType are refHistos


def isSignal(sampleName):
    return 'WH_' in sampleName

Esempio n. 12
0
signalScale     = options.sigScale
justTest        = options.test
verbose         = options.verbose

inputFileNames = glob.glob(inputDir+'/'+'*'+prodTag+'*.root') + glob.glob(signalFname)
print 'input files:\n'+'\n'.join(inputFileNames)
inputFiles = [r.TFile.Open(f) for f in inputFileNames]


histosByType = collections.defaultdict(list)
classifier = HistoNameClassifier()

for fname, infile in zip(inputFileNames, inputFiles) :
    print '-'*3 + fname + '-'*3
    samplename = guessSampleFromFilename(fname)
    histoNames = getAllHistoNames(inputFiles[0], onlyTH1=True)
    histoNames = [h for h in histoNames if any([h.startswith(p) for p in ['sr6', 'sr7', 'sr8', 'sr9']])]
    if justTest : histoNames = histoNames[:10] # just get 10 histos to run quick tests
    histos = [infile.Get(hn) for hn in histoNames]
    for h in histos :
        setHistoType(h, classifier.histoType(h.GetName()))
        setHistoSample(h, samplename)
    organizeHistosByType(histosByType, histos)

def isSignal(sampleName) : return 'WH_' in sampleName

def cumsum(l, leftToRight=True) :
    #return numpy.cumsum(l) # not available ?
    return [sum(l[:i]) for i in range(1,len(l)+1)] if leftToRight \
           else [sum(l[-i:]) for i in range(1,len(l)+1)][::-1]
def mergeOuter(bc, nOuter=2) : # add over/underflow in the first/last bin
def printHistoNames(inputFile) :
    print 'histograms from ',inputFile.GetName()
    print '\n'.join(getAllHistoNames(inputFile))