class FakeRateCalculator(object): def __init__(self, looseFilesData, tightFilesData, looseFilesMC, tightFilesMC, channels, outFile, intLumi=-1., fakeFactor=False): if looseFilesData: assert tightFilesData, "I can only make a fake rate from data with both a tight and loose ntuple" self.dataFiles = {'num' : tightFilesData, 'denom' : looseFilesData} else: assert len(tightFilesData)==0, "I can only make a fake rate from data with both a tight and loose ntuple" self.dataFiles = {} if looseFilesMC: assert tightFilesMC, "I can only make a fake rate from Monte Carlo with both a tight and loose ntuple" self.mcFiles = {'numMC' : tightFilesMC, 'denomMC' : looseFilesMC} else: assert len(tightFilesMC)==0, "I can only make a fake rate from Monte Carlo with both a tight and loose ntuple" self.mcFiles = {} self.hasData = bool(self.dataFiles) self.hasMC = bool(self.mcFiles) self.outFileName = outFile outFileBaseName = self.outFileName.split('/')[-1] self.outDir = self.outFileName.replace(outFileBaseName, '') self.intLumi = intLumi self.plotter = NtuplePlotter(channels, self.outDir, self.mcFiles, self.dataFiles, self.intLumi) self.channels = parseChannels(channels) assert len(self.channels) > 0 and \ all(len(ch) == 3 for ch in self.channels), \ "Invalid channel %s. Channels must have 3 objects."%channels self.WrappedHists = [ self.plotter.WrappedHist, self.plotter.WrappedHist2, self.plotter.WrappedHist3, ] self.WrappedStacks = [ self.plotter.WrappedStack, self.plotter.WrappedStack, self.plotter.WrappedHist3, # THistStack only goes up to 2-D ] self.histMakers = [ lambda *args, **kwargs: self.plotter.makeHist(*args, perUnitWidth=False, **kwargs), self.plotter.makeHist2, self.plotter.makeHist3, ] self.stackMakers = [ lambda *args, **kwargs: self.plotter.makeStack(*args, perUnitWidth=False, **kwargs), self.plotter.makeStack2, # THistStack same for 1- and 2-D self.plotter.makeStack3, ] self.extractHistFromStack = [ lambda s: self.WrappedHists[0](asrootpy(s.GetStack().Last()), category=s.getCategory(), variable=s.getVariable(), selection=s.getSelection()), lambda s: self.WrappedHists[1](asrootpy(s.GetStack().Last()), category=s.getCategory(), variable=s.getVariable(), selection=s.getSelection()), lambda s: s, # THistStack only goes up to 2-D ] self.outputs = [] self.fakeFactor = fakeFactor def calculateFakeRate(self, name, channels, *varsAndBinnings, **kwargs): ''' Make a fake rate histogram. It is stored in self.outputs and returned. Channel should be one channel or a list of channels. Correct object to use for each channel is figured out automatically. The next 2, 4, or 6 arguments should be one, two, or three variables to bin in, each followed immediately by an iterable containing bin edges or [nbins, min, max]. If keyword argument 'useMC' evaluates to True, MC samples are used instead of data. If keyword argument 'draw' evaluates to True, a .png file is created in the same directory as the eventual output with a plot of the fake rate. If a list of subtraction samples is provided, by kwargs['subtractSamples'], the MC contributions of these samples are subtracted from the numerator and denominator before the fake rate is calculated. No bin may be less than 0. ''' subtractSamples = kwargs.pop('subtractSamples', []) if len(varsAndBinnings) % 2 == 1 or len(varsAndBinnings) < 2 or len(varsAndBinnings) > 6: raise ValueError("Invalid list of variables and their binnings") nDims = len(varsAndBinnings) / 2 varTemplate = '' binning = [] varTemplate = ':'.join("{0}"+varsAndBinnings[2*i] for i in xrange(nDims)) for i in xrange(nDims): ind = 2*i+1 if len(varsAndBinnings[ind]) == 3 and isinstance(varsAndBinnings[ind][0], int): binning += varsAndBinnings[ind] # uniform bins else: binning.append(varsAndBinnings[ind]) # variable bins if isinstance(channels, str): channels = [channels] varList = [varTemplate.format(self.fakeObjectForChannel(ch)) for ch in channels] selecList = ["" for v in varList] outputs = [] drawablesMC = {} if self.hasMC: samplesToDraw = [s for s in self.plotter.ntuples['numMC'].keys() if s not in subtractSamples] sNum = self.stackMakers[nDims-1]("numMC", samplesToDraw, channels, varList, selecList, binning, weight="GenWeight") drawablesMC['num'] = sNum numMC = self.extractHistFromStack[nDims-1](sNum) sDenom = self.stackMakers[nDims-1]("denomMC", samplesToDraw, channels, varList, selecList, binning, weight="GenWeight") drawablesMC['denom'] = sDenom denomMC = self.extractHistFromStack[nDims-1](sDenom) fMC = self.WrappedHists[nDims-1](asrootpy(numMC.Clone()), name=name+"MC_fakeRate", isData=False) fMC.Divide(denomMC) drawablesMC['fakeRate'] = fMC if self.fakeFactor: # actual scale factor, f/(1-f) outMC = self.WrappedHists[nDims-1](asrootpy(numMC.empty_clone()), name=name+"MC") for fb, b in zip(fMC, outMC): if b.overflow: continue b.value = fb.value / (1. - fb.value) else: outMC = self.WrappedHists[nDims-1](fMC.clone(), name=name+"MC") for b in outMC: if b.overflow: b.value = 0. b.error = 0. outputs.append(outMC) drawablesData = {} if self.hasData: num = self.histMakers[nDims-1]("num", "num", channels, varList, selecList, binning) denom = self.histMakers[nDims-1]("denom", "denom", channels, varList, selecList, binning) num.sumw2() denom.sumw2() for ss in subtractSamples: subNum = self.histMakers[nDims-1]("numMC", ss, channels, varList, selecList, binning, 1., weights="GenWeight") num -= subNum subDenom = self.histMakers[nDims-1]("denomMC", ss, channels, varList, selecList, binning, 1., weights="GenWeight") denom -= subDenom for bNum, bDenom in zip(num, denom): if bNum.value < 0. or bDenom.value <= 0.: bNum.value = 0. bNum.error = 0. bDenom.value = 0.0000001 bDenom.error = 0. num.sumw2() denom.sumw2() f = self.WrappedHists[nDims-1](asrootpy(num.Clone()), name=name+"_fakeRate") f.Divide(denom) drawablesData['num'] = num drawablesData['denom'] = denom drawablesData['fakeRate'] = f if self.fakeFactor: out = self.WrappedHists[nDims-1](asrootpy(num.empty_clone()), name=name) for fb, b in zip(f, out): if b.overflow: continue b.value = fb.value / (1. - fb.value) else: out = self.WrappedHists[nDims-1](f.clone(), name=name) for b in out: if b.overflow: b.value = 0. b.error = 0. outputs.append(out) for o in outputs: for i in xrange(nDims): o.axis(i).title = varsAndBinnings[i*2] if kwargs.pop('draw', False): self.drawFakeRate(name, nDims, drawablesData, drawablesMC, **kwargs) self.outputs += outputs return outputs def drawFakeRate(self, name, nDims, drawablesData, drawablesMC, **kwargs): ''' Create a .png with the fake rate plot, with numbers superimposed over each (1- and 2-D only), and for the numerator and denominator. For 1-D, MC and data are drawn on the same plot. ''' drawings = {} if nDims == 1 and bool(drawablesData) and bool(drawablesMC): for typeToPlot in ['num', 'denom', 'fakeRate']: drawings[typeToPlot] = self.plotter.Drawing(name+'_'+typeToPlot,, 1000, 1000) if typeToPlot == 'fakeRate': drawablesMC[typeToPlot].drawstyle = 'hist' drawablesMC[typeToPlot].color = 'red' drawings[typeToPlot].addObject(drawablesMC[typeToPlot], legendName='MC', legendStyle="L") else: drawings[typeToPlot].addObject(drawablesMC[typeToPlot]) # drawings[typeToPlot].addRatio(drawablesData[typeToPlot], # drawablesMC[typeToPlot], 0.23, # yTitle="Data / MC") if typeToPlot == 'fakeRate': fakeRateData = self.plotter.WrappedGraph(type='asymm', title='data') fakeRateData.Divide(drawablesData['num'], drawablesData['denom']) else: fakeRateData = drawablesData[typeToPlot] fakeRateData.color = 'black' fakeRateData.drawstyle = 'PE' drawings[typeToPlot].addObject(fakeRateData, legendName='data', legendStyle='LPE') drawings[typeToPlot].draw({'yerror_in_padding' : False}, '%s/%s_%s.png'%(self.outDir, name, typeToPlot), xTitle=kwargs.get('xTitle', ''), xUnits=kwargs.get('xUnits', ''), yTitle='Fake Rate', intLumi=self.intLumi, perUnitWidth=False) else: for maybeMC in ['', 'MC']: if maybeMC: drawables = drawablesMC else: drawables = drawablesData if not drawables: continue for typeToPlot, plot in drawables.iteritems(): ttp = typeToPlot+maybeMC drawings[ttp] = self.plotter.Drawing(name+'_'+ttp,, 800, 800) if isinstance(plot, _Hist2D): plot.drawstyle = plot.drawstyle+"TEXT" if "COLZ" not in plot.drawstyle: plot.drawstyle = plot.drawstyle+"COLZ" if isinstance(plot, _Hist) and maybeMC: plot.drawstyle='hist' plot.color='red' drawings[ttp].addObject(plot, addToLegend=(bool(maybeMC) and typeToPlot != 'fakeRate' and nDims == 1), legendName=("MC" if maybeMC else "data"), legendStyle=("F" if maybeMC else "LPE")) drawings[ttp].draw({'yerror_in_padding' : False}, '%s/%s_%s.png'%(self.outDir, name, ttp), xTitle=kwargs.get('xTitle', ''), xUnits=kwargs.get('xUnits', ''), intLumi=self.intLumi, stackErr=(nDims==1), perUnitWidth=False) _fakeRateCalculator_channelObjMap_ = {} def fakeObjectForChannel(self, channel): ''' Get the odd object out for this channel (the l in Z+l). E.g. 'm3' for channel 'mmm' or 'e' for channel 'emm'. ''' try: return self._fakeRateCalculator_channelObjMap_[channel] except KeyError: assert len(channel) == 3 and any(channel.count(ob)>1 for ob in channel),\ "Only Z+l-like channels are allowed, %s will not work!"%channel objects = mapObjects(channel) for obj in objects: if len(obj) == 1: # only object of its type self._fakeRateCalculator_channelObjMap_[channel] = obj return obj if obj[-1] == '3': # Z always listed first for lll channels self._fakeRateCalculator_channelObjMap_[channel] = obj return obj else: # shouldn't ever happen raise def writeOutput(self): with root_open(self.outFileName, "RECREATE") as f: for op in self.outputs: op.Write()
def __init__(self, looseFilesData, tightFilesData, looseFilesMC, tightFilesMC, channels, outFile, intLumi=-1., fakeFactor=False): if looseFilesData: assert tightFilesData, "I can only make a fake rate from data with both a tight and loose ntuple" self.dataFiles = {'num' : tightFilesData, 'denom' : looseFilesData} else: assert len(tightFilesData)==0, "I can only make a fake rate from data with both a tight and loose ntuple" self.dataFiles = {} if looseFilesMC: assert tightFilesMC, "I can only make a fake rate from Monte Carlo with both a tight and loose ntuple" self.mcFiles = {'numMC' : tightFilesMC, 'denomMC' : looseFilesMC} else: assert len(tightFilesMC)==0, "I can only make a fake rate from Monte Carlo with both a tight and loose ntuple" self.mcFiles = {} self.hasData = bool(self.dataFiles) self.hasMC = bool(self.mcFiles) self.outFileName = outFile outFileBaseName = self.outFileName.split('/')[-1] self.outDir = self.outFileName.replace(outFileBaseName, '') self.intLumi = intLumi self.plotter = NtuplePlotter(channels, self.outDir, self.mcFiles, self.dataFiles, self.intLumi) self.channels = parseChannels(channels) assert len(self.channels) > 0 and \ all(len(ch) == 3 for ch in self.channels), \ "Invalid channel %s. Channels must have 3 objects."%channels self.WrappedHists = [ self.plotter.WrappedHist, self.plotter.WrappedHist2, self.plotter.WrappedHist3, ] self.WrappedStacks = [ self.plotter.WrappedStack, self.plotter.WrappedStack, self.plotter.WrappedHist3, # THistStack only goes up to 2-D ] self.histMakers = [ lambda *args, **kwargs: self.plotter.makeHist(*args, perUnitWidth=False, **kwargs), self.plotter.makeHist2, self.plotter.makeHist3, ] self.stackMakers = [ lambda *args, **kwargs: self.plotter.makeStack(*args, perUnitWidth=False, **kwargs), self.plotter.makeStack2, # THistStack same for 1- and 2-D self.plotter.makeStack3, ] self.extractHistFromStack = [ lambda s: self.WrappedHists[0](asrootpy(s.GetStack().Last()), category=s.getCategory(), variable=s.getVariable(), selection=s.getSelection()), lambda s: self.WrappedHists[1](asrootpy(s.GetStack().Last()), category=s.getCategory(), variable=s.getVariable(), selection=s.getSelection()), lambda s: s, # THistStack only goes up to 2-D ] self.outputs = [] self.fakeFactor = fakeFactor
mcSamples = { 'mc':'/data/nawoods/ntuples/zzNtuples_mc_{1}/results_{0}/ZZTo4L_13TeV_*.root,/data/nawoods/ntuples/zzNtuples_mc_{1}/results_{0}/GluGlu*.root'.format(sampleID, ntupleSet), 'mc3P1F':'/data/nawoods/ntuples/zzNtuples_mc_{1}/results_{0}_3P1F/*.root'.format(sampleID, ntupleSet3P1F), 'mc2P2F':'/data/nawoods/ntuples/zzNtuples_mc_{1}/results_{0}_2P2F/*.root'.format(sampleID, ntupleSet2P2F), } if args.dySkim: mcSamples['mc'] += ',/data/nawoods/ntuples/zzNtuples_mc_dySkim_10feb2016_0/results_{0}/DYSkim_*.root'.format(sampleID, ntupleSet) dataSamples = { 'data':'/data/nawoods/ntuples/zzNtuples_data_2015silver_{1}/results_{0}/data*.root'.format(sampleID, ntupleSet), '3P1F':'/data/nawoods/ntuples/zzNtuples_data_2015silver_{1}/results_{0}_3P1F/data*.root'.format(sampleID, ntupleSet3P1F), '2P2F':'/data/nawoods/ntuples/zzNtuples_data_2015silver_{1}/results_{0}_2P2F/data*.root'.format(sampleID, ntupleSet2P2F), } plotter = NtuplePlotter('zz', '/afs/{0}_{1}'.format('%d%b%Y').lower(), ana), mcSamples, dataSamples, intLumi=2619.) basicSelection = '' print "" if args.printData: print "Signal (data):" plotter.printPassingEvents('data') elif not args.eventsOnly: print "Signal (data):" print ' eeee: %d'%plotter.ntuples['data']['data']['eeee'].GetEntries() print ' eemm: %d'%plotter.ntuples['data']['data']['eemm'].GetEntries() print ' mmmm: %d'%plotter.ntuples['data']['data']['mmmm'].GetEntries() print "" if args.print3P1F: