def __init__(self, analysis, sample, **kwargs): self.analysis = analysis self.sample = sample self.ntuple = NtupleWrapper(analysis, sample, **kwargs) self.histParameters = [] self.selections = [] self.countOnly = []
def _openFile(self,sampleName,**kwargs): '''Verify and open a sample''' analysis = kwargs.pop('analysis',self.analysis) if analysis not in self.sampleFiles: self.sampleFiles[analysis] = {} if sampleName not in self.sampleFiles[analysis]: self.sampleFiles[analysis][sampleName] = NtupleWrapper(analysis,sampleName,new=self.new,**kwargs) ROOT.gROOT.cd()
def __init__(self,analysis,sample,**kwargs): self.analysis = analysis self.sample = sample self.ntuple = NtupleWrapper(analysis,sample,**kwargs) self.histParameters = [] self.selections = [] self.countOnly = []
class FlattenTree(object): '''Produces flat histograms''' def __init__(self, analysis, sample, **kwargs): self.analysis = analysis self.sample = sample self.ntuple = NtupleWrapper(analysis, sample, **kwargs) self.histParameters = [] self.selections = [] self.countOnly = [] def __exit__(self, type, value, traceback): self.__finish() def __del__(self): self.__finish() def __finish(self): pass def addHistogram(self, name, **kwargs): ''' Add a histogram to flatten ''' self.histParameters += [name] def addSelection(self, selection, **kwargs): '''Add selection and postfix name to flatten''' countOnly = kwargs.pop('countOnly', False) self.selections += [selection] if countOnly: self.countOnly += [selection] def clear(self): '''Reset the histograms/selections''' self.histParameters = [] self.selections = [] self.countOnly = [] def flattenAll(self, **kwargs): '''Flatten all selections''' njobs = int(kwargs.pop('njobs', 1)) job = int(kwargs.pop('job', 0)) multi = kwargs.pop('multi', False) if hasProgress and multi: pbar = kwargs.pop( 'progressbar', ProgressBar(widgets=[ '{0}: '.format(self.sample), ' ', SimpleProgress(), ' histograms ', Percentage(), ' ', Bar(), ' ', ETA() ])) else: pbar = None # setup all jobs allJobs = [] for selName in self.selections: for histName in self.histParameters: if selName in self.countOnly and 'count' not in histName: continue allJobs += [[histName, selName]] # split into multiple jobs totjobs = len(allJobs) nperjob = math.ceil(float(totjobs) / njobs) startjob = int(job * nperjob) endjob = int((job + 1) * nperjob) allJobs = sorted(allJobs)[startjob:endjob] # flatten if hasProgress and multi: for args in pbar(allJobs): self.ntuple.flatten(*args) else: n = len(allJobs) for i, args in enumerate(allJobs): logging.info('Processing {3} {4} plot {0} of {1}: {2}.'.format( i + 1, n, ' '.join(args), self.analysis, self.sample)) self.ntuple.flatten(*args)
for s in samples + ['data']: histMap[s] += ['/'.join(['4P0F'] + plotdirs)] histMap['ZX'] += [ '/'.join(['for4P0F', reg] + plotdirs) for reg in ['3P1F', '2P2F'] ] return histMap samples = ['ggZZ', 'qqZZ', 'H'] allsamples = ['TT', 'TTV', 'Z', 'WZ', 'VVV', 'ggZZ', 'qqZZ', 'H'] ddsamples = ['data'] + samples sigMap['ZX'] = [] for s in ddsamples: sigMap['ZX'] += sigMap[s] wrappers = {} for proc in samples + ['data']: for sample in sigMap[proc]: wrappers[sample] = NtupleWrapper('MonoHZZ', sample, new=True, version='94X', baseDirFlat='newflat_94X', baseDirProj='newflat_94X') hists = getDatadrivenHists('hzz4l/met', wrappers=wrappers) limits = MonoHZZLimits(hists) limits.setup() limits.save()
def create_datacard(args): doMatrix = False doParametric = args.parametric doUnbinned = args.unbinned do2D = len(args.fitVars) == 2 chi2Mass = args.chi2Mass blind = not args.unblind addSignal = args.addSignal signalParams = {'h': args.higgs, 'a': args.pseudoscalar} wsname = 'w' var = args.fitVars if doUnbinned and not doParametric: logging.error('Unbinned only supported with parametric option') raise if chi2Mass and 'hkf' not in var: logging.error('Trying to use non-kinematic fit with chi2 cut') raise global xRange global yRange if do2D and var[1] == 'tt': yRange = [0.75, 30] #if do2D and var[1]=='tt': yRange = [0,25] if args.yRange: yRange = args.yRange xRange = args.xRange global project global hCut project = args.project if args.selection: hCut = args.selection ############# ### Setup ### ############# sampleMap = getSampleMap() backgrounds = ['datadriven'] data = ['data'] signals = [ signame.format(h=h, a=a) for h in hmasses for a in amasses if not (h > 125 and a in ['3p6', 4, 6]) ] signalToAdd = signame.format(**signalParams) wrappers = {} for proc in backgrounds + signals + data: if proc == 'datadriven': continue for sample in sampleMap[proc]: wrappers[sample] = NtupleWrapper('MuMuTauTau', sample, new=True, version='80X') for shift in shifts: wrappers[sample + shift] = NtupleWrapper('MuMuTauTau', sample, new=True, version='80X', shift=shift) wrappers_mm = {} for proc in data: for sample in sampleMap[proc]: wrappers_mm[sample] = NtupleWrapper('MuMu', sample, new=True, version='80X') ############################## ### Create/read histograms ### ############################## histMap = {} # The definitons of which regions match to which arguments # PP can take a fake rate datadriven estimate from FP, but FP can only take the observed values regionArgs = { 'PP': { 'region': 'A', 'fakeRegion': 'B', 'source': 'B', 'sources': ['A', 'C'], 'fakeSources': ['B', 'D'], }, 'FP': { 'region': 'B', 'sources': ['B', 'D'], }, } for mode in ['PP', 'FP']: histMap[mode] = {} for shift in [''] + shifts: histMap[mode][shift] = {} for proc in backgrounds + signals: logging.info('Getting {} {}'.format(proc, shift)) if proc == 'datadriven': if mode == 'PP': if doMatrix: histMap[mode][shift][ proc] = getMatrixDatadrivenHist( doUnbinned=doUnbinned, var=var, wrappers=wrappers, shift=shift, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) else: histMap[mode][shift][proc] = getDatadrivenHist( doUnbinned=doUnbinned, var=var, wrappers=wrappers, shift=shift, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) else: if doMatrix: histMap[mode][shift][proc] = getMatrixHist( 'data', doUnbinned=doUnbinned, var=var, wrappers=wrappers, shift=shift, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) else: histMap[mode][shift][proc] = getHist( 'data', doUnbinned=doUnbinned, var=var, wrappers=wrappers, shift=shift, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) else: # override xRange for signal only xRange = [0, 30] if doMatrix: histMap[mode][shift][proc] = getMatrixHist( proc, doUnbinned=doUnbinned, var=var, wrappers=wrappers, shift=shift, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) else: histMap[mode][shift][proc] = getHist( proc, doUnbinned=doUnbinned, var=var, wrappers=wrappers, shift=shift, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) xRange = args.xRange if do2D or doUnbinned: pass # TODO, figure out how to rebin 2D else: histMap[mode][shift][proc].Rebin(rebinning[var[0]]) #if shift: continue logging.info('Getting observed') samples = backgrounds if addSignal: samples = backgrounds + [signalToAdd] hists = [] histsNoSig = [] for proc in samples: hists += [histMap[mode][shift][proc].Clone()] if proc != signalToAdd: histsNoSig += [histMap[mode][shift][proc].Clone()] if doUnbinned: hist = sumDatasets('obs{}{}'.format(mode, shift), *hists) histNoSig = sumDatasets('obsNoSig{}{}'.format(mode, shift), *histsNoSig) else: hist = sumHists('obs{}{}'.format(mode, shift), *hists) histNoSig = sumHists('obsNoSig{}{}'.format(mode, shift), *histsNoSig) #for b in range(hist.GetNbinsX()+1): # val = int(hist.GetBinContent(b)) # if val<0: val = 0 # err = val**0.5 # hist.SetBinContent(b,val) # #hist.SetBinError(b,err) if blind: histMap[mode][shift]['data'] = hist.Clone() histMap[mode][shift]['dataNoSig'] = histNoSig.Clone() else: hist = getHist('data', doUnbinned=doUnbinned, var=var, wrappers=wrappers, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) histMap[mode][shift]['data'] = hist.Clone() histMap[mode][shift]['dataNoSig'] = histNoSig.Clone() if do2D or doUnbinned: pass else: histMap[mode][shift]['data'].Rebin(rebinning[var[0]]) histMap[mode][shift]['dataNoSig'].Rebin(rebinning[var[0]]) for mode in ['control']: histMap[mode] = {} for shift in ['']: histMap[mode][shift] = {} for proc in backgrounds: logging.info('Getting {} {}'.format(proc, shift)) if proc == 'datadriven': histMap[mode][shift][proc] = getControlHist( 'data', doUnbinned=doUnbinned, var=var, wrappers=wrappers_mm) if shift: continue logging.info('Getting observed') hist = getControlHist('data', doUnbinned=doUnbinned, var=var, wrappers=wrappers_mm) histMap[mode][shift]['data'] = hist.Clone() histMap[mode][shift]['dataNoSig'] = hist.Clone() name = [] if args.unbinned: name += ['unbinned'] if do2D: name += [var[1]] n = '_'.join(name) if name else '' name = [] if args.tag: name += [args.tag] if args.addSignal: name += ['wSig'] name = n + '/' + '_'.join(name) if n else '_'.join(name) if var == ['mm']: haaLimits = HaaLimits(histMap, name) elif do2D and project: haaLimits = HaaLimits(histMap, name) elif do2D: haaLimits = HaaLimits2D(histMap, name) else: logging.error('Unsupported fit vars: ', var) raise haaLimits.SHIFTS = shiftTypes haaLimits.SIGNALSHIFTS = signalShiftTypes haaLimits.BACKGROUNDSHIFTS = backgroundShiftTypes haaLimits.QCDSHIFTS = qcdShifts haaLimits.AMASSES = amasses haaLimits.HMASSES = [chi2Mass] if chi2Mass else hmasses haaLimits.XRANGE = xRange haaLimits.XBINNING = int((xRange[1] - xRange[0]) * 10) if do2D: haaLimits.YRANGE = yRange haaLimits.YBINNING = int((yRange[1] - yRange[0]) * 0.02) if 'tt' in var: haaLimits.YLABEL = 'm_{#tau_{#mu}#tau_{h}}' if 'h' in var or 'hkf' in var: haaLimits.YLABEL = 'm_{#mu#mu#tau_{#mu}#tau_{h}}' haaLimits.initializeWorkspace() haaLimits.addControlModels() haaLimits.addBackgroundModels(fixAfterControl=True) if not skipSignal: haaLimits.XRANGE = [0, 30] # override for signal splines if project: haaLimits.addSignalModels(fit=False) elif 'tt' in var: haaLimits.addSignalModels( fit=False, yFitFuncFP='V', yFitFuncPP='L') #,cutOffFP=0.75,cutOffPP=0.75) elif 'h' in var or 'hkf' in var: haaLimits.addSignalModels( fit=False, yFitFuncFP='DG', yFitFuncPP='DG') #,cutOffFP=0.0,cutOffPP=0.0) else: haaLimits.addSignalModels(fit=False) haaLimits.XRANGE = xRange if args.addControl: haaLimits.addControlData() haaLimits.addData( asimov=(blind and not doMatrix and doUnbinned), addSignal=addSignal, doBinned=not doUnbinned, ** signalParams) # this will generate a dataset based on the fitted model haaLimits.setupDatacard(addControl=args.addControl, doBinned=not doUnbinned) haaLimits.addSystematics(addControl=args.addControl, doBinned=not doUnbinned) name = 'mmmt_{}_parametric'.format('_'.join(var)) if args.unbinned: name += '_unbinned' if args.tag: name += '_{}'.format(args.tag) if args.addSignal: name += '_wSig' haaLimits.save(name=name)
binning = [10, 0, 500] #binning = [10,0,250000] # setup sampleMap = getSampleMap() backgrounds = ['TT', 'TTG', 'VVG', 'Z', 'G', 'GG', 'QCD'] data = ['data'] #signals = ['HToAG_250_1','HToAG_250_30','HToAG_250_150'] signals = ['HToAG_250_150'] wrappers = {} for proc in backgrounds + signals + data: for sample in sampleMap[proc]: wrappers[sample] = NtupleWrapper('ThreePhoton', sample, new=True, version='80X') def getBinned(proc, **kwargs): scalefactor = kwargs.pop('scalefactor', '*'.join([ 'genWeight', 'pileupWeight', ])) hists = ROOT.TList() for sample in sampleMap[proc]: hist = wrappers[sample].getTempHist2D( sample, selection, '1' if proc == 'data' else scalefactor, 'gg13_mass', 'gg23_mass', binning, binning) #hist = wrappers[sample].getTempHist2D(sample,selection,'1' if proc=='data' else scalefactor,'gg13_M2','gg23_M2',binning,binning) hists.Add(hist)
def create_datacard(args): doMatrix = False doParametric = args.parametric doUnbinned = args.unbinned do2D = len(args.fitVars) == 2 blind = not args.unblind addSignal = args.addSignal signalParams = {'h': args.higgs, 'a': args.pseudoscalar} wsname = 'w' var = args.fitVars if do2D and doParametric: logging.error('Parametric 2D fits are not yet supported') raise if doUnbinned and not doParametric: logging.error('Unbinned only supported with parametric option') raise ############# ### Setup ### ############# sampleMap = getSampleMap() backgrounds = ['datadriven'] data = ['data'] signals = [signame.format(h=h, a=a) for h in hmasses for a in amasses] signalToAdd = signame.format(**signalParams) signalSplines = [splinename.format(h=h) for h in hmasses] wrappers = {} for proc in backgrounds + signals + data: if proc == 'datadriven': continue for sample in sampleMap[proc]: wrappers[sample] = NtupleWrapper('MuMuTauTau', sample, new=True, version='80X') for shift in shifts: wrappers[sample + shift] = NtupleWrapper('MuMuTauTau', sample, new=True, version='80X', shift=shift) ############################## ### Create/read histograms ### ############################## histMap = {} # The definitons of which regions match to which arguments # PP can take a fake rate datadriven estimate from PF, but PF can only take the observed values regionArgs = { 'PP': { 'region': 'A', 'fakeRegion': 'B', 'source': 'B', 'sources': ['A', 'C'], 'fakeSources': ['B', 'D'], }, 'PF': { 'region': 'B', 'sources': ['B', 'D'], }, } for mode in ['PP', 'PF']: histMap[mode] = {} for shift in [''] + shifts: histMap[mode][shift] = {} for proc in backgrounds + signals: logging.info('Getting {} {}'.format(proc, shift)) if proc == 'datadriven': # TODO: unbinned, get the RooDataHist from flattenener first if mode == 'PP': if doMatrix: histMap[mode][shift][ proc] = getMatrixDatadrivenHist( var=var, wrappers=wrappers, shift=shift, do2D=do2D, **regionArgs[mode]) else: histMap[mode][shift][proc] = getDatadrivenHist( var=var, wrappers=wrappers, shift=shift, do2D=do2D, **regionArgs[mode]) else: if doMatrix: histMap[mode][shift][proc] = getMatrixHist( 'data', var=var, wrappers=wrappers, shift=shift, do2D=do2D, **regionArgs[mode]) else: histMap[mode][shift][proc] = getHist( 'data', var=var, wrappers=wrappers, shift=shift, do2D=do2D, **regionArgs[mode]) else: if doMatrix: histMap[mode][shift][proc] = getMatrixHist( proc, var=var, wrappers=wrappers, shift=shift, do2D=do2D, **regionArgs[mode]) else: histMap[mode][shift][proc] = getHist( proc, var=var, wrappers=wrappers, shift=shift, do2D=do2D, **regionArgs[mode]) if do2D: pass # TODO, figure out how to rebin 2D else: histMap[mode][shift][proc].Rebin(rebinning[var[0]]) if shift: continue logging.info('Getting observed') if blind: samples = backgrounds if addSignal: samples = backgrounds + [signalToAdd] hists = [] for proc in samples: hists += [histMap[mode][shift][proc]] hist = sumHists('obs', *hists) #for b in range(hist.GetNbinsX()+1): # val = int(hist.GetBinContent(b)) # if val<0: val = 0 # err = val**0.5 # hist.SetBinContent(b,val) # #hist.SetBinError(b,err) histMap[mode][shift]['data'] = hist else: hist = getHist('data', var=var, wrappers=wrappers, do2D=do2D, **regionArgs[mode]) histMap[mode][shift]['data'] = hist if do2D: pass else: histMap[mode][shift]['data'].Rebin(rebinning[var[0]]) ##################### ### Create Limits ### ##################### limits = Limits(wsname) limits.addEra('Run2016') limits.addAnalysis('HAA') era = 'Run2016' analysis = 'HAA' reco = 'mmmt' for mode in ['PP', 'PF']: limits.addChannel(mode) if doParametric: binning = varBinning[var[0]] limits.addMH(*binning[1:]) limits.addX(*binning[1:], unit='GeV', label='m_{#mu#mu}') for h in hmasses: limits.addProcess(splinename.format(h=h), signal=True) for background in backgrounds: limits.addProcess(background) # add models for h in hmasses: model = getSpline(histMap[mode][''], h, tag=mode) limits.setExpected(splinename.format(h=h), era, analysis, mode, model) if doUnbinned: bg = buildModel(limits, tag=mode) limits.setExpected('datadriven', era, analysis, mode, bg) else: # add histograms for background if not using an unbinned model for bg in backgrounds: limits.setExpected(bg, era, analysis, mode, histMap[mode][''][bg]) # get roodatahist limits.setObserved(era, analysis, mode, histMap[mode]['']['data']) else: for signal in signals: limits.addProcess(signal, signal=True) for background in backgrounds: limits.addProcess(background) for proc in backgrounds: limits.setExpected(proc, era, analysis, mode, histMap[mode][''][proc]) for proc in signals: limits.setExpected(proc, era, analysis, mode, histMap[mode][''][proc]) limits.setObserved(era, analysis, mode, histMap[mode]['']['data']) ######################### ### Add uncertainties ### ######################### systproc = tuple( [proc for proc in signals + backgrounds if 'datadriven' not in proc]) allproc = tuple([proc for proc in signals + backgrounds]) systsplineproc = tuple([ proc for proc in signalSplines + backgrounds if 'datadriven' not in proc ]) allsplineproc = tuple([proc for proc in signalSplines + backgrounds]) bgproc = tuple([proc for proc in backgrounds]) sigsplineproc = tuple([proc for proc in signalSplines]) sigproc = tuple([proc for proc in signals]) ############ ### stat ### ############ def getStat(hist, direction): newhist = hist.Clone('{0}{1}'.format(hist.GetName(), direction)) nb = hist.GetNbinsX() * hist.GetNbinsY() for b in range(nb + 1): val = hist.GetBinContent(b + 1) err = hist.GetBinError(b + 1) newval = val + err if direction == 'Up' else val - err if newval < 0: newval = 0 newhist.SetBinContent(b + 1, newval) newhist.SetBinError(b + 1, 0) return newhist logging.info('Adding stat systematic') statMapUp = {} statMapDown = {} for proc in backgrounds + signals: statMapUp[proc] = getStat(histMap[mode][''][proc], 'Up') statMapDown[proc] = getStat(histMap[mode][''][proc], 'Down') statsyst = {} for mode in ['PP', 'PF']: # background if doUnbinned: # TODO: add errors on params pass else: for proc in bgproc: statsyst[((proc, ), (era, ), (analysis, ), (mode, ))] = (statMapUp[proc], statMapDown[proc]) # signal if doParametric: for h in hmasses: statsyst[((splinename.format(h=h), ), (era, ), (analysis, ), (mode, ))] = (getSpline(statMapUp, h, tag=mode + 'StatUp'), getSpline(statMapDown, h, tag=mode + 'StatDown')) else: for proc in sigproc: statsyst[((proc, ), (era, ), (analysis, ), (mode, ))] = (statMapUp[proc], statMapDown[proc]) limits.addSystematic('stat_{process}_{channel}', 'shape', systematics=statsyst) ############## ### shifts ### ############## for shift in shiftTypes: logging.info('Adding {} systematic'.format(shift)) shiftsyst = {} for mode in ['PP', 'PF']: # background if doUnbinned: # TODO rateParams on bg model pass else: for proc in bgproc: shiftsyst[((proc, ), (era, ), (analysis, ), (mode, ))] = (histMap[mode][shift + 'Up'][proc], histMap[mode][shift + 'Down'][proc]) # signal if doParametric: for h in hmasses: shiftsyst[((splinename.format(h=h), ), (era, ), (analysis, ), (mode, ))] = (getSpline( histMap[mode][shift + 'Up'], h, tag=mode + shift + 'Up'), getSpline( histMap[mode][shift + 'Down'], h, tag=mode + shift + 'Down')) else: for proc in sigproc: shiftsyst[((proc, ), (era, ), (analysis, ), (mode, ))] = (histMap[mode][shift + 'Up'][proc], histMap[mode][shift + 'Down'][proc]) limits.addSystematic(shift, 'shape', systematics=shiftsyst) ############ ### Lumi ### ############ # lumi 2.3% for 2015 and 2.5% for 2016 # https://twiki.cern.ch/twiki/bin/view/CMS/TWikiLUM#CurRec logging.info('Adding lumi systematic') lumiproc = systsplineproc if doParametric else systproc lumisyst = { (lumiproc, (era, ), ('all', ), ('all', )): 1.025, } limits.addSystematic('lumi', 'lnN', systematics=lumisyst) ############ ### muon ### ############ # from z: 1 % + 0.5 % + 0.5 % per muon for id + iso + trig (pt>20) logging.info('Adding mu id+iso systematic') muproc = systsplineproc if doParametric else systproc musyst = { (muproc, (era, ), ('all', ), ('all', )): 1 + math.sqrt(sum([0.01**2, 0.005**2] * 2 + [0.01**2])), # 2 lead have iso, tau_mu doesnt } limits.addSystematic('muid', 'lnN', systematics=musyst) logging.info('Adding mu trig systematic') musyst = { (muproc, (era, ), ('all', ), ('all', )): 1.005, # 1 triggering muon } limits.addSystematic('mutrig', 'lnN', systematics=musyst) ########### ### tau ### ########### # 5% on sf 0.99 (VL/L) or 0.97 (M) logging.info('Adding mu id+iso systematic') tauproc = systsplineproc if doParametric else systproc tausyst = { (tauproc, (era, ), ('all', ), ('all', )): 1.05, } limits.addSystematic('tauid', 'lnN', systematics=tausyst) ###################### ### Print datacard ### ###################### directory = 'datacards_shape/{0}'.format('MuMuTauTau') python_mkdir(directory) datacard = '{0}/mmmt_{1}'.format( directory, args.tag) if args.tag else '{}/mmmt'.format(directory) processes = {} if doParametric: for h in hmasses: processes[signame.format( h=h, a='X')] = [splinename.format(h=h)] + backgrounds else: for signal in signals: processes[signal] = [signal] + backgrounds limits.printCard(datacard, processes=processes, blind=False, saveWorkspace=doParametric)
def create_datacard(args): global j doMatrix = False doParametric = args.parametric doUnbinned = args.unbinned do2D = len(args.fitVars) == 2 chi2Mass = args.chi2Mass blind = not args.unblind addSignal = args.addSignal signalParams = {'h': args.higgs, 'a': args.pseudoscalar} wsname = 'w' var = args.fitVars if doUnbinned and not doParametric: logging.error('Unbinned only supported with parametric option') raise if chi2Mass and 'hkf' not in var: logging.error('Trying to use non-kinematic fit with chi2 cut') raise global xRange global yRange if do2D and var[1] == 'tt': yRange = [0.75, 30] #if do2D and var[1]=='tt': yRange = [0,25] if args.yRange: yRange = args.yRange xRange = args.xRange global project global hCut project = args.project if args.selection: hCut = args.selection xBinWidth = 0.1 if do2D: yBinWidth = 0.25 if var[1] == 'tt' else 10 global hmasses if not args.do2DInterpolation: hmasses = [h for h in hmasses if h in [125, 300, 750]] ############# ### Setup ### ############# sampleMap = getSampleMap() backgrounds = ['datadriven'] data = ['data'] signals = [ signame.format(h=h, a=a) for h in hmasses for a in amasses if a in hamap[h] ] ggsignals = [ ggsigname.format(h=h, a=a) for h in hmasses for a in amasses if a in hamap[h] ] vbfsignals = [ vbfsigname.format(h=h, a=a) for h in vbfhmasses for a in vbfamasses ] signalToAdd = signame.format(**signalParams) wrappers = {} allsamples = backgrounds if not skipSignal: allsamples = allsamples + signals + ggsignals + vbfsignals allsamples = allsamples + data for proc in allsamples: if proc == 'datadriven': continue for sample in sampleMap[proc]: wrappers[sample] = NtupleWrapper('MuMuTauTau', sample, new=True, version='80X') for shift in shifts: wrappers[sample + shift] = NtupleWrapper('MuMuTauTau', sample, new=True, version='80X', shift=shift) wrappers_mm = {} for proc in data: for sample in sampleMap[proc]: wrappers_mm[sample] = NtupleWrapper('MuMu', sample, new=True, version='80X') ############################## ### Create/read histograms ### ############################## histMap = {} # The definitons of which regions match to which arguments # PP can take a fake rate datadriven estimate from FP, but FP can only take the observed values regionArgs = { 'PP': { 'region': 'A', 'fakeRegion': 'B', 'source': 'B', 'sources': ['A', 'C'], 'fakeSources': ['B', 'D'], }, 'FP': { 'region': 'B', 'sources': ['B', 'D'], }, 'PPdm0': { 'region': 'A', 'dm': 0, 'fakeRegion': 'B', 'source': 'B', 'sources': ['A', 'C'], 'fakeSources': ['B', 'D'], }, 'FPdm0': { 'region': 'B', 'dm': 0, 'sources': ['B', 'D'], }, 'PPdm1': { 'region': 'A', 'dm': 1, 'fakeRegion': 'B', 'source': 'B', 'sources': ['A', 'C'], 'fakeSources': ['B', 'D'], }, 'FPdm1': { 'region': 'B', 'dm': 1, 'sources': ['B', 'D'], }, 'PPdm10': { 'region': 'A', 'dm': 10, 'fakeRegion': 'B', 'source': 'B', 'sources': ['A', 'C'], 'fakeSources': ['B', 'D'], }, 'FPdm10': { 'region': 'B', 'dm': 10, 'sources': ['B', 'D'], }, } if args.sumDecayModes: regionArgs['PP']['sumDecayModes'] = args.sumDecayModes regionArgs['FP']['sumDecayModes'] = args.sumDecayModes modes = ['PP', 'FP'] if args.decayMode: modes = ['PPdm0', 'PPdm1', 'PPdm10', 'FPdm0', 'FPdm1', 'FPdm10'] thesesamples = backgrounds if not skipSignal: thesesamples = backgrounds + signals for mode in modes: histMap[mode] = {} for shift in [''] + shifts: shiftLabel = systLabels.get(shift, shift) histMap[mode][shiftLabel] = {} for proc in thesesamples: logging.info('Getting {} {} {}'.format(mode, proc, shift)) if proc == 'datadriven': if 'PP' in mode: if doMatrix: histMap[mode][shiftLabel][ proc] = getMatrixDatadrivenHist( doUnbinned=True, var=var, wrappers=wrappers, shift=shift, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) else: histMap[mode][shiftLabel][ proc] = getDatadrivenHist(doUnbinned=True, var=var, wrappers=wrappers, shift=shift, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) else: if doMatrix: histMap[mode][shiftLabel][proc] = getMatrixHist( 'data', doUnbinned=True, var=var, wrappers=wrappers, shift=shift, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) else: histMap[mode][shiftLabel][proc] = getHist( 'data', doUnbinned=True, var=var, wrappers=wrappers, shift=shift, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) else: if proc in signals: newproc = 'gg' + proc else: newproc = proc # override xRange for signal only oldXRange = xRange xRange = [0, 30] if doMatrix: histMap[mode][shiftLabel][proc] = getMatrixHist( newproc, doUnbinned=True, var=var, wrappers=wrappers, shift=shift, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) else: histMap[mode][shiftLabel][proc] = getHist( newproc, doUnbinned=True, var=var, wrappers=wrappers, shift=shift, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) xRange = oldXRange #if do2D or doUnbinned: # pass # TODO, figure out how to rebin 2D #else: # histMap[mode][shiftLabel][proc].Rebin(rebinning[var[0]]) #if shift: continue logging.info('Getting observed') samples = backgrounds if addSignal: samples = backgrounds + [signalToAdd] hists = [] histsNoSig = [] for proc in samples: j += 1 hists += [ histMap[mode][shiftLabel][proc].Clone('hist' + str(j)) ] j += 1 if proc != signalToAdd: histsNoSig += [ histMap[mode][shiftLabel][proc].Clone('hist' + str(j)) ] #if doUnbinned: hist = sumDatasets('obs{}{}'.format(mode, shift), *hists) histNoSig = sumDatasets('obsNoSig{}{}'.format(mode, shift), *histsNoSig) #else: # hist = sumHists('obs{}{}'.format(mode,shift),*hists) # histNoSig = sumHists('obsNoSig{}{}'.format(mode,shift),*histsNoSig) #for b in range(hist.GetNbinsX()+1): # val = int(hist.GetBinContent(b)) # if val<0: val = 0 # err = val**0.5 # hist.SetBinContent(b,val) # #hist.SetBinError(b,err) if blind: j += 1 histMap[mode][shiftLabel]['data'] = hist.Clone('hist' + str(j)) j += 1 histMap[mode][shiftLabel]['dataNoSig'] = histNoSig.Clone( 'hist' + str(j)) else: hist = getHist('data', doUnbinned=True, var=var, wrappers=wrappers, do2D=do2D, chi2Mass=chi2Mass, **regionArgs[mode]) j += 1 histMap[mode][shiftLabel]['data'] = hist.Clone('hist' + str(j)) j += 1 histMap[mode][shiftLabel]['dataNoSig'] = histNoSig.Clone( 'hist' + str(j)) #if do2D or doUnbinned: # pass #else: # histMap[mode][shiftLabel]['data'].Rebin(rebinning[var[0]]) # histMap[mode][shiftLabel]['dataNoSig'].Rebin(rebinning[var[0]]) for mode in ['control']: histMap[mode] = {} for shift in ['']: shiftLabel = systLabels.get(shift, shift) histMap[mode][shiftLabel] = {} for proc in backgrounds: logging.info('Getting {} {}'.format(proc, shift)) if proc == 'datadriven': hist = getControlHist('data', doUnbinned=False, var=var, wrappers=wrappers_mm) if subtractSR: # subtract off the signal region and sideband from the control region for mode2 in modes: histsub = getHist('data', doUnbinned=False, var=var, wrappers=wrappers, do2D=False, chi2Mass=chi2Mass, **regionArgs[mode2]) histsub.Rebin(histsub.GetNbinsX() / hist.GetNbinsX()) hist.Add(histsub, -1) histMap[mode][shiftLabel][proc] = hist if shift: continue logging.info('Getting observed') hist = getControlHist('data', doUnbinned=False, var=var, wrappers=wrappers_mm) if subtractSR: # subtract off the signal region and sideband from the control region for mode2 in modes: histsub = getHist('data', doUnbinned=False, var=var, wrappers=wrappers, do2D=False, chi2Mass=chi2Mass, **regionArgs[mode2]) histsub.Rebin(histsub.GetNbinsX() / hist.GetNbinsX()) hist.Add(histsub, -1) j += 1 histMap[mode][shiftLabel]['data'] = hist.Clone('hist' + str(j)) j += 1 histMap[mode][shiftLabel]['dataNoSig'] = hist.Clone('hist' + str(j)) # rescale signal scales = {} for proc in signals: gg = getXsec(proc, 'gg') vbf = getXsec(proc, 'vbf') # divide out H cross section from sample # it was gg only, we will scale to gg+vbf with acceptance correction in the HaaLimits class scale = 1. / gg scales[proc] = scale #print proc, gg, vbf, scale # before doing anything print out integrals to make sure things are okay #h=125 #a=7 #SIGNAME = 'HToAAH{h}A{a}' #for s in ['']+[systLabels.get(shift,shift) for shift in shiftTypes]: # if s: # integral = histMap['PP'][s+'Up'][SIGNAME.format(h=h,a=a)].sumEntries('{0}>{1} && {0}<{2}'.format(xVar,*xRange)) # print s, 'Up', integral # integral = histMap['PP'][s+'Down'][SIGNAME.format(h=h,a=a)].sumEntries('{0}>{1} && {0}<{2}'.format(xVar,*xRange)) # print s, 'Down', integral # else: # integral = histMap['PP'][''][SIGNAME.format(h=h,a=a)].sumEntries('{0}>{1} && {0}<{2}'.format(xVar,*xRange)) # print 'central', integral #return name = [] if args.unbinned: name += ['unbinned'] if do2D: name += [var[1]] n = '_'.join(name) if name else '' name = [] if args.tag: name += [args.tag] if args.addSignal: name += ['wSig'] name = n + '/' + '_'.join(name) if n else '_'.join(name) if var == ['mm']: haaLimits = HaaLimits(histMap, name, do2DInterpolation=args.do2DInterpolation, doParamFit=args.fitParams) elif do2D and project: haaLimits = HaaLimits(histMap, name, do2DInterpolation=args.do2DInterpolation, doParamFit=args.fitParams) elif do2D: haaLimits = HaaLimits2D(histMap, name, do2DInterpolation=args.do2DInterpolation, doParamFit=args.fitParams) else: logging.error('Unsupported fit vars: ', var) raise if args.decayMode: haaLimits.REGIONS = modes if 'h' in var: haaLimits.YCORRELATION = correlation haaLimits.SHIFTS = [systLabels.get(shift, shift) for shift in shiftTypes] haaLimits.SIGNALSHIFTS = [ systLabels.get(shift, shift) for shift in signalShiftTypes ] haaLimits.BACKGROUNDSHIFTS = [ systLabels.get(shift, shift) for shift in backgroundShiftTypes ] haaLimits.QCDSHIFTS = [systLabels.get(shift, shift) for shift in qcdShifts] haaLimits.AMASSES = amasses haaLimits.HMASSES = [chi2Mass] if chi2Mass else hmasses haaLimits.HAMAP = hamap haaLimits.XRANGE = xRange haaLimits.XBINNING = int((xRange[1] - xRange[0]) / xBinWidth) haaLimits.XVAR = xVar if do2D: haaLimits.YVAR = yVar haaLimits.YRANGE = yRange haaLimits.YBINNING = int((yRange[1] - yRange[0]) / yBinWidth) if 'tt' in var: haaLimits.YLABEL = 'm_{#tau_{#mu}#tau_{h}}' if 'h' in var or 'hkf' in var: haaLimits.YLABEL = 'm_{#mu#mu#tau_{#mu}#tau_{h}}' haaLimits.initializeWorkspace() haaLimits.addControlModels() haaLimits.addBackgroundModels(fixAfterControl=True) if not skipSignal: haaLimits.XRANGE = [0, 30] # override for signal splines if project: haaLimits.addSignalModels(scale=scales) elif 'tt' in var: if args.yFitFunc: haaLimits.addSignalModels( scale=scales, yFitFuncFP=args.yFitFunc, yFitFuncPP=args.yFitFunc) #,cutOffFP=0.0,cutOffPP=0.0) else: haaLimits.addSignalModels( scale=scales, yFitFuncFP='V', yFitFuncPP='L') #,cutOffFP=0.75,cutOffPP=0.75) elif 'h' in var or 'hkf' in var: if args.yFitFunc: haaLimits.addSignalModels( scale=scales, yFitFuncFP=args.yFitFunc, yFitFuncPP=args.yFitFunc) #,cutOffFP=0.0,cutOffPP=0.0) else: haaLimits.addSignalModels( scale=scales, yFitFuncFP='DG', yFitFuncPP='DG') #,cutOffFP=0.0,cutOffPP=0.0) else: haaLimits.addSignalModels(scale=scales) haaLimits.XRANGE = xRange if args.addControl: haaLimits.addControlData() haaLimits.addData( blind=blind, asimov=args.asimov, addSignal=args.addSignal, doBinned=not doUnbinned, ** signalParams) # this will generate a dataset based on the fitted model haaLimits.setupDatacard(addControl=args.addControl, doBinned=not doUnbinned) haaLimits.addSystematics(addControl=args.addControl, doBinned=not doUnbinned) name = 'mmmt_{}_parametric'.format('_'.join(var)) if args.unbinned: name += '_unbinned' if args.tag: name += '_{}'.format(args.tag) if args.addSignal: name += '_wSig' haaLimits.save(name=name)
def create_datacard(args): doMatrix = False doParametric = args.parametric doUnbinned = args.unbinned do2D = len(args.fitVars)==2 blind = not args.unblind addSignal = args.addSignal signalParams = {'h': args.higgs, 'a': args.pseudoscalar} wsname = 'w' var = args.fitVars if do2D and doParametric: logging.error('Parametric 2D fits are not yet supported') raise if doUnbinned and not doParametric: logging.error('Unbinned only supported with parametric option') raise ############# ### Setup ### ############# sampleMap = getSampleMap() backgrounds = ['datadriven'] data = ['data'] signals = [signame.format(h=h,a=a) for h in hmasses for a in amasses] signalToAdd = signame.format(**signalParams) wrappers = {} for proc in backgrounds+signals+data: if proc=='datadriven': continue for sample in sampleMap[proc]: wrappers[sample] = NtupleWrapper('MuMuTauTau',sample,new=True,version='80X') for shift in shifts: wrappers[sample+shift] = NtupleWrapper('MuMuTauTau',sample,new=True,version='80X',shift=shift) ############################## ### Create/read histograms ### ############################## histMap = {} # The definitons of which regions match to which arguments # PP can take a fake rate datadriven estimate from FP, but FP can only take the observed values regionArgs = { 'PP': {'region':'A','fakeRegion':'B','source':'B','sources':['A','C'],'fakeSources':['B','D'],}, 'FP': {'region':'B','sources':['B','D'],}, } for mode in ['PP','FP']: histMap[mode] = {} for shift in ['']+shifts: histMap[mode][shift] = {} for proc in backgrounds+signals: logging.info('Getting {} {}'.format(proc,shift)) if proc=='datadriven': # TODO: unbinned, get the RooDataHist from flattenener first if mode=='PP': if doMatrix: histMap[mode][shift][proc] = getMatrixDatadrivenHist(doUnbinned=doUnbinned,var=var,wrappers=wrappers,shift=shift,do2D=do2D,**regionArgs[mode]) else: histMap[mode][shift][proc] = getDatadrivenHist(doUnbinned=doUnbinned,var=var,wrappers=wrappers,shift=shift,do2D=do2D,**regionArgs[mode]) else: if doMatrix: histMap[mode][shift][proc] = getMatrixHist('data',doUnbinned=doUnbinned,var=var,wrappers=wrappers,shift=shift,do2D=do2D,**regionArgs[mode]) else: histMap[mode][shift][proc] = getHist('data',doUnbinned=doUnbinned,var=var,wrappers=wrappers,shift=shift,do2D=do2D,**regionArgs[mode]) else: if doMatrix: histMap[mode][shift][proc] = getMatrixHist(proc,doUnbinned=doUnbinned,var=var,wrappers=wrappers,shift=shift,do2D=do2D,**regionArgs[mode]) else: histMap[mode][shift][proc] = getHist(proc,doUnbinned=doUnbinned,var=var,wrappers=wrappers,shift=shift,do2D=do2D,**regionArgs[mode]) if do2D or doUnbinned: pass # TODO, figure out how to rebin 2D else: histMap[mode][shift][proc].Rebin(rebinning[var[0]]) if shift: continue logging.info('Getting observed') samples = backgrounds if addSignal: samples = backgrounds + [signalToAdd] hists = [] histsNoSig = [] for proc in samples: hists += [histMap[mode][shift][proc].Clone()] if proc!=signalToAdd: histsNoSig += [histMap[mode][shift][proc].Clone()] if doUnbinned: hist = sumDatasets('obs{}{}'.format(mode,shift),*hists) histNoSig = sumDatasets('obsNoSig{}{}'.format(mode,shift),*histsNoSig) else: hist = sumHists('obs{}{}'.format(mode,shift),*hists) histNoSig = sumHists('obsNoSig{}{}'.format(mode,shift),*histsNoSig) #for b in range(hist.GetNbinsX()+1): # val = int(hist.GetBinContent(b)) # if val<0: val = 0 # err = val**0.5 # hist.SetBinContent(b,val) # #hist.SetBinError(b,err) if blind: histMap[mode][shift]['data'] = hist.Clone() histMap[mode][shift]['dataNoSig'] = histNoSig.Clone() else: hist = getHist('data',doUnbinned=doUnbinned,var=var,wrappers=wrappers,do2D=do2D,**regionArgs[mode]) histMap[mode][shift]['data'] = hist.Clone() histMap[mode][shift]['dataNoSig'] = histNoSig.Clone() if do2D or doUnbinned: pass else: histMap[mode][shift]['data'].Rebin(rebinning[var[0]]) histMap[mode][shift]['dataNoSig'].Rebin(rebinning[var[0]]) if var == ['mm']: haaLimits = HaaLimits(histMap,'unbinned' if args.unbinned else '') elif var == ['h'] or var == ['hkf']: haaLimits = HaaLimitsHMass(histMap,'unbinned' if args.unbinned else '') else: logging.error('Unsupported fit vars: ',var) raise haaLimits.AMASSES = amasses haaLimits.HMASSES = hmasses haaLimits.initializeWorkspace() haaLimits.addBackgroundModels() haaLimits.addSignalModels() haaLimits.addData() haaLimits.setupDatacard() haaLimits.addSystematics() name = 'mmmt_{}_parametric'.format('_'.join(var)) if args.unbinned: name += '_unbinned' if args.addSignal: name += '_wSig' haaLimits.save(name=name)
class FlattenTree(object): '''Produces flat histograms''' def __init__(self,analysis,sample,**kwargs): self.analysis = analysis self.sample = sample self.ntuple = NtupleWrapper(analysis,sample,**kwargs) self.histParameters = [] self.selections = [] self.countOnly = [] def __exit__(self, type, value, traceback): self.__finish() def __del__(self): self.__finish() def __finish(self): pass def addHistogram(self,name,**kwargs): ''' Add a histogram to flatten ''' self.histParameters += [name] def addSelection(self,selection,**kwargs): '''Add selection and postfix name to flatten''' countOnly = kwargs.pop('countOnly',False) self.selections += [selection] if countOnly: self.countOnly += [selection] def clear(self): '''Reset the histograms/selections''' self.histParameters = [] self.selections = [] self.countOnly = [] def flattenAll(self,**kwargs): '''Flatten all selections''' njobs = int(kwargs.pop('njobs',1)) job = int(kwargs.pop('job',0)) multi = kwargs.pop('multi',False) if hasProgress and multi: pbar = kwargs.pop('progressbar',ProgressBar(widgets=['{0}: '.format(self.sample),' ',SimpleProgress(),' histograms ',Percentage(),' ',Bar(),' ',ETA()])) else: pbar = None # setup all jobs allJobs = [] for selName in self.selections: for histName in self.histParameters: if selName in self.countOnly and 'count' not in histName: continue allJobs += [[histName,selName]] # split into multiple jobs totjobs = len(allJobs) nperjob = math.ceil(float(totjobs)/njobs) startjob = int(job*nperjob) endjob = int((job+1)*nperjob) allJobs = sorted(allJobs)[startjob:endjob] # flatten if hasProgress and multi: for args in pbar(allJobs): self.ntuple.flatten(*args) else: n = len(allJobs) for i,args in enumerate(allJobs): logging.info('Processing {3} {4} plot {0} of {1}: {2}.'.format(i+1,n,' '.join(args),self.analysis,self.sample)) self.ntuple.flatten(*args)
def __init__(self,**kwargs): inputTreeName = kwargs.pop('inputTreeName','muTauEventTree/eventTree') mass = kwargs.pop('mass',500) nTaus = kwargs.pop('nTaus',0) super(Hpp4lTrainer,self).__init__(**kwargs) sigMap = getSigMap('Hpp4l') genRecoMap = getGenRecoChannelMap('Hpp4l') genChannels = getGenChannels('Hpp4l') selectedGenChannels = [x for x in genChannels['PP'] if x[:2].count('t')==nTaus and x[2:].count('t')==nTaus] selectedRecoChannels = [] for gen in selectedGenChannels: for reco in genRecoMap[gen]: if reco not in selectedRecoChannels: selectedRecoChannels += [reco] allsamples = ['W','T','TT','TTVall','Z','WW','VHall','WZ','VVV','ZZall'] signals = ['HppHmm{0}GeV'.format(mass)] # get the trees ntupleMap = {} for s in allsamples+signals: for sampleName in sigMap[s]: ntupleMap[sampleName] = NtupleWrapper('Hpp4l',sampleName) # add to factory for s in signals: for sig in sigMap[s]: if not ntupleMap[sig].getTree().GetEntries(): continue self.factory.AddSignalTree(ntupleMap[sig].getTree(),ntupleMap[sig].getIntLumi()/ntupleMap[sig].getSampleLumi()) for s in allsamples: for bg in sigMap[s]: if not ntupleMap[bg].getTree().GetEntries(): continue self.factory.AddBackgroundTree(ntupleMap[bg].getTree(),ntupleMap[bg].getIntLumi()/ntupleMap[bg].getSampleLumi()) # per event weight weight = 'hpp1_mediumScale*hpp2_mediumScale*hmm1_mediumScale*hmm2_mediumScale*genWeight*pileupWeight*triggerEfficiency' self.factory.SetWeightExpression(weight) # variables #self.factory.AddVariable('hppWindow := fabs(hpp_mass-{0})'.format(mass), '|m_{++}-m_{#Phi}|', 'GeV', 'F') # h++ symmetric window self.factory.AddVariable('hpp_mass','m_{++}','GeV','F') # h++ #self.factory.AddVariable('hpp_pt','p_{T}^{++}','GeV','F') # h++ pt #self.factory.AddVariable('hpp_deltaR', '#Delta R(++)', '', 'F') # h++ dR #self.factory.AddVariable('hmmWindow := fabs(hmm_mass-{0})'.format(mass), '|m_{--}-m_{#Phi}|', 'GeV', 'F') # h-- symmetric window self.factory.AddVariable('hmm_mass','m_{--}','GeV','F') # h-- #self.factory.AddVariable('hmm_pt','p_{T}^{--}','GeV','F') # h-- pt #self.factory.AddVariable('hmm_deltaR', '#Delta R(--)', '', 'F') # h-- dR self.factory.AddVariable('st := hpp1_pt+hpp2_pt+hmm1_pt+hmm2_pt', 's_{T}', 'GeV', 'F') # 4l st self.factory.AddVariable('zWindow := fabs(z_mass-{0})'.format(ZMASS), '|m_{+-}-m_{Z}|', 'GeV', 'F') # z symmetric window minMap = { 0 : 0.9, 1 : 0.4, 2 : 0.3, } minMass = mass * minMap[nTaus] maxMass = mass * 1.1 # preselection cut cutString = ' && '.join(['{0}_passMedium==1'.format(lep) for lep in ['hpp1','hpp2','hmm1','hmm2']]) cutString += ' && ' + '(' + ' || '.join(['genChannel=="{0}"'.format(chan) for chan in selectedGenChannels + ['a']]) + ')' cutString += ' && ' + '(' + ' || '.join(['channel=="{0}"'.format(chan) for chan in selectedRecoChannels]) + ')' passCut = ROOT.TCut(cutString) self.factory.PrepareTrainingAndTestTree( passCut, ":".join( [ "nTrain_Signal=0", "nTrain_Background=0", "SplitMode=Random", "NormMode=NumEvents", "!V", ] ) ) # options: # H : display help # V : turn on verbosity # IgnoreNegWeightsInTraining : ignore events with negative weights for training, keep for testing # book methods cuts = self.factory.BookMethod( ROOT.TMVA.Types.kCuts, "Cuts", ":".join( [ "VarProp=FSmart", #"CutRangeMin[1]=0.", # set max window for Z at 80 #"CutRangeMax[1]=80.", ] ) )