def get_list_of_syst_to_fill(opts): systematics = ['NOM'] sysOption = opts.syst excludedSyst = opts.exclude anySys = sysOption == None if sysOption == 'fake' or anySys: systematics += systUtils.fakeSystVariations() if sysOption == 'object' or anySys: systematics += systUtils.mcObjectVariations() if sysOption == 'weight' or anySys: systematics += systUtils.mcWeightVariations() if sysOption and sysOption.count(','): systematics = [ s for s in systUtils.getAllVariations() if s in sysOption.split(',') ] elif sysOption in systUtils.getAllVariations(): systematics = [sysOption] elif not anySys and len(systematics) == 1 and sysOption != 'NOM': raise ValueError("Invalid syst %s" % str(sysOption)) if excludedSyst: systematics = [ s for s in systematics if s not in filterWithRegexp(systematics, excludedSyst) ] return systematics
def setSyst(self, sys='NOM') : nominal = 'NOM' # do we have differnt names for nom (mc vs fake)? self.isObjSys = sys in systUtils.mcObjectVariations() self.isWeightSys = sys in systUtils.mcWeightVariations() self.isFakeSys = sys in systUtils.fakeSystVariations() def nameObjectSys(s) : return s if self.isMc else nominal def nameWeightSys(s) : return s if self.isMc else nominal def nameFakeSys(s) : return s if self.isFake else nominal def identity(s) : return s sysNameFunc = nameObjectSys if self.isObjSys else nameWeightSys if self.isWeightSys else nameFakeSys if self.isFakeSys else identity self.syst = sysNameFunc(sys) return self
def exploreAvailableSystematics(self, verbose=False) : systs = ['NOM'] if self.isFake : systs += systUtils.fakeSystVariations() elif self.isMc : systs += systUtils.mcObjectVariations() systs += systUtils.mcWeightVariations() self.systematics = [] for sys in systs : self.setSyst(sys) if os.path.exists(self.filenameHisto) : self.systematics.append(sys) if verbose : print "%s : found %d variations : %s"%(self.name, len(self.systematics), str(self.systematics))
def runFill(opts) : batchMode = opts.batch inputFakeDir = opts.input_fake inputGenDir = opts.input_gen outputDir = opts.output_dir sysOption = opts.syst excludedSyst = opts.exclude verbose = opts.verbose if verbose : print "filling histos" mkdirIfNeeded(outputDir) systematics = ['NOM'] anySys = sysOption==None if sysOption=='fake' or anySys : systematics += systUtils.fakeSystVariations() if sysOption=='object' or anySys : systematics += systUtils.mcObjectVariations() if sysOption=='weight' or anySys : systematics += systUtils.mcWeightVariations() if sysOption and sysOption.count(',') : systematics = [s for s in systUtils.getAllVariations() if s in sysOption.split(',')] elif sysOption in systUtils.getAllVariations() : systematics = [sysOption] elif not anySys and len(systematics)==1 and sysOption!='NOM' : raise ValueError("Invalid syst %s"%str(sysOption)) if excludedSyst : systematics = [s for s in systematics if s not in filterWithRegexp(systematics, excludedSyst)] if verbose : print "about to loop over these systematics:\n %s"%str(systematics) for syst in systematics : if batchMode : newOptions = " --input-gen %s" % opts.input_gen newOptions += " --input-fake %s" % opts.input_fake newOptions += " --output-dir %s" % opts.output_dir newOptions += " --verbose %s" % opts.verbose newOptions += " --syst %s" % syst template = 'batch/templates/check_hft_fill.sh.template' script = "batch/hft_%s.sh"%syst scriptFile = open(script, 'w') scriptFile.write(open(template).read() .replace('%(opt)s', newOptions) .replace('%(logfile)s', 'log/hft/fill_'+syst+'.log') .replace('%(jobname)s', 'fill_'+syst)) scriptFile.close() cmd = "sbatch %s"%script if verbose : print cmd out = getCommandOutput(cmd) if verbose : print out['stdout'] if out['stderr'] : print out['stderr'] continue if verbose : print '---- filling ',syst samplesPerGroup = allSamplesAllGroups() [s.setSyst(syst) for g, samples in samplesPerGroup.iteritems() for s in samples] counters, histos = countAndFillHistos(samplesPerGroup=samplesPerGroup, syst=syst, verbose=verbose, outdir=outputDir) printCounters(counters) saveHistos(samplesPerGroup, histos, outputDir, verbose)
def get_list_of_syst_to_fill(opts): systematics = ['NOM'] sysOption = opts.syst excludedSyst = opts.exclude anySys = sysOption==None if sysOption=='fake' or anySys : systematics += systUtils.fakeSystVariations() if sysOption=='object' or anySys : systematics += systUtils.mcObjectVariations() if sysOption=='weight' or anySys : systematics += systUtils.mcWeightVariations() if sysOption and sysOption.count(','): systematics = [s for s in systUtils.getAllVariations() if s in sysOption.split(',')] elif sysOption in systUtils.getAllVariations(): systematics = [sysOption] elif not anySys and len(systematics)==1 and sysOption!='NOM': raise ValueError("Invalid syst %s"%str(sysOption)) if excludedSyst: systematics = [s for s in systematics if s not in filterWithRegexp(systematics, excludedSyst)] return systematics
def runPlot(opts) : inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose mkdirIfNeeded(outputDir) buildTotBkg = systUtils.buildTotBackgroundHisto buildStat = systUtils.buildStatisticalErrorBand buildSyst = systUtils.buildSystematicErrorBand selections = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) variables = variables_to_plot() groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir) groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake()) if not skip_charge_flip : groups.append(dataset.DatasetGroup.build_qflip_from_simulated_samples(groups)) plot_groups = [systUtils.Group(g.name) for g in groups] sel_not_specified = len(regions_to_plot())==len(selections) if sel_not_specified: selections = guess_available_selections_from_histofiles(inputDir, first(plot_groups), verbose) systematics_to_use = get_list_of_syst_to_fill(opts) for group in plot_groups : group.setCurrentSelection(first(selections)) group.setHistosDir(inputDir).setCurrentSelection(first(selections)) group.exploreAvailableSystematics(verbose) group.filterAndDropSystematics(systematics_to_use, opts.exclude, verbose) available_systematics = sorted(list(set([s for g in plot_groups for s in g.systematics]))) systematics = [s for s in systematics_to_use if s in available_systematics] if verbose : print "using the following systematics : {0}".format(systematics) print "missing the following systematics : {0}".format([s for s in systematics_to_use if s not in available_systematics]) fakeSystematics = [s for s in systematics if s in systUtils.fakeSystVariations()] mcSystematics = [s for s in systematics if s in systUtils.mcObjectVariations() + systUtils.mcWeightVariations()] mkdirIfNeeded(outputDir) findByName = systUtils.findByName simBkgs = [g for g in plot_groups if g.isMcBkg] data = findByName(plot_groups, 'data') fake = findByName(plot_groups, 'fake') signal = findByName(plot_groups, 'signaltaumu') print 'names_stacked_groups to be improved' names_stacked_groups = [g.name for g in simBkgs+[fake]] for sel in selections : if verbose : print '-- plotting ',sel for var in variables : if verbose : print '---- plotting ',var print_summary_yield = var is 'onebin' for g in plot_groups : g.setSystNominal() g.setCurrentSelection(sel) nominalHistoData = data.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoSign = signal.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistosSimBkg = dict([(g.name, g.getHistogram(variable=var, selection=sel, cacheIt=True)) for g in simBkgs]) nominalHistosBkg = dict([('fake', nominalHistoFakeBkg)] + [(g, h) for g, h in nominalHistosSimBkg.iteritems()]) nominalHistoTotBkg = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg, histosSimBkgs=nominalHistosSimBkg) statErrBand = buildStat(nominalHistoTotBkg) systErrBand = buildSyst(fake=fake, simBkgs=simBkgs, variable=var, selection=sel, fakeVariations=fakeSystematics, mcVariations=mcSystematics, verbose=verbose, printYield=print_summary_yield) # if print_summary_yield: # print_stat_syst_yield(fake=fake, variable=var, selection=sel, fakeVariations=fakeSystematics) plotHistos(histoData=nominalHistoData, histoSignal=nominalHistoSign, histoTotBkg=nominalHistoTotBkg, histosBkg=nominalHistosBkg, statErrBand=statErrBand, systErrBand=systErrBand, stack_order=names_stacked_groups, topLabel=sel, canvasName=(sel+'_'+var), outdir=outputDir, options=opts, printYieldSummary=print_summary_yield) for group in plot_groups : group.printVariationsSummary()
def runPlot(opts): inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose mkdirIfNeeded(outputDir) buildTotBkg = systUtils.buildTotBackgroundHisto buildStat = systUtils.buildStatisticalErrorBand buildSyst = systUtils.buildSystematicErrorBand selections = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) variables = variables_to_plot() groups = dataset.DatasetGroup.build_groups_from_files_in_dir( opts.samples_dir) groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake()) if not skip_charge_flip: groups.append( dataset.DatasetGroup.build_qflip_from_simulated_samples(groups)) plot_groups = [systUtils.Group(g.name) for g in groups] sel_not_specified = len(regions_to_plot()) == len(selections) if sel_not_specified: selections = guess_available_selections_from_histofiles( inputDir, first(plot_groups), verbose) systematics_to_use = get_list_of_syst_to_fill(opts) for group in plot_groups: group.setCurrentSelection(first(selections)) group.setHistosDir(inputDir).setCurrentSelection(first(selections)) group.exploreAvailableSystematics(verbose) group.filterAndDropSystematics(systematics_to_use, opts.exclude, verbose) available_systematics = sorted( list(set([s for g in plot_groups for s in g.systematics]))) systematics = [s for s in systematics_to_use if s in available_systematics] if verbose: print "using the following systematics : {0}".format(systematics) print "missing the following systematics : {0}".format( [s for s in systematics_to_use if s not in available_systematics]) fakeSystematics = [ s for s in systematics if s in systUtils.fakeSystVariations() ] mcSystematics = [ s for s in systematics if s in systUtils.mcObjectVariations() + systUtils.mcWeightVariations() ] mkdirIfNeeded(outputDir) findByName = systUtils.findByName simBkgs = [g for g in plot_groups if g.isMcBkg] data = findByName(plot_groups, 'data') fake = findByName(plot_groups, 'fake') signal = findByName(plot_groups, 'signaltaumu') print 'names_stacked_groups to be improved' names_stacked_groups = [g.name for g in simBkgs + [fake]] for sel in selections: if verbose: print '-- plotting ', sel for var in variables: if verbose: print '---- plotting ', var print_summary_yield = var is 'onebin' for g in plot_groups: g.setSystNominal() g.setCurrentSelection(sel) nominalHistoData = data.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoSign = signal.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistosSimBkg = dict([(g.name, g.getHistogram(variable=var, selection=sel, cacheIt=True)) for g in simBkgs]) nominalHistosBkg = dict( [('fake', nominalHistoFakeBkg)] + [(g, h) for g, h in nominalHistosSimBkg.iteritems()]) nominalHistoTotBkg = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg, histosSimBkgs=nominalHistosSimBkg) statErrBand = buildStat(nominalHistoTotBkg) systErrBand = buildSyst(fake=fake, simBkgs=simBkgs, variable=var, selection=sel, fakeVariations=fakeSystematics, mcVariations=mcSystematics, verbose=verbose, printYield=print_summary_yield) # if print_summary_yield: # print_stat_syst_yield(fake=fake, variable=var, selection=sel, fakeVariations=fakeSystematics) plotHistos(histoData=nominalHistoData, histoSignal=nominalHistoSign, histoTotBkg=nominalHistoTotBkg, histosBkg=nominalHistosBkg, statErrBand=statErrBand, systErrBand=systErrBand, stack_order=names_stacked_groups, topLabel=sel, canvasName=(sel + '_' + var), outdir=outputDir, options=opts, printYieldSummary=print_summary_yield) for group in plot_groups: group.printVariationsSummary()
def isNeededForSys(self, sys) : return (sys=='NOM' or (self.isMc and sys in systUtils.mcWeightVariations()) or (self.isMc and sys in systUtils.mcObjectVariations()) or (self.isFake and sys in systUtils.fakeSystVariations()))
def countAndFillHistos(samplesPerGroup={}, syst='', verbose=False, outdir='./') : selections = allRegions() variables = variablesToPlot() mcGroups, fakeGroups = mcDatasetids().keys(), ['fake'] objVariations, weightVariations, fakeVariations = systUtils.mcObjectVariations(), systUtils.mcWeightVariations(), systUtils.fakeSystVariations() def groupIsRelevantForSys(g, s) : isRelevant = (s=='NOM' or (g in mcGroups and s in objVariations+weightVariations) or (g in fakeGroups and s in fakeVariations)) if verbose and not isRelevant : print "skipping %s for %s"%(g, s) return isRelevant def dropIrrelevantGroupsForThisSys(groups, sys) : return dict((g, samples) for g, samples in groups.iteritems() if groupIsRelevantForSys(g, syst)) def dropSamplesWithoutTree(samples) : return [s for s in samples if s.hasInputHftTree(msg='Warning! ')] def dropGroupsWithoutSamples(groups) : return dict((g, samples) for g, samples in groups.iteritems() if len(samples)) samplesPerGroup = dropIrrelevantGroupsForThisSys(samplesPerGroup, syst) samplesPerGroup = dict((g, dropSamplesWithoutTree(samples)) for g, samples in samplesPerGroup.iteritems()) samplesPerGroup = dropGroupsWithoutSamples(samplesPerGroup) groups = samplesPerGroup.keys() counters = bookCounters(groups, selections) histos = bookHistos(variables, groups, selections) for group, samplesGroup in samplesPerGroup.iteritems() : logLine = "---->" if verbose : print 1*' ',group histosGroup = histos [group] countsGroup = counters[group] for sample in samplesGroup : if verbose : logLine +=" %s"%sample.name fillAndCount(histosGroup, countsGroup, sample, blind=False) if verbose : print logLine if verbose : print 'done' return counters, histos
def runPlot(opts) : inputDir = opts.input_dir outputDir = opts.output_dir sysOption = opts.syst excludedSyst = opts.exclude verbose = opts.verbose mkdirIfNeeded(outputDir) buildTotBkg = systUtils.buildTotBackgroundHisto buildStat = systUtils.buildStatisticalErrorBand buildSyst = systUtils.buildSystematicErrorBand groups = allGroups() selections = allRegions() variables = variablesToPlot() for group in groups : group.setHistosDir(inputDir) group.exploreAvailableSystematics(verbose) group.filterAndDropSystematics(sysOption, excludedSyst, verbose) mkdirIfNeeded(outputDir) systematics = ['NOM'] anySys = sysOption==None if sysOption=='fake' or anySys : systematics += systUtils.fakeSystVariations() if sysOption=='object' or anySys : systematics += systUtils.mcObjectVariations() if sysOption=='weight' or anySys : systematics += systUtils.mcWeightVariations() if sysOption and sysOption.count(',') : systematics = [s for s in systUtils.getAllVariations() if s in sysOption.split(',')] elif sysOption in systUtils.getAllVariations() : systematics = [sysOption] if not anySys and len(systematics)==1 and sysOption!='NOM' : raise ValueError("Invalid syst %s"%str(sysOption)) if excludedSyst : systematics = [s for s in systematics if s not in filterWithRegexp(systematics, excludedSyst)] if verbose : print "using the following systematics : %s"%str(systematics) fakeSystematics = [s for s in systematics if s in systUtils.fakeSystVariations()] mcSystematics = [s for s in systematics if s in systUtils.mcObjectVariations() + systUtils.mcWeightVariations()] simBkgs = [g for g in groups if g.isMcBkg] data, fake, signal = findByName(groups, 'data'), findByName(groups, 'fake'), findByName(groups, 'signal') for sel in selections : if verbose : print '-- plotting ',sel for var in variables : if verbose : print '---- plotting ',var for g in groups : g.setSystNominal() nominalHistoData = data.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoSign = signal.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistosSimBkg = dict([(g.name, g.getHistogram(variable=var, selection=sel, cacheIt=True)) for g in simBkgs]) nominalHistosBkg = dict([('fake', nominalHistoFakeBkg)] + [(g, h) for g, h in nominalHistosSimBkg.iteritems()]) nominalHistoTotBkg = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg, histosSimBkgs=nominalHistosSimBkg) statErrBand = buildStat(nominalHistoTotBkg) systErrBand = buildSyst(fake=fake, simBkgs=simBkgs, variable=var, selection=sel, fakeVariations=fakeSystematics, mcVariations=mcSystematics, verbose=verbose) plotHistos(histoData=nominalHistoData, histoSignal=nominalHistoSign, histoTotBkg=nominalHistoTotBkg, histosBkg=nominalHistosBkg, statErrBand=statErrBand, systErrBand=systErrBand, canvasName=(sel+'_'+var), outdir=outputDir, verbose=verbose) for group in groups : summary = group.variationsSummary() for selection, summarySel in summary.iteritems() : colW = str(12) header = ' '.join([('%'+colW+'s')%colName for colName in ['variation', 'yield', 'delta[%]']]) lineTemplate = '%(sys)'+colW+'s'+'%(counts)'+colW+'s'+'%(delta)'+colW+'s' print "---- summary of variations for %s ----" % group.name print "--- %s ---" % selection print header print '\n'.join(lineTemplate%{'sys':s, 'counts':(("%.3f"%c) if type(c) is float else (str(c)+str(type(c)))), 'delta' :(("%.3f"%d) if type(d) is float else '--' if d==None else (str(d)+str(type(d)))) } for s,c,d in summarySel)