def drawBottom(pad, totBkg, bkgHistos, sigHisto, llnjvar) : pad.cd() totBkg.SetStats(False) totBkg.SetMinimum(0.) # force this to avoid negative fluct due to fake totBkg.Draw('axis') pad.Update() # necessary to fool root's dumb object ownership stack = r.THStack('stack_'+llnjvar,'') pad.Update() r.SetOwnership(stack, False) for s, h in bkgHistos.iteritems() : h.SetFillColor(colors[s] if s in colors else r.kOrange) h.SetDrawOption('bar') h.SetDirectory(0) stack.Add(h) stack.Draw('hist same') pad.Update() sigHisto.SetLineColor(r.kRed) sigHisto.SetLineWidth(2*sigHisto.GetLineWidth()) sigHisto.Draw('same') pad.Update() topRightLabel(pad, llnjvar, xpos=0.125, align=13) drawLegendWithDictKeys(pad, dictSum(bkgHistos, {'signal' : sigHisto}), opt='f') pad.RedrawAxis() pad._stack = stack pad._histos = [h for h in stack.GetHists()] pad.Update()
def optimizeSelection() : inputdir, options = parseOptions() print 'sigreg ',options.sigreg tag = pickTag(inputdir, options) sigFiles, bkgFiles = getInputFilenames(inputdir, tag, options) # todo: filter with regexp sigFiles = dict([(s, k) for s, k in sigFiles.iteritems() if s in filterWithRegexp(sigFiles.keys(), options.sigreg)]) allSamples = dictSum(sigFiles, bkgFiles) vars = variablesToPlot() histos = bookHistos(vars, allSamples.keys(), options.ll, options.nj) counts = fillHistosAndCount(histos, dictSum(sigFiles, bkgFiles), options.ll, options.nj, options.quicktest) bkgHistos = dict((s, h) for s, h in histos.iteritems() if s in bkgFiles.keys()) sigHistos = dict((s, h) for s, h in histos.iteritems() if s in sigFiles.keys()) plotHistos(bkgHistos, sigHistos, options.plotdir) printSummary(counts, options.summary)
def scoutedScoreForMatchNum(self, match, allianceIsRed): allTIMDs = self.calculator.su.getTIMDsForMatch(match) allianceNumbers = self.calculator.su.getAllianceForMatch(match, allianceIsRed) allianceNumbers = map(lambda t: t.number, allianceNumbers) allianceTIMDs = [timd for timd in allTIMDs if timd.teamNumber in allianceNumbers] autoPts = self.calculator.getAutoPointsForMatchForAllianceIsRed(match, allianceIsRed) teleShotPts = 2 * sum([(timd.numLowShotsMadeTele or 0) for timd in allianceTIMDs]) + 5 * sum([(timd.numHighShotsMadeTele or 0) for timd in allianceTIMDs]) for timd in allianceTIMDs: s = timd.timesSuccessfulCrossedDefensesTele for key in self.calculator.defenseList: if not key in s: s[key] = 0 elif s[key] == None: s[key] = 0 else: s[key] = len(s[key]) allDefenseCrossings = utils.dictSum(allianceTIMDs[0].timesSuccessfulCrossedDefensesTele, utils.dictSum(allianceTIMDs[1].timesSuccessfulCrossedDefensesTele, allianceTIMDs[2].timesSuccessfulCrossedDefensesTele)) temp = {} for defense, crossings in allDefenseCrossings.items(): if crossings > 2: temp[defense] = 2 else: temp[defense] = crossings allDefenseCrossings = temp teleDefenseCrossPts = 5 * sum(allDefenseCrossings.values()) scalePts = 15 * sum([utils.convertFirebaseBoolean(timd.didScaleTele) for timd in allianceTIMDs]) challengePts = 5 * sum([utils.convertFirebaseBoolean(timd.didChallengeTele) for timd in allianceTIMDs]) return autoPts + teleShotPts + teleDefenseCrossPts + scalePts + challengePts
def plotStackedHistos(histos={}, datakey=None, stackkeys=[], outputDir='', region='', colors={}, verbose=False): "input: a dictionary of histos[group]" mkdirIfNeeded(outputDir) bkg_histos = dict([(k, h) for k, h in histos.iteritems() if k in stackkeys]) tot_bkg = summedHisto(bkg_histos.values(), label='') err_band = None # tmp disable # err_band = buildErrBandGraph(tot_bkg, computeStatErr2(tot_bkg)) empty_bkg = tot_bkg.Integral() == 0 if empty_bkg: if verbose: print "empty backgrounds, skip %s" % tot_bkg.GetName() return histoname = tot_bkg.GetName() can = r.TCanvas('c_' + histoname, histoname, 800, 600) can.cd() pm = tot_bkg # pad master pm.SetStats(False) pm.Draw('axis') can.Update() # necessary to fool root's dumb object ownership stack = r.THStack('stack_' + tot_bkg.GetName(), '') can.Update() r.SetOwnership(stack, False) for s, h in bkg_histos.iteritems(): h.SetFillColor(colors[s] if s in colors else r.kOrange) h.SetDrawOption('bar') h.SetDirectory(0) stack.Add(h) stack.Draw('hist same') # err_band.Draw('E2 same') data = histos[datakey] if datakey and datakey in histos else None if data and data.GetEntries(): data.SetMarkerStyle(r.kFullDotLarge) data.Draw('p same') if verbose: print "data : nEntries {:.1f} totWeight {:.1f} ".format( data.GetEntries(), data.Integral()) yMin, yMax = getMinMax([h for h in [tot_bkg, data, err_band] if h]) # pm.SetMinimum(0.5) pm.SetMaximum(1.1 * yMax) can.Update() # can.SetLogy() topRightLabel(can, "#splitline{%s}{%s}" % (histoname, region), xpos=0.125, align=13) drawLegendWithDictKeys(can, dictSum(bkg_histos, {'stat err': err_band}), opt='f') can.RedrawAxis() can._stack = stack can._histos = [h for h in stack.GetHists()] + [data] can.Update() if verbose: print os.path.join(outputDir, histoname + '.png') can.SaveAs(os.path.join(outputDir, histoname + '.png'))
def plotStackedHistos(histosPerGroup={}, outputDir='', region='', verbose=False): groups = histosPerGroup.keys() variables = first(histosPerGroup).keys() leptonTypes = first(first(histosPerGroup)).keys() colors = SampleUtils.colors mkdirIfNeeded(outputDir) histosPerName = dict([(region+'_'+var+'_'+lt, # one canvas for each histo, so key with histoname w/out group dict([(g, histosPerGroup[g][var][lt]) for g in groups])) for var in variables for lt in leptonTypes]) for histoname, histosPerGroup in histosPerName.iteritems(): missingGroups = [g for g, h in histosPerGroup.iteritems() if not h] if missingGroups: if verbose : print "skip %s, missing histos for %s"%(histoname, str(missingGroups)) continue bkgHistos = dict([(g, h) for g, h in histosPerGroup.iteritems() if isBkgSample(g)]) totBkg = summedHisto(bkgHistos.values()) err_band = buildErrBandGraph(totBkg, computeStatErr2(totBkg)) emptyBkg = totBkg.Integral()==0 if emptyBkg: if verbose : print "empty backgrounds, skip %s"%histoname continue can = r.TCanvas('c_'+histoname, histoname, 800, 600) can.cd() pm = totBkg # pad master pm.SetStats(False) pm.Draw('axis') can.Update() # necessary to fool root's dumb object ownership stack = r.THStack('stack_'+histoname,'') can.Update() r.SetOwnership(stack, False) for s, h in bkgHistos.iteritems() : h.SetFillColor(colors[s] if s in colors else r.kOrange) h.SetDrawOption('bar') h.SetDirectory(0) stack.Add(h) stack.Draw('hist same') err_band.Draw('E2 same') data = histosPerGroup['data'] if data and data.GetEntries(): data.SetMarkerStyle(r.kFullDotLarge) data.Draw('p same') yMin, yMax = getMinMax([h for h in [totBkg, data, err_band] if h]) pm.SetMinimum(0.0) pm.SetMaximum(1.1*yMax) can.Update() topRightLabel(can, histoname, xpos=0.125, align=13) drawLegendWithDictKeys(can, dictSum(bkgHistos, {'stat err':err_band}), opt='f') can.RedrawAxis() can._stack = stack can._histos = [h for h in stack.GetHists()]+[data] can.Update() outFname = os.path.join(outputDir, histoname+'.png') utils.rmIfExists(outFname) can.SaveAs(outFname)
def plotStackedHistosWithData(histosPerGroup={}, outputDir='', canvasname='', canvastitle='', colors={}, verbose=False): "histosPerGroup[group], where group=data is treated as special" groups = histosPerGroup.keys() mkdirIfNeeded(outputDir) missingGroups = [g for g, h in histosPerGroup.iteritems() if not h] if missingGroups: if verbose : print "skip %s, missing histos for %s"%(histoname, str(missingGroups)) return bkgHistos = dict([(g, h) for g, h in histosPerGroup.iteritems() if not isDataSample(g)]) totBkg = summedHisto(bkgHistos.values()) err_band = buildErrBandGraph(totBkg, computeStatErr2(totBkg)) emptyBkg = totBkg.Integral()==0 histoname, region = totBkg.GetName(), 'emu' # tmp replacement vars, to be fixed if emptyBkg: if verbose : print "empty backgrounds, skip %s"%histoname return can = r.TCanvas(canvasname, canvastitle, 800, 600) can.cd() pm = totBkg # pad master pm.SetStats(False) pm.Draw('axis') can.Update() # necessary to fool root's dumb object ownership stack = r.THStack('stack_'+histoname,'') can.Update() r.SetOwnership(stack, False) for s, h in bkgHistos.iteritems() : h.SetFillColor(colors[s] if s in colors else r.kOrange) h.SetDrawOption('bar') h.SetDirectory(0) stack.Add(h) stack.Draw('hist same') err_band.Draw('E2 same') data = histosPerGroup['data'] if 'data' in histosPerGroup else None if data and data.GetEntries(): data.SetMarkerStyle(r.kFullDotLarge) data.Draw('p same') if verbose : print "integrals : {0} tot.bkg.: {1}, data: {2}".format(histoname, totBkg.Integral(), data.Integral()) else: print "no data" yMin, yMax = getMinMax([h for h in [totBkg, data, err_band] if h]) pm.SetMinimum(0.0) pm.SetMaximum(1.1*yMax) can.Update() topRightLabel(can, "#splitline{%s}{%s}"%(histoname, region), xpos=0.15, ypos=(1.0-0.5*can.GetTopMargin()), align=13) drawLegendWithDictKeys(can, dictSum(bkgHistos, {'stat err':err_band}), opt='f') can.RedrawAxis() can._stack = stack can._histos = [h for h in stack.GetHists()]+[data] can.Update() filename=os.path.join(outputDir, histoname+'.png') rmIfExists(filename) can.SaveAs(filename)
def plotStackedHistosSources(histosPerVar={}, outputDir='', region='', verbose=False): variables = histosPerVar.keys() sources = first(histosPerVar).keys() colors = colorsFillSources mkdirIfNeeded(outputDir) for var in variables: for lOrT in ['loose', 'tight']: histos = dict((s, histosPerVar[var][s][lOrT]) for s in sources) canvasBasename = region+'_region_'+var+'_'+lOrT missingSources = [s for s, h in histos.iteritems() if not h] if missingSources: if verbose : print "skip %s, missing histos for %s"%(var, str(missingSources)) continue totBkg = summedHisto(histos.values()) err_band = buildErrBandGraph(totBkg, computeStatErr2(totBkg)) emptyBkg = totBkg.Integral()==0 if emptyBkg: if verbose : print "empty backgrounds, skip %s"%canvasBasename continue can = r.TCanvas('c_'+canvasBasename, canvasBasename, 800, 600) can.cd() pm = totBkg # pad master pm.SetStats(False) pm.Draw('axis') can.Update() # necessary to fool root's dumb object ownership stack = r.THStack('stack_'+canvasBasename,'') can.Update() r.SetOwnership(stack, False) for s, h in histos.iteritems() : h.SetFillColor(colors[s] if s in colors else r.kOrange) h.SetDrawOption('bar') h.SetDirectory(0) stack.Add(h) stack.Draw('hist same') err_band.Draw('E2 same') yMin, yMax = getMinMax([h for h in [totBkg, err_band] if h is not None]) pm.SetMinimum(0.0) pm.SetMaximum(1.1*yMax) can.Update() topRightLabel(can, canvasBasename, xpos=0.125, align=13) drawLegendWithDictKeys(can, dictSum(histos, {'stat err':err_band}), opt='f') can.RedrawAxis() can._stack = stack can._histos = [h for h in stack.GetHists()] can.Update() outFname = os.path.join(outputDir, canvasBasename+'.png') utils.rmIfExists(outFname) can.SaveAs(outFname)
def plotStackedHistos(histos={}, datakey=None, stackkeys=[], outputDir='', region='', colors={}, verbose=False): "input: a dictionary of histos[group]" mkdirIfNeeded(outputDir) bkg_histos = dict([(k,h) for k,h in histos.iteritems() if k in stackkeys]) tot_bkg = summedHisto(bkg_histos.values(), label='') err_band = None # tmp disable # err_band = buildErrBandGraph(tot_bkg, computeStatErr2(tot_bkg)) empty_bkg = tot_bkg.Integral()==0 if empty_bkg: if verbose : print "empty backgrounds, skip %s"%tot_bkg.GetName() return histoname = tot_bkg.GetName() can = r.TCanvas('c_'+histoname, histoname, 800, 600) can.cd() pm = tot_bkg # pad master pm.SetStats(False) pm.Draw('axis') can.Update() # necessary to fool root's dumb object ownership stack = r.THStack('stack_'+tot_bkg.GetName(),'') can.Update() r.SetOwnership(stack, False) for s, h in bkg_histos.iteritems() : h.SetFillColor(colors[s] if s in colors else r.kOrange) h.SetDrawOption('bar') h.SetDirectory(0) stack.Add(h) stack.Draw('hist same') # err_band.Draw('E2 same') data = histos[datakey] if datakey and datakey in histos else None if data and data.GetEntries(): data.SetMarkerStyle(r.kFullDotLarge) data.Draw('p same') if verbose: print "data : nEntries {:.1f} totWeight {:.1f} ".format(data.GetEntries(), data.Integral()) yMin, yMax = getMinMax([h for h in [tot_bkg, data, err_band] if h]) # pm.SetMinimum(0.5) pm.SetMaximum(1.1*yMax) can.Update() # can.SetLogy() topRightLabel(can, "#splitline{%s}{%s}"%(histoname, region), xpos=0.125, align=13) drawLegendWithDictKeys(can, dictSum(bkg_histos, {'stat err':err_band}), opt='f') can.RedrawAxis() can._stack = stack can._histos = [h for h in stack.GetHists()]+[data] can.Update() if verbose : print os.path.join(outputDir, histoname+'.png') can.SaveAs(os.path.join(outputDir, histoname+'.png'))
def getThirdTeamCalcDataKeys(calc): return { "predictedNumRPs": lambda team: calc.predictedNumberOfRPs(team), "actualNumRPs": lambda team: calc.getTeamRPsFromTBA(team), "actualSeed": lambda team: calc.getTeamSeed(team), "predictedSeed": lambda team: calc.cachedComp.predictedSeedings.index(team) + 1, "RScoreTorque": lambda team: calc.cachedComp.torqueZScores[team.number], "RScoreSpeed": lambda team: calc.cachedComp.speedZScores[team.number], "RScoreAgility": lambda team: calc.cachedComp.agilityZScores[team.number], "RScoreDefense": lambda team: calc.cachedComp.defenseZScores[team.number], "RScoreBallControl": lambda team: calc.cachedComp.ballControlZScores[team.number], "RScoreDrivingAbility": lambda team: calc.cachedComp.drivingAbilityZScores[team.number], "avgSuccessfulTimesCrossedDefenses": lambda team: utils.dictSum( team.calculatedData.avgSuccessfulTimesCrossedDefensesAuto, team. calculatedData.avgSuccessfulTimesCrossedDefensesTele), "blockingAbility": lambda team: (team.calculatedData.avgShotsBlocked - calc.averageTeam. calculatedData.avgShotsBlocked) * calc.averageTeam. calculatedData.highShotAccuracyTele * 5, "defensesCrossableAuto": lambda team: calc.defensesCrossableByTeamForDefenseDict( team, team.calculatedData.avgSuccessfulTimesCrossedDefensesAuto), "defensesCrossableTele": lambda team: calc.defensesCrossableByTeamForDefenseDict( team, team.calculatedData.avgSuccessfulTimesCrossedDefensesTele), "firstPickAbility": lambda team: calc.firstPickAbility(team), "overallSecondPickAbility": lambda team: calc.overallSecondPickAbility(team) }
def runFill(opts) : batchMode = opts.batch inputFakeDir = opts.input_fake inputGenDir = opts.input_other outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug blinded = not opts.unblind tightight = opts.require_tight_tight if debug : dataset.Dataset.verbose_parsing = True groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir) if not skip_charge_flip : groups.append(dataset.DatasetGroup.build_qflip_from_simulated_samples(groups)) groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake()) groups = parse_group_option(opts, groups) if verbose : print '\n'.join("group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups) if debug : print '\n'.join("group {0} : {1} samples: {2}".format(g.name, len(g.datasets), '\n\t'+'\n\t'.join(d.name for d in g.datasets)) for g in groups) if verbose : print "filling histos" # eval will take care of aborting on typos onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None mkdirIfNeeded(outputDir) systematics = get_list_of_syst_to_fill(opts) regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) if verbose : print "about to loop over these systematics:\n %s"%str(systematics) if verbose : print "about to loop over these regions:\n %s"%str(regions) if batchMode: for group in groups: for systematic in systematics: if systUtils.Group(group.name).isNeededForSys(systematic): opts.syst = systematic for selection in regions: submit_batch_fill_job_per_group_per_selection(group=group, selection=selection, opts=opts) else: for group in groups: systematics = [s for s in systematics if systUtils.Group(group.name).isNeededForSys(s)] if not systematics : print "warning, empty syst list. You should have at least the nominal" for systematic in systematics: # note to self: here you will want to use a modified Sample.setHftInputDir # for now we just have the fake syst that are in the nominal tree tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) input_dir = opts.input_fake if group.name=='fake' else opts.input_other for ds in group.datasets: chain.Add(os.path.join(input_dir, systUtils.Sample(ds.name, group.name).setSyst(systematic).filename)) if opts.verbose: print "{0} : {1} entries from {2} samples".format(group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/') tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list() uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list() if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts]) for cut in cached_tcuts: chain.preselect(cut) c_pre, h_pre = count_and_fill(chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, cached_cut=cut) out_filename = (systUtils.Group(group.name) .setSyst(systematic) .setHistosDir(outputDir) .setCurrentSelection(cut.GetName())).filenameHisto writeObjectsToFile(out_filename, h_pre, verbose) counters_pre = dictSum(counters_pre, c_pre) histos_pre = dictSum(histos_pre, h_pre) if uncached_tcuts: if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts]) counters_npre, histos_npre = count_and_fill(chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, noncached_cuts=uncached_tcuts) for sel, histos in histos_npre.iteritems(): out_filename = (systUtils.Group(group.name) .setSyst(systematic) .setHistosDir(outputDir) .setCurrentSelection(sel)).filenameHisto writeObjectsToFile(out_filename, histos, verbose) chain.save_lists()
def runFill(opts): lepton = opts.lepton batchMode = opts.batch inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir( opts.samples_dir) if opts.group: groups = [g for g in groups if g.name == opts.group] if verbose: print '\n'.join( "group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups) if debug: print '\n'.join("group {0} : {1} samples: {2}".format( g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name for d in g.datasets)) for g in groups) if verbose: print "filling histos" outputDir = outputDir + '/' + lepton + '/histos' mkdirIfNeeded(outputDir) if batchMode: for group in groups: submit_batch_fill_job_per_group(group, opts) else: for group in groups: tree_name = 'ss3l_tuple' chain = IndexedChain(tree_name) for ds in group.datasets: chain.Add(os.path.join(inputDir, ds.name + '.root')) if opts.verbose: print "{0} : {1} entries from {2} samples".format( group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/' + group.name + '/') tcuts = [ r.TCut(reg, selection_formulas()[reg]) for reg in regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) ] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list( ) uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list( ) print 'todo: skip cuts for which the histo files are there' if verbose: print 'filling cached cuts: ', ' '.join( [c.GetName() for c in cached_tcuts]) for cut in cached_tcuts: chain.preselect(cut) c_pre, h_pre = count_and_fill(chain=chain, opts=opts, group=group, cached_cut=cut) counters_pre = dictSum(counters_pre, c_pre) histos_pre = dictSum(histos_pre, h_pre) if verbose: print 'filling uncached cuts: ', ' '.join( [c.GetName() for c in uncached_tcuts]) if uncached_tcuts: counters_npre, histos_npre = count_and_fill( chain=chain, opts=opts, group=group, noncached_cuts=uncached_tcuts) chain.save_lists() all_histos = dictSum(histos_pre, histos_npre) for sel, histos in all_histos.iteritems(): # write histos for each sel to a separate file (finer granularity, better caching) out_filename = os.path.join(outputDir, group.name + '_' + sel + '.root') if verbose: print 'saving to ', out_filename writeObjectsToFile(out_filename, histos, verbose)
def runFill(opts): batchMode = opts.batch inputFakeDir = opts.input_fake inputGenDir = opts.input_other outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug blinded = not opts.unblind tightight = opts.require_tight_tight if debug: dataset.Dataset.verbose_parsing = True groups = dataset.DatasetGroup.build_groups_from_files_in_dir( opts.samples_dir) if not skip_charge_flip: groups.append( dataset.DatasetGroup.build_qflip_from_simulated_samples(groups)) groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake()) groups = parse_group_option(opts, groups) if verbose: print '\n'.join( "group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups) if debug: print '\n'.join("group {0} : {1} samples: {2}".format( g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name for d in g.datasets)) for g in groups) if verbose: print "filling histos" # eval will take care of aborting on typos onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None mkdirIfNeeded(outputDir) systematics = get_list_of_syst_to_fill(opts) regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) if verbose: print "about to loop over these systematics:\n %s" % str(systematics) if verbose: print "about to loop over these regions:\n %s" % str(regions) if batchMode: for group in groups: for systematic in systematics: if systUtils.Group(group.name).isNeededForSys(systematic): opts.syst = systematic for selection in regions: submit_batch_fill_job_per_group_per_selection( group=group, selection=selection, opts=opts) else: for group in groups: systematics = [ s for s in systematics if systUtils.Group(group.name).isNeededForSys(s) ] if not systematics: print "warning, empty syst list. You should have at least the nominal" for systematic in systematics: # note to self: here you will want to use a modified Sample.setHftInputDir # for now we just have the fake syst that are in the nominal tree tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) input_dir = opts.input_fake if group.name == 'fake' else opts.input_other for ds in group.datasets: chain.Add( os.path.join( input_dir, systUtils.Sample( ds.name, group.name).setSyst(systematic).filename)) if opts.verbose: print "{0} : {1} entries from {2} samples".format( group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/' + group.name + '/') tcuts = [ r.TCut(reg, selection_formulas()[reg]) for reg in regions ] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list( ) uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list( ) if verbose: print 'filling cached cuts: ', ' '.join( [c.GetName() for c in cached_tcuts]) for cut in cached_tcuts: chain.preselect(cut) c_pre, h_pre = count_and_fill( chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, cached_cut=cut) out_filename = (systUtils.Group( group.name).setSyst(systematic).setHistosDir( outputDir).setCurrentSelection( cut.GetName())).filenameHisto writeObjectsToFile(out_filename, h_pre, verbose) counters_pre = dictSum(counters_pre, c_pre) histos_pre = dictSum(histos_pre, h_pre) if uncached_tcuts: if verbose: print 'filling uncached cuts: ', ' '.join( [c.GetName() for c in uncached_tcuts]) counters_npre, histos_npre = count_and_fill( chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, noncached_cuts=uncached_tcuts) for sel, histos in histos_npre.iteritems(): out_filename = (systUtils.Group( group.name).setSyst(systematic).setHistosDir( outputDir).setCurrentSelection(sel) ).filenameHisto writeObjectsToFile(out_filename, histos, verbose) chain.save_lists()
def runFill(opts): lepton = opts.lepton batchMode = opts.batch inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir) if opts.group : groups = [g for g in groups if g.name==opts.group] if verbose : print '\n'.join("group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups) if debug : print '\n'.join("group {0} : {1} samples: {2}".format(g.name, len(g.datasets), '\n\t'+'\n\t'.join(d.name for d in g.datasets)) for g in groups) if verbose : print "filling histos" outputDir = outputDir+'/'+lepton+'/histos' mkdirIfNeeded(outputDir) if batchMode: for group in groups: submit_batch_fill_job_per_group(group, opts) else: for group in groups: tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) for ds in group.datasets: chain.Add(os.path.join(inputDir, ds.name+'.root')) if opts.verbose: print "{0} : {1} entries from {2} samples".format(group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/') tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions)] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list() uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list() print 'todo: skip cuts for which the histo files are there' if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts]) for cut in cached_tcuts: chain.preselect(cut) c_pre, h_pre = count_and_fill(chain=chain, opts=opts, group=group, cached_cut=cut) counters_pre = dictSum(counters_pre, c_pre) histos_pre = dictSum(histos_pre, h_pre) if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts]) if uncached_tcuts: counters_npre, histos_npre = count_and_fill(chain=chain, opts=opts, group=group, noncached_cuts=uncached_tcuts) chain.save_lists() all_histos = dictSum(histos_pre, histos_npre) for sel, histos in all_histos.iteritems(): # write histos for each sel to a separate file (finer granularity, better caching) out_filename = os.path.join(outputDir, group.name+'_'+sel+'.root') if verbose : print 'saving to ',out_filename writeObjectsToFile(out_filename, histos, verbose)
def getFirstTeamCalcDataKeys(calc): sumCategoryADataPointDict = lambda team: utils.dictSum( team.calculatedData.avgNumTimesUnaffected, utils.dictSum(team.calculatedData.avgNumTimesBeached, team. calculatedData.avgNumTimesSlowed)) return { "avgTorque": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.rankTorque), # Checked "avgSpeed": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.rankSpeed), "avgAgility": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.rankAgility), # Checked "avgDefense": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.rankDefense), # Checked "avgBallControl": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.rankBallControl), # Checked "avgDrivingAbility": lambda team: calc.drivingAbility(team), "disabledPercentage": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: utils.convertFirebaseBoolean(timd.didGetDisabled )), "incapacitatedPercentage": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: utils.convertFirebaseBoolean( timd.didGetIncapacitated)), "disfunctionalPercentage": lambda team: team.calculatedData.disabledPercentage + team. calculatedData.incapacitatedPercentage, # Auto "autoAbility": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.calculatedData.autoAbility), "autoAbilityExcludeD": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: calc.autoAbility( calc.timdHasDefenseExclusion(timd, calc.defenseDictionary['d']) )), "autoAbilityExcludeLB": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: calc.autoAbility( calc.timdHasDefenseExclusion(timd, calc.defenseDictionary['e']) )), "avgHighShotsAuto": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.numHighShotsMadeAuto), # Checked "avgLowShotsAuto": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.numLowShotsMadeAuto), # Checked "reachPercentage": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: utils.convertFirebaseBoolean(timd.didReachAuto) ), "highShotAccuracyAuto": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: calc.TIMDShotAccuracy( timd.numHighShotsMadeAuto, timd.numHighShotsMissedAuto) ), # Checked "lowShotAccuracyAuto": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: calc.TIMDShotAccuracy( timd.numLowShotsMadeAuto, timd.numLowShotsMissedAuto) ), # Checked "avgMidlineBallsIntakedAuto": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.calculatedData. numBallsIntakedOffMidlineAuto), "sdMidlineBallsIntakedAuto": lambda team: calc.getStandardDeviationForDataFunctionForTeam( team, lambda timd: timd.calculatedData. numBallsIntakedOffMidlineAuto), "sdHighShotsAuto": lambda team: calc.getStandardDeviationForDataFunctionForTeam( team, lambda timd: timd.numHighShotsMadeAuto), # Checked "sdLowShotsAuto": lambda team: calc.getStandardDeviationForDataFunctionForTeam( team, lambda timd: timd.numLowShotsMadeAuto), # Checked "sdBallsKnockedOffMidlineAuto": lambda team: calc.getStandardDeviationForDataFunctionForTeam( team, lambda timd: timd.numBallsKnockedOffMidlineAuto), #Tele "scalePercentage": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: int( utils.convertFirebaseBoolean(timd.didScaleTele))), "challengePercentage": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: int( utils.convertFirebaseBoolean(timd.didChallengeTele))), "avgGroundIntakes": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.numGroundIntakesTele), # Checked "avgBallsKnockedOffMidlineAuto": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.numBallsKnockedOffMidlineAuto), # Checked "avgShotsBlocked": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.numShotsBlockedTele), # Checked "avgHighShotsTele": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.numHighShotsMadeTele), # Checked "avgLowShotsTele": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.numLowShotsMadeTele), # Checked "highShotAccuracyTele": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: calc.TIMDShotAccuracy( timd.numHighShotsMadeTele, timd.numHighShotsMissedTele) ), # Checked "lowShotAccuracyTele": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: calc.TIMDShotAccuracy( timd.numLowShotsMadeTele, timd.numLowShotsMissedTele)), "teleopShotAbility": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.calculatedData.teleopShotAbility ), # Checked "siegeConsistency": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: utils.convertFirebaseBoolean( timd.didChallengeTele) or utils.convertFirebaseBoolean( timd.didScaleTele)), # Checked "siegeAbility": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.calculatedData.siegeAbility), # Checked "sdHighShotsTele": lambda team: calc.getStandardDeviationForDataFunctionForTeam( team, lambda timd: timd.numHighShotsMadeTele), # Checked "sdLowShotsTele": lambda team: calc.getStandardDeviationForDataFunctionForTeam( team, lambda timd: timd.numLowShotsMadeTele), # Checked "sdGroundIntakes": lambda team: calc.getStandardDeviationForDataFunctionForTeam( team, lambda timd: timd.numGroundIntakesTele), # Checked "sdShotsBlocked": lambda team: calc.getStandardDeviationForDataFunctionForTeam( team, lambda timd: timd.numShotsBlockedTele), # Checked "sdTeleopShotAbility": lambda team: calc.getStandardDeviationForDataFunctionForTeam( team, lambda timd: timd.calculatedData.teleopShotAbility), "sdSiegeAbility": lambda team: calc.getStandardDeviationForDataFunctionForTeam( team, lambda timd: timd.calculatedData.siegeAbility), "sdAutoAbility": lambda team: calc.getStandardDeviationForDataFunctionForTeam( team, lambda timd: timd.calculatedData.autoAbility), "numScaleAndChallengePoints": lambda team: calc.numScaleAndChallengePointsForTeam(team), # Checked "breachPercentage": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: utils. convertFirebaseBoolean(lambda team: calc.teamDidBreachInMatch( team, lambda team: calc.su.getMatchForNumber(timd.matchNumber)) )), "avgHighShotsAttemptedTele": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.calculatedData.highShotsAttemptedTele), "avgLowShotsAttemptedTele": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda timd: timd.calculatedData.lowShotsAttemptedTele), "twoBallAutoTriedPercentage": lambda team: calc.twoBallAutoTriedPercentage(team), "twoBallAutoAccuracy": lambda team: calc.twoBallAutoAccuracy(team), "avgNumTimesBeached": lambda team: calc.categoryAAverageDictForDataFunction( team, lambda timd: timd.numTimesBeached), "avgNumTimesSlowed": { "pc": lambda team: calc.avgNumTimesSlowed(team, "pc"), "cdf": lambda team: calc.avgNumTimesSlowed(team, "cdf") }, "avgNumTimesUnaffected": lambda team: calc.categoryAAverageDictForDataFunction( team, lambda timd: timd.numTimesUnaffected), "beachedPercentage": lambda team: utils.dictQuotient(team.calculatedData.avgNumTimesBeached, sumCategoryADataPointDict(team)), "slowedPercentage": lambda team: utils.dictQuotient(team.calculatedData.avgNumTimesSlowed, sumCategoryADataPointDict(team)), "unaffectedPercentage": lambda team: utils.dictQuotient( team.calculatedData.avgNumTimesUnaffected, sumCategoryADataPointDict(team)), "avgNumTimesCrossedDefensesAuto": lambda team: calc.getAverageForDataFunctionForTeam( team, lambda tm: tm.calculatedData.totalNumTimesCrossedDefensesAuto ), "defenses": [ lambda team: calc.setDefenseValuesForTeam( team, team.calculatedData. avgSuccessfulTimesCrossedDefensesTele, lambda tm: tm. timesSuccessfulCrossedDefensesTele, lambda x: np.mean(x) if x != None and len(x) > 0 else 0, lambda y: len(y) if y != None else 0), lambda team: calc.setDefenseValuesForTeam( team, team.calculatedData. avgSuccessfulTimesCrossedDefensesAuto, lambda tm: tm. timesSuccessfulCrossedDefensesAuto, lambda x: np.mean(x) if x != None and len(x) > 0 else 0, lambda y: len(y) if y != None else 0), lambda team: calc.setDefenseValuesForTeam( team, team.calculatedData.avgFailedTimesCrossedDefensesTele, lambda tm: tm.timesFailedCrossedDefensesTele, lambda x: np. mean(x) if x != None and len(x) > 0 else 0, lambda y: len(y) if y != None else 0), lambda team: calc.setDefenseValuesForTeam( team, team.calculatedData.avgFailedTimesCrossedDefensesAuto, lambda tm: tm.timesFailedCrossedDefensesAuto, lambda x: np. mean(x) if x != None and len(x) > 0 else 0, lambda y: len(y) if y != None else 0), lambda team: calc.setDefenseValuesForTeam( team, team.calculatedData.avgTimeForDefenseCrossTele, lambda tm: tm.timesSuccessfulCrossedDefensesTele, lambda x: np.mean(x) if x != None and len(x) > 0 else 0, lambda y: np.mean(y) if y != None and len(y) > 0 else 0), lambda team: calc.setDefenseValuesForTeam( team, team.calculatedData.avgTimeForDefenseCrossAuto, lambda tm: tm.timesSuccessfulCrossedDefensesAuto, lambda x: np.mean(x) if x != None and len(x) > 0 else 0, lambda y: np.mean(y) if y != None and len(y) > 0 else 0), lambda team: calc.setDefenseValuesForTeam( team, team.calculatedData.sdSuccessfulDefenseCrossesAuto, lambda tm: tm.timesSuccessfulCrossedDefensesAuto, lambda x: utils.rms(x) if x != None and len(x) > 0 else 0, lambda y: len(y) if y != None else 0), lambda team: calc.setDefenseValuesForTeam( team, team.calculatedData.sdSuccessfulDefenseCrossesTele, lambda tm: tm.ti, mesSuccessfulCrossedDefensesTele, lambda x: utils.rms(x) if x != None and len(x) > 0 else 0, lambda y: len(y) if y != None else 0), lambda team: calc.setDefenseValuesForTeam( team, team.calculatedData.sdFailedDefenseCrossesAuto, lambda tm: tm.timesFailedCrossedDefensesAuto, lambda x: utils.rms(x) if x != None and len(x) > 0 else 0, lambda y: len(y) if y != None else 0), lambda team: calc.setDefenseValuesForTeam( team, team.calculatedData.sdFailedDefenseCrossesTele, lambda tm: tm.timesFailedCrossedDefensesTele, lambda x: utils.rms(x) if x != None and len(x) > 0 else 0, lambda y: len(y) if y != None else 0) ] }
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor', help='dir for plots') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples') parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)') parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.') parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('-v', '--verbose', action='store_true', default=False) (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton region = options.region tag = options.tag verbose = options.verbose if not tag : parser.error('tag is a required option') if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton) filestems, treenames = utils.verticalSlice(fakeu.tupleStemsAndNames) regions = filestems assert region in regions,"invalid region '%s', must be one of %s"%(region, str(regions)) templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region} templateOutputFilename = "%(region)s_%(l)s_scale_histos.root" % {'region':region, 'l':lepton} treeName = treenames[regions.index(region)] outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things mkdirIfNeeded(outputDir) outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_'+region+'_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos optionsToPrint = ['inputDir', 'outputDir', 'region', 'tag', 'doFillHistograms', 'onthefly_tight_def'] if verbose : print "working from %s"%os.getcwd() print "being called as : %s"%' '.join(os.sys.argv) print "options:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint]) # collect inputs if verbose : print 'input files ',os.path.join(inputDir, templateInputFilename) tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename)) samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose)) samplesPerGroup = collections.defaultdict(list) filenamesPerGroup = collections.defaultdict(list) mkdirIfNeeded(outputDir) for s, f in zip(samples, tupleFilenames) : samplesPerGroup[s.group].append(s) filenamesPerGroup[s.group].append(f) vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1'] groups = samplesPerGroup.keys() #fill histos if doFillHistograms : start_time = time.clock() num_processed_entries = 0 histosPerGroup = bookHistos(vars, groups, region=region) histosPerSource = bookHistosPerSource(vars, leptonSources, region=region) histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, leptonSources, region=region) for group in groups: isData = isDataSample(group) filenames = filenamesPerGroup[group] if verbose: print " --- group : %s ---".format(group) print '\n\t'.join(filenames) histosThisGroup = histosPerGroup[group] histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group]) for v in histosPerGroupPerSource.keys()) chain = r.TChain(treeName) [chain.Add(fn) for fn in filenames] if verbose: print "%s : %d entries"%(group, chain.GetEntries()) num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, region, onthefly_tight_def=onthefly_tight_def, verbose=verbose) writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose) end_time = time.clock() delta_time = end_time - start_time if verbose: print ("processed {0:d} entries ".format(num_processed_entries) +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else "{0:.1f} s ".format(delta_time)) +"({0:.1f} kHz)".format(num_processed_entries/delta_time)) # compute scale factors histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, groups, region), verbose) histosPerSource = fetchHistos(cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose) histosPerSamplePerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, leptonSources, region), verbose) plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, verbose) plotStackedHistosSources(histosPerSource, outputDir+'/by_source', region, verbose) plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir+'/by_source', lepton=lepton, region=region, verbose=verbose) for g in groups: hps = dict((v, histosPerSamplePerSource[v][g])for v in vars) plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose) hn_sf_eta = histoname_sf_vs_eta (lepton) hn_sf_pt = histoname_sf_vs_pt (lepton) hn_da_eta = histoname_data_fake_eff_vs_eta(lepton) hn_da_pt = histoname_data_fake_eff_vs_pt (lepton) objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, verbose) objs_pt = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1', hn_sf_pt, hn_da_pt, outputDir, region, verbose) rootUtils.writeObjectsToFile(outputFileName, dictSum(objs_eta, objs_pt), verbose) if verbose : print "saved scale factors to %s" % outputFileName
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-g', '--group', help='group to be processed (used only in fill mode)') parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples') parser.add_option('--samples-dir', default='samples/', help='directory with the list of samples; default ./samples/') parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.') parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('--keep-real', action='store_true', default=False, help='do not subtract real (to get real lep efficiency)') parser.add_option('--debug', action='store_true') parser.add_option('--verbose', action='store_true') parser.add_option('--disable-cache', action='store_true', help='disable the entry cache') (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton region = options.region keepreal = options.keep_real debug = options.debug verbose = options.verbose if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton) regions = kin.selection_formulas().keys() assert region in regions,"invalid region '%s', must be one of %s"%(region, str(sorted(regions))) regions = [region] dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir(options.samples_dir) if options.group : groups = [g for g in groups if g.name==options.group] group_names = [g.name for g in groups] outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things mkdirIfNeeded(outputDir) templateOutputFilename = "scale_factor_{0}.root".format(lepton) outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos if verbose : utils.print_running_conditions(parser, options) vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1', 'pt1_eta1'] #fill histos if doFillHistograms : start_time = time.clock() num_processed_entries = 0 histosPerGroup = bookHistos(vars, group_names, region=region) histosPerSource = bookHistosPerSource(vars, leptonSources, region=region) histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, group_names, leptonSources, region=region) for group in groups: tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) for ds in group.datasets: fname = os.path.join(inputDir, ds.name+'.root') if os.path.exists(fname): chain.Add(fname) if verbose: print "{0} : {1} entries from {2} samples".format(group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/') tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions] print 'tcuts ',[c.GetName() for c in tcuts] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() print 'tcuts_with_existing_list ',str([c.GetName() for c in chain.tcuts_with_existing_list()]) print 'tcuts_without_existing_list ',str([c.GetName() for c in chain.tcuts_without_existing_list()]) cached_tcuts = [] if options.disable_cache else chain.tcuts_with_existing_list() print 'cached_tcuts ',[c.GetName() for c in cached_tcuts] uncached_tcuts = tcuts if options.disable_cache else chain.tcuts_without_existing_list() print 'todo: skip cuts for which the histo files are there' if verbose: print " --- group : {0} ---".format(group.name) print '\n\t'.join(chain.filenames) if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts]) if verbose: print "%s : %d entries"%(group.name, chain.GetEntries()) histosThisGroup = histosPerGroup[group.name] histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group.name]) for v in histosPerGroupPerSource.keys()) for cut in cached_tcuts: print 'cached_tcut ',cut chain.preselect(cut) num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, cut, cut_is_cached=True, onthefly_tight_def=onthefly_tight_def, verbose=verbose) if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts]) if uncached_tcuts: assert len(uncached_tcuts)==1, "expecting only one cut, got {}".format(len(uncached_tcuts)) cut = uncached_tcuts[0] chain.preselect(None) num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, cut, cut_is_cached=False, onthefly_tight_def=onthefly_tight_def, verbose=verbose) chain.save_lists() writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose) end_time = time.clock() delta_time = end_time - start_time if verbose: print ("processed {0:d} entries ".format(num_processed_entries) +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else "{0:.1f} s ".format(delta_time)) +"({0:.1f} kHz)".format(num_processed_entries/delta_time)) # return # compute scale factors histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, group_names, region), verbose) histosPerSource = fetchHistos(cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose) histosPerSamplePerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, group_names, leptonSources, region), verbose) plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, verbose) plotStackedHistosSources(histosPerSource, outputDir+'/by_source', region, verbose) plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir+'/by_source', lepton=lepton, region=region, verbose=verbose) for g in group_names: hps = dict((v, histosPerSamplePerSource[v][g])for v in vars) plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose) hn_sf_eta = histoname_sf_vs_eta (lepton) hn_sf_pt = histoname_sf_vs_pt (lepton) hn_da_eta = histoname_data_fake_eff_vs_eta(lepton) hn_da_pt = histoname_data_fake_eff_vs_pt (lepton) subtractReal = not keepreal objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, subtractReal, verbose) objs_pt = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1', hn_sf_pt, hn_da_pt, outputDir, region, subtractReal, verbose) objs_pt_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1_eta1', histoname_sf_vs_pt_eta(lepton), histoname_data_fake_eff_vs_pt_eta(lepton), outputDir, region, subtractReal, verbose) rootUtils.writeObjectsToFile(outputFileName, dictSum(dictSum(objs_eta, objs_pt), objs_pt_eta), verbose) if verbose : print "saved scale factors to %s" % outputFileName
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/fakerate/efficiencies') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option('-m', '--mode', help='emu') parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)') parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.') parser.add_option('-v', '--verbose', action='store_true', default=False) (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton mode = options.mode tag = options.tag verbose = options.verbose if not tag : parser.error('tag is a required option') if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton) validModes = ['emu'] if mode not in validModes : parser.error("invalid mode %s"%mode) tupleStem, treeName = filter(lambda _: _[0]==mode, fakeu.tupleStemsAndNames)[0] templateInputFilename = "*_%(stem)s_tuple_%(tag)s.root" % {'tag':tag, 'stem':tupleStem} templateOutputFilename = "%(stem)s_%(l)s_eff.root" % {'stem':tupleStem.replace('tuple','histos'), 'l':lepton} outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_'+mode+'_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos optionsToPrint = ['inputDir', 'outputDir', 'mode', 'tag', 'doFillHistograms', 'cacheFileName', 'onthefly_tight_def'] if verbose : print "working from %s"%os.getcwd() print "being called as : %s"%' '.join(os.sys.argv) print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint]) print 'input filenames: ',os.path.join(inputDir, templateInputFilename) # collect inputs tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename)) samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose)) samplesPerGroup = collections.defaultdict(list) filenamesPerGroup = collections.defaultdict(list) mkdirIfNeeded(outputDir) for s, f in zip(samples, tupleFilenames) : samplesPerGroup[s.group].append(s) filenamesPerGroup[s.group].append(f) vars = ['pt', 'pt_eta'] groups = [g for g in samplesPerGroup.keys() if g is not 'higgs'] if lepton=='el' : groups = [g for g in groups if g is not 'heavyflavor'] sourcesThisMode = ['real', 'conv', 'heavy', 'light', 'unknown'] if lepton=='el' else ['real', 'heavy', 'light', 'unknown'] #fill histos if doFillHistograms : start_time = time.clock() num_processed_entries = 0 histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, sourcesThisMode, mode=mode) for group in groups: filenames = filenamesPerGroup[group] sources = histosPerGroupPerSource.keys() histosThisGroupPerSource = dict((s, histosPerGroupPerSource[s][group]) for s in sources) histosAnyGroupPerSource = dict((s, histosPerGroupPerSource[s]['anygroup']) for s in sources) if group!='data' else {} chain = r.TChain(treeName) [chain.Add(fn) for fn in filenames] if verbose: print "%s : %d entries"%(group, chain.GetEntries()) is_data = group in ['data'] print 'is_data ',is_data num_processed_entries += fillHistos(chain=chain, histosPerSource=histosThisGroupPerSource, histosPerSourceAnygroup=histosAnyGroupPerSource, lepton=lepton, onthefly_tight_def=onthefly_tight_def, verbose=verbose) writeHistos(cacheFileName, histosPerGroupPerSource, verbose) end_time = time.clock() delta_time = end_time - start_time one_minute = 60 if verbose: print ("processed {0:d} entries ".format(num_processed_entries) +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>one_minute else "{0:.1f} s ".format(delta_time)) +"({0:.1f} kHz)".format(num_processed_entries/delta_time)) # plot histos histosPerGroupPerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, sourcesThisMode, mode), verbose) # effs = computeEfficiencies(histosPerGroupPerSource) # still [var][gr][source][l/t] for v in vars: varIs1D, varIs2D = v=='pt', v=='pt_eta' densThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['loose']) for s in sourcesThisMode), {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']}) numsThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['tight']) for s in sourcesThisMode), {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']}) if varIs1D: lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)' cname = 'stack_loose_'+lepton lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY plotStackedHistosWithData(densThisSourceThisVar, outputDir, cname, title, colors=fakeu.colorsFillSources(), verbose=verbose) cname = 'stack_tight_'+lepton lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY plotStackedHistosWithData(numsThisSourceThisVar, outputDir, cname, title, colors=fakeu.colorsFillSources(), verbose=verbose) for s in sourcesThisMode: for v in vars: groups = first(histosPerGroupPerSource).keys() varIs1D, varIs2D = v=='pt', v=='pt_eta' # effsThisSourceThisVar = dict((g, effs[v][g][s]) for g in groups) densThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g][s]['loose']) for g in groups if g not in ['anygroup','data']), {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']}) numsThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g]['unknown']['tight']) for g in groups if g not in ['anygroup','data']), {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']}) if varIs1D: # cname = 'eff_'+lepton+'_'+s lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)' # title = lT+' '+s+' '+lepton+';'+lX+';'+lY # zoomIn = True # fakeu.plot1dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn) cname = 'stack_loose_'+lepton+'_'+s lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+s+' '+lepton+';'+lX+';'+lY plotStackedHistosWithData(densThisSourceThisVar, outputDir, cname, title, colors=SampleUtils.colors, verbose=verbose) cname = 'stack_tight_'+lepton+'_'+s lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+s+' '+lepton+';'+lX+';'+lY plotStackedHistosWithData(numsThisSourceThisVar, outputDir, cname, title, colors=SampleUtils.colors, verbose=verbose) # elif varIs2D: # cname = 'eff_'+lepton+'_'+s # lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#eta' # title = lT+' '+s+' '+lepton+';'+lX+';'+lY # fakeu.plot2dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn=zoomIn) # writeHistos(outputFileName, effs, verbose) if verbose : print "saved scale factors to %s" % outputFileName
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-g', '--group', help='group to be processed (used only in fill mode)') parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option( '-r', '--region', help='one of the regions for which we saved the fake ntuples') parser.add_option( '--samples-dir', default='samples/', help='directory with the list of samples; default ./samples/') parser.add_option( '-T', '--tight-def', help= 'on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.' ) parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('--keep-real', action='store_true', default=False, help='do not subtract real (to get real lep efficiency)') parser.add_option('--debug', action='store_true') parser.add_option('--verbose', action='store_true') parser.add_option('--disable-cache', action='store_true', help='disable the entry cache') (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton region = options.region keepreal = options.keep_real debug = options.debug verbose = options.verbose if lepton not in ['el', 'mu']: parser.error("invalid lepton '%s'" % lepton) regions = kin.selection_formulas().keys() assert region in regions, "invalid region '%s', must be one of %s" % ( region, str(sorted(regions))) regions = [region] dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir( options.samples_dir) if options.group: groups = [g for g in groups if g.name == options.group] group_names = [g.name for g in groups] outputDir = outputDir + '/' + region + '/' + lepton # split the output in subdirectories, so we don't overwrite things mkdirIfNeeded(outputDir) templateOutputFilename = "scale_factor_{0}.root".format(lepton) outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) onthefly_tight_def = eval( options.tight_def ) if options.tight_def else None # eval will take care of aborting on typos if verbose: utils.print_running_conditions(parser, options) vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1', 'pt1_eta1'] #fill histos if doFillHistograms: start_time = time.clock() num_processed_entries = 0 histosPerGroup = bookHistos(vars, group_names, region=region) histosPerSource = bookHistosPerSource(vars, leptonSources, region=region) histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, group_names, leptonSources, region=region) for group in groups: tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) for ds in group.datasets: fname = os.path.join(inputDir, ds.name + '.root') if os.path.exists(fname): chain.Add(fname) if verbose: print "{0} : {1} entries from {2} samples".format( group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/' + group.name + '/') tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions] print 'tcuts ', [c.GetName() for c in tcuts] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() print 'tcuts_with_existing_list ', str( [c.GetName() for c in chain.tcuts_with_existing_list()]) print 'tcuts_without_existing_list ', str( [c.GetName() for c in chain.tcuts_without_existing_list()]) cached_tcuts = [] if options.disable_cache else chain.tcuts_with_existing_list( ) print 'cached_tcuts ', [c.GetName() for c in cached_tcuts] uncached_tcuts = tcuts if options.disable_cache else chain.tcuts_without_existing_list( ) print 'todo: skip cuts for which the histo files are there' if verbose: print " --- group : {0} ---".format(group.name) print '\n\t'.join(chain.filenames) if verbose: print 'filling cached cuts: ', ' '.join( [c.GetName() for c in cached_tcuts]) if verbose: print "%s : %d entries" % (group.name, chain.GetEntries()) histosThisGroup = histosPerGroup[group.name] histosThisGroupPerSource = dict( (v, histosPerGroupPerSource[v][group.name]) for v in histosPerGroupPerSource.keys()) for cut in cached_tcuts: print 'cached_tcut ', cut chain.preselect(cut) num_processed_entries += fillHistos( chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, cut, cut_is_cached=True, onthefly_tight_def=onthefly_tight_def, verbose=verbose) if verbose: print 'filling uncached cuts: ', ' '.join( [c.GetName() for c in uncached_tcuts]) if uncached_tcuts: assert len(uncached_tcuts ) == 1, "expecting only one cut, got {}".format( len(uncached_tcuts)) cut = uncached_tcuts[0] chain.preselect(None) num_processed_entries += fillHistos( chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, cut, cut_is_cached=False, onthefly_tight_def=onthefly_tight_def, verbose=verbose) chain.save_lists() writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose) end_time = time.clock() delta_time = end_time - start_time if verbose: print("processed {0:d} entries ".format(num_processed_entries) + "in " + ("{0:d} min ".format(int(delta_time / 60)) if delta_time > 60 else "{0:.1f} s ".format(delta_time)) + "({0:.1f} kHz)".format(num_processed_entries / delta_time)) # return # compute scale factors histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, group_names, region), verbose) histosPerSource = fetchHistos( cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose) histosPerSamplePerSource = fetchHistos( cacheFileName, histoNamesPerSamplePerSource(vars, group_names, leptonSources, region), verbose) plotStackedHistos(histosPerGroup, outputDir + '/by_group', region, verbose) plotStackedHistosSources(histosPerSource, outputDir + '/by_source', region, verbose) plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir + '/by_source', lepton=lepton, region=region, verbose=verbose) for g in group_names: hps = dict((v, histosPerSamplePerSource[v][g]) for v in vars) plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose) hn_sf_eta = histoname_sf_vs_eta(lepton) hn_sf_pt = histoname_sf_vs_pt(lepton) hn_da_eta = histoname_data_fake_eff_vs_eta(lepton) hn_da_pt = histoname_data_fake_eff_vs_pt(lepton) subtractReal = not keepreal objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, subtractReal, verbose) objs_pt = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1', hn_sf_pt, hn_da_pt, outputDir, region, subtractReal, verbose) objs_pt_eta = subtractRealAndComputeScaleFactor( histosPerGroup, 'pt1_eta1', histoname_sf_vs_pt_eta(lepton), histoname_data_fake_eff_vs_pt_eta(lepton), outputDir, region, subtractReal, verbose) rootUtils.writeObjectsToFile( outputFileName, dictSum(dictSum(objs_eta, objs_pt), objs_pt_eta), verbose) if verbose: print "saved scale factors to %s" % outputFileName