Python dictSum Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: dictSum

Examples at hotexamples.com: 18

Python dictSum - 18 examples found. These are the top rated real world Python examples of utils.dictSum extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: optimizeSelection.py Project: gerbaudo/DileptonFakeMeasurement

def drawBottom(pad, totBkg, bkgHistos, sigHisto, llnjvar) :
    pad.cd()
    totBkg.SetStats(False)
    totBkg.SetMinimum(0.) # force this to avoid negative fluct due to fake
    totBkg.Draw('axis')
    pad.Update() # necessary to fool root's dumb object ownership
    stack = r.THStack('stack_'+llnjvar,'')
    pad.Update()
    r.SetOwnership(stack, False)
    for s, h in bkgHistos.iteritems() :
        h.SetFillColor(colors[s] if s in colors else r.kOrange)
        h.SetDrawOption('bar')
        h.SetDirectory(0)
        stack.Add(h)
    stack.Draw('hist same')
    pad.Update()
    sigHisto.SetLineColor(r.kRed)
    sigHisto.SetLineWidth(2*sigHisto.GetLineWidth())
    sigHisto.Draw('same')
    pad.Update()
    topRightLabel(pad, llnjvar, xpos=0.125, align=13)
    drawLegendWithDictKeys(pad, dictSum(bkgHistos, {'signal' : sigHisto}), opt='f')
    pad.RedrawAxis()
    pad._stack = stack
    pad._histos = [h for h in stack.GetHists()]
    pad.Update()

Example #2

Show file

File: optimizeSelection.py Project: gerbaudo/DileptonFakeMeasurement

def optimizeSelection() :
    inputdir, options = parseOptions()


    print 'sigreg ',options.sigreg
    tag = pickTag(inputdir, options)
    sigFiles, bkgFiles = getInputFilenames(inputdir, tag, options) # todo: filter with regexp
    sigFiles = dict([(s, k) for s, k in sigFiles.iteritems() if s in filterWithRegexp(sigFiles.keys(), options.sigreg)])
    allSamples = dictSum(sigFiles, bkgFiles)
    vars = variablesToPlot()
    histos = bookHistos(vars, allSamples.keys(), options.ll, options.nj)
    counts = fillHistosAndCount(histos, dictSum(sigFiles, bkgFiles), options.ll, options.nj, options.quicktest)
    bkgHistos = dict((s, h) for s, h in histos.iteritems() if s in bkgFiles.keys())
    sigHistos = dict((s, h) for s, h in histos.iteritems() if s in sigFiles.keys())
    plotHistos(bkgHistos, sigHistos, options.plotdir)
    printSummary(counts, options.summary)

Example #3

Show file

File: scoutPerformance.py Project: head1ton/server-2016

	def scoutedScoreForMatchNum(self, match, allianceIsRed):
		allTIMDs = self.calculator.su.getTIMDsForMatch(match)
		allianceNumbers = self.calculator.su.getAllianceForMatch(match, allianceIsRed)
		allianceNumbers = map(lambda t: t.number, allianceNumbers)
		allianceTIMDs = [timd for timd in allTIMDs if timd.teamNumber in allianceNumbers]

		autoPts = self.calculator.getAutoPointsForMatchForAllianceIsRed(match, allianceIsRed)

		teleShotPts = 2 * sum([(timd.numLowShotsMadeTele or 0) for timd in allianceTIMDs]) + 5 * sum([(timd.numHighShotsMadeTele or 0) for timd in allianceTIMDs])
		
		for timd in allianceTIMDs:
			s = timd.timesSuccessfulCrossedDefensesTele
			for key in self.calculator.defenseList:
				if not key in s:
					s[key] = 0
				elif s[key] == None:
					s[key] = 0
				else:
					s[key] = len(s[key])
		allDefenseCrossings = utils.dictSum(allianceTIMDs[0].timesSuccessfulCrossedDefensesTele, utils.dictSum(allianceTIMDs[1].timesSuccessfulCrossedDefensesTele, allianceTIMDs[2].timesSuccessfulCrossedDefensesTele))
		
		temp = {}
		for defense, crossings in allDefenseCrossings.items():
			if crossings > 2:
				temp[defense] = 2
			else:
				temp[defense] = crossings
		allDefenseCrossings = temp

		teleDefenseCrossPts = 5 * sum(allDefenseCrossings.values())
		
		scalePts = 15 * sum([utils.convertFirebaseBoolean(timd.didScaleTele) for timd in allianceTIMDs])
		challengePts = 5 * sum([utils.convertFirebaseBoolean(timd.didChallengeTele) for timd in allianceTIMDs])

		return autoPts + teleShotPts + teleDefenseCrossPts + scalePts + challengePts

Example #4

Show file

def plotStackedHistos(histos={},
                      datakey=None,
                      stackkeys=[],
                      outputDir='',
                      region='',
                      colors={},
                      verbose=False):
    "input: a dictionary of histos[group]"
    mkdirIfNeeded(outputDir)
    bkg_histos = dict([(k, h) for k, h in histos.iteritems()
                       if k in stackkeys])
    tot_bkg = summedHisto(bkg_histos.values(), label='')
    err_band = None  # tmp disable
    # err_band = buildErrBandGraph(tot_bkg, computeStatErr2(tot_bkg))
    empty_bkg = tot_bkg.Integral() == 0
    if empty_bkg:
        if verbose: print "empty backgrounds, skip %s" % tot_bkg.GetName()
        return
    histoname = tot_bkg.GetName()
    can = r.TCanvas('c_' + histoname, histoname, 800, 600)
    can.cd()
    pm = tot_bkg  # pad master
    pm.SetStats(False)
    pm.Draw('axis')
    can.Update()  # necessary to fool root's dumb object ownership
    stack = r.THStack('stack_' + tot_bkg.GetName(), '')
    can.Update()
    r.SetOwnership(stack, False)
    for s, h in bkg_histos.iteritems():
        h.SetFillColor(colors[s] if s in colors else r.kOrange)
        h.SetDrawOption('bar')
        h.SetDirectory(0)
        stack.Add(h)
    stack.Draw('hist same')
    # err_band.Draw('E2 same')
    data = histos[datakey] if datakey and datakey in histos else None
    if data and data.GetEntries():
        data.SetMarkerStyle(r.kFullDotLarge)
        data.Draw('p same')
        if verbose:
            print "data : nEntries {:.1f} totWeight {:.1f} ".format(
                data.GetEntries(), data.Integral())
    yMin, yMax = getMinMax([h for h in [tot_bkg, data, err_band] if h])
    # pm.SetMinimum(0.5)
    pm.SetMaximum(1.1 * yMax)
    can.Update()
    # can.SetLogy()
    topRightLabel(can,
                  "#splitline{%s}{%s}" % (histoname, region),
                  xpos=0.125,
                  align=13)
    drawLegendWithDictKeys(can,
                           dictSum(bkg_histos, {'stat err': err_band}),
                           opt='f')
    can.RedrawAxis()
    can._stack = stack
    can._histos = [h for h in stack.GetHists()] + [data]
    can.Update()
    if verbose: print os.path.join(outputDir, histoname + '.png')
    can.SaveAs(os.path.join(outputDir, histoname + '.png'))

Example #5

Show file

File: compute_fake_scale_factor.py Project: gerbaudo/DileptonFakeMeasurement

def plotStackedHistos(histosPerGroup={}, outputDir='', region='', verbose=False):
    groups = histosPerGroup.keys()
    variables = first(histosPerGroup).keys()
    leptonTypes = first(first(histosPerGroup)).keys()
    colors = SampleUtils.colors
    mkdirIfNeeded(outputDir)
    histosPerName = dict([(region+'_'+var+'_'+lt, # one canvas for each histo, so key with histoname w/out group
                           dict([(g, histosPerGroup[g][var][lt]) for g in groups]))
                          for var in variables for lt in leptonTypes])
    for histoname, histosPerGroup in histosPerName.iteritems():
        missingGroups = [g for g, h in histosPerGroup.iteritems() if not h]
        if missingGroups:
            if verbose : print "skip %s, missing histos for %s"%(histoname, str(missingGroups))
            continue
        bkgHistos = dict([(g, h) for g, h in histosPerGroup.iteritems() if isBkgSample(g)])
        totBkg = summedHisto(bkgHistos.values())
        err_band = buildErrBandGraph(totBkg, computeStatErr2(totBkg))
        emptyBkg = totBkg.Integral()==0
        if emptyBkg:
            if verbose : print "empty backgrounds, skip %s"%histoname
            continue
        can = r.TCanvas('c_'+histoname, histoname, 800, 600)
        can.cd()
        pm = totBkg # pad master
        pm.SetStats(False)
        pm.Draw('axis')
        can.Update() # necessary to fool root's dumb object ownership
        stack = r.THStack('stack_'+histoname,'')
        can.Update()
        r.SetOwnership(stack, False)
        for s, h in bkgHistos.iteritems() :
            h.SetFillColor(colors[s] if s in colors else r.kOrange)
            h.SetDrawOption('bar')
            h.SetDirectory(0)
            stack.Add(h)
        stack.Draw('hist same')
        err_band.Draw('E2 same')
        data = histosPerGroup['data']
        if data and data.GetEntries():
            data.SetMarkerStyle(r.kFullDotLarge)
            data.Draw('p same')
        yMin, yMax = getMinMax([h for h in [totBkg, data, err_band] if h])
        pm.SetMinimum(0.0)
        pm.SetMaximum(1.1*yMax)
        can.Update()
        topRightLabel(can, histoname, xpos=0.125, align=13)
        drawLegendWithDictKeys(can, dictSum(bkgHistos, {'stat err':err_band}), opt='f')
        can.RedrawAxis()
        can._stack = stack
        can._histos = [h for h in stack.GetHists()]+[data]
        can.Update()
        outFname = os.path.join(outputDir, histoname+'.png')
        utils.rmIfExists(outFname)
        can.SaveAs(outFname)

Example #6

Show file

File: compute_fake_factor.py Project: gerbaudo/DileptonFakeMeasurement

def plotStackedHistosWithData(histosPerGroup={}, outputDir='', canvasname='', canvastitle='', colors={}, verbose=False):
    "histosPerGroup[group], where group=data is treated as special"
    groups = histosPerGroup.keys()
    mkdirIfNeeded(outputDir)
    missingGroups = [g for g, h in histosPerGroup.iteritems() if not h]
    if missingGroups:
        if verbose : print "skip %s, missing histos for %s"%(histoname, str(missingGroups))
        return
    bkgHistos = dict([(g, h) for g, h in histosPerGroup.iteritems() if not isDataSample(g)])
    totBkg = summedHisto(bkgHistos.values())
    err_band = buildErrBandGraph(totBkg, computeStatErr2(totBkg))
    emptyBkg = totBkg.Integral()==0
    histoname, region = totBkg.GetName(), 'emu' # tmp replacement vars, to be fixed
    if emptyBkg:
        if verbose : print "empty backgrounds, skip %s"%histoname
        return
    can = r.TCanvas(canvasname, canvastitle, 800, 600)
    can.cd()
    pm = totBkg # pad master
    pm.SetStats(False)
    pm.Draw('axis')
    can.Update() # necessary to fool root's dumb object ownership
    stack = r.THStack('stack_'+histoname,'')
    can.Update()
    r.SetOwnership(stack, False)
    for s, h in bkgHistos.iteritems() :
        h.SetFillColor(colors[s] if s in colors else r.kOrange)
        h.SetDrawOption('bar')
        h.SetDirectory(0)
        stack.Add(h)
    stack.Draw('hist same')
    err_band.Draw('E2 same')
    data = histosPerGroup['data'] if 'data' in histosPerGroup else None
    if data and data.GetEntries():
        data.SetMarkerStyle(r.kFullDotLarge)
        data.Draw('p same')
        if verbose :
            print "integrals : {0} tot.bkg.: {1}, data: {2}".format(histoname, totBkg.Integral(), data.Integral())
    else:
        print "no data"
    yMin, yMax = getMinMax([h for h in [totBkg, data, err_band] if h])
    pm.SetMinimum(0.0)
    pm.SetMaximum(1.1*yMax)
    can.Update()
    topRightLabel(can, "#splitline{%s}{%s}"%(histoname, region), xpos=0.15, ypos=(1.0-0.5*can.GetTopMargin()), align=13)
    drawLegendWithDictKeys(can, dictSum(bkgHistos, {'stat err':err_band}), opt='f')
    can.RedrawAxis()
    can._stack = stack
    can._histos = [h for h in stack.GetHists()]+[data]
    can.Update()
    filename=os.path.join(outputDir, histoname+'.png')
    rmIfExists(filename)
    can.SaveAs(filename)

Example #7

Show file

File: compute_fake_scale_factor.py Project: gerbaudo/DileptonFakeMeasurement

def plotStackedHistosSources(histosPerVar={}, outputDir='', region='', verbose=False):
    variables = histosPerVar.keys()
    sources = first(histosPerVar).keys()
    colors = colorsFillSources
    mkdirIfNeeded(outputDir)
    for var in variables:
        for lOrT in ['loose', 'tight']:
            histos = dict((s, histosPerVar[var][s][lOrT]) for s in sources)
            canvasBasename = region+'_region_'+var+'_'+lOrT
            missingSources = [s for s, h in histos.iteritems() if not h]
            if missingSources:
                if verbose : print "skip %s, missing histos for %s"%(var, str(missingSources))
                continue
            totBkg = summedHisto(histos.values())
            err_band = buildErrBandGraph(totBkg, computeStatErr2(totBkg))
            emptyBkg = totBkg.Integral()==0
            if emptyBkg:
                if verbose : print "empty backgrounds, skip %s"%canvasBasename
                continue
            can = r.TCanvas('c_'+canvasBasename, canvasBasename, 800, 600)
            can.cd()
            pm = totBkg # pad master
            pm.SetStats(False)
            pm.Draw('axis')
            can.Update() # necessary to fool root's dumb object ownership
            stack = r.THStack('stack_'+canvasBasename,'')
            can.Update()
            r.SetOwnership(stack, False)
            for s, h in histos.iteritems() :
                h.SetFillColor(colors[s] if s in colors else r.kOrange)
                h.SetDrawOption('bar')
                h.SetDirectory(0)
                stack.Add(h)
            stack.Draw('hist same')
            err_band.Draw('E2 same')
            yMin, yMax = getMinMax([h for h in [totBkg, err_band] if h is not None])
            pm.SetMinimum(0.0)
            pm.SetMaximum(1.1*yMax)
            can.Update()
            topRightLabel(can, canvasBasename, xpos=0.125, align=13)
            drawLegendWithDictKeys(can, dictSum(histos, {'stat err':err_band}), opt='f')
            can.RedrawAxis()
            can._stack = stack
            can._histos = [h for h in stack.GetHists()]
            can.Update()
            outFname = os.path.join(outputDir, canvasBasename+'.png')
            utils.rmIfExists(outFname)
            can.SaveAs(outFname)

Example #8

Show file

File: plot_by_source.py Project: gerbaudo/SusyntHlfv

def plotStackedHistos(histos={}, datakey=None, stackkeys=[], outputDir='', region='', colors={}, verbose=False):
    "input: a dictionary of histos[group]"
    mkdirIfNeeded(outputDir)
    bkg_histos = dict([(k,h) for k,h in histos.iteritems() if k in stackkeys])
    tot_bkg = summedHisto(bkg_histos.values(), label='')
    err_band = None # tmp disable
    # err_band = buildErrBandGraph(tot_bkg, computeStatErr2(tot_bkg))
    empty_bkg = tot_bkg.Integral()==0
    if empty_bkg:
        if verbose : print "empty backgrounds, skip %s"%tot_bkg.GetName()
        return
    histoname = tot_bkg.GetName()
    can = r.TCanvas('c_'+histoname, histoname, 800, 600)
    can.cd()
    pm = tot_bkg # pad master
    pm.SetStats(False)
    pm.Draw('axis')
    can.Update() # necessary to fool root's dumb object ownership
    stack = r.THStack('stack_'+tot_bkg.GetName(),'')
    can.Update()
    r.SetOwnership(stack, False)
    for s, h in bkg_histos.iteritems() :
            h.SetFillColor(colors[s] if s in colors else r.kOrange)
            h.SetDrawOption('bar')
            h.SetDirectory(0)
            stack.Add(h)
    stack.Draw('hist same')
    # err_band.Draw('E2 same')
    data = histos[datakey] if datakey and datakey in histos else None
    if data and data.GetEntries():
        data.SetMarkerStyle(r.kFullDotLarge)
        data.Draw('p same')
        if verbose:
            print "data : nEntries {:.1f} totWeight {:.1f} ".format(data.GetEntries(), data.Integral())
    yMin, yMax = getMinMax([h for h in [tot_bkg, data, err_band] if h])
    # pm.SetMinimum(0.5)
    pm.SetMaximum(1.1*yMax)
    can.Update()
    # can.SetLogy()
    topRightLabel(can, "#splitline{%s}{%s}"%(histoname, region), xpos=0.125, align=13)
    drawLegendWithDictKeys(can, dictSum(bkg_histos, {'stat err':err_band}), opt='f')
    can.RedrawAxis()
    can._stack = stack
    can._histos = [h for h in stack.GetHists()]+[data]
    can.Update()
    if verbose : print os.path.join(outputDir, histoname+'.png')
    can.SaveAs(os.path.join(outputDir, histoname+'.png'))

Example #9

Show file

File: teamCalculatedDataKeys.py Project: head1ton/server-2016

def getThirdTeamCalcDataKeys(calc):
    return {
        "predictedNumRPs":
        lambda team: calc.predictedNumberOfRPs(team),
        "actualNumRPs":
        lambda team: calc.getTeamRPsFromTBA(team),
        "actualSeed":
        lambda team: calc.getTeamSeed(team),
        "predictedSeed":
        lambda team: calc.cachedComp.predictedSeedings.index(team) + 1,
        "RScoreTorque":
        lambda team: calc.cachedComp.torqueZScores[team.number],
        "RScoreSpeed":
        lambda team: calc.cachedComp.speedZScores[team.number],
        "RScoreAgility":
        lambda team: calc.cachedComp.agilityZScores[team.number],
        "RScoreDefense":
        lambda team: calc.cachedComp.defenseZScores[team.number],
        "RScoreBallControl":
        lambda team: calc.cachedComp.ballControlZScores[team.number],
        "RScoreDrivingAbility":
        lambda team: calc.cachedComp.drivingAbilityZScores[team.number],
        "avgSuccessfulTimesCrossedDefenses":
        lambda team: utils.dictSum(
            team.calculatedData.avgSuccessfulTimesCrossedDefensesAuto, team.
            calculatedData.avgSuccessfulTimesCrossedDefensesTele),
        "blockingAbility":
        lambda team: (team.calculatedData.avgShotsBlocked - calc.averageTeam.
                      calculatedData.avgShotsBlocked) * calc.averageTeam.
        calculatedData.highShotAccuracyTele * 5,
        "defensesCrossableAuto":
        lambda team: calc.defensesCrossableByTeamForDefenseDict(
            team, team.calculatedData.avgSuccessfulTimesCrossedDefensesAuto),
        "defensesCrossableTele":
        lambda team: calc.defensesCrossableByTeamForDefenseDict(
            team, team.calculatedData.avgSuccessfulTimesCrossedDefensesTele),
        "firstPickAbility":
        lambda team: calc.firstPickAbility(team),
        "overallSecondPickAbility":
        lambda team: calc.overallSecondPickAbility(team)
    }

Example #10

Show file

File: plot_emu.py Project: gerbaudo/SusyntHlfv

def runFill(opts) :
    batchMode    = opts.batch
    inputFakeDir = opts.input_fake
    inputGenDir  = opts.input_other
    outputDir    = opts.output_dir
    verbose      = opts.verbose
    debug        = opts.debug
    blinded      = not opts.unblind
    tightight    = opts.require_tight_tight

    if debug : dataset.Dataset.verbose_parsing = True
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir)
    if not skip_charge_flip : groups.append(dataset.DatasetGroup.build_qflip_from_simulated_samples(groups))
    groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake())
    groups = parse_group_option(opts, groups)
    if verbose : print '\n'.join("group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups)
    if debug :
        print '\n'.join("group {0} : {1} samples: {2}".format(g.name,
                                                              len(g.datasets),
                                                              '\n\t'+'\n\t'.join(d.name for d in g.datasets))
                        for g in groups)
    if verbose : print "filling histos"
    # eval will take care of aborting on typos
    onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None
    mkdirIfNeeded(outputDir)
    systematics = get_list_of_syst_to_fill(opts)
    regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions)
    if verbose : print "about to loop over these systematics:\n %s"%str(systematics)
    if verbose : print "about to loop over these regions:\n %s"%str(regions)
    if batchMode:
        for group in groups:
            for systematic in systematics:
                if systUtils.Group(group.name).isNeededForSys(systematic):
                    opts.syst = systematic
                    for selection in regions:
                        submit_batch_fill_job_per_group_per_selection(group=group, selection=selection, opts=opts)
    else:
        for group in groups:
            systematics = [s for s in systematics if systUtils.Group(group.name).isNeededForSys(s)]
            if not systematics : print "warning, empty syst list. You should have at least the nominal"
            for systematic in systematics:
                # note to self: here you will want to use a modified Sample.setHftInputDir
                # for now we just have the fake syst that are in the nominal tree
                tree_name = 'hlfv_tuple'
                chain = IndexedChain(tree_name)
                input_dir = opts.input_fake if group.name=='fake' else opts.input_other
                for ds in group.datasets:
                    chain.Add(os.path.join(input_dir, systUtils.Sample(ds.name, group.name).setSyst(systematic).filename))
                if opts.verbose:
                    print "{0} : {1} entries from {2} samples".format(group.name,
                                                                      chain.GetEntries(),
                                                                      len(group.datasets))
                chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/')
                tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions]
                chain.retrieve_entrylists(tcuts)
                counters_pre, histos_pre = dict(), dict()
                counters_npre, histos_npre = dict(), dict()
                cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list()
                uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list()
                if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts])
                for cut in cached_tcuts:
                    chain.preselect(cut)
                    c_pre, h_pre = count_and_fill(chain=chain, sample=group.name,
                                                  syst=systematic, verbose=verbose,
                                                  debug=debug, blinded=blinded,
                                                  onthefly_tight_def=onthefly_tight_def,
                                                  tightight=tightight, quicktest=opts.quick_test,
                                                  cached_cut=cut)
                    out_filename = (systUtils.Group(group.name)
                                    .setSyst(systematic)
                                    .setHistosDir(outputDir)
                                    .setCurrentSelection(cut.GetName())).filenameHisto
                    writeObjectsToFile(out_filename, h_pre, verbose)
                    counters_pre = dictSum(counters_pre, c_pre)
                    histos_pre = dictSum(histos_pre, h_pre)
                if uncached_tcuts:
                    if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts])
                    counters_npre, histos_npre = count_and_fill(chain=chain, sample=group.name,
                                                                syst=systematic, verbose=verbose,
                                                                debug=debug, blinded=blinded,
                                                                onthefly_tight_def=onthefly_tight_def,
                                                                tightight=tightight,
                                                                quicktest=opts.quick_test,
                                                                noncached_cuts=uncached_tcuts)
                    for sel, histos in histos_npre.iteritems():
                        out_filename = (systUtils.Group(group.name)
                                        .setSyst(systematic)
                                        .setHistosDir(outputDir)
                                        .setCurrentSelection(sel)).filenameHisto
                        writeObjectsToFile(out_filename, histos, verbose)
                chain.save_lists()

Example #11

Show file

def runFill(opts):
    lepton = opts.lepton
    batchMode = opts.batch
    inputDir = opts.input_dir
    outputDir = opts.output_dir
    verbose = opts.verbose
    debug = opts.debug

    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        opts.samples_dir)
    if opts.group: groups = [g for g in groups if g.name == opts.group]
    if verbose:
        print '\n'.join(
            "group {0} : {1} samples".format(g.name, len(g.datasets))
            for g in groups)
    if debug:
        print '\n'.join("group {0} : {1} samples: {2}".format(
            g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name
                                                          for d in g.datasets))
                        for g in groups)
    if verbose: print "filling histos"
    outputDir = outputDir + '/' + lepton + '/histos'
    mkdirIfNeeded(outputDir)
    if batchMode:
        for group in groups:
            submit_batch_fill_job_per_group(group, opts)
    else:
        for group in groups:
            tree_name = 'ss3l_tuple'
            chain = IndexedChain(tree_name)
            for ds in group.datasets:
                chain.Add(os.path.join(inputDir, ds.name + '.root'))
            if opts.verbose:
                print "{0} : {1} entries from {2} samples".format(
                    group.name, chain.GetEntries(), len(group.datasets))
            chain.cache_directory = os.path.abspath('./selection_cache/' +
                                                    group.name + '/')
            tcuts = [
                r.TCut(reg,
                       selection_formulas()[reg])
                for reg in regions_to_plot(opts.include_regions,
                                           opts.exclude_regions, opts.regions)
            ]
            chain.retrieve_entrylists(tcuts)
            counters_pre, histos_pre = dict(), dict()
            counters_npre, histos_npre = dict(), dict()
            cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list(
            )
            uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list(
            )
            print 'todo: skip cuts for which the histo files are there'
            if verbose:
                print 'filling cached cuts: ', ' '.join(
                    [c.GetName() for c in cached_tcuts])
            for cut in cached_tcuts:
                chain.preselect(cut)
                c_pre, h_pre = count_and_fill(chain=chain,
                                              opts=opts,
                                              group=group,
                                              cached_cut=cut)
                counters_pre = dictSum(counters_pre, c_pre)
                histos_pre = dictSum(histos_pre, h_pre)
            if verbose:
                print 'filling uncached cuts: ', ' '.join(
                    [c.GetName() for c in uncached_tcuts])
            if uncached_tcuts:
                counters_npre, histos_npre = count_and_fill(
                    chain=chain,
                    opts=opts,
                    group=group,
                    noncached_cuts=uncached_tcuts)
                chain.save_lists()
            all_histos = dictSum(histos_pre, histos_npre)
            for sel, histos in all_histos.iteritems():
                # write histos for each sel to a separate file (finer granularity, better caching)
                out_filename = os.path.join(outputDir,
                                            group.name + '_' + sel + '.root')
                if verbose: print 'saving to ', out_filename
                writeObjectsToFile(out_filename, histos, verbose)

Example #12

Show file

def runFill(opts):
    batchMode = opts.batch
    inputFakeDir = opts.input_fake
    inputGenDir = opts.input_other
    outputDir = opts.output_dir
    verbose = opts.verbose
    debug = opts.debug
    blinded = not opts.unblind
    tightight = opts.require_tight_tight

    if debug: dataset.Dataset.verbose_parsing = True
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        opts.samples_dir)
    if not skip_charge_flip:
        groups.append(
            dataset.DatasetGroup.build_qflip_from_simulated_samples(groups))
    groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake())
    groups = parse_group_option(opts, groups)
    if verbose:
        print '\n'.join(
            "group {0} : {1} samples".format(g.name, len(g.datasets))
            for g in groups)
    if debug:
        print '\n'.join("group {0} : {1} samples: {2}".format(
            g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name
                                                          for d in g.datasets))
                        for g in groups)
    if verbose: print "filling histos"
    # eval will take care of aborting on typos
    onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None
    mkdirIfNeeded(outputDir)
    systematics = get_list_of_syst_to_fill(opts)
    regions = regions_to_plot(opts.include_regions, opts.exclude_regions,
                              opts.regions)
    if verbose:
        print "about to loop over these systematics:\n %s" % str(systematics)
    if verbose: print "about to loop over these regions:\n %s" % str(regions)
    if batchMode:
        for group in groups:
            for systematic in systematics:
                if systUtils.Group(group.name).isNeededForSys(systematic):
                    opts.syst = systematic
                    for selection in regions:
                        submit_batch_fill_job_per_group_per_selection(
                            group=group, selection=selection, opts=opts)
    else:
        for group in groups:
            systematics = [
                s for s in systematics
                if systUtils.Group(group.name).isNeededForSys(s)
            ]
            if not systematics:
                print "warning, empty syst list. You should have at least the nominal"
            for systematic in systematics:
                # note to self: here you will want to use a modified Sample.setHftInputDir
                # for now we just have the fake syst that are in the nominal tree
                tree_name = 'hlfv_tuple'
                chain = IndexedChain(tree_name)
                input_dir = opts.input_fake if group.name == 'fake' else opts.input_other
                for ds in group.datasets:
                    chain.Add(
                        os.path.join(
                            input_dir,
                            systUtils.Sample(
                                ds.name,
                                group.name).setSyst(systematic).filename))
                if opts.verbose:
                    print "{0} : {1} entries from {2} samples".format(
                        group.name, chain.GetEntries(), len(group.datasets))
                chain.cache_directory = os.path.abspath('./selection_cache/' +
                                                        group.name + '/')
                tcuts = [
                    r.TCut(reg,
                           selection_formulas()[reg]) for reg in regions
                ]
                chain.retrieve_entrylists(tcuts)
                counters_pre, histos_pre = dict(), dict()
                counters_npre, histos_npre = dict(), dict()
                cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list(
                )
                uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list(
                )
                if verbose:
                    print 'filling cached cuts: ', ' '.join(
                        [c.GetName() for c in cached_tcuts])
                for cut in cached_tcuts:
                    chain.preselect(cut)
                    c_pre, h_pre = count_and_fill(
                        chain=chain,
                        sample=group.name,
                        syst=systematic,
                        verbose=verbose,
                        debug=debug,
                        blinded=blinded,
                        onthefly_tight_def=onthefly_tight_def,
                        tightight=tightight,
                        quicktest=opts.quick_test,
                        cached_cut=cut)
                    out_filename = (systUtils.Group(
                        group.name).setSyst(systematic).setHistosDir(
                            outputDir).setCurrentSelection(
                                cut.GetName())).filenameHisto
                    writeObjectsToFile(out_filename, h_pre, verbose)
                    counters_pre = dictSum(counters_pre, c_pre)
                    histos_pre = dictSum(histos_pre, h_pre)
                if uncached_tcuts:
                    if verbose:
                        print 'filling uncached cuts: ', ' '.join(
                            [c.GetName() for c in uncached_tcuts])
                    counters_npre, histos_npre = count_and_fill(
                        chain=chain,
                        sample=group.name,
                        syst=systematic,
                        verbose=verbose,
                        debug=debug,
                        blinded=blinded,
                        onthefly_tight_def=onthefly_tight_def,
                        tightight=tightight,
                        quicktest=opts.quick_test,
                        noncached_cuts=uncached_tcuts)
                    for sel, histos in histos_npre.iteritems():
                        out_filename = (systUtils.Group(
                            group.name).setSyst(systematic).setHistosDir(
                                outputDir).setCurrentSelection(sel)
                                        ).filenameHisto
                        writeObjectsToFile(out_filename, histos, verbose)
                chain.save_lists()

Example #13

Show file

File: plot_by_source.py Project: gerbaudo/SusyntHlfv

def runFill(opts):
    lepton    = opts.lepton
    batchMode = opts.batch
    inputDir  = opts.input_dir
    outputDir = opts.output_dir
    verbose   = opts.verbose
    debug     = opts.debug

    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir)
    if opts.group : groups = [g for g in groups if g.name==opts.group]
    if verbose : print '\n'.join("group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups)
    if debug :
        print '\n'.join("group {0} : {1} samples: {2}".format(g.name,
                                                              len(g.datasets),
                                                              '\n\t'+'\n\t'.join(d.name for d in g.datasets))
                        for g in groups)
    if verbose : print "filling histos"
    outputDir = outputDir+'/'+lepton+'/histos'
    mkdirIfNeeded(outputDir)
    if batchMode:
        for group in groups:
            submit_batch_fill_job_per_group(group, opts)
    else:
        for group in groups:
            tree_name = 'hlfv_tuple'
            chain = IndexedChain(tree_name)
            for ds in group.datasets:
                chain.Add(os.path.join(inputDir, ds.name+'.root'))
            if opts.verbose:
                print "{0} : {1} entries from {2} samples".format(group.name,
                                                                  chain.GetEntries(),
                                                                  len(group.datasets))
            chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/')
            tcuts = [r.TCut(reg, selection_formulas()[reg])
                     for reg in regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions)]
            chain.retrieve_entrylists(tcuts)
            counters_pre, histos_pre = dict(), dict()
            counters_npre, histos_npre = dict(), dict()
            cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list()
            uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list()
            print 'todo: skip cuts for which the histo files are there'
            if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts])
            for cut in cached_tcuts:
                chain.preselect(cut)
                c_pre, h_pre = count_and_fill(chain=chain, opts=opts,
                                              group=group,
                                              cached_cut=cut)
                counters_pre = dictSum(counters_pre, c_pre)
                histos_pre = dictSum(histos_pre, h_pre)
            if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts])
            if uncached_tcuts:
                counters_npre, histos_npre = count_and_fill(chain=chain, opts=opts,
                                                            group=group,
                                                            noncached_cuts=uncached_tcuts)
                chain.save_lists()
            all_histos = dictSum(histos_pre, histos_npre)
            for sel, histos in all_histos.iteritems():
                # write histos for each sel to a separate file (finer granularity, better caching)
                out_filename = os.path.join(outputDir, group.name+'_'+sel+'.root')
                if verbose : print 'saving to ',out_filename
                writeObjectsToFile(out_filename, histos, verbose)

Example #14

Show file

File: teamCalculatedDataKeys.py Project: head1ton/server-2016

def getFirstTeamCalcDataKeys(calc):
    sumCategoryADataPointDict = lambda team: utils.dictSum(
        team.calculatedData.avgNumTimesUnaffected,
        utils.dictSum(team.calculatedData.avgNumTimesBeached, team.
                      calculatedData.avgNumTimesSlowed))

    return {
        "avgTorque":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.rankTorque),  # Checked
        "avgSpeed":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.rankSpeed),
        "avgAgility":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.rankAgility),  # Checked
        "avgDefense":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.rankDefense),  # Checked
        "avgBallControl":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.rankBallControl),  # Checked
        "avgDrivingAbility":
        lambda team: calc.drivingAbility(team),
        "disabledPercentage":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: utils.convertFirebaseBoolean(timd.didGetDisabled
                                                            )),
        "incapacitatedPercentage":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: utils.convertFirebaseBoolean(
                timd.didGetIncapacitated)),
        "disfunctionalPercentage":
        lambda team: team.calculatedData.disabledPercentage + team.
        calculatedData.incapacitatedPercentage,

        # Auto
        "autoAbility":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.autoAbility),
        "autoAbilityExcludeD":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: calc.autoAbility(
                calc.timdHasDefenseExclusion(timd, calc.defenseDictionary['d'])
            )),
        "autoAbilityExcludeLB":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: calc.autoAbility(
                calc.timdHasDefenseExclusion(timd, calc.defenseDictionary['e'])
            )),
        "avgHighShotsAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numHighShotsMadeAuto),  # Checked
        "avgLowShotsAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numLowShotsMadeAuto),  # Checked   
        "reachPercentage":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: utils.convertFirebaseBoolean(timd.didReachAuto)
        ),
        "highShotAccuracyAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: calc.TIMDShotAccuracy(
                timd.numHighShotsMadeAuto, timd.numHighShotsMissedAuto)
        ),  # Checked
        "lowShotAccuracyAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: calc.TIMDShotAccuracy(
                timd.numLowShotsMadeAuto, timd.numLowShotsMissedAuto)
        ),  # Checked
        "avgMidlineBallsIntakedAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.
            numBallsIntakedOffMidlineAuto),
        "sdMidlineBallsIntakedAuto":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.
            numBallsIntakedOffMidlineAuto),
        "sdHighShotsAuto":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numHighShotsMadeAuto),  # Checked
        "sdLowShotsAuto":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numLowShotsMadeAuto),  # Checked
        "sdBallsKnockedOffMidlineAuto":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numBallsKnockedOffMidlineAuto),

        #Tele
        "scalePercentage":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: int(
                utils.convertFirebaseBoolean(timd.didScaleTele))),
        "challengePercentage":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: int(
                utils.convertFirebaseBoolean(timd.didChallengeTele))),
        "avgGroundIntakes":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numGroundIntakesTele),  # Checked
        "avgBallsKnockedOffMidlineAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numBallsKnockedOffMidlineAuto),  # Checked
        "avgShotsBlocked":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numShotsBlockedTele),  # Checked
        "avgHighShotsTele":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numHighShotsMadeTele),  # Checked
        "avgLowShotsTele":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numLowShotsMadeTele),  # Checked
        "highShotAccuracyTele":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: calc.TIMDShotAccuracy(
                timd.numHighShotsMadeTele, timd.numHighShotsMissedTele)
        ),  # Checked
        "lowShotAccuracyTele":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: calc.TIMDShotAccuracy(
                timd.numLowShotsMadeTele, timd.numLowShotsMissedTele)),
        "teleopShotAbility":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.teleopShotAbility
        ),  # Checked
        "siegeConsistency":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: utils.convertFirebaseBoolean(
                timd.didChallengeTele) or utils.convertFirebaseBoolean(
                    timd.didScaleTele)),  # Checked
        "siegeAbility":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.siegeAbility),  # Checked
        "sdHighShotsTele":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numHighShotsMadeTele),  # Checked
        "sdLowShotsTele":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numLowShotsMadeTele),  # Checked
        "sdGroundIntakes":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numGroundIntakesTele),  # Checked
        "sdShotsBlocked":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numShotsBlockedTele),  # Checked
        "sdTeleopShotAbility":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.teleopShotAbility),
        "sdSiegeAbility":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.siegeAbility),
        "sdAutoAbility":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.autoAbility),
        "numScaleAndChallengePoints":
        lambda team: calc.numScaleAndChallengePointsForTeam(team),  # Checked
        "breachPercentage":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: utils.
            convertFirebaseBoolean(lambda team: calc.teamDidBreachInMatch(
                team, lambda team: calc.su.getMatchForNumber(timd.matchNumber))
                                   )),
        "avgHighShotsAttemptedTele":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.highShotsAttemptedTele),
        "avgLowShotsAttemptedTele":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.lowShotsAttemptedTele),
        "twoBallAutoTriedPercentage":
        lambda team: calc.twoBallAutoTriedPercentage(team),
        "twoBallAutoAccuracy":
        lambda team: calc.twoBallAutoAccuracy(team),
        "avgNumTimesBeached":
        lambda team: calc.categoryAAverageDictForDataFunction(
            team, lambda timd: timd.numTimesBeached),
        "avgNumTimesSlowed": {
            "pc": lambda team: calc.avgNumTimesSlowed(team, "pc"),
            "cdf": lambda team: calc.avgNumTimesSlowed(team, "cdf")
        },
        "avgNumTimesUnaffected":
        lambda team: calc.categoryAAverageDictForDataFunction(
            team, lambda timd: timd.numTimesUnaffected),
        "beachedPercentage":
        lambda team: utils.dictQuotient(team.calculatedData.avgNumTimesBeached,
                                        sumCategoryADataPointDict(team)),
        "slowedPercentage":
        lambda team: utils.dictQuotient(team.calculatedData.avgNumTimesSlowed,
                                        sumCategoryADataPointDict(team)),
        "unaffectedPercentage":
        lambda team: utils.dictQuotient(
            team.calculatedData.avgNumTimesUnaffected,
            sumCategoryADataPointDict(team)),
        "avgNumTimesCrossedDefensesAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda tm: tm.calculatedData.totalNumTimesCrossedDefensesAuto
        ),
        "defenses": [
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.
                avgSuccessfulTimesCrossedDefensesTele, lambda tm: tm.
                timesSuccessfulCrossedDefensesTele, lambda x: np.mean(x)
                if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.
                avgSuccessfulTimesCrossedDefensesAuto, lambda tm: tm.
                timesSuccessfulCrossedDefensesAuto, lambda x: np.mean(x)
                if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.avgFailedTimesCrossedDefensesTele,
                lambda tm: tm.timesFailedCrossedDefensesTele, lambda x: np.
                mean(x) if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.avgFailedTimesCrossedDefensesAuto,
                lambda tm: tm.timesFailedCrossedDefensesAuto, lambda x: np.
                mean(x) if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.avgTimeForDefenseCrossTele, lambda
                tm: tm.timesSuccessfulCrossedDefensesTele, lambda x: np.mean(x)
                if x != None and len(x) > 0 else 0, lambda y: np.mean(y)
                if y != None and len(y) > 0 else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.avgTimeForDefenseCrossAuto, lambda
                tm: tm.timesSuccessfulCrossedDefensesAuto, lambda x: np.mean(x)
                if x != None and len(x) > 0 else 0, lambda y: np.mean(y)
                if y != None and len(y) > 0 else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.sdSuccessfulDefenseCrossesAuto,
                lambda tm: tm.timesSuccessfulCrossedDefensesAuto, lambda x:
                utils.rms(x)
                if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.sdSuccessfulDefenseCrossesTele,
                lambda tm: tm.ti, mesSuccessfulCrossedDefensesTele, lambda x:
                utils.rms(x)
                if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.sdFailedDefenseCrossesAuto, lambda
                tm: tm.timesFailedCrossedDefensesAuto, lambda x: utils.rms(x)
                if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.sdFailedDefenseCrossesTele, lambda
                tm: tm.timesFailedCrossedDefensesTele, lambda x: utils.rms(x)
                if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0)
        ]
    }

Example #15

Show file

File: compute_fake_scale_factor.py Project: gerbaudo/DileptonFakeMeasurement

def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor', help='dir for plots')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    region    = options.region
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    filestems, treenames = utils.verticalSlice(fakeu.tupleStemsAndNames)
    regions = filestems
    assert region in regions,"invalid region '%s', must be one of %s"%(region, str(regions))

    templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region}
    templateOutputFilename =  "%(region)s_%(l)s_scale_histos.root" % {'region':region, 'l':lepton}
    treeName = treenames[regions.index(region)]
    outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things
    mkdirIfNeeded(outputDir)
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_'+region+'_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos
    optionsToPrint = ['inputDir', 'outputDir', 'region', 'tag', 'doFillHistograms', 'onthefly_tight_def']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
    # collect inputs
    if verbose : print 'input files ',os.path.join(inputDir, templateInputFilename)
    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    mkdirIfNeeded(outputDir)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1']
    groups = samplesPerGroup.keys()
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroup = bookHistos(vars, groups, region=region)
        histosPerSource = bookHistosPerSource(vars, leptonSources, region=region)
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, leptonSources, region=region)
        for group in groups:
            isData = isDataSample(group)
            filenames = filenamesPerGroup[group]
            if verbose:
                print " --- group : %s ---".format(group)
                print '\n\t'.join(filenames)
            histosThisGroup = histosPerGroup[group]
            histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group]) for v in histosPerGroupPerSource.keys())
            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            if verbose: print "%s : %d entries"%(group, chain.GetEntries())
            num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource,
                                                histosThisGroupPerSource,
                                                lepton, group, region,
                                                onthefly_tight_def=onthefly_tight_def, verbose=verbose)
        writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # compute scale factors
    histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, groups, region), verbose)
    histosPerSource = fetchHistos(cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose)
    histosPerSamplePerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, leptonSources, region), verbose)
    plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, verbose)
    plotStackedHistosSources(histosPerSource, outputDir+'/by_source', region, verbose)
    plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir+'/by_source', lepton=lepton, region=region, verbose=verbose)
    for g in groups:
        hps = dict((v, histosPerSamplePerSource[v][g])for v in vars)
        plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose)


    hn_sf_eta = histoname_sf_vs_eta           (lepton)
    hn_sf_pt  = histoname_sf_vs_pt            (lepton)
    hn_da_eta = histoname_data_fake_eff_vs_eta(lepton)
    hn_da_pt  = histoname_data_fake_eff_vs_pt (lepton)
    objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, verbose)
    objs_pt  = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1',  hn_sf_pt,  hn_da_pt,  outputDir, region, verbose)
    rootUtils.writeObjectsToFile(outputFileName, dictSum(objs_eta, objs_pt), verbose)
    if verbose : print "saved scale factors to %s" % outputFileName

Example #16

Show file

File: compute_fake_scale_factor.py Project: gerbaudo/SusyntHlfv

def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-g', '--group', help='group to be processed (used only in fill mode)')
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples')
    parser.add_option('--samples-dir', default='samples/', help='directory with the list of samples; default ./samples/')
    parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('--keep-real', action='store_true', default=False, help='do not subtract real (to get real lep efficiency)')
    parser.add_option('--debug', action='store_true')
    parser.add_option('--verbose', action='store_true')
    parser.add_option('--disable-cache', action='store_true', help='disable the entry cache')
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    region    = options.region
    keepreal  = options.keep_real
    debug     = options.debug
    verbose   = options.verbose
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    regions = kin.selection_formulas().keys()
    assert region in regions,"invalid region '%s', must be one of %s"%(region, str(sorted(regions)))
    regions = [region]

    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(options.samples_dir)
    if options.group : groups = [g for g in groups if g.name==options.group]
    group_names = [g.name for g in groups]

    outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things
    mkdirIfNeeded(outputDir)
    templateOutputFilename = "scale_factor_{0}.root".format(lepton)
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos
    if verbose : utils.print_running_conditions(parser, options)
    vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1', 'pt1_eta1']
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroup = bookHistos(vars, group_names, region=region)
        histosPerSource = bookHistosPerSource(vars, leptonSources, region=region)
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, group_names, leptonSources, region=region)
        for group in groups:
            tree_name = 'hlfv_tuple'
            chain = IndexedChain(tree_name)
            for ds in group.datasets:
                fname = os.path.join(inputDir, ds.name+'.root')
                if os.path.exists(fname):
                    chain.Add(fname)
            if verbose:
                print "{0} : {1} entries from {2} samples".format(group.name, chain.GetEntries(), len(group.datasets))
            chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/')
            tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions]
            print 'tcuts ',[c.GetName() for c in tcuts]
            chain.retrieve_entrylists(tcuts)
            counters_pre, histos_pre = dict(), dict()
            counters_npre, histos_npre = dict(), dict()
            print 'tcuts_with_existing_list ',str([c.GetName() for c in chain.tcuts_with_existing_list()])
            print 'tcuts_without_existing_list ',str([c.GetName() for c in chain.tcuts_without_existing_list()])
            cached_tcuts = [] if options.disable_cache else chain.tcuts_with_existing_list()
            print 'cached_tcuts ',[c.GetName() for c in cached_tcuts]
            uncached_tcuts = tcuts if options.disable_cache else chain.tcuts_without_existing_list()
            print 'todo: skip cuts for which the histo files are there'
            if verbose:
                print " --- group : {0} ---".format(group.name)
                print '\n\t'.join(chain.filenames)
            if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts])
            if verbose: print "%s : %d entries"%(group.name, chain.GetEntries())
            histosThisGroup = histosPerGroup[group.name]
            histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group.name]) for v in histosPerGroupPerSource.keys())
            for cut in cached_tcuts:
                print 'cached_tcut ',cut
                chain.preselect(cut)
                num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource,
                                                    histosThisGroupPerSource,
                                                    lepton, group,
                                                    cut, cut_is_cached=True,
                                                    onthefly_tight_def=onthefly_tight_def,
                                                    verbose=verbose)
            if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts])
            if uncached_tcuts:
                assert len(uncached_tcuts)==1, "expecting only one cut, got {}".format(len(uncached_tcuts))
                cut = uncached_tcuts[0]
                chain.preselect(None)
                num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource,
                                                    histosThisGroupPerSource,
                                                    lepton, group,
                                                    cut, cut_is_cached=False,
                                                    onthefly_tight_def=onthefly_tight_def,
                                                    verbose=verbose)
                chain.save_lists()

        writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # return
    # compute scale factors
    histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, group_names, region), verbose)
    histosPerSource = fetchHistos(cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose)
    histosPerSamplePerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, group_names, leptonSources, region), verbose)
    plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, verbose)
    plotStackedHistosSources(histosPerSource, outputDir+'/by_source', region, verbose)
    plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir+'/by_source', lepton=lepton, region=region, verbose=verbose)
    for g in group_names:
        hps = dict((v, histosPerSamplePerSource[v][g])for v in vars)
        plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose)


    hn_sf_eta = histoname_sf_vs_eta           (lepton)
    hn_sf_pt  = histoname_sf_vs_pt            (lepton)
    hn_da_eta = histoname_data_fake_eff_vs_eta(lepton)
    hn_da_pt  = histoname_data_fake_eff_vs_pt (lepton)
    subtractReal = not keepreal
    objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, subtractReal, verbose)
    objs_pt  = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1',  hn_sf_pt,  hn_da_pt,  outputDir, region, subtractReal, verbose)
    objs_pt_eta  = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1_eta1',
                                                     histoname_sf_vs_pt_eta(lepton),
                                                     histoname_data_fake_eff_vs_pt_eta(lepton),
                                                     outputDir, region, subtractReal, verbose)
    rootUtils.writeObjectsToFile(outputFileName, dictSum(dictSum(objs_eta, objs_pt), objs_pt_eta), verbose)
    if verbose : print "saved scale factors to %s" % outputFileName

Example #17

Show file

File: compute_fake_factor.py Project: gerbaudo/DileptonFakeMeasurement

def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fakerate/efficiencies')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-m', '--mode', help='emu')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    mode      = options.mode
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    validModes = ['emu']
    if mode not in validModes : parser.error("invalid mode %s"%mode)
    tupleStem, treeName = filter(lambda _: _[0]==mode, fakeu.tupleStemsAndNames)[0]

    templateInputFilename = "*_%(stem)s_tuple_%(tag)s.root" % {'tag':tag, 'stem':tupleStem}
    templateOutputFilename =  "%(stem)s_%(l)s_eff.root" % {'stem':tupleStem.replace('tuple','histos'), 'l':lepton}
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_'+mode+'_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos
    optionsToPrint = ['inputDir', 'outputDir', 'mode', 'tag', 'doFillHistograms', 'cacheFileName', 'onthefly_tight_def']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
        print 'input filenames: ',os.path.join(inputDir, templateInputFilename)
    # collect inputs
    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    mkdirIfNeeded(outputDir)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['pt', 'pt_eta']
    groups = [g for g in samplesPerGroup.keys() if g is not 'higgs']
    if lepton=='el' : groups = [g for g in groups if g is not 'heavyflavor']
    sourcesThisMode = ['real', 'conv', 'heavy', 'light', 'unknown'] if lepton=='el' else ['real', 'heavy', 'light', 'unknown']
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, sourcesThisMode, mode=mode)
        for group in groups:
            filenames = filenamesPerGroup[group]
            sources = histosPerGroupPerSource.keys()
            histosThisGroupPerSource = dict((s, histosPerGroupPerSource[s][group]) for s in sources)
            histosAnyGroupPerSource  = dict((s, histosPerGroupPerSource[s]['anygroup']) for s in sources) if group!='data' else {}

            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            if verbose: print "%s : %d entries"%(group, chain.GetEntries())
            is_data = group in ['data']
            print 'is_data ',is_data
            num_processed_entries += fillHistos(chain=chain,
                                                histosPerSource=histosThisGroupPerSource,
                                                histosPerSourceAnygroup=histosAnyGroupPerSource,
                                                lepton=lepton,
                                                onthefly_tight_def=onthefly_tight_def,
                                                verbose=verbose)
        writeHistos(cacheFileName, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        one_minute = 60
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>one_minute else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # plot histos
    histosPerGroupPerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, sourcesThisMode, mode), verbose)

    # effs = computeEfficiencies(histosPerGroupPerSource) # still [var][gr][source][l/t]
    for v in vars:
        varIs1D, varIs2D = v=='pt', v=='pt_eta'
        densThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['loose']) for s in sourcesThisMode),
                                        {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']})
        numsThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['tight']) for s in sourcesThisMode),
                                        {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']})
        if varIs1D:
            lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)'
            cname = 'stack_loose_'+lepton
            lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)'
            title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY
            plotStackedHistosWithData(densThisSourceThisVar,
                                      outputDir, cname, title,
                                      colors=fakeu.colorsFillSources(),
                                      verbose=verbose)
            cname = 'stack_tight_'+lepton
            lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)'
            title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY
            plotStackedHistosWithData(numsThisSourceThisVar,
                                      outputDir, cname, title,
                                      colors=fakeu.colorsFillSources(),
                                      verbose=verbose)

    for s in sourcesThisMode:
        for v in vars:
            groups = first(histosPerGroupPerSource).keys()
            varIs1D, varIs2D = v=='pt', v=='pt_eta'
            # effsThisSourceThisVar = dict((g, effs[v][g][s]) for g in groups)
            densThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g][s]['loose'])
                                                 for g in groups if g not in ['anygroup','data']),
                                            {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']})
            numsThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g]['unknown']['tight'])
                                                 for g in groups if g not in ['anygroup','data']),
                                            {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']})
            if varIs1D:
                # cname = 'eff_'+lepton+'_'+s
                lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)'
                # title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                # zoomIn = True
                # fakeu.plot1dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn)
                cname = 'stack_loose_'+lepton+'_'+s
                lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                plotStackedHistosWithData(densThisSourceThisVar,
                                          outputDir, cname, title,
                                          colors=SampleUtils.colors,
                                          verbose=verbose)
                cname = 'stack_tight_'+lepton+'_'+s
                lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                plotStackedHistosWithData(numsThisSourceThisVar,
                                          outputDir, cname, title,
                                          colors=SampleUtils.colors,
                                          verbose=verbose)

            # elif varIs2D:
            #     cname = 'eff_'+lepton+'_'+s
            #     lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#eta'
            #     title = lT+' '+s+' '+lepton+';'+lX+';'+lY
            #     fakeu.plot2dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn=zoomIn)
    # writeHistos(outputFileName, effs, verbose)
    if verbose : print "saved scale factors to %s" % outputFileName

Example #18

Show file

File: compute_fake_scale_factor.py Project: PraderioM/SusyntHlfv

def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-g',
                      '--group',
                      help='group to be processed (used only in fill mode)')
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option(
        '-r',
        '--region',
        help='one of the regions for which we saved the fake ntuples')
    parser.add_option(
        '--samples-dir',
        default='samples/',
        help='directory with the list of samples; default ./samples/')
    parser.add_option(
        '-T',
        '--tight-def',
        help=
        'on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.'
    )
    parser.add_option('-f',
                      '--fill-histos',
                      action='store_true',
                      default=False,
                      help='force fill (default only if needed)')
    parser.add_option('--keep-real',
                      action='store_true',
                      default=False,
                      help='do not subtract real (to get real lep efficiency)')
    parser.add_option('--debug', action='store_true')
    parser.add_option('--verbose', action='store_true')
    parser.add_option('--disable-cache',
                      action='store_true',
                      help='disable the entry cache')
    (options, args) = parser.parse_args()
    inputDir = options.input_dir
    outputDir = options.output_dir
    lepton = options.lepton
    region = options.region
    keepreal = options.keep_real
    debug = options.debug
    verbose = options.verbose
    if lepton not in ['el', 'mu']: parser.error("invalid lepton '%s'" % lepton)
    regions = kin.selection_formulas().keys()
    assert region in regions, "invalid region '%s', must be one of %s" % (
        region, str(sorted(regions)))
    regions = [region]

    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        options.samples_dir)
    if options.group: groups = [g for g in groups if g.name == options.group]
    group_names = [g.name for g in groups]

    outputDir = outputDir + '/' + region + '/' + lepton  # split the output in subdirectories, so we don't overwrite things
    mkdirIfNeeded(outputDir)
    templateOutputFilename = "scale_factor_{0}.root".format(lepton)
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(
        options.tight_def
    ) if options.tight_def else None  # eval will take care of aborting on typos
    if verbose: utils.print_running_conditions(parser, options)
    vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1', 'pt1_eta1']
    #fill histos
    if doFillHistograms:
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroup = bookHistos(vars, group_names, region=region)
        histosPerSource = bookHistosPerSource(vars,
                                              leptonSources,
                                              region=region)
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars,
                                                               group_names,
                                                               leptonSources,
                                                               region=region)
        for group in groups:
            tree_name = 'hlfv_tuple'
            chain = IndexedChain(tree_name)
            for ds in group.datasets:
                fname = os.path.join(inputDir, ds.name + '.root')
                if os.path.exists(fname):
                    chain.Add(fname)
            if verbose:
                print "{0} : {1} entries from {2} samples".format(
                    group.name, chain.GetEntries(), len(group.datasets))
            chain.cache_directory = os.path.abspath('./selection_cache/' +
                                                    group.name + '/')
            tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions]
            print 'tcuts ', [c.GetName() for c in tcuts]
            chain.retrieve_entrylists(tcuts)
            counters_pre, histos_pre = dict(), dict()
            counters_npre, histos_npre = dict(), dict()
            print 'tcuts_with_existing_list ', str(
                [c.GetName() for c in chain.tcuts_with_existing_list()])
            print 'tcuts_without_existing_list ', str(
                [c.GetName() for c in chain.tcuts_without_existing_list()])
            cached_tcuts = [] if options.disable_cache else chain.tcuts_with_existing_list(
            )
            print 'cached_tcuts ', [c.GetName() for c in cached_tcuts]
            uncached_tcuts = tcuts if options.disable_cache else chain.tcuts_without_existing_list(
            )
            print 'todo: skip cuts for which the histo files are there'
            if verbose:
                print " --- group : {0} ---".format(group.name)
                print '\n\t'.join(chain.filenames)
            if verbose:
                print 'filling cached cuts: ', ' '.join(
                    [c.GetName() for c in cached_tcuts])
            if verbose:
                print "%s : %d entries" % (group.name, chain.GetEntries())
            histosThisGroup = histosPerGroup[group.name]
            histosThisGroupPerSource = dict(
                (v, histosPerGroupPerSource[v][group.name])
                for v in histosPerGroupPerSource.keys())
            for cut in cached_tcuts:
                print 'cached_tcut ', cut
                chain.preselect(cut)
                num_processed_entries += fillHistos(
                    chain,
                    histosThisGroup,
                    histosPerSource,
                    histosThisGroupPerSource,
                    lepton,
                    group,
                    cut,
                    cut_is_cached=True,
                    onthefly_tight_def=onthefly_tight_def,
                    verbose=verbose)
            if verbose:
                print 'filling uncached cuts: ', ' '.join(
                    [c.GetName() for c in uncached_tcuts])
            if uncached_tcuts:
                assert len(uncached_tcuts
                           ) == 1, "expecting only one cut, got {}".format(
                               len(uncached_tcuts))
                cut = uncached_tcuts[0]
                chain.preselect(None)
                num_processed_entries += fillHistos(
                    chain,
                    histosThisGroup,
                    histosPerSource,
                    histosThisGroupPerSource,
                    lepton,
                    group,
                    cut,
                    cut_is_cached=False,
                    onthefly_tight_def=onthefly_tight_def,
                    verbose=verbose)
                chain.save_lists()

        writeHistos(cacheFileName, histosPerGroup, histosPerSource,
                    histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print("processed {0:d} entries ".format(num_processed_entries) +
                  "in " +
                  ("{0:d} min ".format(int(delta_time / 60))
                   if delta_time > 60 else "{0:.1f} s ".format(delta_time)) +
                  "({0:.1f} kHz)".format(num_processed_entries / delta_time))
    # return
    # compute scale factors
    histosPerGroup = fetchHistos(cacheFileName,
                                 histoNames(vars, group_names, region),
                                 verbose)
    histosPerSource = fetchHistos(
        cacheFileName, histoNamesPerSource(vars, leptonSources, region),
        verbose)
    histosPerSamplePerSource = fetchHistos(
        cacheFileName,
        histoNamesPerSamplePerSource(vars, group_names, leptonSources, region),
        verbose)
    plotStackedHistos(histosPerGroup, outputDir + '/by_group', region, verbose)
    plotStackedHistosSources(histosPerSource, outputDir + '/by_source', region,
                             verbose)
    plotPerSourceEff(histosPerVar=histosPerSource,
                     outputDir=outputDir + '/by_source',
                     lepton=lepton,
                     region=region,
                     verbose=verbose)
    for g in group_names:
        hps = dict((v, histosPerSamplePerSource[v][g]) for v in vars)
        plotPerSourceEff(histosPerVar=hps,
                         outputDir=outputDir,
                         lepton=lepton,
                         region=region,
                         sample=g,
                         verbose=verbose)

    hn_sf_eta = histoname_sf_vs_eta(lepton)
    hn_sf_pt = histoname_sf_vs_pt(lepton)
    hn_da_eta = histoname_data_fake_eff_vs_eta(lepton)
    hn_da_pt = histoname_data_fake_eff_vs_pt(lepton)
    subtractReal = not keepreal
    objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1',
                                                 hn_sf_eta, hn_da_eta,
                                                 outputDir, region,
                                                 subtractReal, verbose)
    objs_pt = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1',
                                                hn_sf_pt, hn_da_pt, outputDir,
                                                region, subtractReal, verbose)
    objs_pt_eta = subtractRealAndComputeScaleFactor(
        histosPerGroup, 'pt1_eta1', histoname_sf_vs_pt_eta(lepton),
        histoname_data_fake_eff_vs_pt_eta(lepton), outputDir, region,
        subtractReal, verbose)
    rootUtils.writeObjectsToFile(
        outputFileName, dictSum(dictSum(objs_eta, objs_pt), objs_pt_eta),
        verbose)
    if verbose: print "saved scale factors to %s" % outputFileName