Python dictSumの例

プログラミング言語: Python

名前空間/パッケージ名: utils

メソッド/関数: dictSum

hotexamples.comのコード掲載数: 18

Python dictSum - 18件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのutils.dictSumの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: optimizeSelection.py プロジェクト: gerbaudo/DileptonFakeMeasurement

def drawBottom(pad, totBkg, bkgHistos, sigHisto, llnjvar) :
    pad.cd()
    totBkg.SetStats(False)
    totBkg.SetMinimum(0.) # force this to avoid negative fluct due to fake
    totBkg.Draw('axis')
    pad.Update() # necessary to fool root's dumb object ownership
    stack = r.THStack('stack_'+llnjvar,'')
    pad.Update()
    r.SetOwnership(stack, False)
    for s, h in bkgHistos.iteritems() :
        h.SetFillColor(colors[s] if s in colors else r.kOrange)
        h.SetDrawOption('bar')
        h.SetDirectory(0)
        stack.Add(h)
    stack.Draw('hist same')
    pad.Update()
    sigHisto.SetLineColor(r.kRed)
    sigHisto.SetLineWidth(2*sigHisto.GetLineWidth())
    sigHisto.Draw('same')
    pad.Update()
    topRightLabel(pad, llnjvar, xpos=0.125, align=13)
    drawLegendWithDictKeys(pad, dictSum(bkgHistos, {'signal' : sigHisto}), opt='f')
    pad.RedrawAxis()
    pad._stack = stack
    pad._histos = [h for h in stack.GetHists()]
    pad.Update()

コード例 #2

ファイルを表示

ファイル: optimizeSelection.py プロジェクト: gerbaudo/DileptonFakeMeasurement

def optimizeSelection() :
    inputdir, options = parseOptions()


    print 'sigreg ',options.sigreg
    tag = pickTag(inputdir, options)
    sigFiles, bkgFiles = getInputFilenames(inputdir, tag, options) # todo: filter with regexp
    sigFiles = dict([(s, k) for s, k in sigFiles.iteritems() if s in filterWithRegexp(sigFiles.keys(), options.sigreg)])
    allSamples = dictSum(sigFiles, bkgFiles)
    vars = variablesToPlot()
    histos = bookHistos(vars, allSamples.keys(), options.ll, options.nj)
    counts = fillHistosAndCount(histos, dictSum(sigFiles, bkgFiles), options.ll, options.nj, options.quicktest)
    bkgHistos = dict((s, h) for s, h in histos.iteritems() if s in bkgFiles.keys())
    sigHistos = dict((s, h) for s, h in histos.iteritems() if s in sigFiles.keys())
    plotHistos(bkgHistos, sigHistos, options.plotdir)
    printSummary(counts, options.summary)

コード例 #3

ファイルを表示

ファイル: scoutPerformance.py プロジェクト: head1ton/server-2016

	def scoutedScoreForMatchNum(self, match, allianceIsRed):
		allTIMDs = self.calculator.su.getTIMDsForMatch(match)
		allianceNumbers = self.calculator.su.getAllianceForMatch(match, allianceIsRed)
		allianceNumbers = map(lambda t: t.number, allianceNumbers)
		allianceTIMDs = [timd for timd in allTIMDs if timd.teamNumber in allianceNumbers]

		autoPts = self.calculator.getAutoPointsForMatchForAllianceIsRed(match, allianceIsRed)

		teleShotPts = 2 * sum([(timd.numLowShotsMadeTele or 0) for timd in allianceTIMDs]) + 5 * sum([(timd.numHighShotsMadeTele or 0) for timd in allianceTIMDs])
		
		for timd in allianceTIMDs:
			s = timd.timesSuccessfulCrossedDefensesTele
			for key in self.calculator.defenseList:
				if not key in s:
					s[key] = 0
				elif s[key] == None:
					s[key] = 0
				else:
					s[key] = len(s[key])
		allDefenseCrossings = utils.dictSum(allianceTIMDs[0].timesSuccessfulCrossedDefensesTele, utils.dictSum(allianceTIMDs[1].timesSuccessfulCrossedDefensesTele, allianceTIMDs[2].timesSuccessfulCrossedDefensesTele))
		
		temp = {}
		for defense, crossings in allDefenseCrossings.items():
			if crossings > 2:
				temp[defense] = 2
			else:
				temp[defense] = crossings
		allDefenseCrossings = temp

		teleDefenseCrossPts = 5 * sum(allDefenseCrossings.values())
		
		scalePts = 15 * sum([utils.convertFirebaseBoolean(timd.didScaleTele) for timd in allianceTIMDs])
		challengePts = 5 * sum([utils.convertFirebaseBoolean(timd.didChallengeTele) for timd in allianceTIMDs])

		return autoPts + teleShotPts + teleDefenseCrossPts + scalePts + challengePts

コード例 #4

ファイルを表示

def plotStackedHistos(histos={},
                      datakey=None,
                      stackkeys=[],
                      outputDir='',
                      region='',
                      colors={},
                      verbose=False):
    "input: a dictionary of histos[group]"
    mkdirIfNeeded(outputDir)
    bkg_histos = dict([(k, h) for k, h in histos.iteritems()
                       if k in stackkeys])
    tot_bkg = summedHisto(bkg_histos.values(), label='')
    err_band = None  # tmp disable
    # err_band = buildErrBandGraph(tot_bkg, computeStatErr2(tot_bkg))
    empty_bkg = tot_bkg.Integral() == 0
    if empty_bkg:
        if verbose: print "empty backgrounds, skip %s" % tot_bkg.GetName()
        return
    histoname = tot_bkg.GetName()
    can = r.TCanvas('c_' + histoname, histoname, 800, 600)
    can.cd()
    pm = tot_bkg  # pad master
    pm.SetStats(False)
    pm.Draw('axis')
    can.Update()  # necessary to fool root's dumb object ownership
    stack = r.THStack('stack_' + tot_bkg.GetName(), '')
    can.Update()
    r.SetOwnership(stack, False)
    for s, h in bkg_histos.iteritems():
        h.SetFillColor(colors[s] if s in colors else r.kOrange)
        h.SetDrawOption('bar')
        h.SetDirectory(0)
        stack.Add(h)
    stack.Draw('hist same')
    # err_band.Draw('E2 same')
    data = histos[datakey] if datakey and datakey in histos else None
    if data and data.GetEntries():
        data.SetMarkerStyle(r.kFullDotLarge)
        data.Draw('p same')
        if verbose:
            print "data : nEntries {:.1f} totWeight {:.1f} ".format(
                data.GetEntries(), data.Integral())
    yMin, yMax = getMinMax([h for h in [tot_bkg, data, err_band] if h])
    # pm.SetMinimum(0.5)
    pm.SetMaximum(1.1 * yMax)
    can.Update()
    # can.SetLogy()
    topRightLabel(can,
                  "#splitline{%s}{%s}" % (histoname, region),
                  xpos=0.125,
                  align=13)
    drawLegendWithDictKeys(can,
                           dictSum(bkg_histos, {'stat err': err_band}),
                           opt='f')
    can.RedrawAxis()
    can._stack = stack
    can._histos = [h for h in stack.GetHists()] + [data]
    can.Update()
    if verbose: print os.path.join(outputDir, histoname + '.png')
    can.SaveAs(os.path.join(outputDir, histoname + '.png'))

コード例 #5

ファイルを表示

ファイル: compute_fake_scale_factor.py プロジェクト: gerbaudo/DileptonFakeMeasurement

def plotStackedHistos(histosPerGroup={}, outputDir='', region='', verbose=False):
    groups = histosPerGroup.keys()
    variables = first(histosPerGroup).keys()
    leptonTypes = first(first(histosPerGroup)).keys()
    colors = SampleUtils.colors
    mkdirIfNeeded(outputDir)
    histosPerName = dict([(region+'_'+var+'_'+lt, # one canvas for each histo, so key with histoname w/out group
                           dict([(g, histosPerGroup[g][var][lt]) for g in groups]))
                          for var in variables for lt in leptonTypes])
    for histoname, histosPerGroup in histosPerName.iteritems():
        missingGroups = [g for g, h in histosPerGroup.iteritems() if not h]
        if missingGroups:
            if verbose : print "skip %s, missing histos for %s"%(histoname, str(missingGroups))
            continue
        bkgHistos = dict([(g, h) for g, h in histosPerGroup.iteritems() if isBkgSample(g)])
        totBkg = summedHisto(bkgHistos.values())
        err_band = buildErrBandGraph(totBkg, computeStatErr2(totBkg))
        emptyBkg = totBkg.Integral()==0
        if emptyBkg:
            if verbose : print "empty backgrounds, skip %s"%histoname
            continue
        can = r.TCanvas('c_'+histoname, histoname, 800, 600)
        can.cd()
        pm = totBkg # pad master
        pm.SetStats(False)
        pm.Draw('axis')
        can.Update() # necessary to fool root's dumb object ownership
        stack = r.THStack('stack_'+histoname,'')
        can.Update()
        r.SetOwnership(stack, False)
        for s, h in bkgHistos.iteritems() :
            h.SetFillColor(colors[s] if s in colors else r.kOrange)
            h.SetDrawOption('bar')
            h.SetDirectory(0)
            stack.Add(h)
        stack.Draw('hist same')
        err_band.Draw('E2 same')
        data = histosPerGroup['data']
        if data and data.GetEntries():
            data.SetMarkerStyle(r.kFullDotLarge)
            data.Draw('p same')
        yMin, yMax = getMinMax([h for h in [totBkg, data, err_band] if h])
        pm.SetMinimum(0.0)
        pm.SetMaximum(1.1*yMax)
        can.Update()
        topRightLabel(can, histoname, xpos=0.125, align=13)
        drawLegendWithDictKeys(can, dictSum(bkgHistos, {'stat err':err_band}), opt='f')
        can.RedrawAxis()
        can._stack = stack
        can._histos = [h for h in stack.GetHists()]+[data]
        can.Update()
        outFname = os.path.join(outputDir, histoname+'.png')
        utils.rmIfExists(outFname)
        can.SaveAs(outFname)

コード例 #6

ファイルを表示

ファイル: compute_fake_factor.py プロジェクト: gerbaudo/DileptonFakeMeasurement

def plotStackedHistosWithData(histosPerGroup={}, outputDir='', canvasname='', canvastitle='', colors={}, verbose=False):
    "histosPerGroup[group], where group=data is treated as special"
    groups = histosPerGroup.keys()
    mkdirIfNeeded(outputDir)
    missingGroups = [g for g, h in histosPerGroup.iteritems() if not h]
    if missingGroups:
        if verbose : print "skip %s, missing histos for %s"%(histoname, str(missingGroups))
        return
    bkgHistos = dict([(g, h) for g, h in histosPerGroup.iteritems() if not isDataSample(g)])
    totBkg = summedHisto(bkgHistos.values())
    err_band = buildErrBandGraph(totBkg, computeStatErr2(totBkg))
    emptyBkg = totBkg.Integral()==0
    histoname, region = totBkg.GetName(), 'emu' # tmp replacement vars, to be fixed
    if emptyBkg:
        if verbose : print "empty backgrounds, skip %s"%histoname
        return
    can = r.TCanvas(canvasname, canvastitle, 800, 600)
    can.cd()
    pm = totBkg # pad master
    pm.SetStats(False)
    pm.Draw('axis')
    can.Update() # necessary to fool root's dumb object ownership
    stack = r.THStack('stack_'+histoname,'')
    can.Update()
    r.SetOwnership(stack, False)
    for s, h in bkgHistos.iteritems() :
        h.SetFillColor(colors[s] if s in colors else r.kOrange)
        h.SetDrawOption('bar')
        h.SetDirectory(0)
        stack.Add(h)
    stack.Draw('hist same')
    err_band.Draw('E2 same')
    data = histosPerGroup['data'] if 'data' in histosPerGroup else None
    if data and data.GetEntries():
        data.SetMarkerStyle(r.kFullDotLarge)
        data.Draw('p same')
        if verbose :
            print "integrals : {0} tot.bkg.: {1}, data: {2}".format(histoname, totBkg.Integral(), data.Integral())
    else:
        print "no data"
    yMin, yMax = getMinMax([h for h in [totBkg, data, err_band] if h])
    pm.SetMinimum(0.0)
    pm.SetMaximum(1.1*yMax)
    can.Update()
    topRightLabel(can, "#splitline{%s}{%s}"%(histoname, region), xpos=0.15, ypos=(1.0-0.5*can.GetTopMargin()), align=13)
    drawLegendWithDictKeys(can, dictSum(bkgHistos, {'stat err':err_band}), opt='f')
    can.RedrawAxis()
    can._stack = stack
    can._histos = [h for h in stack.GetHists()]+[data]
    can.Update()
    filename=os.path.join(outputDir, histoname+'.png')
    rmIfExists(filename)
    can.SaveAs(filename)

コード例 #7

ファイルを表示

ファイル: compute_fake_scale_factor.py プロジェクト: gerbaudo/DileptonFakeMeasurement

def plotStackedHistosSources(histosPerVar={}, outputDir='', region='', verbose=False):
    variables = histosPerVar.keys()
    sources = first(histosPerVar).keys()
    colors = colorsFillSources
    mkdirIfNeeded(outputDir)
    for var in variables:
        for lOrT in ['loose', 'tight']:
            histos = dict((s, histosPerVar[var][s][lOrT]) for s in sources)
            canvasBasename = region+'_region_'+var+'_'+lOrT
            missingSources = [s for s, h in histos.iteritems() if not h]
            if missingSources:
                if verbose : print "skip %s, missing histos for %s"%(var, str(missingSources))
                continue
            totBkg = summedHisto(histos.values())
            err_band = buildErrBandGraph(totBkg, computeStatErr2(totBkg))
            emptyBkg = totBkg.Integral()==0
            if emptyBkg:
                if verbose : print "empty backgrounds, skip %s"%canvasBasename
                continue
            can = r.TCanvas('c_'+canvasBasename, canvasBasename, 800, 600)
            can.cd()
            pm = totBkg # pad master
            pm.SetStats(False)
            pm.Draw('axis')
            can.Update() # necessary to fool root's dumb object ownership
            stack = r.THStack('stack_'+canvasBasename,'')
            can.Update()
            r.SetOwnership(stack, False)
            for s, h in histos.iteritems() :
                h.SetFillColor(colors[s] if s in colors else r.kOrange)
                h.SetDrawOption('bar')
                h.SetDirectory(0)
                stack.Add(h)
            stack.Draw('hist same')
            err_band.Draw('E2 same')
            yMin, yMax = getMinMax([h for h in [totBkg, err_band] if h is not None])
            pm.SetMinimum(0.0)
            pm.SetMaximum(1.1*yMax)
            can.Update()
            topRightLabel(can, canvasBasename, xpos=0.125, align=13)
            drawLegendWithDictKeys(can, dictSum(histos, {'stat err':err_band}), opt='f')
            can.RedrawAxis()
            can._stack = stack
            can._histos = [h for h in stack.GetHists()]
            can.Update()
            outFname = os.path.join(outputDir, canvasBasename+'.png')
            utils.rmIfExists(outFname)
            can.SaveAs(outFname)

コード例 #8

ファイルを表示

ファイル: plot_by_source.py プロジェクト: gerbaudo/SusyntHlfv

def plotStackedHistos(histos={}, datakey=None, stackkeys=[], outputDir='', region='', colors={}, verbose=False):
    "input: a dictionary of histos[group]"
    mkdirIfNeeded(outputDir)
    bkg_histos = dict([(k,h) for k,h in histos.iteritems() if k in stackkeys])
    tot_bkg = summedHisto(bkg_histos.values(), label='')
    err_band = None # tmp disable
    # err_band = buildErrBandGraph(tot_bkg, computeStatErr2(tot_bkg))
    empty_bkg = tot_bkg.Integral()==0
    if empty_bkg:
        if verbose : print "empty backgrounds, skip %s"%tot_bkg.GetName()
        return
    histoname = tot_bkg.GetName()
    can = r.TCanvas('c_'+histoname, histoname, 800, 600)
    can.cd()
    pm = tot_bkg # pad master
    pm.SetStats(False)
    pm.Draw('axis')
    can.Update() # necessary to fool root's dumb object ownership
    stack = r.THStack('stack_'+tot_bkg.GetName(),'')
    can.Update()
    r.SetOwnership(stack, False)
    for s, h in bkg_histos.iteritems() :
            h.SetFillColor(colors[s] if s in colors else r.kOrange)
            h.SetDrawOption('bar')
            h.SetDirectory(0)
            stack.Add(h)
    stack.Draw('hist same')
    # err_band.Draw('E2 same')
    data = histos[datakey] if datakey and datakey in histos else None
    if data and data.GetEntries():
        data.SetMarkerStyle(r.kFullDotLarge)
        data.Draw('p same')
        if verbose:
            print "data : nEntries {:.1f} totWeight {:.1f} ".format(data.GetEntries(), data.Integral())
    yMin, yMax = getMinMax([h for h in [tot_bkg, data, err_band] if h])
    # pm.SetMinimum(0.5)
    pm.SetMaximum(1.1*yMax)
    can.Update()
    # can.SetLogy()
    topRightLabel(can, "#splitline{%s}{%s}"%(histoname, region), xpos=0.125, align=13)
    drawLegendWithDictKeys(can, dictSum(bkg_histos, {'stat err':err_band}), opt='f')
    can.RedrawAxis()
    can._stack = stack
    can._histos = [h for h in stack.GetHists()]+[data]
    can.Update()
    if verbose : print os.path.join(outputDir, histoname+'.png')
    can.SaveAs(os.path.join(outputDir, histoname+'.png'))

コード例 #9

ファイルを表示

ファイル: teamCalculatedDataKeys.py プロジェクト: head1ton/server-2016

def getThirdTeamCalcDataKeys(calc):
    return {
        "predictedNumRPs":
        lambda team: calc.predictedNumberOfRPs(team),
        "actualNumRPs":
        lambda team: calc.getTeamRPsFromTBA(team),
        "actualSeed":
        lambda team: calc.getTeamSeed(team),
        "predictedSeed":
        lambda team: calc.cachedComp.predictedSeedings.index(team) + 1,
        "RScoreTorque":
        lambda team: calc.cachedComp.torqueZScores[team.number],
        "RScoreSpeed":
        lambda team: calc.cachedComp.speedZScores[team.number],
        "RScoreAgility":
        lambda team: calc.cachedComp.agilityZScores[team.number],
        "RScoreDefense":
        lambda team: calc.cachedComp.defenseZScores[team.number],
        "RScoreBallControl":
        lambda team: calc.cachedComp.ballControlZScores[team.number],
        "RScoreDrivingAbility":
        lambda team: calc.cachedComp.drivingAbilityZScores[team.number],
        "avgSuccessfulTimesCrossedDefenses":
        lambda team: utils.dictSum(
            team.calculatedData.avgSuccessfulTimesCrossedDefensesAuto, team.
            calculatedData.avgSuccessfulTimesCrossedDefensesTele),
        "blockingAbility":
        lambda team: (team.calculatedData.avgShotsBlocked - calc.averageTeam.
                      calculatedData.avgShotsBlocked) * calc.averageTeam.
        calculatedData.highShotAccuracyTele * 5,
        "defensesCrossableAuto":
        lambda team: calc.defensesCrossableByTeamForDefenseDict(
            team, team.calculatedData.avgSuccessfulTimesCrossedDefensesAuto),
        "defensesCrossableTele":
        lambda team: calc.defensesCrossableByTeamForDefenseDict(
            team, team.calculatedData.avgSuccessfulTimesCrossedDefensesTele),
        "firstPickAbility":
        lambda team: calc.firstPickAbility(team),
        "overallSecondPickAbility":
        lambda team: calc.overallSecondPickAbility(team)
    }

コード例 #10

ファイルを表示

ファイル: plot_emu.py プロジェクト: gerbaudo/SusyntHlfv

def runFill(opts) :
    batchMode    = opts.batch
    inputFakeDir = opts.input_fake
    inputGenDir  = opts.input_other
    outputDir    = opts.output_dir
    verbose      = opts.verbose
    debug        = opts.debug
    blinded      = not opts.unblind
    tightight    = opts.require_tight_tight

    if debug : dataset.Dataset.verbose_parsing = True
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir)
    if not skip_charge_flip : groups.append(dataset.DatasetGroup.build_qflip_from_simulated_samples(groups))
    groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake())
    groups = parse_group_option(opts, groups)
    if verbose : print '\n'.join("group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups)
    if debug :
        print '\n'.join("group {0} : {1} samples: {2}".format(g.name,
                                                              len(g.datasets),
                                                              '\n\t'+'\n\t'.join(d.name for d in g.datasets))
                        for g in groups)
    if verbose : print "filling histos"
    # eval will take care of aborting on typos
    onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None
    mkdirIfNeeded(outputDir)
    systematics = get_list_of_syst_to_fill(opts)
    regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions)
    if verbose : print "about to loop over these systematics:\n %s"%str(systematics)
    if verbose : print "about to loop over these regions:\n %s"%str(regions)
    if batchMode:
        for group in groups:
            for systematic in systematics:
                if systUtils.Group(group.name).isNeededForSys(systematic):
                    opts.syst = systematic
                    for selection in regions:
                        submit_batch_fill_job_per_group_per_selection(group=group, selection=selection, opts=opts)
    else:
        for group in groups:
            systematics = [s for s in systematics if systUtils.Group(group.name).isNeededForSys(s)]
            if not systematics : print "warning, empty syst list. You should have at least the nominal"
            for systematic in systematics:
                # note to self: here you will want to use a modified Sample.setHftInputDir
                # for now we just have the fake syst that are in the nominal tree
                tree_name = 'hlfv_tuple'
                chain = IndexedChain(tree_name)
                input_dir = opts.input_fake if group.name=='fake' else opts.input_other
                for ds in group.datasets:
                    chain.Add(os.path.join(input_dir, systUtils.Sample(ds.name, group.name).setSyst(systematic).filename))
                if opts.verbose:
                    print "{0} : {1} entries from {2} samples".format(group.name,
                                                                      chain.GetEntries(),
                                                                      len(group.datasets))
                chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/')
                tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions]
                chain.retrieve_entrylists(tcuts)
                counters_pre, histos_pre = dict(), dict()
                counters_npre, histos_npre = dict(), dict()
                cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list()
                uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list()
                if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts])
                for cut in cached_tcuts:
                    chain.preselect(cut)
                    c_pre, h_pre = count_and_fill(chain=chain, sample=group.name,
                                                  syst=systematic, verbose=verbose,
                                                  debug=debug, blinded=blinded,
                                                  onthefly_tight_def=onthefly_tight_def,
                                                  tightight=tightight, quicktest=opts.quick_test,
                                                  cached_cut=cut)
                    out_filename = (systUtils.Group(group.name)
                                    .setSyst(systematic)
                                    .setHistosDir(outputDir)
                                    .setCurrentSelection(cut.GetName())).filenameHisto
                    writeObjectsToFile(out_filename, h_pre, verbose)
                    counters_pre = dictSum(counters_pre, c_pre)
                    histos_pre = dictSum(histos_pre, h_pre)
                if uncached_tcuts:
                    if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts])
                    counters_npre, histos_npre = count_and_fill(chain=chain, sample=group.name,
                                                                syst=systematic, verbose=verbose,
                                                                debug=debug, blinded=blinded,
                                                                onthefly_tight_def=onthefly_tight_def,
                                                                tightight=tightight,
                                                                quicktest=opts.quick_test,
                                                                noncached_cuts=uncached_tcuts)
                    for sel, histos in histos_npre.iteritems():
                        out_filename = (systUtils.Group(group.name)
                                        .setSyst(systematic)
                                        .setHistosDir(outputDir)
                                        .setCurrentSelection(sel)).filenameHisto
                        writeObjectsToFile(out_filename, histos, verbose)
                chain.save_lists()

コード例 #11

ファイルを表示

def runFill(opts):
    lepton = opts.lepton
    batchMode = opts.batch
    inputDir = opts.input_dir
    outputDir = opts.output_dir
    verbose = opts.verbose
    debug = opts.debug

    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        opts.samples_dir)
    if opts.group: groups = [g for g in groups if g.name == opts.group]
    if verbose:
        print '\n'.join(
            "group {0} : {1} samples".format(g.name, len(g.datasets))
            for g in groups)
    if debug:
        print '\n'.join("group {0} : {1} samples: {2}".format(
            g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name
                                                          for d in g.datasets))
                        for g in groups)
    if verbose: print "filling histos"
    outputDir = outputDir + '/' + lepton + '/histos'
    mkdirIfNeeded(outputDir)
    if batchMode:
        for group in groups:
            submit_batch_fill_job_per_group(group, opts)
    else:
        for group in groups:
            tree_name = 'ss3l_tuple'
            chain = IndexedChain(tree_name)
            for ds in group.datasets:
                chain.Add(os.path.join(inputDir, ds.name + '.root'))
            if opts.verbose:
                print "{0} : {1} entries from {2} samples".format(
                    group.name, chain.GetEntries(), len(group.datasets))
            chain.cache_directory = os.path.abspath('./selection_cache/' +
                                                    group.name + '/')
            tcuts = [
                r.TCut(reg,
                       selection_formulas()[reg])
                for reg in regions_to_plot(opts.include_regions,
                                           opts.exclude_regions, opts.regions)
            ]
            chain.retrieve_entrylists(tcuts)
            counters_pre, histos_pre = dict(), dict()
            counters_npre, histos_npre = dict(), dict()
            cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list(
            )
            uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list(
            )
            print 'todo: skip cuts for which the histo files are there'
            if verbose:
                print 'filling cached cuts: ', ' '.join(
                    [c.GetName() for c in cached_tcuts])
            for cut in cached_tcuts:
                chain.preselect(cut)
                c_pre, h_pre = count_and_fill(chain=chain,
                                              opts=opts,
                                              group=group,
                                              cached_cut=cut)
                counters_pre = dictSum(counters_pre, c_pre)
                histos_pre = dictSum(histos_pre, h_pre)
            if verbose:
                print 'filling uncached cuts: ', ' '.join(
                    [c.GetName() for c in uncached_tcuts])
            if uncached_tcuts:
                counters_npre, histos_npre = count_and_fill(
                    chain=chain,
                    opts=opts,
                    group=group,
                    noncached_cuts=uncached_tcuts)
                chain.save_lists()
            all_histos = dictSum(histos_pre, histos_npre)
            for sel, histos in all_histos.iteritems():
                # write histos for each sel to a separate file (finer granularity, better caching)
                out_filename = os.path.join(outputDir,
                                            group.name + '_' + sel + '.root')
                if verbose: print 'saving to ', out_filename
                writeObjectsToFile(out_filename, histos, verbose)

コード例 #12

ファイルを表示

def runFill(opts):
    batchMode = opts.batch
    inputFakeDir = opts.input_fake
    inputGenDir = opts.input_other
    outputDir = opts.output_dir
    verbose = opts.verbose
    debug = opts.debug
    blinded = not opts.unblind
    tightight = opts.require_tight_tight

    if debug: dataset.Dataset.verbose_parsing = True
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        opts.samples_dir)
    if not skip_charge_flip:
        groups.append(
            dataset.DatasetGroup.build_qflip_from_simulated_samples(groups))
    groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake())
    groups = parse_group_option(opts, groups)
    if verbose:
        print '\n'.join(
            "group {0} : {1} samples".format(g.name, len(g.datasets))
            for g in groups)
    if debug:
        print '\n'.join("group {0} : {1} samples: {2}".format(
            g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name
                                                          for d in g.datasets))
                        for g in groups)
    if verbose: print "filling histos"
    # eval will take care of aborting on typos
    onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None
    mkdirIfNeeded(outputDir)
    systematics = get_list_of_syst_to_fill(opts)
    regions = regions_to_plot(opts.include_regions, opts.exclude_regions,
                              opts.regions)
    if verbose:
        print "about to loop over these systematics:\n %s" % str(systematics)
    if verbose: print "about to loop over these regions:\n %s" % str(regions)
    if batchMode:
        for group in groups:
            for systematic in systematics:
                if systUtils.Group(group.name).isNeededForSys(systematic):
                    opts.syst = systematic
                    for selection in regions:
                        submit_batch_fill_job_per_group_per_selection(
                            group=group, selection=selection, opts=opts)
    else:
        for group in groups:
            systematics = [
                s for s in systematics
                if systUtils.Group(group.name).isNeededForSys(s)
            ]
            if not systematics:
                print "warning, empty syst list. You should have at least the nominal"
            for systematic in systematics:
                # note to self: here you will want to use a modified Sample.setHftInputDir
                # for now we just have the fake syst that are in the nominal tree
                tree_name = 'hlfv_tuple'
                chain = IndexedChain(tree_name)
                input_dir = opts.input_fake if group.name == 'fake' else opts.input_other
                for ds in group.datasets:
                    chain.Add(
                        os.path.join(
                            input_dir,
                            systUtils.Sample(
                                ds.name,
                                group.name).setSyst(systematic).filename))
                if opts.verbose:
                    print "{0} : {1} entries from {2} samples".format(
                        group.name, chain.GetEntries(), len(group.datasets))
                chain.cache_directory = os.path.abspath('./selection_cache/' +
                                                        group.name + '/')
                tcuts = [
                    r.TCut(reg,
                           selection_formulas()[reg]) for reg in regions
                ]
                chain.retrieve_entrylists(tcuts)
                counters_pre, histos_pre = dict(), dict()
                counters_npre, histos_npre = dict(), dict()
                cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list(
                )
                uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list(
                )
                if verbose:
                    print 'filling cached cuts: ', ' '.join(
                        [c.GetName() for c in cached_tcuts])
                for cut in cached_tcuts:
                    chain.preselect(cut)
                    c_pre, h_pre = count_and_fill(
                        chain=chain,
                        sample=group.name,
                        syst=systematic,
                        verbose=verbose,
                        debug=debug,
                        blinded=blinded,
                        onthefly_tight_def=onthefly_tight_def,
                        tightight=tightight,
                        quicktest=opts.quick_test,
                        cached_cut=cut)
                    out_filename = (systUtils.Group(
                        group.name).setSyst(systematic).setHistosDir(
                            outputDir).setCurrentSelection(
                                cut.GetName())).filenameHisto
                    writeObjectsToFile(out_filename, h_pre, verbose)
                    counters_pre = dictSum(counters_pre, c_pre)
                    histos_pre = dictSum(histos_pre, h_pre)
                if uncached_tcuts:
                    if verbose:
                        print 'filling uncached cuts: ', ' '.join(
                            [c.GetName() for c in uncached_tcuts])
                    counters_npre, histos_npre = count_and_fill(
                        chain=chain,
                        sample=group.name,
                        syst=systematic,
                        verbose=verbose,
                        debug=debug,
                        blinded=blinded,
                        onthefly_tight_def=onthefly_tight_def,
                        tightight=tightight,
                        quicktest=opts.quick_test,
                        noncached_cuts=uncached_tcuts)
                    for sel, histos in histos_npre.iteritems():
                        out_filename = (systUtils.Group(
                            group.name).setSyst(systematic).setHistosDir(
                                outputDir).setCurrentSelection(sel)
                                        ).filenameHisto
                        writeObjectsToFile(out_filename, histos, verbose)
                chain.save_lists()

コード例 #13

ファイルを表示

ファイル: plot_by_source.py プロジェクト: gerbaudo/SusyntHlfv

def runFill(opts):
    lepton    = opts.lepton
    batchMode = opts.batch
    inputDir  = opts.input_dir
    outputDir = opts.output_dir
    verbose   = opts.verbose
    debug     = opts.debug

    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir)
    if opts.group : groups = [g for g in groups if g.name==opts.group]
    if verbose : print '\n'.join("group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups)
    if debug :
        print '\n'.join("group {0} : {1} samples: {2}".format(g.name,
                                                              len(g.datasets),
                                                              '\n\t'+'\n\t'.join(d.name for d in g.datasets))
                        for g in groups)
    if verbose : print "filling histos"
    outputDir = outputDir+'/'+lepton+'/histos'
    mkdirIfNeeded(outputDir)
    if batchMode:
        for group in groups:
            submit_batch_fill_job_per_group(group, opts)
    else:
        for group in groups:
            tree_name = 'hlfv_tuple'
            chain = IndexedChain(tree_name)
            for ds in group.datasets:
                chain.Add(os.path.join(inputDir, ds.name+'.root'))
            if opts.verbose:
                print "{0} : {1} entries from {2} samples".format(group.name,
                                                                  chain.GetEntries(),
                                                                  len(group.datasets))
            chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/')
            tcuts = [r.TCut(reg, selection_formulas()[reg])
                     for reg in regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions)]
            chain.retrieve_entrylists(tcuts)
            counters_pre, histos_pre = dict(), dict()
            counters_npre, histos_npre = dict(), dict()
            cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list()
            uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list()
            print 'todo: skip cuts for which the histo files are there'
            if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts])
            for cut in cached_tcuts:
                chain.preselect(cut)
                c_pre, h_pre = count_and_fill(chain=chain, opts=opts,
                                              group=group,
                                              cached_cut=cut)
                counters_pre = dictSum(counters_pre, c_pre)
                histos_pre = dictSum(histos_pre, h_pre)
            if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts])
            if uncached_tcuts:
                counters_npre, histos_npre = count_and_fill(chain=chain, opts=opts,
                                                            group=group,
                                                            noncached_cuts=uncached_tcuts)
                chain.save_lists()
            all_histos = dictSum(histos_pre, histos_npre)
            for sel, histos in all_histos.iteritems():
                # write histos for each sel to a separate file (finer granularity, better caching)
                out_filename = os.path.join(outputDir, group.name+'_'+sel+'.root')
                if verbose : print 'saving to ',out_filename
                writeObjectsToFile(out_filename, histos, verbose)

コード例 #14

ファイルを表示

ファイル: teamCalculatedDataKeys.py プロジェクト: head1ton/server-2016

def getFirstTeamCalcDataKeys(calc):
    sumCategoryADataPointDict = lambda team: utils.dictSum(
        team.calculatedData.avgNumTimesUnaffected,
        utils.dictSum(team.calculatedData.avgNumTimesBeached, team.
                      calculatedData.avgNumTimesSlowed))

    return {
        "avgTorque":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.rankTorque),  # Checked
        "avgSpeed":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.rankSpeed),
        "avgAgility":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.rankAgility),  # Checked
        "avgDefense":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.rankDefense),  # Checked
        "avgBallControl":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.rankBallControl),  # Checked
        "avgDrivingAbility":
        lambda team: calc.drivingAbility(team),
        "disabledPercentage":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: utils.convertFirebaseBoolean(timd.didGetDisabled
                                                            )),
        "incapacitatedPercentage":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: utils.convertFirebaseBoolean(
                timd.didGetIncapacitated)),
        "disfunctionalPercentage":
        lambda team: team.calculatedData.disabledPercentage + team.
        calculatedData.incapacitatedPercentage,

        # Auto
        "autoAbility":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.autoAbility),
        "autoAbilityExcludeD":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: calc.autoAbility(
                calc.timdHasDefenseExclusion(timd, calc.defenseDictionary['d'])
            )),
        "autoAbilityExcludeLB":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: calc.autoAbility(
                calc.timdHasDefenseExclusion(timd, calc.defenseDictionary['e'])
            )),
        "avgHighShotsAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numHighShotsMadeAuto),  # Checked
        "avgLowShotsAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numLowShotsMadeAuto),  # Checked   
        "reachPercentage":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: utils.convertFirebaseBoolean(timd.didReachAuto)
        ),
        "highShotAccuracyAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: calc.TIMDShotAccuracy(
                timd.numHighShotsMadeAuto, timd.numHighShotsMissedAuto)
        ),  # Checked
        "lowShotAccuracyAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: calc.TIMDShotAccuracy(
                timd.numLowShotsMadeAuto, timd.numLowShotsMissedAuto)
        ),  # Checked
        "avgMidlineBallsIntakedAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.
            numBallsIntakedOffMidlineAuto),
        "sdMidlineBallsIntakedAuto":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.
            numBallsIntakedOffMidlineAuto),
        "sdHighShotsAuto":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numHighShotsMadeAuto),  # Checked
        "sdLowShotsAuto":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numLowShotsMadeAuto),  # Checked
        "sdBallsKnockedOffMidlineAuto":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numBallsKnockedOffMidlineAuto),

        #Tele
        "scalePercentage":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: int(
                utils.convertFirebaseBoolean(timd.didScaleTele))),
        "challengePercentage":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: int(
                utils.convertFirebaseBoolean(timd.didChallengeTele))),
        "avgGroundIntakes":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numGroundIntakesTele),  # Checked
        "avgBallsKnockedOffMidlineAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numBallsKnockedOffMidlineAuto),  # Checked
        "avgShotsBlocked":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numShotsBlockedTele),  # Checked
        "avgHighShotsTele":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numHighShotsMadeTele),  # Checked
        "avgLowShotsTele":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.numLowShotsMadeTele),  # Checked
        "highShotAccuracyTele":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: calc.TIMDShotAccuracy(
                timd.numHighShotsMadeTele, timd.numHighShotsMissedTele)
        ),  # Checked
        "lowShotAccuracyTele":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: calc.TIMDShotAccuracy(
                timd.numLowShotsMadeTele, timd.numLowShotsMissedTele)),
        "teleopShotAbility":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.teleopShotAbility
        ),  # Checked
        "siegeConsistency":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: utils.convertFirebaseBoolean(
                timd.didChallengeTele) or utils.convertFirebaseBoolean(
                    timd.didScaleTele)),  # Checked
        "siegeAbility":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.siegeAbility),  # Checked
        "sdHighShotsTele":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numHighShotsMadeTele),  # Checked
        "sdLowShotsTele":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numLowShotsMadeTele),  # Checked
        "sdGroundIntakes":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numGroundIntakesTele),  # Checked
        "sdShotsBlocked":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.numShotsBlockedTele),  # Checked
        "sdTeleopShotAbility":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.teleopShotAbility),
        "sdSiegeAbility":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.siegeAbility),
        "sdAutoAbility":
        lambda team: calc.getStandardDeviationForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.autoAbility),
        "numScaleAndChallengePoints":
        lambda team: calc.numScaleAndChallengePointsForTeam(team),  # Checked
        "breachPercentage":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: utils.
            convertFirebaseBoolean(lambda team: calc.teamDidBreachInMatch(
                team, lambda team: calc.su.getMatchForNumber(timd.matchNumber))
                                   )),
        "avgHighShotsAttemptedTele":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.highShotsAttemptedTele),
        "avgLowShotsAttemptedTele":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda timd: timd.calculatedData.lowShotsAttemptedTele),
        "twoBallAutoTriedPercentage":
        lambda team: calc.twoBallAutoTriedPercentage(team),
        "twoBallAutoAccuracy":
        lambda team: calc.twoBallAutoAccuracy(team),
        "avgNumTimesBeached":
        lambda team: calc.categoryAAverageDictForDataFunction(
            team, lambda timd: timd.numTimesBeached),
        "avgNumTimesSlowed": {
            "pc": lambda team: calc.avgNumTimesSlowed(team, "pc"),
            "cdf": lambda team: calc.avgNumTimesSlowed(team, "cdf")
        },
        "avgNumTimesUnaffected":
        lambda team: calc.categoryAAverageDictForDataFunction(
            team, lambda timd: timd.numTimesUnaffected),
        "beachedPercentage":
        lambda team: utils.dictQuotient(team.calculatedData.avgNumTimesBeached,
                                        sumCategoryADataPointDict(team)),
        "slowedPercentage":
        lambda team: utils.dictQuotient(team.calculatedData.avgNumTimesSlowed,
                                        sumCategoryADataPointDict(team)),
        "unaffectedPercentage":
        lambda team: utils.dictQuotient(
            team.calculatedData.avgNumTimesUnaffected,
            sumCategoryADataPointDict(team)),
        "avgNumTimesCrossedDefensesAuto":
        lambda team: calc.getAverageForDataFunctionForTeam(
            team, lambda tm: tm.calculatedData.totalNumTimesCrossedDefensesAuto
        ),
        "defenses": [
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.
                avgSuccessfulTimesCrossedDefensesTele, lambda tm: tm.
                timesSuccessfulCrossedDefensesTele, lambda x: np.mean(x)
                if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.
                avgSuccessfulTimesCrossedDefensesAuto, lambda tm: tm.
                timesSuccessfulCrossedDefensesAuto, lambda x: np.mean(x)
                if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.avgFailedTimesCrossedDefensesTele,
                lambda tm: tm.timesFailedCrossedDefensesTele, lambda x: np.
                mean(x) if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.avgFailedTimesCrossedDefensesAuto,
                lambda tm: tm.timesFailedCrossedDefensesAuto, lambda x: np.
                mean(x) if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.avgTimeForDefenseCrossTele, lambda
                tm: tm.timesSuccessfulCrossedDefensesTele, lambda x: np.mean(x)
                if x != None and len(x) > 0 else 0, lambda y: np.mean(y)
                if y != None and len(y) > 0 else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.avgTimeForDefenseCrossAuto, lambda
                tm: tm.timesSuccessfulCrossedDefensesAuto, lambda x: np.mean(x)
                if x != None and len(x) > 0 else 0, lambda y: np.mean(y)
                if y != None and len(y) > 0 else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.sdSuccessfulDefenseCrossesAuto,
                lambda tm: tm.timesSuccessfulCrossedDefensesAuto, lambda x:
                utils.rms(x)
                if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.sdSuccessfulDefenseCrossesTele,
                lambda tm: tm.ti, mesSuccessfulCrossedDefensesTele, lambda x:
                utils.rms(x)
                if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.sdFailedDefenseCrossesAuto, lambda
                tm: tm.timesFailedCrossedDefensesAuto, lambda x: utils.rms(x)
                if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0),
            lambda team: calc.setDefenseValuesForTeam(
                team, team.calculatedData.sdFailedDefenseCrossesTele, lambda
                tm: tm.timesFailedCrossedDefensesTele, lambda x: utils.rms(x)
                if x != None and len(x) > 0 else 0, lambda y: len(y)
                if y != None else 0)
        ]
    }

コード例 #15

ファイルを表示

ファイル: compute_fake_scale_factor.py プロジェクト: gerbaudo/DileptonFakeMeasurement

def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor', help='dir for plots')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    region    = options.region
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    filestems, treenames = utils.verticalSlice(fakeu.tupleStemsAndNames)
    regions = filestems
    assert region in regions,"invalid region '%s', must be one of %s"%(region, str(regions))

    templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region}
    templateOutputFilename =  "%(region)s_%(l)s_scale_histos.root" % {'region':region, 'l':lepton}
    treeName = treenames[regions.index(region)]
    outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things
    mkdirIfNeeded(outputDir)
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_'+region+'_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos
    optionsToPrint = ['inputDir', 'outputDir', 'region', 'tag', 'doFillHistograms', 'onthefly_tight_def']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
    # collect inputs
    if verbose : print 'input files ',os.path.join(inputDir, templateInputFilename)
    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    mkdirIfNeeded(outputDir)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1']
    groups = samplesPerGroup.keys()
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroup = bookHistos(vars, groups, region=region)
        histosPerSource = bookHistosPerSource(vars, leptonSources, region=region)
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, leptonSources, region=region)
        for group in groups:
            isData = isDataSample(group)
            filenames = filenamesPerGroup[group]
            if verbose:
                print " --- group : %s ---".format(group)
                print '\n\t'.join(filenames)
            histosThisGroup = histosPerGroup[group]
            histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group]) for v in histosPerGroupPerSource.keys())
            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            if verbose: print "%s : %d entries"%(group, chain.GetEntries())
            num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource,
                                                histosThisGroupPerSource,
                                                lepton, group, region,
                                                onthefly_tight_def=onthefly_tight_def, verbose=verbose)
        writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # compute scale factors
    histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, groups, region), verbose)
    histosPerSource = fetchHistos(cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose)
    histosPerSamplePerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, leptonSources, region), verbose)
    plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, verbose)
    plotStackedHistosSources(histosPerSource, outputDir+'/by_source', region, verbose)
    plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir+'/by_source', lepton=lepton, region=region, verbose=verbose)
    for g in groups:
        hps = dict((v, histosPerSamplePerSource[v][g])for v in vars)
        plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose)


    hn_sf_eta = histoname_sf_vs_eta           (lepton)
    hn_sf_pt  = histoname_sf_vs_pt            (lepton)
    hn_da_eta = histoname_data_fake_eff_vs_eta(lepton)
    hn_da_pt  = histoname_data_fake_eff_vs_pt (lepton)
    objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, verbose)
    objs_pt  = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1',  hn_sf_pt,  hn_da_pt,  outputDir, region, verbose)
    rootUtils.writeObjectsToFile(outputFileName, dictSum(objs_eta, objs_pt), verbose)
    if verbose : print "saved scale factors to %s" % outputFileName

コード例 #16

ファイルを表示

ファイル: compute_fake_scale_factor.py プロジェクト: gerbaudo/SusyntHlfv

def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-g', '--group', help='group to be processed (used only in fill mode)')
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples')
    parser.add_option('--samples-dir', default='samples/', help='directory with the list of samples; default ./samples/')
    parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('--keep-real', action='store_true', default=False, help='do not subtract real (to get real lep efficiency)')
    parser.add_option('--debug', action='store_true')
    parser.add_option('--verbose', action='store_true')
    parser.add_option('--disable-cache', action='store_true', help='disable the entry cache')
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    region    = options.region
    keepreal  = options.keep_real
    debug     = options.debug
    verbose   = options.verbose
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    regions = kin.selection_formulas().keys()
    assert region in regions,"invalid region '%s', must be one of %s"%(region, str(sorted(regions)))
    regions = [region]

    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(options.samples_dir)
    if options.group : groups = [g for g in groups if g.name==options.group]
    group_names = [g.name for g in groups]

    outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things
    mkdirIfNeeded(outputDir)
    templateOutputFilename = "scale_factor_{0}.root".format(lepton)
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos
    if verbose : utils.print_running_conditions(parser, options)
    vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1', 'pt1_eta1']
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroup = bookHistos(vars, group_names, region=region)
        histosPerSource = bookHistosPerSource(vars, leptonSources, region=region)
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, group_names, leptonSources, region=region)
        for group in groups:
            tree_name = 'hlfv_tuple'
            chain = IndexedChain(tree_name)
            for ds in group.datasets:
                fname = os.path.join(inputDir, ds.name+'.root')
                if os.path.exists(fname):
                    chain.Add(fname)
            if verbose:
                print "{0} : {1} entries from {2} samples".format(group.name, chain.GetEntries(), len(group.datasets))
            chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/')
            tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions]
            print 'tcuts ',[c.GetName() for c in tcuts]
            chain.retrieve_entrylists(tcuts)
            counters_pre, histos_pre = dict(), dict()
            counters_npre, histos_npre = dict(), dict()
            print 'tcuts_with_existing_list ',str([c.GetName() for c in chain.tcuts_with_existing_list()])
            print 'tcuts_without_existing_list ',str([c.GetName() for c in chain.tcuts_without_existing_list()])
            cached_tcuts = [] if options.disable_cache else chain.tcuts_with_existing_list()
            print 'cached_tcuts ',[c.GetName() for c in cached_tcuts]
            uncached_tcuts = tcuts if options.disable_cache else chain.tcuts_without_existing_list()
            print 'todo: skip cuts for which the histo files are there'
            if verbose:
                print " --- group : {0} ---".format(group.name)
                print '\n\t'.join(chain.filenames)
            if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts])
            if verbose: print "%s : %d entries"%(group.name, chain.GetEntries())
            histosThisGroup = histosPerGroup[group.name]
            histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group.name]) for v in histosPerGroupPerSource.keys())
            for cut in cached_tcuts:
                print 'cached_tcut ',cut
                chain.preselect(cut)
                num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource,
                                                    histosThisGroupPerSource,
                                                    lepton, group,
                                                    cut, cut_is_cached=True,
                                                    onthefly_tight_def=onthefly_tight_def,
                                                    verbose=verbose)
            if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts])
            if uncached_tcuts:
                assert len(uncached_tcuts)==1, "expecting only one cut, got {}".format(len(uncached_tcuts))
                cut = uncached_tcuts[0]
                chain.preselect(None)
                num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource,
                                                    histosThisGroupPerSource,
                                                    lepton, group,
                                                    cut, cut_is_cached=False,
                                                    onthefly_tight_def=onthefly_tight_def,
                                                    verbose=verbose)
                chain.save_lists()

        writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # return
    # compute scale factors
    histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, group_names, region), verbose)
    histosPerSource = fetchHistos(cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose)
    histosPerSamplePerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, group_names, leptonSources, region), verbose)
    plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, verbose)
    plotStackedHistosSources(histosPerSource, outputDir+'/by_source', region, verbose)
    plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir+'/by_source', lepton=lepton, region=region, verbose=verbose)
    for g in group_names:
        hps = dict((v, histosPerSamplePerSource[v][g])for v in vars)
        plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose)


    hn_sf_eta = histoname_sf_vs_eta           (lepton)
    hn_sf_pt  = histoname_sf_vs_pt            (lepton)
    hn_da_eta = histoname_data_fake_eff_vs_eta(lepton)
    hn_da_pt  = histoname_data_fake_eff_vs_pt (lepton)
    subtractReal = not keepreal
    objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, subtractReal, verbose)
    objs_pt  = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1',  hn_sf_pt,  hn_da_pt,  outputDir, region, subtractReal, verbose)
    objs_pt_eta  = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1_eta1',
                                                     histoname_sf_vs_pt_eta(lepton),
                                                     histoname_data_fake_eff_vs_pt_eta(lepton),
                                                     outputDir, region, subtractReal, verbose)
    rootUtils.writeObjectsToFile(outputFileName, dictSum(dictSum(objs_eta, objs_pt), objs_pt_eta), verbose)
    if verbose : print "saved scale factors to %s" % outputFileName

コード例 #17

ファイルを表示

ファイル: compute_fake_factor.py プロジェクト: gerbaudo/DileptonFakeMeasurement

def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fakerate/efficiencies')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-m', '--mode', help='emu')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    mode      = options.mode
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    validModes = ['emu']
    if mode not in validModes : parser.error("invalid mode %s"%mode)
    tupleStem, treeName = filter(lambda _: _[0]==mode, fakeu.tupleStemsAndNames)[0]

    templateInputFilename = "*_%(stem)s_tuple_%(tag)s.root" % {'tag':tag, 'stem':tupleStem}
    templateOutputFilename =  "%(stem)s_%(l)s_eff.root" % {'stem':tupleStem.replace('tuple','histos'), 'l':lepton}
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_'+mode+'_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos
    optionsToPrint = ['inputDir', 'outputDir', 'mode', 'tag', 'doFillHistograms', 'cacheFileName', 'onthefly_tight_def']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
        print 'input filenames: ',os.path.join(inputDir, templateInputFilename)
    # collect inputs
    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    mkdirIfNeeded(outputDir)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['pt', 'pt_eta']
    groups = [g for g in samplesPerGroup.keys() if g is not 'higgs']
    if lepton=='el' : groups = [g for g in groups if g is not 'heavyflavor']
    sourcesThisMode = ['real', 'conv', 'heavy', 'light', 'unknown'] if lepton=='el' else ['real', 'heavy', 'light', 'unknown']
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, sourcesThisMode, mode=mode)
        for group in groups:
            filenames = filenamesPerGroup[group]
            sources = histosPerGroupPerSource.keys()
            histosThisGroupPerSource = dict((s, histosPerGroupPerSource[s][group]) for s in sources)
            histosAnyGroupPerSource  = dict((s, histosPerGroupPerSource[s]['anygroup']) for s in sources) if group!='data' else {}

            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            if verbose: print "%s : %d entries"%(group, chain.GetEntries())
            is_data = group in ['data']
            print 'is_data ',is_data
            num_processed_entries += fillHistos(chain=chain,
                                                histosPerSource=histosThisGroupPerSource,
                                                histosPerSourceAnygroup=histosAnyGroupPerSource,
                                                lepton=lepton,
                                                onthefly_tight_def=onthefly_tight_def,
                                                verbose=verbose)
        writeHistos(cacheFileName, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        one_minute = 60
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>one_minute else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # plot histos
    histosPerGroupPerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, sourcesThisMode, mode), verbose)

    # effs = computeEfficiencies(histosPerGroupPerSource) # still [var][gr][source][l/t]
    for v in vars:
        varIs1D, varIs2D = v=='pt', v=='pt_eta'
        densThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['loose']) for s in sourcesThisMode),
                                        {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']})
        numsThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['tight']) for s in sourcesThisMode),
                                        {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']})
        if varIs1D:
            lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)'
            cname = 'stack_loose_'+lepton
            lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)'
            title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY
            plotStackedHistosWithData(densThisSourceThisVar,
                                      outputDir, cname, title,
                                      colors=fakeu.colorsFillSources(),
                                      verbose=verbose)
            cname = 'stack_tight_'+lepton
            lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)'
            title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY
            plotStackedHistosWithData(numsThisSourceThisVar,
                                      outputDir, cname, title,
                                      colors=fakeu.colorsFillSources(),
                                      verbose=verbose)

    for s in sourcesThisMode:
        for v in vars:
            groups = first(histosPerGroupPerSource).keys()
            varIs1D, varIs2D = v=='pt', v=='pt_eta'
            # effsThisSourceThisVar = dict((g, effs[v][g][s]) for g in groups)
            densThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g][s]['loose'])
                                                 for g in groups if g not in ['anygroup','data']),
                                            {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']})
            numsThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g]['unknown']['tight'])
                                                 for g in groups if g not in ['anygroup','data']),
                                            {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']})
            if varIs1D:
                # cname = 'eff_'+lepton+'_'+s
                lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)'
                # title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                # zoomIn = True
                # fakeu.plot1dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn)
                cname = 'stack_loose_'+lepton+'_'+s
                lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                plotStackedHistosWithData(densThisSourceThisVar,
                                          outputDir, cname, title,
                                          colors=SampleUtils.colors,
                                          verbose=verbose)
                cname = 'stack_tight_'+lepton+'_'+s
                lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                plotStackedHistosWithData(numsThisSourceThisVar,
                                          outputDir, cname, title,
                                          colors=SampleUtils.colors,
                                          verbose=verbose)

            # elif varIs2D:
            #     cname = 'eff_'+lepton+'_'+s
            #     lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#eta'
            #     title = lT+' '+s+' '+lepton+';'+lX+';'+lY
            #     fakeu.plot2dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn=zoomIn)
    # writeHistos(outputFileName, effs, verbose)
    if verbose : print "saved scale factors to %s" % outputFileName

コード例 #18

ファイルを表示

ファイル: compute_fake_scale_factor.py プロジェクト: PraderioM/SusyntHlfv

def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-g',
                      '--group',
                      help='group to be processed (used only in fill mode)')
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option(
        '-r',
        '--region',
        help='one of the regions for which we saved the fake ntuples')
    parser.add_option(
        '--samples-dir',
        default='samples/',
        help='directory with the list of samples; default ./samples/')
    parser.add_option(
        '-T',
        '--tight-def',
        help=
        'on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.'
    )
    parser.add_option('-f',
                      '--fill-histos',
                      action='store_true',
                      default=False,
                      help='force fill (default only if needed)')
    parser.add_option('--keep-real',
                      action='store_true',
                      default=False,
                      help='do not subtract real (to get real lep efficiency)')
    parser.add_option('--debug', action='store_true')
    parser.add_option('--verbose', action='store_true')
    parser.add_option('--disable-cache',
                      action='store_true',
                      help='disable the entry cache')
    (options, args) = parser.parse_args()
    inputDir = options.input_dir
    outputDir = options.output_dir
    lepton = options.lepton
    region = options.region
    keepreal = options.keep_real
    debug = options.debug
    verbose = options.verbose
    if lepton not in ['el', 'mu']: parser.error("invalid lepton '%s'" % lepton)
    regions = kin.selection_formulas().keys()
    assert region in regions, "invalid region '%s', must be one of %s" % (
        region, str(sorted(regions)))
    regions = [region]

    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        options.samples_dir)
    if options.group: groups = [g for g in groups if g.name == options.group]
    group_names = [g.name for g in groups]

    outputDir = outputDir + '/' + region + '/' + lepton  # split the output in subdirectories, so we don't overwrite things
    mkdirIfNeeded(outputDir)
    templateOutputFilename = "scale_factor_{0}.root".format(lepton)
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(
        options.tight_def
    ) if options.tight_def else None  # eval will take care of aborting on typos
    if verbose: utils.print_running_conditions(parser, options)
    vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1', 'pt1_eta1']
    #fill histos
    if doFillHistograms:
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroup = bookHistos(vars, group_names, region=region)
        histosPerSource = bookHistosPerSource(vars,
                                              leptonSources,
                                              region=region)
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars,
                                                               group_names,
                                                               leptonSources,
                                                               region=region)
        for group in groups:
            tree_name = 'hlfv_tuple'
            chain = IndexedChain(tree_name)
            for ds in group.datasets:
                fname = os.path.join(inputDir, ds.name + '.root')
                if os.path.exists(fname):
                    chain.Add(fname)
            if verbose:
                print "{0} : {1} entries from {2} samples".format(
                    group.name, chain.GetEntries(), len(group.datasets))
            chain.cache_directory = os.path.abspath('./selection_cache/' +
                                                    group.name + '/')
            tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions]
            print 'tcuts ', [c.GetName() for c in tcuts]
            chain.retrieve_entrylists(tcuts)
            counters_pre, histos_pre = dict(), dict()
            counters_npre, histos_npre = dict(), dict()
            print 'tcuts_with_existing_list ', str(
                [c.GetName() for c in chain.tcuts_with_existing_list()])
            print 'tcuts_without_existing_list ', str(
                [c.GetName() for c in chain.tcuts_without_existing_list()])
            cached_tcuts = [] if options.disable_cache else chain.tcuts_with_existing_list(
            )
            print 'cached_tcuts ', [c.GetName() for c in cached_tcuts]
            uncached_tcuts = tcuts if options.disable_cache else chain.tcuts_without_existing_list(
            )
            print 'todo: skip cuts for which the histo files are there'
            if verbose:
                print " --- group : {0} ---".format(group.name)
                print '\n\t'.join(chain.filenames)
            if verbose:
                print 'filling cached cuts: ', ' '.join(
                    [c.GetName() for c in cached_tcuts])
            if verbose:
                print "%s : %d entries" % (group.name, chain.GetEntries())
            histosThisGroup = histosPerGroup[group.name]
            histosThisGroupPerSource = dict(
                (v, histosPerGroupPerSource[v][group.name])
                for v in histosPerGroupPerSource.keys())
            for cut in cached_tcuts:
                print 'cached_tcut ', cut
                chain.preselect(cut)
                num_processed_entries += fillHistos(
                    chain,
                    histosThisGroup,
                    histosPerSource,
                    histosThisGroupPerSource,
                    lepton,
                    group,
                    cut,
                    cut_is_cached=True,
                    onthefly_tight_def=onthefly_tight_def,
                    verbose=verbose)
            if verbose:
                print 'filling uncached cuts: ', ' '.join(
                    [c.GetName() for c in uncached_tcuts])
            if uncached_tcuts:
                assert len(uncached_tcuts
                           ) == 1, "expecting only one cut, got {}".format(
                               len(uncached_tcuts))
                cut = uncached_tcuts[0]
                chain.preselect(None)
                num_processed_entries += fillHistos(
                    chain,
                    histosThisGroup,
                    histosPerSource,
                    histosThisGroupPerSource,
                    lepton,
                    group,
                    cut,
                    cut_is_cached=False,
                    onthefly_tight_def=onthefly_tight_def,
                    verbose=verbose)
                chain.save_lists()

        writeHistos(cacheFileName, histosPerGroup, histosPerSource,
                    histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print("processed {0:d} entries ".format(num_processed_entries) +
                  "in " +
                  ("{0:d} min ".format(int(delta_time / 60))
                   if delta_time > 60 else "{0:.1f} s ".format(delta_time)) +
                  "({0:.1f} kHz)".format(num_processed_entries / delta_time))
    # return
    # compute scale factors
    histosPerGroup = fetchHistos(cacheFileName,
                                 histoNames(vars, group_names, region),
                                 verbose)
    histosPerSource = fetchHistos(
        cacheFileName, histoNamesPerSource(vars, leptonSources, region),
        verbose)
    histosPerSamplePerSource = fetchHistos(
        cacheFileName,
        histoNamesPerSamplePerSource(vars, group_names, leptonSources, region),
        verbose)
    plotStackedHistos(histosPerGroup, outputDir + '/by_group', region, verbose)
    plotStackedHistosSources(histosPerSource, outputDir + '/by_source', region,
                             verbose)
    plotPerSourceEff(histosPerVar=histosPerSource,
                     outputDir=outputDir + '/by_source',
                     lepton=lepton,
                     region=region,
                     verbose=verbose)
    for g in group_names:
        hps = dict((v, histosPerSamplePerSource[v][g]) for v in vars)
        plotPerSourceEff(histosPerVar=hps,
                         outputDir=outputDir,
                         lepton=lepton,
                         region=region,
                         sample=g,
                         verbose=verbose)

    hn_sf_eta = histoname_sf_vs_eta(lepton)
    hn_sf_pt = histoname_sf_vs_pt(lepton)
    hn_da_eta = histoname_data_fake_eff_vs_eta(lepton)
    hn_da_pt = histoname_data_fake_eff_vs_pt(lepton)
    subtractReal = not keepreal
    objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1',
                                                 hn_sf_eta, hn_da_eta,
                                                 outputDir, region,
                                                 subtractReal, verbose)
    objs_pt = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1',
                                                hn_sf_pt, hn_da_pt, outputDir,
                                                region, subtractReal, verbose)
    objs_pt_eta = subtractRealAndComputeScaleFactor(
        histosPerGroup, 'pt1_eta1', histoname_sf_vs_pt_eta(lepton),
        histoname_data_fake_eff_vs_pt_eta(lepton), outputDir, region,
        subtractReal, verbose)
    rootUtils.writeObjectsToFile(
        outputFileName, dictSum(dictSum(objs_eta, objs_pt), objs_pt_eta),
        verbose)
    if verbose: print "saved scale factors to %s" % outputFileName