def main():
    if len(sys.argv)!=3:
        print "Usage: {0} inputdir outputdir".format(sys.argv[0])
        return
    inputdir = sys.argv[1]
    outputdir = sys.argv[2]
    verbose = True
    if not os.path.exists(inputdir):
        print "missing input dir {0}".format(inputdir)
        return
    utils.mkdirIfNeeded(outputdir)

    fake = systUtils.Group('fake')
    fake.setHistosDir(inputdir)

    fake.setSyst() # reset to nominal (state is undetermined after 'explore')
    c = r.TCanvas('c','')
    variables = ['mcollcoarse']

    for jetnojet in regions_to_plot().keys():
        for var in variables:
            sel_emu, sel_mue = regions_to_plot()[jetnojet]
            h_emu = fake.getHistogram(variable=var, selection=sel_emu, cacheIt=True)
            h_mue = fake.getHistogram(variable=var, selection=sel_mue, cacheIt=True)
            h_ratio = h_emu.Clone(h_emu.GetName().replace('emu', 'emu_over_mue'))
            h_ratio.Divide(h_mue)
            plot_emu_mue_with_ratio(canvas=c, h_mue=h_mue, h_emu=h_emu, h_ratio=h_ratio,
                                    filename=outputdir+'/'+var+'_'+jetnojet+'_emu_over_mue_wout_sys_err')
            h_with_totErrBand = {} # histo with stat+syst err (to get the correct error in the ratio)
            for sel in [sel_emu, sel_mue]:
                print ">>>plotting ",sel
                fake.setSystNominal()
                fake.setCurrentSelection(sel)
                fake.exploreAvailableSystematics(verbose)
                fakeSystematics = [s for s in fake.systematics if s!='NOM']
                nominalHistoData    = None
                nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True)
                nominalHistosBkg    = {'fake', nominalHistoFakeBkg}
                nominalHistoTotBkg  = buildTotBackgroundHisto(histoFakeBkg=nominalHistoFakeBkg, histosSimBkgs={})
                statErrBand = buildStatisticalErrorBand(nominalHistoTotBkg)
                systErrBand = buildFakeSystematicErrorBand(fake=fake, nominalHistosSimBkg={}, variable=var, selection=sel,
                                                           variations=fakeSystematics, verbose=verbose)
                totErrBand = systUtils.addErrorBandsInQuadrature(statErrBand, systErrBand)
                # c.cd()
                # c.Clear()
                # nominalHistoFakeBkg.Draw()
                # totErrBand.Draw('E2 same')
                # totErrBand.SetFillStyle(3005)
                # for ext in ['png', 'eps']:
                #     c.SaveAs("{0}/{1}_{2}.{3}".format(outputdir, sel, var, ext))
                h_with_totErrBand[sel] = systUtils.setHistErrFromErrBand(nominalHistoFakeBkg, totErrBand)
                pprint.pprint(h_with_totErrBand)
            h_emu = [h for k,h in h_with_totErrBand.iteritems() if 'emu' in k][0]
            h_mue = [h for k,h in h_with_totErrBand.iteritems() if 'mue' in k][0]

            h_ratio = h_emu.Clone(h_mue.GetName().replace('emu', 'emu_over_mue'))
            h_ratio.Divide(h_mue)
            plot_emu_mue_with_ratio(canvas=c, h_mue=h_mue, h_emu=h_emu, h_ratio=h_ratio,
                                    filename=outputdir+'/'+var+'_'+jetnojet+'_emu_over_mue_with_sys_err')
    return
Exemple #2
0
def count_and_fill(chain,
                   sample='',
                   syst='',
                   verbose=False,
                   debug=False,
                   blinded=True,
                   onthefly_tight_def=None,
                   tightight=False,
                   quicktest=False,
                   cached_cut=None,
                   noncached_cuts=[]):
    """
    count and fill for one sample (or group), one syst.
    """
    sysGroup = systUtils.Group(sample).setSyst(syst)
    is_mc = systUtils.Group(sample).isMc
    is_data = systUtils.Group(sample).isData
    is_qflip_sample = dataset.DatasetGroup(sample).is_qflip
    assert bool(cached_cut) != bool(
        noncached_cuts
    ), "must choose either cached selection or non-cached selections: {}, {}".format(
        cached_cut, noncached_cuts)
    cuts = [cached_cut] if cached_cut else noncached_cuts
    if noncached_cuts:
        chain.preselect(None)
    selections = [c.GetName() for c in cuts]
    counters = book_counters(selections)
    histos = book_histograms(sample_name=sample,
                             variables=variables_to_fill(),
                             systematics=[syst],
                             selections=selections)[syst]
    if is_qflip_sample:  # for qflip, only fill ss histos
        selections = [s for s in selections if s.endswith('_ss')]
    weight_expr = 'event.pars.weight'
    weight_expr = sysGroup.weightLeafname
    qflip_expr = 'event.pars.qflipWeight'
    print 'weight_expr: ', weight_expr
    print 'selections: ', '\n'.join([
        "%d) %s : %s" % (i, cut.GetName(), cut.GetTitle())
        for i, cut in enumerate(cuts)
    ])
    start_time = time.clock()
    num_total_entries = chain.GetEntries()
    num_processed_entries = 0
    fields_to_print = [
        'l0_pt', 'l1_pt', 'l0_eta', 'l1_eta', 'met_pt', 'm_ll', 'pt_ll',
        'dpt_l0_l1', 'dphi_l0_met', 'dphi_l1_met', 'dphi_l0_l1', 'mt0', 'mt1',
        'n_soft_jets', 'eta_csj0', 'phi_csj0', 'eta_csj1', 'phi_csj1'
    ]
    if debug: print ",".join(fields_to_print)
    for iEntry, event in enumerate(chain):
        if quicktest and 100 * iEntry > num_total_entries: break
        run_num = event.pars.runNumber
        evt_num = event.pars.eventNumber
        l0 = addTlv(event.l0)
        l1 = addTlv(event.l1)
        met = addTlv(event.met)
        l0_is_el, l0_is_mu = l0.isEl, l0.isMu
        l1_is_el, l1_is_mu = l1.isEl, l1.isMu
        l0_is_t = onthefly_tight_def(l0) if onthefly_tight_def else l0.isTight
        l1_is_t = onthefly_tight_def(l1) if onthefly_tight_def else l1.isTight
        is_emu = int(l0_is_el and l1_is_mu)
        is_mue = int(l0_is_mu and l1_is_el)
        is_mumu = int(l0_is_mu and l1_is_mu)
        is_ee = int(l0_is_el and l1_is_el)
        is_same_sign = int((l0.charge * l1.charge) > 0)
        is_opp_sign = not is_same_sign
        is_qflippable = is_opp_sign and (l0_is_el or l1_is_el) and is_mc
        weight = eval(weight_expr)
        qflip_prob = eval(qflip_expr)
        # print "event : same sign {0}, opp_sign {1}, qflippable {2}, qflip_prob {3}".format(is_same_sign, is_opp_sign, is_qflippable, eval(qflip_expr))
        l0_pt, l1_pt = l0.p4.Pt(), l1.p4.Pt()
        d_pt0_pt1 = l0_pt - l1_pt
        l0_eta, l1_eta = l0.p4.Eta(), l1.p4.Eta()
        l0_phi, l1_phi = l0.p4.Phi(), l1.p4.Phi()
        met_pt = met.p4.Pt()
        m_ll = (l0.p4 + l1.p4).M()
        pt_ll = (l0.p4 + l1.p4).Pt()
        dphi_l0_met = abs(l0.p4.DeltaPhi(met.p4))
        dphi_l1_met = abs(l1.p4.DeltaPhi(met.p4))
        dphi_l0_l1 = abs(l0.p4.DeltaPhi(l1.p4))
        dpt_l0_l1 = l0.p4.Pt() - l1.p4.Pt()
        m_coll = computeCollinearMassLepTau(l0.p4, l1.p4, met.p4)
        mt0, mt1 = computeMt(l0.p4, met.p4), computeMt(l1.p4, met.p4)
        dphillbeta, mdr = computeRazor(l0.p4, l1.p4, met.p4)

        def jet_pt2(j):
            return j.px * j.px + j.py * j.py

        cl_jets = [addTlv(j) for j in event.jets if jet_pt2(j) > 30. * 30.]
        n_cl_jets = len(cl_jets)
        n_b_jets = event.pars.numBjets
        n_f_jets = event.pars.numFjets
        n_bf_jets = n_b_jets + n_f_jets
        n_jets = n_cl_jets + n_b_jets + n_f_jets
        # n_jets = event.pars.numFjets + event.pars.numBjets
        soft_jets = [addTlv(j) for j in event.jets
                     if jet_pt2(j) < 30.**2]  # todo: merge with cl_jets loop
        n_soft_jets = len(soft_jets)
        csj0 = first(sorted(soft_jets, key=lambda j: j.p4.DeltaR(l0.p4)))
        csj1 = first(sorted(soft_jets, key=lambda j: j.p4.DeltaR(l1.p4)))
        eta_csj0 = csj0.p4.Eta() if csj0 else -5.0
        phi_csj0 = csj0.p4.Phi() if csj0 else -5.0
        eta_csj1 = csj1.p4.Eta() if csj1 else -5.0
        phi_csj1 = csj1.p4.Phi() if csj1 else -5.0
        drl0csj = csj0.p4.DeltaR(l0.p4) if csj0 else None
        drl1csj = csj1.p4.DeltaR(l1.p4) if csj1 else None
        m_jj = (cl_jets[0].p4 + cl_jets[1].p4).M() if n_cl_jets > 1 else None
        deta_jj = abs(cl_jets[0].p4.Eta() -
                      cl_jets[1].p4.Eta()) if n_cl_jets > 1 else None
        pass_sels = {}
        if tightight and not (l0_is_t and l1_is_t): continue

        for cut in cuts:
            sel = cut.GetName()
            sel_expr = cut.GetTitle()
            pass_sel = eval(sel_expr)  # and (l0_pt>60.0 and dphi_l1_met<0.7)
            pass_sels[sel] = pass_sel
            is_ss_sel = sel.endswith('_ss')
            as_qflip = is_qflippable and (is_opp_sign and is_ss_sel)
            if is_qflip_sample and not as_qflip: pass_sel = False
            if not is_qflip_sample and as_qflip: pass_sel = False
            if not pass_sel: continue
            if pass_sel and not cached_cut:
                chain.add_entry_to_list(cut, iEntry)
            # <isElectron 1> <isElectron 2> <isTight 1> <isTight 2> <pt 1> <pt 2> <eta 1> <eta 2>
            lltype = "{0}{1}".format('e' if l0_is_el else 'mu',
                                     'e' if l1_is_el else 'mu')
            qqtype = "{0}{1}".format('T' if l0_is_t else 'L',
                                     'T' if l1_is_t else 'L')
            if debug: print ','.join([str(eval(_)) for _ in fields_to_print])

            def fmt(b):
                return '1' if b else '0'

            # --- begin dbg
            # print "event: {0:12s} {1} {2} {3} {4} {5} {6} {7} {8}".format(lltype+' '+qqtype, #+' '+sel,
            #                                                               fmt(l0_is_el), fmt(l1_is_el),
            #                                                               fmt(l0_is_t), fmt(l1_is_t),
            #                                                               l0_pt, l1_pt,
            #                                                               l0.p4.Eta(), l1.p4.Eta())
            # print "event: {0:12s} {1} {2} {3:.2f} {4:.2f}".format(lltype+' '+qqtype+' '+sel,
            #                                                       run_num, evt_num,
            #                                                       l0_pt, l1_pt)
            # --- end dbg
            fill_weight = (weight * qflip_prob) if as_qflip else weight
            h = histos[sel]
            h['onebin'].Fill(1.0, fill_weight)
            h['njets'].Fill(n_jets, fill_weight)
            h['pt0'].Fill(l0_pt, fill_weight)
            h['pt1'].Fill(l1_pt, fill_weight)
            h['d_pt0_pt1'].Fill(d_pt0_pt1, fill_weight)
            h['eta0'].Fill(l0_eta, fill_weight)
            h['eta1'].Fill(l1_eta, fill_weight)
            h['phi0'].Fill(l0_phi, fill_weight)
            h['phi1'].Fill(l1_phi, fill_weight)
            h['mll'].Fill(m_ll, fill_weight)
            h['ptll'].Fill(pt_ll, fill_weight)
            h['met'].Fill(met_pt, fill_weight)
            h['dphil0met'].Fill(dphi_l0_met, fill_weight)
            h['dphil1met'].Fill(dphi_l1_met, fill_weight)
            h['nsj'].Fill(n_soft_jets, fill_weight)
            h['pt0_vs_pt1'].Fill(l1_pt, l0_pt, fill_weight)
            h['met_vs_pt1'].Fill(l1_pt, met.p4.Pt(), fill_weight)
            h['dphil0l1_vs_pt1'].Fill(l1_pt, dphi_l0_l1, fill_weight)
            h['dphil0met_vs_pt1'].Fill(l1_pt, dphi_l0_met, fill_weight)
            h['dphil1met_vs_pt1'].Fill(l1_pt, dphi_l1_met, fill_weight)
            if n_soft_jets:
                h['drl0csj'].Fill(drl0csj, fill_weight)
                h['drl1csj'].Fill(drl1csj, fill_weight)
            if n_jets == 2 and n_cl_jets == 2:  # fixme: f jets are not saved, but we need them for vbf
                h['m_jj'].Fill(m_jj, fill_weight)
                h['deta_jj'].Fill(deta_jj, fill_weight)
            if is_data and (blinded and 100.0 < m_coll and m_coll < 150.0):
                pass
            else:
                h['mcoll'].Fill(m_coll, fill_weight)
                h['mcollcoarse'].Fill(m_coll, fill_weight)
                h['mcoll_vs_pt1'].Fill(l1_pt, m_coll, fill_weight)
            counters[sel] += (fill_weight)
        # print ('e' if l0_is_el else 'm'),('e' if l1_is_el else 'm'),' : ',
        # print ' is_opp_sign: ',is_opp_sign,
        # print ' is_qflippable: ',is_qflippable,
        # print pass_sels
        num_processed_entries += 1
    end_time = time.clock()
    delta_time = end_time - start_time
    if verbose:
        print(
            "processed {0:d} entries ".format(num_processed_entries) + "in " +
            ("{0:d} min ".format(int(delta_time / 60))
             if delta_time > 60 else "{0:.1f} s ".format(delta_time)) +
            "({0:.1f} kHz)".format((num_processed_entries /
                                    delta_time) if delta_time else 1.0e9))
    if verbose:
        for v in ['onebin']:  #, 'pt0', 'pt1']:
            for sel in selections:
                h = histos[sel][v]
                print "{0}: integral {1}, entries {2}".format(
                    h.GetName(), h.Integral(), h.GetEntries())
    return counters, histos
Exemple #3
0
def runPlot(opts):
    inputDir = opts.input_dir
    outputDir = opts.output_dir
    verbose = opts.verbose
    mkdirIfNeeded(outputDir)
    buildTotBkg = systUtils.buildTotBackgroundHisto
    buildStat = systUtils.buildStatisticalErrorBand
    buildSyst = systUtils.buildSystematicErrorBand
    selections = regions_to_plot(opts.include_regions, opts.exclude_regions,
                                 opts.regions)
    variables = variables_to_plot()

    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        opts.samples_dir)
    groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake())
    if not skip_charge_flip:
        groups.append(
            dataset.DatasetGroup.build_qflip_from_simulated_samples(groups))
    plot_groups = [systUtils.Group(g.name) for g in groups]
    sel_not_specified = len(regions_to_plot()) == len(selections)
    if sel_not_specified:
        selections = guess_available_selections_from_histofiles(
            inputDir, first(plot_groups), verbose)
    systematics_to_use = get_list_of_syst_to_fill(opts)
    for group in plot_groups:
        group.setCurrentSelection(first(selections))
        group.setHistosDir(inputDir).setCurrentSelection(first(selections))
        group.exploreAvailableSystematics(verbose)
        group.filterAndDropSystematics(systematics_to_use, opts.exclude,
                                       verbose)
    available_systematics = sorted(
        list(set([s for g in plot_groups for s in g.systematics])))
    systematics = [s for s in systematics_to_use if s in available_systematics]
    if verbose:
        print "using the following systematics : {0}".format(systematics)
        print "missing the following systematics : {0}".format(
            [s for s in systematics_to_use if s not in available_systematics])
    fakeSystematics = [
        s for s in systematics if s in systUtils.fakeSystVariations()
    ]
    mcSystematics = [
        s for s in systematics if s in systUtils.mcObjectVariations() +
        systUtils.mcWeightVariations()
    ]

    mkdirIfNeeded(outputDir)
    findByName = systUtils.findByName
    simBkgs = [g for g in plot_groups if g.isMcBkg]
    data = findByName(plot_groups, 'data')
    fake = findByName(plot_groups, 'fake')
    signal = findByName(plot_groups, 'signaltaumu')
    print 'names_stacked_groups to be improved'
    names_stacked_groups = [g.name for g in simBkgs + [fake]]
    for sel in selections:
        if verbose: print '-- plotting ', sel
        for var in variables:
            if verbose: print '---- plotting ', var
            print_summary_yield = var is 'onebin'
            for g in plot_groups:
                g.setSystNominal()
                g.setCurrentSelection(sel)
            nominalHistoData = data.getHistogram(variable=var,
                                                 selection=sel,
                                                 cacheIt=True)
            nominalHistoSign = signal.getHistogram(variable=var,
                                                   selection=sel,
                                                   cacheIt=True)
            nominalHistoFakeBkg = fake.getHistogram(variable=var,
                                                    selection=sel,
                                                    cacheIt=True)
            nominalHistosSimBkg = dict([(g.name,
                                         g.getHistogram(variable=var,
                                                        selection=sel,
                                                        cacheIt=True))
                                        for g in simBkgs])
            nominalHistosBkg = dict(
                [('fake', nominalHistoFakeBkg)] +
                [(g, h) for g, h in nominalHistosSimBkg.iteritems()])
            nominalHistoTotBkg = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg,
                                             histosSimBkgs=nominalHistosSimBkg)
            statErrBand = buildStat(nominalHistoTotBkg)
            systErrBand = buildSyst(fake=fake,
                                    simBkgs=simBkgs,
                                    variable=var,
                                    selection=sel,
                                    fakeVariations=fakeSystematics,
                                    mcVariations=mcSystematics,
                                    verbose=verbose,
                                    printYield=print_summary_yield)
            # if print_summary_yield:
            #     print_stat_syst_yield(fake=fake, variable=var, selection=sel, fakeVariations=fakeSystematics)

            plotHistos(histoData=nominalHistoData,
                       histoSignal=nominalHistoSign,
                       histoTotBkg=nominalHistoTotBkg,
                       histosBkg=nominalHistosBkg,
                       statErrBand=statErrBand,
                       systErrBand=systErrBand,
                       stack_order=names_stacked_groups,
                       topLabel=sel,
                       canvasName=(sel + '_' + var),
                       outdir=outputDir,
                       options=opts,
                       printYieldSummary=print_summary_yield)
    for group in plot_groups:
        group.printVariationsSummary()
Exemple #4
0
def runFill(opts):
    batchMode = opts.batch
    inputFakeDir = opts.input_fake
    inputGenDir = opts.input_other
    outputDir = opts.output_dir
    verbose = opts.verbose
    debug = opts.debug
    blinded = not opts.unblind
    tightight = opts.require_tight_tight

    if debug: dataset.Dataset.verbose_parsing = True
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        opts.samples_dir)
    if not skip_charge_flip:
        groups.append(
            dataset.DatasetGroup.build_qflip_from_simulated_samples(groups))
    groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake())
    groups = parse_group_option(opts, groups)
    if verbose:
        print '\n'.join(
            "group {0} : {1} samples".format(g.name, len(g.datasets))
            for g in groups)
    if debug:
        print '\n'.join("group {0} : {1} samples: {2}".format(
            g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name
                                                          for d in g.datasets))
                        for g in groups)
    if verbose: print "filling histos"
    # eval will take care of aborting on typos
    onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None
    mkdirIfNeeded(outputDir)
    systematics = get_list_of_syst_to_fill(opts)
    regions = regions_to_plot(opts.include_regions, opts.exclude_regions,
                              opts.regions)
    if verbose:
        print "about to loop over these systematics:\n %s" % str(systematics)
    if verbose: print "about to loop over these regions:\n %s" % str(regions)
    if batchMode:
        for group in groups:
            for systematic in systematics:
                if systUtils.Group(group.name).isNeededForSys(systematic):
                    opts.syst = systematic
                    for selection in regions:
                        submit_batch_fill_job_per_group_per_selection(
                            group=group, selection=selection, opts=opts)
    else:
        for group in groups:
            systematics = [
                s for s in systematics
                if systUtils.Group(group.name).isNeededForSys(s)
            ]
            if not systematics:
                print "warning, empty syst list. You should have at least the nominal"
            for systematic in systematics:
                # note to self: here you will want to use a modified Sample.setHftInputDir
                # for now we just have the fake syst that are in the nominal tree
                tree_name = 'hlfv_tuple'
                chain = IndexedChain(tree_name)
                input_dir = opts.input_fake if group.name == 'fake' else opts.input_other
                for ds in group.datasets:
                    chain.Add(
                        os.path.join(
                            input_dir,
                            systUtils.Sample(
                                ds.name,
                                group.name).setSyst(systematic).filename))
                if opts.verbose:
                    print "{0} : {1} entries from {2} samples".format(
                        group.name, chain.GetEntries(), len(group.datasets))
                chain.cache_directory = os.path.abspath('./selection_cache/' +
                                                        group.name + '/')
                tcuts = [
                    r.TCut(reg,
                           selection_formulas()[reg]) for reg in regions
                ]
                chain.retrieve_entrylists(tcuts)
                counters_pre, histos_pre = dict(), dict()
                counters_npre, histos_npre = dict(), dict()
                cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list(
                )
                uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list(
                )
                if verbose:
                    print 'filling cached cuts: ', ' '.join(
                        [c.GetName() for c in cached_tcuts])
                for cut in cached_tcuts:
                    chain.preselect(cut)
                    c_pre, h_pre = count_and_fill(
                        chain=chain,
                        sample=group.name,
                        syst=systematic,
                        verbose=verbose,
                        debug=debug,
                        blinded=blinded,
                        onthefly_tight_def=onthefly_tight_def,
                        tightight=tightight,
                        quicktest=opts.quick_test,
                        cached_cut=cut)
                    out_filename = (systUtils.Group(
                        group.name).setSyst(systematic).setHistosDir(
                            outputDir).setCurrentSelection(
                                cut.GetName())).filenameHisto
                    writeObjectsToFile(out_filename, h_pre, verbose)
                    counters_pre = dictSum(counters_pre, c_pre)
                    histos_pre = dictSum(histos_pre, h_pre)
                if uncached_tcuts:
                    if verbose:
                        print 'filling uncached cuts: ', ' '.join(
                            [c.GetName() for c in uncached_tcuts])
                    counters_npre, histos_npre = count_and_fill(
                        chain=chain,
                        sample=group.name,
                        syst=systematic,
                        verbose=verbose,
                        debug=debug,
                        blinded=blinded,
                        onthefly_tight_def=onthefly_tight_def,
                        tightight=tightight,
                        quicktest=opts.quick_test,
                        noncached_cuts=uncached_tcuts)
                    for sel, histos in histos_npre.iteritems():
                        out_filename = (systUtils.Group(
                            group.name).setSyst(systematic).setHistosDir(
                                outputDir).setCurrentSelection(sel)
                                        ).filenameHisto
                        writeObjectsToFile(out_filename, histos, verbose)
                chain.save_lists()