Python Plotter.linearize_hist Examples, Utilities.Plotter.linearize_hist Python Examples

Example #1

0

Show file

File: htt_nnlo_reweighting_study.py Project: jdulemba/NanoAOD_Analyses

                                bintitle, 'mtt'
                            ]))
                            fig.savefig(figname)
                            print('%s written' % figname)
                            plt.close()

                            # plot linearized view
                        fig, (ax, rax) = plt.subplots(
                            2,
                            1,
                            gridspec_kw={"height_ratios": (3, 1)},
                            sharex=True)
                        fig.subplots_adjust(hspace=.07)

                        #set_trace()
                        hline = Plotter.linearize_hist(hslice)
                        #set_trace()

                        ax, rax = Plotter.plot_stack1d(
                            ax,
                            rax,
                            hline,
                            xlabel='%s $\otimes$ %s' % (xtitle, ytitle),
                            xlimits=(
                                0,
                                len(
                                    hline.axis(
                                        hline.dense_axes()[0].name).edges()) -
                                1),
                            **mc_opts)

Example #2

0

Show file

File: check_nnlo_reweighting_yields_plotter.py Project: jdulemba/NanoAOD_Analyses

        ] * len(mtt_binlabels)
        mtt_bin_locs = np.linspace(
            (len(ctstar_binning) - 1) / 2, (len(ctstar_binning) - 1) *
            (len(mtt_binning) - 1) - (len(ctstar_binning) - 1) / 2,
            len(mtt_binning) - 1)
        vlines = [
            len(mtt_binlabels) * ybin for ybin in range(1, len(mtt_binlabels))
        ]  # last element is hardcoded from binning

        # easier to rename sparse axis than change linearize()
        tmp_histo = histo.copy()
        tmp_histo = tmp_histo.group(
            histo.sparse_axes()[0].name, process_axis,
            {key[0]: key[0]
             for key in histo.values().keys()})
        hline = Plotter.linearize_hist(tmp_histo, no_transpose=True)
        # revert sparse axis name to original
        hline = hline.group(hline.sparse_axes()[0].name, reweighting_axis,
                            {key[0]: key[0]
                             for key in histo.values().keys()})
        histo = hline

    xtitle, rebinning, x_lims = variables[hname]
    if rebinning != 1:
        histo = histo.rebin(*axes_to_sum, rebinning)

        # orig
    fig, (ax, rax) = plt.subplots(2,
                                  1,
                                  gridspec_kw={"height_ratios": (3, 1)},
                                  sharex=True)

Example #3

0

Show file

File: plot_NNLOqcd_dists.py Project: jdulemba/NanoAOD_Analyses

    }, {
        'name': 'mtt',
        'idx': 1
    }],
)

mtt_binning = nnlo_dict[nnlo_var].axis('mtt').edges()
ctstar_binning = nnlo_dict[nnlo_var].axis('ctstar').edges()
vlines = [(len(mtt_binning) - 1) * ybin
          for ybin in range(1,
                            len(ctstar_binning) - 1)]
#set_trace()

## linearize nnlo_hist
nnlo_lin_dict = {
    name: Plotter.linearize_hist(nnlo_dict[name].integrate('dataset'))
    for name in nnlo_dict.keys()
}

png_ext = 'StatUncs'
nnlo_leg = '(Stat.)'

axis_labels_dict = {
    'mtt': '$m_{t\\bar{t}}$',
    'ctstar': 'cos($\\theta^{*}_{t_{h}}$)'
}

############## Plot hists ################

### raw histo ###
nnlo_histo = nnlo_dict[nnlo_var]

Example #4

0

Show file

File: hem_invest.py Project: jdulemba/NanoAOD_Analyses

                     #    hep.cms.label(ax=ax, data=withData, paper=False, year=args.year, lumi=round(data_lumi_year['%ss' % args.lepton]/1000., 1))
 
                     #    #set_trace()
                     #    bintitle = '%sctstar%s' % (hslice.dense_axes()[1].edges()[ybin], hslice.dense_axes()[1].edges()[ybin+1])
                     #    bintitle = bintitle.replace('.', 'p')
                     #    figname = os.path.join(pltdir, '_'.join([jmult, args.lepton, lepcat, btagregion, bintitle, 'mtt']))
                     #    fig.savefig(figname)
                     #    print('%s written' % figname)
                     #    plt.close()
 
 
                         # plot linearized view 
                     fig, (ax, rax) = plt.subplots(2, 1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True)
                     fig.subplots_adjust(hspace=.07)
 
                     hline = Plotter.linearize_hist(hslice)
                     
                     Plotter.plot_stack1d(ax, rax, hline, xlabel='%s $\otimes$ %s' % (xtitle, ytitle), xlimits=(0, len(hline.axis(hline.dense_axes()[0].name).edges())-1), **mc_opts)
 
                         # draw vertical lines separating ctstar bins
                     bin_sep_lines = [hslice.values()[('EWK',)].shape[0]*ybin for ybin in range(1, hslice.values()[('EWK',)].shape[1])]
                     for binline in bin_sep_lines:
                         ax.axvline(binline, color='k', linestyle='--')
                         if rax is not None: rax.axvline(binline, color='k', linestyle='--')
 
                         # plot unrolled x and y labels for each bin
                     ## plot x labels
                     if plot_xlabels is not None:
                         rax.set_xticks(np.arange(len(plot_xlabels)))
                         rax.set_xticklabels(plot_xlabels)
                         ax.tick_params(which='minor', bottom=False, top=False)

Example #5

0

Show file

def get_bkg_templates(tmp_rname):
    '''
    Function that writes linearized mtt vs costheta distributions to root file.
    '''
    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0])

        ## get data lumi and scale MC by lumi
    data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year]

        # get correct hist and rebin
    hname_to_use = 'mtt_vs_tlep_ctstar_abs'
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = linearize_binning
    histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
    
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
        ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)
        ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    rebin_histo = histo.rebin(yaxis_name, new_ybins)
    
    nbins = (len(xrebinning)-1)*(len(yrebinning)-1)
    
        ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
    ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other']
    names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists
    ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...

        # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
    if bkg_ttJets_fname is not None:
        ttJets_hdict = load(bkg_ttJets_fname)
        ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
        
            ## rebin x axis
        ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins)
            ## rebin y axis
        ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins)
        
        only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists
        only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...


        ## make groups based on process
    process = hist.Cat("process", "Process", sorting='placement')
    process_cat = "dataset"

        # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname)

    if '3Jets' in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})
    if '4PJets' in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})

    for lep in ['Muon', 'Electron']:
        lepdir = 'mujets' if lep == 'Muon' else 'ejets'
    
        ## make groups based on process
        process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates')
        
        lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep]
                # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
        if len(ttJets_cats) > 0:
            for tt_cat in ttJets_cats:
                ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                lumi_correction.update({tt_cat: ttJets_eff_lumi})
    
        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis='dataset')
        histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype')

            # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
        if bkg_ttJets_fname is not None:
            if len(only_ttJets_cats) > 0:
                for tt_cat in only_ttJets_cats:
                    ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                    ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                    lumi_correction.update({tt_cat: ttJets_eff_lumi})

            tt_histo = ttJets_histo.copy()
            tt_histo.scale(lumi_correction, axis='dataset')
            tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype')


        for jmult in njets_to_run:
            iso_sb    = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            btag_sb   = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
        
            for sys in sys_to_use.keys():
                if sys not in histo.axis('sys')._sorted:
                    print('\n\n   Systematic %s not available, skipping\n\n' % sys)
                    continue

                #set_trace()
                sysname, onlyTT = sys_to_use[sys]
                if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0])
        
                qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys)

                    ## write nominal and systematic variations for each topology to file
                for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])):
                    if (proc != 'TT') and onlyTT: continue
                    if (proc == 'data_obs') and not (sys == 'nosys'): continue
                    name = proc+lepdir if proc == 'QCD' else proc
                    print(lep, jmult, sys, name)
                    outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname])
                    template_histo = qcd_est_histo[proc].integrate('process')
                    if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None):
                        tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
                        tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys')
                        template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys'))

                    if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]):
                        template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo)
                        #set_trace()

                    if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]):
                        template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False)
                        #set_trace()

                        ## save template histos to coffea dict
                    if jmult == '3Jets':
                        histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo
                    if jmult == '4PJets':
                        histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if '3Jets' in njets_to_run:
        coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_3j, coffea_out_3j)
        print("%s written" % coffea_out_3j)
    if '4PJets' in njets_to_run:
        coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_4pj, coffea_out_4pj)
        print("%s written" % coffea_out_4pj)

    
    upfout.close()
    print('%s written' % tmp_rname)

Example #6

0

Show file

File: data_hem_comp.py Project: jdulemba/NanoAOD_Analyses

            for lepcat in sorted(set([key[3] for key in histo.values().keys()])):
                for btagregion in sorted(set([key[2] for key in histo.values().keys()])):
                    pltdir = os.path.join(outdir, args.lepton, jmult, lepcat, btagregion)
                    if not os.path.isdir(pltdir):
                        os.makedirs(pltdir)

                    if ((lepcat, btagregion) == ('Tight', 'btagPass')) and (hname == 'mtt_vs_tlep_ctstar_abs'):
                        continue
                    #else:
                    #    withData = variables[hname][-1]
                    print(', '.join([jmult, lepcat, btagregion, hname])) 

                    hslice = histo[:, jmult, btagregion, lepcat].integrate('jmult').integrate('lepcat').integrate('btag')

                    # plot linearized view 
                    hline_before = Plotter.linearize_hist(hslice['Before'].integrate('hem'))
                    hline_after = Plotter.linearize_hist(hslice['After'].integrate('hem'))
                    hline_edges = hline_before.axis(hline_before.dense_axes()[0].name).edges()

                        # plot original yields
                    fig, ax = plt.subplots()
                    fig.subplots_adjust(hspace=.07)

                    Plotter.plot_1D(hline_before.values()[()], hline_edges, xlimits=(min(hline_edges), max(hline_edges)), ax=ax, label='Runs $<$ 319077')
                    Plotter.plot_1D(hline_after.values()[()], hline_edges, xlimits=(min(hline_edges), max(hline_edges)), xlabel='%s $\otimes$ %s' % (xtitle, ytitle), color='r', ax=ax, label='Runs $\\geq$ 319077')
                    ax.legend(loc='upper right')

                        # draw vertical lines separating ctstar bins
                    bin_sep_lines = [hslice.values()[('Before',)].shape[0]*ybin for ybin in range(1, hslice.values()[('Before',)].shape[1])]
                    for binline in bin_sep_lines:
                        ax.axvline(binline, color='k', linestyle='--')

Example #7

0

Show file

File: make_raw_templates.py Project: jdulemba/NanoAOD_Analyses

def get_bkg_templates(tmp_rname):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """
    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(
        bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0])

    # get correct hist and rebin
    hname_to_use = "mtt_vs_tlep_ctstar_abs"
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = linearize_binning
    histo = hdict[hname_to_use][
        Plotter.
        nonsignal_samples]  # process, sys, jmult, leptype, btag, lepcat

    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
    ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)
    ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    rebin_histo = histo.rebin(yaxis_name, new_ybins)

    ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
    ttJets_permcats = [
        "*right", "*matchable", "*unmatchable", "*sl_tau", "*other"
    ]
    names = [
        dataset
        for dataset in sorted(set([key[0] for key in histo.values().keys()]))
    ]  # get dataset names in hists
    ttJets_cats = [
        name for name in names
        if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])
    ]  # gets ttJets(_PS)_other, ...

    ## make groups based on process
    process = hist.Cat("process", "Process", sorting="placement")
    process_cat = "dataset"

    # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB(
        4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname)

    if "3Jets" in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })
    if "4PJets" in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })

    for lep in ["Muon", "Electron"]:
        orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS"

        #set_trace()
        ## make groups based on process
        process_groups = plt_tools.make_dataset_groups(lep,
                                                       args.year,
                                                       samples=names,
                                                       gdict="templates")
        #process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="dataset")

        lumi_correction = lumi_corr_dict[args.year]["%ss" % lep]
        # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
        if len(ttJets_cats) > 0:
            for tt_cat in ttJets_cats:
                ttJets_lumi_topo = "_".join(tt_cat.split(
                    "_")[:-2]) if "sl_tau" in tt_cat else "_".join(
                        tt_cat.split("_")
                        [:-1])  # gets ttJets[SL, Had, DiLep] or ttJets_PS
                ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                lumi_correction.update({tt_cat: ttJets_eff_lumi})

        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis="dataset")
        histo = histo.group(process_cat, process,
                            process_groups)[:, :, :,
                                            lep, :, :].integrate("leptype")

        #set_trace()
        systs = sorted(set([key[1] for key in histo.values().keys()]))
        systs.insert(0, systs.pop(
            systs.index("nosys")))  # move "nosys" to the front

        # loop over each jet multiplicity
        for jmult in njets_to_run:
            lepdir = orig_lepdir.replace("NJETS", jmult.lower())

            # get sideband and signal region hists
            cen_sb_histo = Plotter.linearize_hist(
                histo[:, "nosys", jmult,
                      btag_reg_names_dict["Central"]["reg"]].integrate(
                          "jmult").integrate("btag").integrate("sys"))
            #up_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Up"]["reg"]].integrate("jmult").integrate("btag")
            #dw_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Down"]["reg"]].integrate("jmult").integrate("btag")
            sig_histo = Plotter.linearize_hist(
                histo[:, :, jmult,
                      btag_reg_names_dict["Signal"]["reg"]].integrate(
                          "jmult").integrate("btag"))

            # loop over each systematic
            for sys in systs:
                if sys not in systematics.template_sys_to_name[
                        args.year].keys():
                    continue

                sys_histo = sig_histo[:, sys].integrate(
                    "sys") if sys in systematics.ttJets_sys.values(
                    ) else Plotter.BKG_Est(
                        sig_reg=sig_histo[:, sys].integrate("sys"),
                        sb_reg=cen_sb_histo,
                        norm_type="SigMC",
                        sys=sys,
                        ignore_uncs=True)

                ## write nominal and systematic variations for each topology to file
                #for proc in sorted(set([key[0] for key in sig_histo.values().keys()])):
                for proc in sorted(
                        set([key[0] for key in sys_histo.values().keys()])):
                    if ("tt" not in proc) and (
                            sys in systematics.ttJets_sys.values()):
                        continue
                    #if (proc != "tt") and (sys in systematics.ttJets_sys.values()): continue
                    if (proc == "data_obs") and not (sys == "nosys"): continue
                    if not sys_histo[proc].values().keys():
                        #if not sig_histo[proc, sys].values().keys():
                        print(
                            f"Systematic {sys} for {lep} {jmult} {proc} not found, skipping"
                        )
                        continue

                    print(args.year, lep, jmult, sys, proc)
                    #set_trace()
                    outhname = "_".join(
                        list(
                            filter(None, [
                                proc, systematics.template_sys_to_name[
                                    args.year][sys][0], lepdir,
                                (args.year)[-2:]
                            ])))
                    if "LEP" in outhname:
                        outhname = outhname.replace(
                            "LEP",
                            "muon") if lep == "Muon" else outhname.replace(
                                "LEP", "electron")

                    template_histo = sys_histo[proc].integrate("process")
                    #template_histo = sig_histo[proc, sys].integrate("process").integrate("sys")

                    #set_trace()
                    ## save template histos to coffea dict
                    if jmult == "3Jets":
                        histo_dict_3j[lep][
                            f"{proc}_{sys}"] = template_histo.copy()
                    if jmult == "4PJets":
                        histo_dict_4pj[lep][
                            f"{proc}_{sys}"] = template_histo.copy()

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if "3Jets" in njets_to_run:
        coffea_out_3j = os.path.join(
            outdir,
            f"test_raw_templates_lj_3Jets_bkg_{args.year}_{jobid}.coffea")
        save(histo_dict_3j, coffea_out_3j)
        print(f"{coffea_out_3j} written")
    if "4PJets" in njets_to_run:
        coffea_out_4pj = os.path.join(
            outdir,
            f"test_raw_templates_lj_4PJets_bkg_{args.year}_{jobid}.coffea")
        save(histo_dict_4pj, coffea_out_4pj)
        print(f"{coffea_out_4pj} written")

    upfout.close()
    print(f"{tmp_rname} written")

Example #8

0

Show file

File: make_raw_templates.py Project: jdulemba/NanoAOD_Analyses

def get_sig_templates(tmp_rname):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """
    widthTOname = lambda width: str(width).replace(".", "p")
    nameTOwidth = lambda width: str(width).replace("p", ".")

    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(
        sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0])

    # get correct hist and rebin
    hname_to_use = "mtt_vs_tlep_ctstar_abs"
    if hname_to_use not in hdict.keys():
        raise ValueError(f"{hname_to_use} not found in file")
    xrebinning, yrebinning = linearize_binning
    #xrebinning, yrebinning = mtt_ctstar_2d_binning
    histo = hdict[hname_to_use]  # process, sys, jmult, leptype, btag, lepcat

    #set_trace()
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
    ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)

    ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    histo = histo.rebin(yaxis_name, new_ybins)
    rebin_histo = histo[Plotter.signal_samples, :, :, :,
                        "btagPass"].integrate("btag")

    names = [
        dataset for dataset in sorted(
            set([key[0] for key in rebin_histo.values().keys()]))
    ]  # get dataset names in hists

    signals = sorted(set([key[0] for key in rebin_histo.values().keys()]))
    signals = [sig for sig in signals
               if "TTJetsSL" in sig]  # only use SL decays

    systs = sorted(set([key[1] for key in rebin_histo.values().keys()]))
    systs.insert(0,
                 systs.pop(systs.index("nosys")))  # move "nosys" to the front

    # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB(
        4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname)

    if "3Jets" in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })
    if "4PJets" in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })

        # write signal dists to temp file
    for lep in ["Muon", "Electron"]:
        orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS"

        # scale by lumi
        lumi_correction = lumi_corr_dict[args.year]["%ss" % lep]
        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis="dataset")
        process_groups = plt_tools.make_dataset_groups(lep,
                                                       args.year,
                                                       samples=names,
                                                       gdict="templates")
        histo = histo.group(
            "dataset", hist.Cat("process", "Process", sorting="placement"),
            process_groups)

        for jmult in njets_to_run:
            lepdir = orig_lepdir.replace("NJETS", jmult.lower())

            #set_trace()
            lin_histo = Plotter.linearize_hist(
                histo[:, :, jmult,
                      lep].integrate("jmult").integrate("leptype"))
            for signal in signals:
                if "Int" in signal:
                    boson, mass, width, pI, wt = tuple(signal.split("_"))
                else:
                    boson, mass, width, pI = tuple(signal.split("_"))
                sub_name = "_".join([
                    "%s%s" % (boson[0], mass[1:]),
                    "relw%s" % widthTOname(width).split("W")[-1],
                    pI.lower(), wt
                ]) if pI == "Int" else "_".join([
                    "%s%s" % (boson[0], mass[1:]),
                    "relw%s" % widthTOname(width).split("W")[-1],
                    pI.lower()
                ])

                #set_trace()
                for sys in systs:
                    if sys not in systematics.template_sys_to_name[
                            args.year].keys():
                        continue
                    if not lin_histo[signal, sys].values().keys():
                        print(
                            f"Systematic {sys} for {lep} {jmult} {signal} not found, skipping"
                        )
                        continue

                    print(args.year, lep, jmult, sub_name, sys)
                    outhname = "_".join(
                        list(
                            filter(None, [
                                sub_name, systematics.template_sys_to_name[
                                    args.year][sys][0], lepdir,
                                (args.year)[-2:]
                            ])))
                    if "LEP" in outhname:
                        outhname = outhname.replace(
                            "LEP",
                            "muon") if lep == "Muon" else outhname.replace(
                                "LEP", "electron")

                    template_histo = lin_histo[signal, sys].integrate(
                        "process").integrate("sys")

                    ## save template histos to coffea dict
                    if jmult == "3Jets":
                        histo_dict_3j[lep][
                            f"{signal}_{sys}"] = template_histo.copy()
                    if jmult == "4PJets":
                        histo_dict_4pj[lep][
                            f"{signal}_{sys}"] = template_histo.copy()

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if "3Jets" in njets_to_run:
        coffea_out_3j = os.path.join(
            outdir,
            f"test_raw_templates_lj_3Jets_sig_{args.year}_{jobid}.coffea")
        save(histo_dict_3j, coffea_out_3j)
        print(f"{coffea_out_3j} written")
    if "4PJets" in njets_to_run:
        coffea_out_4pj = os.path.join(
            outdir,
            f"test_raw_templates_lj_4PJets_sig_{args.year}_{jobid}.coffea")
        save(histo_dict_4pj, coffea_out_4pj)
        print(f"{coffea_out_4pj} written")

    upfout.close()
    print(f"{tmp_rname} written")

Example #9

0

Show file

File: make_nnlo_reweighting_vars_plotter.py Project: jdulemba/NanoAOD_Analyses

        #True, [6*ybin for ybin in range(1, 6)]), # last element is hardcoded from binning
    ]
    nnlo_dict = Plotter.root_converters_dict_to_hist(nnlo_file, vars=[val[1] for val in variables],
        sparse_axes_list=[{"name": "dataset", "label" : "Event Process", "fill" : "nnlo"}],
        dense_axes_list=[{"name": "mtt", "idx" : 1}, {"name" : "ctstar", "idx" : 0}],
        transpose_da=True,
        #dense_axes_list=[{"name" : "ctstar", "idx" : 0}, {"name": "mtt", "idx" : 1}],
    )

    mtt_binning = nnlo_dict["xsec_central"].axis("mtt").edges()
    #mtt_binlabels = ["%s $\leq$ m($t\\bar{t}$) $\leq$ %s" % (mtt_binning[bin], mtt_binning[bin+1]) for bin in range(len(mtt_binning)-1)]
    ctstar_binning = nnlo_dict["xsec_central"].axis("ctstar").edges()
    vlines = [(len(mtt_binning)-1)*ybin for ybin in range(1, len(ctstar_binning)-1)]
    #set_trace()
    # linearize nnlo_hist
    nnlo_dict = {name: Plotter.linearize_hist(nnlo_dict[name].integrate("dataset")) for name in nnlo_dict.keys()}

    png_ext = "StatUncs"
    nnlo_leg = "(Stat.)"
else:
    variables = [
        ("thad_pt", "thadpt_cen" if nnlo_fname == "matrixhists_NNPDF.root" else "thadpth_xs", "$p_{T}$($t_{h}$) [GeV]", "$\dfrac{d \\sigma}{d p_{T}(t_{h})}$", False, None),
        ("mtt_vs_thad_ctstar", "ttm+cts_cen" if nnlo_fname == "matrixhists_NNPDF.root" else "ttm+cts_xs", "m($t\\bar{t}$) $\otimes$ cos($\\theta^{*}_{t_{h}}$)", "$\dfrac{d^{2} \\sigma}{d m(t\\bar{t}) d cos(\\theta^{*}_{t_{h}})}$", True, [6*ybin for ybin in range(1, 6)]), # last element is hardcoded from binning
    ]

    nnlo_dict = Plotter.root_converters_dict_to_hist(nnlo_file, vars=[val[1] for val in variables],
        sparse_axes_list=[{"name": "dataset", "label" : "Event Process", "fill" : "nnlo"}],
    )

    png_ext = "StatUncs" if nnlo_fname == "matrixhists_NNPDF.root" else "Scale_StatUncs"
    nnlo_leg = "(Stat.)" if nnlo_fname == "matrixhists_NNPDF.root" else "(Scale+Stat.)"