Esempio n. 1
0
def smooth_bkg_templates(fnames_to_run):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """
    if "3Jets" in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })
    if "4PJets" in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })

    #set_trace()
    for bkg_file in fnames_to_run:
        hdict = load(bkg_file)
        jmult = "3Jets" if "3Jets" in os.path.basename(bkg_file) else "4PJets"
        for lep in hdict.keys():
            for tname, orig_template in hdict[lep].items():
                #set_trace()

                proc = tname.split(
                    "_")[0] if not "data_obs" in tname else "data_obs"
                sys = sorted(filter(None, tname.split(f"{proc}_")))[0]
                #if sys == "nosys": continue
                print(lep, jmult, sys, proc)

                # perform smoothing
                smoothed_histo = hdict[lep][f"{proc}_nosys"].copy(
                ) if sys == "nosys" else Plotter.smoothing_mttbins(
                    nosys=hdict[lep][f"{proc}_nosys"],
                    systematic=orig_template,
                    mtt_centers=mtt_centers,
                    nbinsx=len(linearize_binning[0]) - 1,
                    nbinsy=len(linearize_binning[1]) - 1)

                ## save template histos to coffea dict
                if jmult == "3Jets":
                    histo_dict_3j[lep][tname] = smoothed_histo.copy()
                if jmult == "4PJets":
                    histo_dict_4pj[lep][tname] = smoothed_histo.copy()

    #set_trace()
    if "3Jets" in njets_to_run:
        coffea_out_3j = os.path.join(
            input_dir,
            f"test_smoothed_templates_lj_3Jets_bkg_{args.year}_{jobid}.coffea")
        save(histo_dict_3j, coffea_out_3j)
        print(f"{coffea_out_3j} written")
    if "4PJets" in njets_to_run:
        coffea_out_4pj = os.path.join(
            input_dir,
            f"test_smoothed_templates_lj_4PJets_bkg_{args.year}_{jobid}.coffea"
        )
        save(histo_dict_4pj, coffea_out_4pj)
        print(f"{coffea_out_4pj} written")
ctstar_binlabels = ["%s $\leq$ cos($\\theta^{*}_{t_{h}}$) $\leq$ %s" % (ctstar_binning[bin], ctstar_binning[bin+1]) for bin in range(len(ctstar_binning)-1)]*len(mtt_binlabels)

    ## get values from NNLO root file
nnlo_fname = "MATRIX_ttmVStheta.root" # "xsec_central" dist has only statistical uncs
#nnlo_fname = "matrixhists_NNPDF.root" # "cen" dist has only statistical uncs
#nnlo_fname = "MATRIX_17_abs.root" # has scale and statistical uncs
nnlo_file = convert_histo_root_file(os.path.join(proj_dir, "NNLO_files", nnlo_fname))
if nnlo_fname == "MATRIX_ttmVStheta.root":
    variables = [
        ("mtt_vs_thad_ctstar", "xsec_central", "m($t\\bar{t}$) $\otimes$ cos($\\theta^{*}_{t_{h}}$)", "$\dfrac{d^{2} \\sigma}{d m(t\\bar{t}) d cos(\\theta^{*}_{t_{h}})}$",
        True),
        #True, [6*ybin for ybin in range(1, 6)]), # last element is hardcoded from binning
    ]
    nnlo_dict = Plotter.root_converters_dict_to_hist(nnlo_file, vars=[val[1] for val in variables],
        sparse_axes_list=[{"name": "dataset", "label" : "Event Process", "fill" : "nnlo"}],
        dense_axes_list=[{"name": "mtt", "idx" : 1}, {"name" : "ctstar", "idx" : 0}],
        transpose_da=True,
        #dense_axes_list=[{"name" : "ctstar", "idx" : 0}, {"name": "mtt", "idx" : 1}],
    )

    mtt_binning = nnlo_dict["xsec_central"].axis("mtt").edges()
    #mtt_binlabels = ["%s $\leq$ m($t\\bar{t}$) $\leq$ %s" % (mtt_binning[bin], mtt_binning[bin+1]) for bin in range(len(mtt_binning)-1)]
    ctstar_binning = nnlo_dict["xsec_central"].axis("ctstar").edges()
    vlines = [(len(mtt_binning)-1)*ybin for ybin in range(1, len(ctstar_binning)-1)]
    #set_trace()
    # linearize nnlo_hist
    nnlo_dict = {name: Plotter.linearize_hist(nnlo_dict[name].integrate("dataset")) for name in nnlo_dict.keys()}

    png_ext = "StatUncs"
    nnlo_leg = "(Stat.)"
else:
    variables = [
        ] * len(mtt_binlabels)
        mtt_bin_locs = np.linspace(
            (len(ctstar_binning) - 1) / 2, (len(ctstar_binning) - 1) *
            (len(mtt_binning) - 1) - (len(ctstar_binning) - 1) / 2,
            len(mtt_binning) - 1)
        vlines = [
            len(mtt_binlabels) * ybin for ybin in range(1, len(mtt_binlabels))
        ]  # last element is hardcoded from binning

        # easier to rename sparse axis than change linearize()
        tmp_histo = histo.copy()
        tmp_histo = tmp_histo.group(
            histo.sparse_axes()[0].name, process_axis,
            {key[0]: key[0]
             for key in histo.values().keys()})
        hline = Plotter.linearize_hist(tmp_histo, no_transpose=True)
        # revert sparse axis name to original
        hline = hline.group(hline.sparse_axes()[0].name, reweighting_axis,
                            {key[0]: key[0]
                             for key in histo.values().keys()})
        histo = hline

    xtitle, rebinning, x_lims = variables[hname]
    if rebinning != 1:
        histo = histo.rebin(*axes_to_sum, rebinning)

        # orig
    fig, (ax, rax) = plt.subplots(2,
                                  1,
                                  gridspec_kw={"height_ratios": (3, 1)},
                                  sharex=True)
                        }  # use SL+DL+Had events

                        fig, (ax, rax) = plt.subplots(
                            2,
                            1,
                            gridspec_kw={"height_ratios": (3, 1)},
                            sharex=True)
                        fig.subplots_adjust(hspace=.07)

                        for rewt, hslice in allTT_hslices.items():
                            vals, bins = hslice.values()[(
                            )], hslice.axis(xaxis_name).edges()
                            ax = Plotter.plot_1D(
                                vals,
                                bins,
                                xlimits=x_lims,
                                ax=ax,
                                histtype='step',
                                label=rewt_style_dict[rewt][0],
                                color=rewt_style_dict[rewt][1])

                            if (rewt != nominal_jobid) and (
                                    nominal_jobid in rewt_style_dict.keys()):
                                ratio_vals, ratio_bins = Plotter.get_ratio_arrays(
                                    num_vals=vals,
                                    denom_vals=allTT_hslices[nominal_jobid].
                                    values()[()],
                                    input_bins=bins)
                                rax.step(ratio_bins,
                                         ratio_vals,
                                         where='post',
                                         **{
Esempio n. 5
0
                            x_lims = (0., 0.1) if lepcat == 'Tight' else (0.,
                                                                          0.5)

                    #if hname == 'mass_disc':
                    #    x_lims = (3., 13.) if jmult == '3Jets' else (5., 15.)
                    #if hname == 'full_disc':
                    #    x_lims = (6., 20.) if jmult == '3Jets' else (10., 22.)

                    mc_opts = {
                        #    'mcorder' : ['QCD', 'EWK', 'singlet', 'ttJets'] if not ttJets_cats else ['QCD', 'EWK', 'singlet', 'ttJets_other', 'ttJets_unmatchable', 'ttJets_matchable', 'ttJets_right']
                    }

                    if withData:
                        ax, rax = Plotter.plot_stack1d(ax,
                                                       rax,
                                                       hslice,
                                                       xlabel=xtitle,
                                                       xlimits=x_lims,
                                                       **mc_opts)
                    else:
                        ax = Plotter.plot_mc1d(ax,
                                               hslice,
                                               xlabel=xtitle,
                                               xlimits=x_lims,
                                               **mc_opts)

                    if hname == 'Jets_njets':
                        print(jmult)
                        yields_txt, yields_json = Plotter.get_samples_yield_and_frac(
                            hslice,
                            data_lumi_year['%ss' % args.lepton] / 1000.,
                            promptmc=True)
Esempio n. 6
0
                                      **smooth_styles)  # smoothed template
                if not np.array_equal(flat_sys.values()[()],
                                      orig_sys.values()[()]):
                    hep.plot.histplot(flat_sys.values()[()],
                                      nominal.dense_axes()[0].edges(),
                                      ax=ax,
                                      histtype="step",
                                      **flat_styles)  # flattened template
                ax.legend(loc="upper right", title=f"{sys}, {proc}")
                ax.set_ylabel("Events")
                ax.autoscale()
                ax.set_ylim(ax.get_ylim()[0], ax.get_ylim()[1] * 1.15)

                # plot relative deviation
                orig_masked_vals, orig_masked_bins = Plotter.get_ratio_arrays(
                    num_vals=orig_sys.values()[()] - nominal.values()[()],
                    denom_vals=nominal.values()[()],
                    input_bins=nominal.dense_axes()[0].edges())
                rax.step(orig_masked_bins,
                         orig_masked_vals,
                         where='post',
                         **orig_styles)
                if not np.array_equal(smooth_sys.values()[()],
                                      orig_sys.values()[()]):
                    smooth_masked_vals, smooth_masked_bins = Plotter.get_ratio_arrays(
                        num_vals=smooth_sys.values()[()] -
                        nominal.values()[()],
                        denom_vals=nominal.values()[()],
                        input_bins=nominal.dense_axes()[0].edges())
                    rax.step(smooth_masked_bins,
                             smooth_masked_vals,
                             where='post',
Esempio n. 7
0
                plot.plot1d(
                    hslice,
                    overlay=hslice.axes()[0].name,
                    ax=ax,
                    clear=False,
                    line_opts={'linestyle': '-'},
                )

                # norm
                #set_trace()
                for corr in sorted(hslice.values().keys()):
                    Plotter.plot_1D(values=hslice.values()[corr] /
                                    np.sum(hslice.values()[corr]),
                                    bins=hslice.dense_axes()[0].edges(),
                                    ax=ax_norm,
                                    xlimits=x_lims,
                                    xlabel=xtitle,
                                    ylabel='Probability Density',
                                    label=corr[0],
                                    histtype='step')

                ## set legend and corresponding colors
            handles, labels = ax.get_legend_handles_labels()
            for idx, label in enumerate(labels):
                if label == '4Jets':
                    labels[idx] = jet_mults[label]
                    handles[idx].set_color('b')
                    handles[idx].set_linewidth(2)
                elif label == '5PJets':
                    labels[idx] = jet_mults[label]
                    handles[idx].set_color('g')
Esempio n. 8
0
                if not os.path.isdir(pltdir):
                    os.makedirs(pltdir)

                opts = {
                    "legend_title" : sys,
                    "maskData" : maskData,
                }

                sys_histo = hdict[(args.lepton, jmult, sys, hname)]
                sys_histo = sys_histo.group(process_cat, process, process_groups)

                fig, (ax, rax) = plt.subplots(2, 1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True)
                fig.subplots_adjust(hspace=.07)

                if hname == "Jets_njets":
                    yields_txt, yields_json = Plotter.get_samples_yield_and_frac(sys_histo, data_lumi_year["%ss" % args.lepton]/1000., sys=sys)
                    frac_name = "%s_%s_yields_and_fracs_QCD_Est" % (sys, "_".join([jmult, args.lepton]))
                    plt_tools.print_table(yields_txt, filename="%s/%s.txt" % (pltdir, frac_name), print_output=True)
                    print("%s/%s.txt written" % (pltdir, frac_name))
                    with open("%s/%s.json" % (pltdir, frac_name), "w") as out:
                        out.write(prettyjson.dumps(yields_json))
                
                ax, rax = Plotter.plot_stack1d(ax, rax, sys_histo, xlabel=xtitle, xlimits=x_lims, **opts)
                
                    # add lepton/jet multiplicity label
                ax.text(
                    0.02, 0.92, "%s, %s" % (leptypes[args.lepton], jet_mults[jmult]),
                    fontsize=rcParams["font.size"]*0.9, horizontalalignment="left", verticalalignment="bottom", transform=ax.transAxes
                )
                        ## draw vertical lines for distinguishing different ctstar bins
                if vlines is not None:
                    ) + hslice['DL mu tau->l'].copy() + hslice[
                        'DL mu tau->h'].copy() + hslice['DL tau tau->ll'].copy(
                        ) + hslice['DL tau tau->lh'].copy(
                        ) + hslice['DL tau tau->hh'].copy()
                    #set_trace()

                    mc_opts = {
                        #'mcorder' : ['QCD', 'EWK', 'singlet', 'ttJets'] if not ttJets_cats else ['QCD', 'EWK', 'singlet', 'ttJets_other', 'ttJets_unmatchable', 'ttJets_matchable', 'ttJets_right']
                        #'maskData' : not withData
                    }

                    fig, ax = plt.subplots()
                    fig.subplots_adjust(hspace=.07)
                    ax = Plotter.plot_mc1d(ax,
                                           plt_histo,
                                           xlabel=xtitle,
                                           xlimits=x_lims,
                                           **mc_opts)

                    # add lepton/jet multiplicity label
                    #set_trace()
                    ax.text(0.02,
                            0.88,
                            "%s, %s\n%s" % (lep_cats[lepcat], jet_mults[jmult],
                                            btag_cats[btagregion]),
                            fontsize=rcParams['font.size'] * 0.75,
                            horizontalalignment='left',
                            verticalalignment='bottom',
                            transform=ax.transAxes)
                    hep.cms.cmslabel(
                        ax=ax,
Esempio n. 10
0
                        os.makedirs(pltdir)

                    print(', '.join([jmult, lepcat, btagregion, hname])) 
                    hslice = histo[:, jmult, btagregion, lepcat].integrate('jmult').integrate('lepcat').integrate('btag')

                    if hname == 'Lep_iso':
                        if args.lepton == 'Muon':
                            x_lims = (0., 0.15) if lepcat == 'Tight' else (0.15, 1.)
                        if args.lepton == 'Electron':
                            x_lims = (0., 0.1) if lepcat == 'Tight' else (0., 0.5)

                        # plot original yields
                    fig, ax = plt.subplots()
                    fig.subplots_adjust(hspace=.07)

                    Plotter.plot_1D(hslice.values()[('Before',)], hslice.axis(xaxis_name).edges(), xlimits=x_lims, ax=ax, label='Runs $<$ 319077')
                    Plotter.plot_1D(hslice.values()[('After',)], hslice.axis(xaxis_name).edges(), xlimits=x_lims, xlabel=xtitle, color='r', ax=ax, label='Runs $\\geq$ 319077')
                    ax.legend(loc='upper right')

                    if hname == 'Jets_njets':
                        print(jmult)
                        #set_trace() 
                        rows = [("Lumi: %s fb^-1" % format(data_lumi_year['%ss' % args.lepton]/1000., '.1f'), "Yield", "Error", "Frac")]
                        rows += [("Runs < 319077", format(sum(hslice.values(overflow='all')[('Before',)]), '.1f'), format(np.sqrt(sum(hslice.values(overflow='all', sumw2=True)[('Before',)][1])), '.1f'),
                            format(sum(hslice.values(overflow='all')[('Before',)])/sum(hslice.sum('hem').values(overflow='all')[()]), '.3f'))]
                        rows += [("Runs >= 319077", format(sum(hslice.values(overflow='all')[('After',)]), '.1f'), format(np.sqrt(sum(hslice.values(overflow='all', sumw2=True)[('After',)][1])), '.1f'),
                            format(sum(hslice.values(overflow='all')[('After',)])/sum(hslice.sum('hem').values(overflow='all')[()]), '.3f'))]
                        rows += [("Total", format(sum(hslice.sum('hem').values(overflow='all')[()]), '.1f'), format(np.sqrt(sum(hslice.sum('hem').values(overflow='all', sumw2=True)[()][1])), '.1f'), "")]

                        frac_name = '%s_yields_and_fracs.txt' % '_'.join([jmult, args.lepton, lepcat, btagregion])
                        plt_tools.print_table(rows, filename=os.path.join(pltdir, frac_name), print_output=True)
def get_bkg_templates(tmp_rname):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """
    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(
        bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0])

    # get correct hist and rebin
    hname_to_use = "mtt_vs_tlep_ctstar_abs"
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = linearize_binning
    histo = hdict[hname_to_use][
        Plotter.
        nonsignal_samples]  # process, sys, jmult, leptype, btag, lepcat

    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
    ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)
    ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    rebin_histo = histo.rebin(yaxis_name, new_ybins)

    ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
    ttJets_permcats = [
        "*right", "*matchable", "*unmatchable", "*sl_tau", "*other"
    ]
    names = [
        dataset
        for dataset in sorted(set([key[0] for key in histo.values().keys()]))
    ]  # get dataset names in hists
    ttJets_cats = [
        name for name in names
        if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])
    ]  # gets ttJets(_PS)_other, ...

    ## make groups based on process
    process = hist.Cat("process", "Process", sorting="placement")
    process_cat = "dataset"

    # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB(
        4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname)

    if "3Jets" in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })
    if "4PJets" in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })

    for lep in ["Muon", "Electron"]:
        orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS"

        #set_trace()
        ## make groups based on process
        process_groups = plt_tools.make_dataset_groups(lep,
                                                       args.year,
                                                       samples=names,
                                                       gdict="templates")
        #process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="dataset")

        lumi_correction = lumi_corr_dict[args.year]["%ss" % lep]
        # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
        if len(ttJets_cats) > 0:
            for tt_cat in ttJets_cats:
                ttJets_lumi_topo = "_".join(tt_cat.split(
                    "_")[:-2]) if "sl_tau" in tt_cat else "_".join(
                        tt_cat.split("_")
                        [:-1])  # gets ttJets[SL, Had, DiLep] or ttJets_PS
                ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                lumi_correction.update({tt_cat: ttJets_eff_lumi})

        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis="dataset")
        histo = histo.group(process_cat, process,
                            process_groups)[:, :, :,
                                            lep, :, :].integrate("leptype")

        #set_trace()
        systs = sorted(set([key[1] for key in histo.values().keys()]))
        systs.insert(0, systs.pop(
            systs.index("nosys")))  # move "nosys" to the front

        # loop over each jet multiplicity
        for jmult in njets_to_run:
            lepdir = orig_lepdir.replace("NJETS", jmult.lower())

            # get sideband and signal region hists
            cen_sb_histo = Plotter.linearize_hist(
                histo[:, "nosys", jmult,
                      btag_reg_names_dict["Central"]["reg"]].integrate(
                          "jmult").integrate("btag").integrate("sys"))
            #up_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Up"]["reg"]].integrate("jmult").integrate("btag")
            #dw_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Down"]["reg"]].integrate("jmult").integrate("btag")
            sig_histo = Plotter.linearize_hist(
                histo[:, :, jmult,
                      btag_reg_names_dict["Signal"]["reg"]].integrate(
                          "jmult").integrate("btag"))

            # loop over each systematic
            for sys in systs:
                if sys not in systematics.template_sys_to_name[
                        args.year].keys():
                    continue

                sys_histo = sig_histo[:, sys].integrate(
                    "sys") if sys in systematics.ttJets_sys.values(
                    ) else Plotter.BKG_Est(
                        sig_reg=sig_histo[:, sys].integrate("sys"),
                        sb_reg=cen_sb_histo,
                        norm_type="SigMC",
                        sys=sys,
                        ignore_uncs=True)

                ## write nominal and systematic variations for each topology to file
                #for proc in sorted(set([key[0] for key in sig_histo.values().keys()])):
                for proc in sorted(
                        set([key[0] for key in sys_histo.values().keys()])):
                    if ("tt" not in proc) and (
                            sys in systematics.ttJets_sys.values()):
                        continue
                    #if (proc != "tt") and (sys in systematics.ttJets_sys.values()): continue
                    if (proc == "data_obs") and not (sys == "nosys"): continue
                    if not sys_histo[proc].values().keys():
                        #if not sig_histo[proc, sys].values().keys():
                        print(
                            f"Systematic {sys} for {lep} {jmult} {proc} not found, skipping"
                        )
                        continue

                    print(args.year, lep, jmult, sys, proc)
                    #set_trace()
                    outhname = "_".join(
                        list(
                            filter(None, [
                                proc, systematics.template_sys_to_name[
                                    args.year][sys][0], lepdir,
                                (args.year)[-2:]
                            ])))
                    if "LEP" in outhname:
                        outhname = outhname.replace(
                            "LEP",
                            "muon") if lep == "Muon" else outhname.replace(
                                "LEP", "electron")

                    template_histo = sys_histo[proc].integrate("process")
                    #template_histo = sig_histo[proc, sys].integrate("process").integrate("sys")

                    #set_trace()
                    ## save template histos to coffea dict
                    if jmult == "3Jets":
                        histo_dict_3j[lep][
                            f"{proc}_{sys}"] = template_histo.copy()
                    if jmult == "4PJets":
                        histo_dict_4pj[lep][
                            f"{proc}_{sys}"] = template_histo.copy()

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if "3Jets" in njets_to_run:
        coffea_out_3j = os.path.join(
            outdir,
            f"test_raw_templates_lj_3Jets_bkg_{args.year}_{jobid}.coffea")
        save(histo_dict_3j, coffea_out_3j)
        print(f"{coffea_out_3j} written")
    if "4PJets" in njets_to_run:
        coffea_out_4pj = os.path.join(
            outdir,
            f"test_raw_templates_lj_4PJets_bkg_{args.year}_{jobid}.coffea")
        save(histo_dict_4pj, coffea_out_4pj)
        print(f"{coffea_out_4pj} written")

    upfout.close()
    print(f"{tmp_rname} written")
def get_sig_templates(tmp_rname):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """
    widthTOname = lambda width: str(width).replace(".", "p")
    nameTOwidth = lambda width: str(width).replace("p", ".")

    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(
        sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0])

    # get correct hist and rebin
    hname_to_use = "mtt_vs_tlep_ctstar_abs"
    if hname_to_use not in hdict.keys():
        raise ValueError(f"{hname_to_use} not found in file")
    xrebinning, yrebinning = linearize_binning
    #xrebinning, yrebinning = mtt_ctstar_2d_binning
    histo = hdict[hname_to_use]  # process, sys, jmult, leptype, btag, lepcat

    #set_trace()
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
    ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)

    ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    histo = histo.rebin(yaxis_name, new_ybins)
    rebin_histo = histo[Plotter.signal_samples, :, :, :,
                        "btagPass"].integrate("btag")

    names = [
        dataset for dataset in sorted(
            set([key[0] for key in rebin_histo.values().keys()]))
    ]  # get dataset names in hists

    signals = sorted(set([key[0] for key in rebin_histo.values().keys()]))
    signals = [sig for sig in signals
               if "TTJetsSL" in sig]  # only use SL decays

    systs = sorted(set([key[1] for key in rebin_histo.values().keys()]))
    systs.insert(0,
                 systs.pop(systs.index("nosys")))  # move "nosys" to the front

    # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB(
        4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname)

    if "3Jets" in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })
    if "4PJets" in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })

        # write signal dists to temp file
    for lep in ["Muon", "Electron"]:
        orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS"

        # scale by lumi
        lumi_correction = lumi_corr_dict[args.year]["%ss" % lep]
        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis="dataset")
        process_groups = plt_tools.make_dataset_groups(lep,
                                                       args.year,
                                                       samples=names,
                                                       gdict="templates")
        histo = histo.group(
            "dataset", hist.Cat("process", "Process", sorting="placement"),
            process_groups)

        for jmult in njets_to_run:
            lepdir = orig_lepdir.replace("NJETS", jmult.lower())

            #set_trace()
            lin_histo = Plotter.linearize_hist(
                histo[:, :, jmult,
                      lep].integrate("jmult").integrate("leptype"))
            for signal in signals:
                if "Int" in signal:
                    boson, mass, width, pI, wt = tuple(signal.split("_"))
                else:
                    boson, mass, width, pI = tuple(signal.split("_"))
                sub_name = "_".join([
                    "%s%s" % (boson[0], mass[1:]),
                    "relw%s" % widthTOname(width).split("W")[-1],
                    pI.lower(), wt
                ]) if pI == "Int" else "_".join([
                    "%s%s" % (boson[0], mass[1:]),
                    "relw%s" % widthTOname(width).split("W")[-1],
                    pI.lower()
                ])

                #set_trace()
                for sys in systs:
                    if sys not in systematics.template_sys_to_name[
                            args.year].keys():
                        continue
                    if not lin_histo[signal, sys].values().keys():
                        print(
                            f"Systematic {sys} for {lep} {jmult} {signal} not found, skipping"
                        )
                        continue

                    print(args.year, lep, jmult, sub_name, sys)
                    outhname = "_".join(
                        list(
                            filter(None, [
                                sub_name, systematics.template_sys_to_name[
                                    args.year][sys][0], lepdir,
                                (args.year)[-2:]
                            ])))
                    if "LEP" in outhname:
                        outhname = outhname.replace(
                            "LEP",
                            "muon") if lep == "Muon" else outhname.replace(
                                "LEP", "electron")

                    template_histo = lin_histo[signal, sys].integrate(
                        "process").integrate("sys")

                    ## save template histos to coffea dict
                    if jmult == "3Jets":
                        histo_dict_3j[lep][
                            f"{signal}_{sys}"] = template_histo.copy()
                    if jmult == "4PJets":
                        histo_dict_4pj[lep][
                            f"{signal}_{sys}"] = template_histo.copy()

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if "3Jets" in njets_to_run:
        coffea_out_3j = os.path.join(
            outdir,
            f"test_raw_templates_lj_3Jets_sig_{args.year}_{jobid}.coffea")
        save(histo_dict_3j, coffea_out_3j)
        print(f"{coffea_out_3j} written")
    if "4PJets" in njets_to_run:
        coffea_out_4pj = os.path.join(
            outdir,
            f"test_raw_templates_lj_4PJets_sig_{args.year}_{jobid}.coffea")
        save(histo_dict_4pj, coffea_out_4pj)
        print(f"{coffea_out_4pj} written")

    upfout.close()
    print(f"{tmp_rname} written")
        fig, ax = plt.subplots()
        fig.subplots_adjust(hspace=.07)
        ax.axhline(1, **{"linestyle": "--", "color": (0, 0, 0, 0.5), "linewidth": 1})
        ax.text(
            0.02, 0.90, "$t\\bart \\rightarrow e/\mu + jets$\nparton level",
            fontsize=rcParams["font.size"], horizontalalignment="left", verticalalignment="bottom", transform=ax.transAxes
        )
    
    #set_trace()
    for year in years_to_run:
        histo = ratios[year][var] # get histogram

        if histo._dimension == 1:
            orig_bins, orig_vals = histo._axes, histo._values
                # plot original distribution
            Plotter.plot_1D(orig_vals, orig_bins, xlabel=var_opts[var]["xtitle"], ylabel=var_opts[var]["ytitle"], color=year_opts[year]["col"], ax=ax, label="%s Original" % year_opts[year]["leg_title"])

                # get interpolated values from ROOT Interpolate
            orig_ratio_hist = Hist(orig_bins, name="", title="")
            for xbin in range(1, orig_bins.size):
                orig_ratio_hist[xbin] = orig_vals[xbin-1]

            output_bins = np.arange(min(orig_bins), max(orig_bins)+10, 10)
            interped_array = np.zeros(output_bins.size-1)
            for xbin in range(output_bins.size-1):
                interped_array[xbin] = orig_ratio_hist.Interpolate(output_bins[xbin])

            Plotter.plot_1D(interped_array, output_bins, xlabel=var_opts[var]["xtitle"], ylabel=var_opts[var]["ytitle"], color=year_opts[year]["col"], ax=ax, label="%s Interp" % (year_opts[year]["leg_title"]), linestyle="--")

            if args.save_ratios:
                lookup = dense_lookup(*(interped_array, output_bins))
                    #alpha_medians, alpha_errors = np.array(alpha_median), np.array(alpha_median_errs)
                    #binned_mtt_medians[idx] = alpha_medians
                    #binned_mtt_errors[idx] = alpha_errors
                    alpha_median = get_median_from_2d(hslice,
                                                      'norm_mthad',
                                                      xmin=mthad_fit_range[0],
                                                      xmax=mthad_fit_range[1])
                    alpha_medians = np.array(alpha_median)
                    binned_mtt_medians[idx] = alpha_medians

                Plotter.plot_2d_norm(hslice,
                                     xaxis_name='norm_mthad',
                                     yaxis_name=alpha_axis_name,
                                     values=np.ma.masked_where(
                                         hslice.values()[()] <= 0.,
                                         hslice.values()[()]),
                                     xlimits=mthad_lims,
                                     ylimits=alpha_lims,
                                     xlabel=mthad_title,
                                     ylabel=alpha_title,
                                     ax=ax,
                                     **opts)

                mtt_label = '$m_{t\\bar{t}}^{Reco}$ $\geq$ %s' % bin_min if idx == len(
                    mtt_bin_ranges
                ) - 1 else '%s $\leq$ $m_{t\\bar{t}}^{Reco}$ $<$ %s' % (
                    bin_min, bin_max)
                # add lepton/jet multiplicity label
                ax.text(
                    0.02,
                    0.84,
                    "%s\n%s" % (blurb, mtt_label),
Esempio n. 15
0
                                     for key in histo.values().keys()]))):
                        objlabel, mass_range = objects[obj]
                        new_xtitle = xtitle.replace('obj', objlabel)
                        if ('mass' in hname) and (hname != 'Reso_mass'):
                            x_lims = mass_range

                        fig, ax = plt.subplots()
                        fig.subplots_adjust(hspace=.07)

                        hslice = histo[:, jmult, btagregion, lepcat,
                                       obj].integrate('jmult').integrate(
                                           'lepcat').integrate(
                                               'btag').integrate('objtype')
                        Plotter.plot_mc1d(ax,
                                          hslice,
                                          xlabel=new_xtitle,
                                          xlimits=x_lims,
                                          ylabel='Events')

                        # add lepton/jet multiplicity label
                        ax.text(0.02,
                                0.90,
                                "%s, %s\n%s" %
                                (lep_cats[lepcat], jet_mults[jmult],
                                 btag_cats[btagregion]),
                                horizontalalignment='left',
                                verticalalignment='bottom',
                                transform=ax.transAxes)
                        hep.cms.label(ax=ax,
                                      data=False,
                                      paper=False,
    ## get values from NNLO root file
nnlo_fname = 'MATRIX_ttmVStheta.root'  # 'xsec_central' dist has only statistical uncs
#nnlo_fname = 'matrixhists_NNPDF.root' # 'cen' dist has only statistical uncs
#nnlo_fname = 'MATRIX_17_abs.root' # has scale and statistical uncs
nnlo_file = convert_histo_root_file(
    os.path.join(proj_dir, 'NNLO_files', nnlo_fname))
nnlo_var = 'xsec_central'
nnlo_dict = Plotter.root_converters_dict_to_hist(
    nnlo_file,
    vars=[nnlo_var],
    sparse_axes_list=[{
        'name': 'dataset',
        'label': "Event Process",
        'fill': 'nnlo'
    }],
    #dense_axes_list=[{'name': 'mtt', 'idx' : 1}, {'name' : 'ctstar', 'idx' : 0}],
    #transpose_da=True,
    dense_axes_list=[{
        'name': 'ctstar',
        'idx': 0
    }, {
        'name': 'mtt',
        'idx': 1
    }],
)

mtt_binning = nnlo_dict[nnlo_var].axis('mtt').edges()
ctstar_binning = nnlo_dict[nnlo_var].axis('ctstar').edges()
vlines = [(len(mtt_binning) - 1) * ybin
          for ybin in range(1,
                            len(ctstar_binning) - 1)]
#set_trace()
def plot_bkg_templates(fnames_to_run):
    """
    Runs LOWESS smoothing algorithm ntoys times and finds 1 and 2 sigma bands for interpolation
    """

    for bkg_file in fnames_to_run:
        hdict = load(bkg_file)
        jmult = "3Jets" if "3Jets" in os.path.basename(bkg_file) else "4PJets"
        for tname, orig_template in hdict[args.lepton].items():

            proc = tname.split(
                "_")[0] if not "data_obs" in tname else "data_obs"
            sys = sorted(filter(None, tname.split(f"{proc}_")))[0]

            if proc == "BKG": continue
            #if sys not in ["hdampUP", "hdampDOWN", "mtop1665", "mtop1695", "mtop1715", "mtop1735", "mtop1755", "mtop1785", "ueUP", "ueDOWN"]: continue
            if sys == "nosys": continue
            print(args.lepton, jmult, sys, proc)

            nosys_hist = hdict[args.lepton][f"{proc}_nosys"].copy()
            orig_smooth_hist = Plotter.smoothing_mttbins(
                nosys=nosys_hist,
                systematic=orig_template,
                mtt_centers=mtt_centers,
                nbinsx=nbinsx,
                nbinsy=nbinsy)

            x_lims = (0, nosys_hist.dense_axes()[0].centers().size)

            # get vals and errors of systematic variation
            sys_histo_vals, sys_histo_sumw2 = orig_template.values(
                sumw2=True)[()]
            sys_histo_errs = np.sqrt(sys_histo_sumw2)

            # make toys based on Gaussian distribution of mu=bin_val, sigma=bin_error
            toy_arrays = np.zeros((nbins, ntoys))
            for idx in range(nbins):
                toy_arrays[idx] = np.random.normal(sys_histo_vals[idx],
                                                   sys_histo_errs[idx],
                                                   size=ntoys)

                # get smoothed relative deviation distributions from toys
            smoothed_rel_dev_arrays = np.zeros((ntoys, nbins))
            chi2_pvals = np.zeros((ntoys, 2))
            for idx in range(ntoys):
                smoothed_array = Plotter.smoothing_mttbins(
                    nosys=nosys_hist,
                    systematic=(toy_arrays.T)[idx],
                    mtt_centers=mtt_centers,
                    nbinsx=nbinsx,
                    nbinsy=nbinsy)
                chi2_pval = chisquare(
                    f_obs=smoothed_array, f_exp=orig_smooth_hist.values()[()]
                )  # convert to expected yields so inputs are greater than 5
                chi2_pvals[idx] = np.array(
                    [chi2_pval.statistic, chi2_pval.pvalue])
                smoothed_rel_dev_arrays[idx] = (
                    smoothed_array -
                    nosys_hist.values()[()]) / nosys_hist.values()[()]

                ## find 68% and 95% intervals
            plus_one_sigma_smooth_vals, minus_one_sigma_smooth_vals = np.zeros(
                nbins), np.zeros(nbins)
            plus_two_sigma_smooth_vals, minus_two_sigma_smooth_vals = np.zeros(
                nbins), np.zeros(nbins)
            for bin in range(nbins):
                plus_one_sigma_smooth_vals[bin] = np.sort(
                    smoothed_rel_dev_arrays[:, bin])[plus_one_sigma_ind]
                minus_one_sigma_smooth_vals[bin] = np.sort(
                    smoothed_rel_dev_arrays[:, bin])[minus_one_sigma_ind]
                plus_two_sigma_smooth_vals[bin] = np.sort(
                    smoothed_rel_dev_arrays[:, bin])[plus_two_sigma_ind]
                minus_two_sigma_smooth_vals[bin] = np.sort(
                    smoothed_rel_dev_arrays[:, bin])[minus_two_sigma_ind]

            # plot relative deviation
            fig, ax = plt.subplots()
            fig.subplots_adjust(hspace=.07)

            # original relative deviations
            orig_masked_vals, orig_masked_bins = Plotter.get_ratio_arrays(
                num_vals=orig_template.values()[()] - nosys_hist.values()[()],
                denom_vals=nosys_hist.values()[()],
                input_bins=nosys_hist.dense_axes()[0].edges())
            ax.step(orig_masked_bins,
                    orig_masked_vals,
                    where="post",
                    **{
                        "color": "k",
                        "linestyle": "-",
                        "label": "Original"
                    })
            # original smoothing relative deviations
            orig_smoothed_masked_vals, orig_smoothed_masked_bins = Plotter.get_ratio_arrays(
                num_vals=orig_smooth_hist.values()[()] -
                nosys_hist.values()[()],
                denom_vals=nosys_hist.values()[()],
                input_bins=nosys_hist.dense_axes()[0].edges())
            ax.step(orig_smoothed_masked_bins,
                    orig_smoothed_masked_vals,
                    where="post",
                    **{
                        "color": "r",
                        "linestyle": "-",
                        "label": "Original Smoothing"
                    })
            # plot 68 and 95% intervals for yields
            ax.fill_between(nosys_hist.dense_axes()[0].edges(),
                            np.r_[minus_one_sigma_smooth_vals,
                                  minus_one_sigma_smooth_vals[-1]],
                            np.r_[plus_one_sigma_smooth_vals,
                                  plus_one_sigma_smooth_vals[-1]],
                            where=np.r_[plus_one_sigma_smooth_vals,
                                        plus_one_sigma_smooth_vals[-1]] >
                            np.r_[minus_one_sigma_smooth_vals,
                                  minus_one_sigma_smooth_vals[-1]],
                            step="post",
                            **{
                                "label": "68%",
                                "facecolor": "#00cc00",
                                "alpha": 0.5
                            })
            ax.fill_between(nosys_hist.dense_axes()[0].edges(),
                            np.r_[minus_two_sigma_smooth_vals,
                                  minus_two_sigma_smooth_vals[-1]],
                            np.r_[plus_two_sigma_smooth_vals,
                                  plus_two_sigma_smooth_vals[-1]],
                            where=np.r_[plus_two_sigma_smooth_vals,
                                        plus_two_sigma_smooth_vals[-1]] >
                            np.r_[minus_two_sigma_smooth_vals,
                                  minus_two_sigma_smooth_vals[-1]],
                            step="post",
                            **{
                                "label": "95%",
                                "facecolor": "#ffcc00",
                                "alpha": 0.5
                            })

            ax.legend(loc="upper right", title=f"{sys}, {proc}")
            ax.axhline(
                0, **{
                    "linestyle": "--",
                    "color": (0, 0, 0, 0.5),
                    "linewidth": 1
                })
            ax.autoscale()
            ax.set_ylim(ax.get_ylim()[0], ax.get_ylim()[1] * 1.15)
            ax.set_xlim(x_lims)
            ax.set_xlabel(
                "$m_{t\\bar{t}}$ $\otimes$ |cos($\\theta^{*}_{t_{l}}$)|")
            ax.set_ylabel("Rel. Deviaton from Nominal")

            # add lepton/jet multiplicity label
            ax.text(0.02,
                    0.94,
                    f"{leptypes[args.lepton]}, {jet_mults[jmult]}",
                    fontsize=rcParams["font.size"] * 0.9,
                    horizontalalignment="left",
                    verticalalignment="bottom",
                    transform=ax.transAxes)
            ## draw vertical lines for distinguishing different ctstar bins
            vlines = [x_lims[1] * ybin / 5 for ybin in range(1, 5)]
            for vline in vlines:
                ax.axvline(vline, color="k", linestyle="--")
            hep.cms.label(ax=ax,
                          data=False,
                          paper=False,
                          year=args.year,
                          lumi=round(data_lumi_year[f"{args.lepton}s"] / 1000.,
                                     1))

            #set_trace()
            pltdir = os.path.join(outdir, args.lepton, jmult, sys)
            if not os.path.isdir(pltdir):
                os.makedirs(pltdir)

            figname = os.path.join(
                pltdir, "_".join([
                    jmult, args.lepton, sys, proc,
                    "SmoothingConfidenceIntervals"
                ]))
            fig.savefig(figname)
            print(f"{figname} written")
            plt.close()
Esempio n. 18
0
            if not os.path.isdir(pltdir):
                os.makedirs(pltdir)
            print(hname, jmult)

            hslice = h_tot[jmult, :].integrate('jmult')
            if rebinning != 1:
                xaxis_name = hslice.dense_axes()[0].name
                hslice = hslice.rebin(xaxis_name, rebinning)

                # make histo with event all individual categories for 'Lost' and 'Merged'
            indiv_cat_histo = hslice.group(orig_axis, all_evt_cat, indiv_evt_groups)
                # hists with yields
            fig, ax = plt.subplots()
            fig.subplots_adjust(hspace=.07)

            Plotter.plot_mc1d(ax=ax, hdict=indiv_cat_histo, xlabel=xtitle, ylabel='Events', xlimits=x_lims, hist_styles=indiv_cat_styles, **{'error_opts':None, 'leg_ncols':2})
            ax.set_ylim(0, ax.get_ylim()[1]*1.1)
                # add lep category
            ax.text(
                0.02, 0.90, "$e/\mu$, %s\nParton Level" % jet_mults[jmult],
                fontsize=rcParams['font.size'], horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes
            )
                ## set axes labels and titles
            hep.cms.label(ax=ax, data=False, paper=False, year=args.year, lumi=round(lumi_to_use, 1))

            #set_trace()
            figname = os.path.join(pltdir, '_'.join([args.year, jobid, jmult, 'Indiv_Cats', hname]))
            fig.savefig(figname)
            print('%s written' % figname)

            #set_trace()
Esempio n. 19
0
def get_bkg_templates(tmp_rname):
    '''
    Function that writes linearized mtt vs costheta distributions to root file.
    '''
    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0])

        ## get data lumi and scale MC by lumi
    data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year]

        # get correct hist and rebin
    hname_to_use = 'mtt_vs_tlep_ctstar_abs'
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = linearize_binning
    histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
    
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
        ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)
        ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    rebin_histo = histo.rebin(yaxis_name, new_ybins)
    
    nbins = (len(xrebinning)-1)*(len(yrebinning)-1)
    
        ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
    ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other']
    names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists
    ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...

        # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
    if bkg_ttJets_fname is not None:
        ttJets_hdict = load(bkg_ttJets_fname)
        ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
        
            ## rebin x axis
        ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins)
            ## rebin y axis
        ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins)
        
        only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists
        only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...


        ## make groups based on process
    process = hist.Cat("process", "Process", sorting='placement')
    process_cat = "dataset"

        # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname)

    if '3Jets' in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})
    if '4PJets' in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})

    for lep in ['Muon', 'Electron']:
        lepdir = 'mujets' if lep == 'Muon' else 'ejets'
    
        ## make groups based on process
        process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates')
        
        lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep]
                # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
        if len(ttJets_cats) > 0:
            for tt_cat in ttJets_cats:
                ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                lumi_correction.update({tt_cat: ttJets_eff_lumi})
    
        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis='dataset')
        histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype')

            # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
        if bkg_ttJets_fname is not None:
            if len(only_ttJets_cats) > 0:
                for tt_cat in only_ttJets_cats:
                    ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                    ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                    lumi_correction.update({tt_cat: ttJets_eff_lumi})

            tt_histo = ttJets_histo.copy()
            tt_histo.scale(lumi_correction, axis='dataset')
            tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype')


        for jmult in njets_to_run:
            iso_sb    = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            btag_sb   = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
        
            for sys in sys_to_use.keys():
                if sys not in histo.axis('sys')._sorted:
                    print('\n\n   Systematic %s not available, skipping\n\n' % sys)
                    continue

                #set_trace()
                sysname, onlyTT = sys_to_use[sys]
                if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0])
        
                qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys)

                    ## write nominal and systematic variations for each topology to file
                for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])):
                    if (proc != 'TT') and onlyTT: continue
                    if (proc == 'data_obs') and not (sys == 'nosys'): continue
                    name = proc+lepdir if proc == 'QCD' else proc
                    print(lep, jmult, sys, name)
                    outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname])
                    template_histo = qcd_est_histo[proc].integrate('process')
                    if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None):
                        tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
                        tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys')
                        template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys'))

                    if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]):
                        template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo)
                        #set_trace()

                    if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]):
                        template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False)
                        #set_trace()

                        ## save template histos to coffea dict
                    if jmult == '3Jets':
                        histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo
                    if jmult == '4PJets':
                        histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if '3Jets' in njets_to_run:
        coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_3j, coffea_out_3j)
        print("%s written" % coffea_out_3j)
    if '4PJets' in njets_to_run:
        coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_4pj, coffea_out_4pj)
        print("%s written" % coffea_out_4pj)

    
    upfout.close()
    print('%s written' % tmp_rname)
Esempio n. 20
0
                     fig.subplots_adjust(hspace=.07)
 
                     hslice = histo[:, jmult, btagregion, lepcat].integrate('jmult').integrate('lepcat').integrate('btag')
 
                     if hname == 'Lep_iso':
                         if args.lepton == 'Muon':
                             x_lims = (0., 0.15) if lepcat == 'Tight' else (0.15, 1.)
                         if args.lepton == 'Electron':
                             x_lims = (0., 0.1) if lepcat == 'Tight' else (0., 0.5)
 
                     mc_opts = {
                         #'mcorder' : ['QCD', 'EWK', 'singlet', 'ttJets'] if not ttJets_cats else ['QCD', 'EWK', 'singlet', 'ttJets_other', 'ttJets_unmatchable', 'ttJets_matchable', 'ttJets_right']
                         #'maskData' : not withData
                     }
 
                     Plotter.plot_stack1d(ax, rax, hslice, xlabel=xtitle, xlimits=x_lims, **mc_opts)
 
                     #set_trace() 
                     if hname == 'Jets_njets':
                         print(jmult)
                         yields_txt, yields_json = Plotter.get_samples_yield_and_frac(hslice, data_lumi_year['%ss' % args.lepton]/1000., promptmc=True)
                         frac_name = '%s_yields_and_fracs' % '_'.join([jmult, args.lepton, lepcat, btagregion])
                         plt_tools.print_table(yields_txt, filename='%s/%s.txt' % (pltdir, frac_name), print_output=True)
                         print('%s/%s.txt written' % (pltdir, frac_name))
                         with open('%s/%s.json' % (pltdir, frac_name), 'w') as out:
                             out.write(prettyjson.dumps(yields_json))
 
                         # add lepton/jet multiplicity label
                     #set_trace()
                     ax.text(
                         0.02, 0.88, "%s, %s\n%s" % (lep_cats[lepcat], jet_mults[jmult], btag_cats[btagregion]),
def get_bkg_templates(fnames_to_run):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """

    #set_trace()
    for bkg_file in fnames_to_run:
        hdict = load(bkg_file)
        jmult = "3Jets" if "3Jets" in os.path.basename(bkg_file) else "4PJets"
        for lep in hdict.keys():
            for tname, orig_template in hdict[lep].items():

                proc = tname.split(
                    "_")[0] if not "data_obs" in tname else "data_obs"
                sys = sorted(filter(None, tname.split(f"{proc}_")))[0]

                #if not ((sys == "ueDOWN") and (proc == "ttJets")): continue
                if sys == "nosys": continue
                print(lep, jmult, sys, proc)

                nominal_hist = hdict[lep][f"{proc}_nosys"].copy()

                x_lims = (0, nominal_hist.dense_axes()[0].centers().size)

                # perform smoothing
                smoothed_histos_list = [(Plotter.smoothing_mttbins(
                    nosys=nominal_hist,
                    systematic=orig_template,
                    mtt_centers=mtt_centers,
                    nbinsx=len(linearize_binning[0]) - 1,
                    nbinsy=len(linearize_binning[1]) - 1,
                    **{"frac": frac_val / 10.}), frac_val / 10.)
                                        for frac_val in np.arange(2, 7, 2)]
                #smoothed_histos_chi2 = {frac_val :  find_chi2(h_fitted=smooth_histo, h_unc=orig_template) for smooth_histo, frac_val in smoothed_histos_list}
                # perform flattening
                flattened_histo = Plotter.flatten(nosys=nominal_hist,
                                                  systematic=orig_template)
                #flat_chi2 = find_chi2(h_fitted=flattened_histo, h_unc=orig_template)

                # plot relative deviation
                fig, ax = plt.subplots()
                fig.subplots_adjust(hspace=.07)

                # plot original dist
                orig_masked_vals, orig_masked_bins = Plotter.get_ratio_arrays(
                    num_vals=orig_template.values()[()] -
                    nominal_hist.values()[()],
                    denom_vals=nominal_hist.values()[()],
                    input_bins=nominal_hist.dense_axes()[0].edges())
                ax.fill_between(orig_masked_bins,
                                orig_masked_vals,
                                facecolor="k",
                                step="post",
                                alpha=0.5,
                                label="Unsmoothed")

                # plot smoothed versions
                for smooth_histo, frac_val in smoothed_histos_list:
                    smooth_masked_vals, smooth_masked_bins = Plotter.get_ratio_arrays(
                        num_vals=smooth_histo.values()[()] -
                        nominal_hist.values()[()],
                        denom_vals=nominal_hist.values()[()],
                        input_bins=nominal_hist.dense_axes()[0].edges())
                    ax.step(smooth_masked_bins,
                            smooth_masked_vals,
                            where="post",
                            **{
                                "linestyle": "-",
                                "label": f"Frac={frac_val}",
                                "linewidth": 2
                            })

                # plot flattened val
                flat_masked_vals, flat_masked_bins = Plotter.get_ratio_arrays(
                    num_vals=flattened_histo.values()[()] -
                    nominal_hist.values()[()],
                    denom_vals=nominal_hist.values()[()],
                    input_bins=nominal_hist.dense_axes()[0].edges())
                ax.step(flat_masked_bins,
                        flat_masked_vals,
                        where="post",
                        **{
                            "linestyle": "-",
                            "label": "Flat",
                            "linewidth": 2
                        })

                ax.legend(loc="upper right", title=f"{sys}, {proc}")
                ax.axhline(
                    0, **{
                        "linestyle": "--",
                        "color": (0, 0, 0, 0.5),
                        "linewidth": 1
                    })
                ax.autoscale()
                ax.set_xlim(x_lims)
                ax.set_xlabel(
                    "$m_{t\\bar{t}}$ $\otimes$ |cos($\\theta^{*}_{t_{l}}$)|")
                ax.set_ylabel("Rel. Deviaton from Nominal")

                # add lepton/jet multiplicity label
                ax.text(0.02,
                        0.94,
                        f"{leptypes[lep]}, {jet_mults[jmult]}",
                        fontsize=rcParams["font.size"] * 0.9,
                        horizontalalignment="left",
                        verticalalignment="bottom",
                        transform=ax.transAxes)
                ## draw vertical lines for distinguishing different ctstar bins
                vlines = [x_lims[1] * ybin / 5 for ybin in range(1, 5)]
                for vline in vlines:
                    ax.axvline(vline, color="k", linestyle="--")
                hep.cms.label(ax=ax,
                              data=False,
                              paper=False,
                              year=args.year,
                              lumi=round(data_lumi_year[f"{lep}s"] / 1000., 1))

                #set_trace()
                pltdir = os.path.join(outdir, lep, jmult, sys)
                if not os.path.isdir(pltdir):
                    os.makedirs(pltdir)

                #figname = os.path.join(pltdir, "_".join([jmult, lep, sys, proc, "BinWidths_Comp"]))
                #figname = os.path.join(pltdir, "_".join([jmult, lep, sys, proc, "SmoothValues_Comp"]))
                #figname = os.path.join(pltdir, "_".join([jmult, lep, sys, proc, "MttBinWidths_SmoothValues_Comp"]))
                figname = os.path.join(
                    pltdir,
                    "_".join([jmult, lep, sys, proc, "SmoothedFlatVals_Comp"]))
                fig.savefig(figname)
                print(f"{figname} written")
                plt.close()
                        ## normalized plots
                        fig, ax = plt.subplots()
                        fig.subplots_adjust(hspace=.07)

                        norm_values = hcat.values()[()] / np.sum(
                            hcat.values()[
                                ()])  ## normalized array of hist values
                        opts = {'cmap_label': '$P_{M}$'}
                        Plotter.plot_2d_norm(
                            hcat,
                            xaxis_name=hcat.axes()[0].name,
                            yaxis_name=hcat.axes()[1].name,
                            values=np.ma.masked_where(
                                norm_values <= 0.0, norm_values
                            ),  # mask nonzero probabilities for plotting
                            xlimits=x_lims,
                            ylimits=y_lims,
                            xlabel=xtitle,
                            ylabel=ytitle,
                            ax=ax,
                            **opts)

                        # add lep category
                        ax.text(0.02,
                                0.91,
                                "%s\n%s" %
                                (lep_cats[lepcat], jet_mults['3Jets']),
                                fontsize=rcParams['font.size'],
                                horizontalalignment='left',
                                verticalalignment='bottom',
                                    for sig in samples:
                                        boson, mass, width, shape = sig.split(
                                            '_')
                                        opts = Plotter.styles.styles[width]

                                        pos_histo = histo[
                                            '%s_pos' % sig, jmult, btagregion,
                                            lepcat].integrate(
                                                'jmult'
                                            ).integrate('lepcat').integrate(
                                                'btag').integrate('dataset')
                                        Plotter.plot_1D(
                                            pos_histo.values()[()],
                                            pos_histo.dense_axes()[0].edges(),
                                            ax=pos_ax,
                                            xlimits=x_lims,
                                            xlabel=xtitle,
                                            color=opts['color'],
                                            label=opts['name'],
                                            histtype='step')
                                        neg_histo = histo[
                                            '%s_neg' % sig, jmult, btagregion,
                                            lepcat].integrate(
                                                'jmult'
                                            ).integrate('lepcat').integrate(
                                                'btag').integrate('dataset')
                                        Plotter.plot_1D(
                                            neg_histo.values()[()],
                                            neg_histo.dense_axes()[0].edges(),
                                            ax=neg_ax,
                                            xlimits=x_lims,
Esempio n. 24
0
                                1,
                                gridspec_kw={"height_ratios": (3, 1)},
                                sharex=True)
                        else:
                            fig, ax = plt.subplots()
                        fig.subplots_adjust(hspace=.07)
                        hslice = histo[:, btag_applied, jmult, lepcat,
                                       btagregion].integrate(
                                           'jmult'
                                       ).integrate('lepcat').integrate(
                                           'btag').integrate('btagging')

                        if hname == 'Jets_njets':
                            print(jmult)
                            yields_txt, yields_json = Plotter.get_samples_yield_and_frac(
                                hslice,
                                data_lumi_year['%ss' % args.lepton] / 1000.)
                            plt_tools.print_table(
                                yields_txt,
                                filename='%s/%s_%s_yields_and_fracs.txt' %
                                (pltdir, jmult, args.lepton),
                                print_output=True)
                            with open(
                                    '%s/%s_%s_yields_and_fracs.json' %
                                (pltdir, jmult, args.lepton), 'w') as out:
                                out.write(prettyjson.dumps(yields_json))

                        if rebinning != 1:
                            xaxis_name = hslice.dense_axes()[0].name
                            hslice = hslice.rebin(xaxis_name, rebinning)
Esempio n. 25
0
                    print(', '.join([jmult, lepcat, btagregion, hname,
                                     sample]))
                    fig, ax = plt.subplots()
                    fig.subplots_adjust(hspace=.07)

                    hslice = histo[sample, jmult, btagregion,
                                   lepcat].integrate('jmult').integrate(
                                       'lepcat').integrate('btag').integrate(
                                           'dataset')

                    if hslice.values():
                        Plotter.plot_1D(
                            *hslice.values().values(),
                            histo.axis(xaxis_name).edges(),
                            xlimits=(-5., 5.)
                            if sample.startswith('QCD') else x_lims,
                            xlabel=xtitle,
                            ylabel='Events (Unweighted)',
                            ax=ax,
                            label='%s\n%s' %
                            (sample, format(lumi_correction[sample], '.3f')))
                        ax.legend(loc='upper right')

                        # add lepton/jet multiplicity label
                        ax.text(0.02,
                                0.88,
                                "%s, %s\n%s" %
                                (lep_cats[lepcat], jet_mults[jmult],
                                 btag_cats[btagregion]),
                                fontsize=rcParams['font.size'] * 0.75,
                                horizontalalignment='left',
                                verticalalignment='bottom',
Esempio n. 26
0
                        sig_hslice = hslice[Plotter.signal_samples]
                        SM_hslice = hslice[Plotter.nonsignal_samples]

                        # plot signal
                        if sig_hslice.values():
                            for signal in sig_hslice.values().keys():
                                fig, ax = plt.subplots()
                                fig.subplots_adjust(hspace=.07)

                                sig_hist = sig_hslice[signal].integrate(
                                    'process')
                                Plotter.plot_1D(
                                    *sig_hist.values().values(),
                                    sig_hist.axis(xaxis_name).edges(),
                                    xlabel=new_xtitle,
                                    xlimits=x_lims,
                                    ax=ax,
                                    label='%s, %s' %
                                    (plt_tools.get_label(
                                        signal[0], styles.styles),
                                     signal[0].split('_')[-1]))
                                ax.legend(loc='upper right')
                                # add lepton/jet multiplicity label
                                ax.text(0.02,
                                        0.85,
                                        "%s, %s\n%s" %
                                        (lep_cats[lepcat], jet_mults[jmult],
                                         btag_cats[btagregion]),
                                        horizontalalignment='left',
                                        verticalalignment='bottom',
                                        transform=ax.transAxes)
                                hep.cms.label(
Esempio n. 27
0
                        (pltdir, jmult, lep),
                        print_output=True)
                    with open(
                            '%s/%s_%s_yields_and_fracs.json' %
                        (pltdir, jmult, lep), 'w') as out:
                        out.write(prettyjson.dumps(yields_json))

                if rebinning != 1:
                    xaxis_name = hslice.dense_axes()[0].name
                    hslice = hslice.rebin(xaxis_name, rebinning)

                    ## plot mc+data
                if withData:
                    ax, rax = Plotter.plot_stack1d(ax,
                                                   rax,
                                                   hslice,
                                                   xlabel=xtitle,
                                                   xlimits=x_lims)
                else:
                    ax = Plotter.plot_mc1d(ax,
                                           hslice,
                                           xlabel=xtitle,
                                           xlimits=x_lims)

                    ## set axes labels and titles
                    # add lepton/jet multiplicity label
                ax.text(0.02,
                        0.92,
                        "%s, %s" % (objtypes['Lep'][lep], jet_mults[jmult]),
                        fontsize=hep.styles_cms.CMS['font.size'] * 0.75,
                        horizontalalignment='left',
Esempio n. 28
0
        for ttbar_type in sorted(set([key[1] for key in histo.values().keys()])):
            decay_label = "%s %s" % (plt_tools.get_label(dataset, styles), ttdecay_types[ttbar_type])
            #decay_label = ttdecay_types[ttbar_type]
            pltdir = os.path.join(outdir, dataset, ttbar_type) if isSignal(dataset) else os.path.join(outdir, "ttJets", ttbar_type)
            if not os.path.isdir(pltdir):
                os.makedirs(pltdir)
            for genobj, (objlabel, mass_range) in objects[ttbar_type].items():
                new_xtitle = xtitle.replace("obj", objlabel)
                if hname == "mass":
                    x_lims = mass_range
                tt_histo = histo[genobj, ttbar_type].integrate("objtype").integrate("ttdecay")
        
                fig, ax = plt.subplots()
                fig.subplots_adjust(hspace=.07)
    
                Plotter.plot_1D(tt_histo.values()[()], tt_histo.axis(xaxis_name).edges(), xlabel=new_xtitle, xlimits=x_lims, ax=ax, histtype="step")
                hep.cms.label(ax=ax, data=False, paper=False, year=args.year, lumi=round(lumi_to_use, 1))

                #set_trace()
                    # add lepton/jet multiplicity label
                if isSignal(dataset):
                    if "Int" in dataset:
                        sig_type = "Int, w $<$ 0" if "Int_neg" in dataset else "Int, w $>$ 0"
                    else:
                        sig_type = "Res"
                    sig_label = "%s\n%s" % (decay_label, sig_type)
                    ax.text(
                        0.95, 0.85, sig_label,
                        horizontalalignment="right", verticalalignment="bottom", transform=ax.transAxes
                    )
                else:
Esempio n. 29
0
])
mc_hdict = plt_tools.add_coffea_files(
    mc_fnames) if len(mc_fnames) > 1 else load(mc_fnames[0])

# get hists
mc_nTrueInt_histo = mc_hdict['PU_nTrueInt']

data_input_dir = os.path.join(proj_dir, 'inputs', 'data', base_jobid, 'Pileup')
# central
data_pu_central = convert_histo_root_file(
    os.path.join(data_input_dir, '%s_data.meta.pu.root' % args.year))
data_pu_dict = Plotter.root_converters_dict_to_hist(data_pu_central,
                                                    vars=['pileup'],
                                                    sparse_axes_list=[{
                                                        'name':
                                                        'dataset',
                                                        'label':
                                                        "Event Process",
                                                        'fill':
                                                        'data'
                                                    }])
# up
data_pu_up = convert_histo_root_file(
    os.path.join(data_input_dir, '%s_data.meta.pu_up.root' % args.year))
data_pu_up_dict = Plotter.root_converters_dict_to_hist(data_pu_up,
                                                       vars=['pileup'],
                                                       sparse_axes_list=[{
                                                           'name':
                                                           'dataset',
                                                           'label':
                                                           "Event Process",
                                                           'fill':