Ejemplo n.º 1
0
def make_signal_plot(sigh, title=None, overflow='over', yscale='log'):
    import matplotlib.pyplot as plt
    import numpy as np
    from coffea import hist

    fig, ax = plt.subplots(1, 1, figsize=(8, 6))
    hist.plot1d(
        sigh,
        overlay='dataset',
        ax=ax,
        overflow=overflow,
    )
    ax.set_yscale(yscale)
    ax.autoscale(axis='both', tight=True)
    ymin, ymax = ax.get_ylim()
    if yscale == 'linear': ymax = (ymax - ymin) * 1.2 + ymin
    if yscale == 'log':
        ymax = 10**(((np.log10(ymax) - np.log10(ymin)) * 1.2) + np.log10(ymin))
    ax.set_ylim(ymin, ymax)
    ax.set_xlabel(ax.get_xlabel(), x=1, ha='right')
    ax.set_ylabel(ax.get_ylabel(), y=1.0, ha="right")
    ax.set_title(title, x=0.0, ha="left")
    ax.text(1,
            1,
            '59.74/fb (13TeV)',
            ha='right',
            va='bottom',
            transform=ax.transAxes)

    return fig, ax
Ejemplo n.º 2
0
def plot_rebsmear_prediction(acc_rs,
                             h_qcd,
                             distribution='mjj',
                             dataset='JetHT_2017',
                             region='cr_vbf_qcd'):
    '''Together with the data - MC prediction from VBF, plot the rebalance and smear prediction.'''
    acc_rs.load(distribution)
    h = acc_rs[distribution]

    # Merge the JetHT datasets together
    h = rs_merge_datasets(h)

    if distribution in BINNINGS.keys():
        new_ax = BINNINGS[distribution]
        h = h.rebin(new_ax.name, new_ax)

    h = h.integrate('region', region)[dataset]

    fig, ax = plt.subplots()
    hist.plot1d(h, ax=ax, overlay='dataset', binwnorm=1)
    hist.plot1d(h_qcd, ax=ax, binwnorm=1, clear=False)

    ax.set_yscale('log')
    ax.set_ylim(1e-4, 1e2)

    fig.savefig('test.pdf')
Ejemplo n.º 3
0
def kinematic_selection_plot(hname,
                             btype,
                             dataset_name,
                             selection_name,
                             yscale="linear"):
    print(
        f"Making plot: {hname}, {btype}, {dataset_name}, {selection_name}, {yscale}"
    )
    fig, ax = plt.subplots(1, 1, figsize=(10, 7))
    print(histograms[btype][hname].axes())
    print(histograms[btype][hname].axis("dataset").identifiers())
    print(histograms[btype][hname].axis("selection").identifiers())
    hist.plot1d(histograms[btype][hname].integrate("dataset",
                                                   [(dataset_name)]).integrate(
                                                       "selection",
                                                       [(selection_name)]),
                error_opts={
                    "marker": ".",
                    "linestyle": "none",
                    "markersize": 10.,
                    "color": "k",
                    "elinewidth": 1
                })
    if "inclusive" in dataset_name:
        dataset_tag = "inclusive"
    elif "probefilter" in dataset_name:
        dataset_tag = "probefilter"
    plt.tight_layout()
    fig.savefig(
        f"{figure_directory}/{hname}_{btype}_{dataset_tag}_{selection_name}.png"
    )
    plt.close(fig)
Ejemplo n.º 4
0
def plot_ht_dist(acc, regex, tag):
    '''Given the accumulator and the dataset regex,
	   plot the HT distribution.'''
    acc.load('lhe_ht')
    h = acc['lhe_ht']

    h = merge_extensions(h, acc, reweight_pu=False)
    scale_xs_lumi(h)
    h = merge_datasets(h)

    # Choose the relevant dataset(s)
    h = h[re.compile(regex)]

    new_ht_bins = hist.Bin('ht', r'$H_T \ (GeV)$', 50, 0, 4000)
    h = h.rebin('ht', new_ht_bins)

    # Plot the HT distribution
    fig, ax = plt.subplots(1, 1)
    hist.plot1d(h, ax=ax, overflow='all', binwnorm=True, overlay='dataset')
    ax.set_yscale('log')
    ax.set_ylim(1e-3, 1e6)
    if 'gjets' in tag:
        ax.plot([600, 600], [1e-3, 1e6])

    if not os.path.exists('./output'):
        os.mkdir('output')

    fig.savefig(f'./output/{tag}_lhe_ht.pdf')
Ejemplo n.º 5
0
def plot(hist_mc=None, hist_data=None, xlim=[], xscale="", xlabel="", ylim=[], yscale="", ylabel="", data_selection="", mc_selection="", savetag=""):
    hist_mc = hist_mc.integrate("selection", mc_selection)
    print(hist_data.axis("selection").identifiers())
    hist_data = hist_data.integrate("selection", data_selection)

    # Normalize MC to data
    print(hist_data)
    print(hist_data.values())
    data_norm = hist_data.values().sum()

    hist_all = copy.deepcopy(hist_data).add(hist_mc)
    fig, ax = plt.subplots(1, 1, figsize=(10, 12))
    hist.plot1d(hist_all, overlay="dataset", ax=ax[0])
    ax[0].set_xlim(xlim)
    ax[0].set_xscale(xscale)
    ax[0].set_xlabel(xlabel)
    ax[0].set_ylim(ylim)
    ax[0].set_yscale(yscale)
    ax[0].set_ylabel(ylabel)

    hist.plotratio(
        num=hist_all.integrate("dataset", "Run2018"), 
        den=hist_all.integrate("dataset", "Bu2KJpsi2KMuMu_probefilter"), 
        unc='num',
        ax=ax[1])
    ax[1].set_xlim(xlim)
    ax[1].set_xscale(xscale)
    ax[1].set_xlabel(xlabel)
    ax[1].set_ylabel("Data / MC")

    fig.savefig(f"{figure_directory}/{savetag}.png")
Ejemplo n.º 6
0
def test_plot1d():
    # histogram creation and manipulation
    # matplotlib
    import matplotlib.pyplot as plt

    plt.switch_backend("agg")

    from coffea import hist

    lepton_kinematics = fill_lepton_kinematics()

    # looking at lepton pt for all eta
    lepton_pt = lepton_kinematics.integrate("eta", overflow="under")

    ax = hist.plot1d(
        lepton_pt,
        overlay="flavor",
        stack=True,
        fill_opts={
            "alpha": 0.5,
            "edgecolor": (0, 0, 0, 0.3)
        },
    )
    # all matplotlib primitives are returned, in case one wants to tweak them
    # e.g. maybe you really miss '90s graphics...

    # Clearly the yields are much different, are the shapes similar?
    lepton_pt.label = "Density"
    hist.plot1d(lepton_pt, overlay="flavor", density=True)

    return ax.figure
Ejemplo n.º 7
0
def test_plot1d():
    # histogram creation and manipulation
    from coffea import hist
    # matplotlib
    import matplotlib as mpl
    mpl.use('Agg')
    import matplotlib.pyplot as plt

    lepton_kinematics = fill_lepton_kinematics()

    # looking at lepton pt for all eta
    lepton_pt = lepton_kinematics.integrate("eta", overflow='under')

    fig, ax, primitives = hist.plot1d(lepton_pt,
                                      overlay="flavor",
                                      stack=True,
                                      fill_opts={
                                          'alpha': .5,
                                          'edgecolor': (0, 0, 0, 0.3)
                                      })
    # all matplotlib primitives are returned, in case one wants to tweak them
    # e.g. maybe you really miss '90s graphics...
    primitives['legend'].shadow = True

    # Clearly the yields are much different, are the shapes similar?
    lepton_pt.label = "Density"
    fig, ax, primitives = hist.plot1d(lepton_pt,
                                      overlay="flavor",
                                      density=True)
Ejemplo n.º 8
0
def plot_lhe_v_pt(acc, tag, regex, outputrootfile, pttype):
    outdir = './output/'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    fig, (ax, rax) = plt.subplots(2,
                                  1,
                                  figsize=(7, 7),
                                  gridspec_kw={"height_ratios": (3, 1)},
                                  sharex=True)
    # new_ax = hist.Bin('vpt','LHE V $p_{T}$ (GeV)',list(range(100,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250)))
    new_ax = hist.Bin('vpt', 'LHE V $p_{T}$ (GeV)',
                      list(range(80, 800, 40)) + list(range(800, 2000, 100)))

    for dist in ['gen_vpt']:
        h = copy.deepcopy(acc[dist])
        h = h.integrate('type', pttype)
        h = h.rebin(h.axis('vpt'), new_ax)
        h = merge_extensions(h, acc, reweight_pu=False)
        scale_xs_lumi(h)
        h = merge_datasets(h)
        h = h[re.compile(regex)]
        h = h.integrate('weight_type', 'nominal')
        h = h.integrate('weight_index', slice(-0.5, 0.5))
        hist.plot1d(h, overlay='dataset', overflow='all', binwnorm=True, ax=ax)
        lo = h[re.compile('.*HT.*')].integrate('dataset')
        nlo = h[re.compile('.*LHE.*')].integrate('dataset')

        hist.plotratio(nlo,
                       lo,
                       ax=rax,
                       denom_fill_opts={},
                       guide_opts={},
                       unc='num',
                       overflow='all',
                       error_opts=data_err_opts,
                       label='2017 NLO/LO ratio')
        old = get_old_kfac(tag)
        old_x = 0.5 * (old.bins[:, 0] + old.bins[:, 1])
        rax.plot(old_x, old.values, 'ob-', label='2016 QCD k fac')
        rax.plot(old_x,
                 old.values * pdfwgt_sf(old_x),
                 'or-',
                 label='2016 x ad-hoc DY pdfwgt SF')
        ax.set_yscale('log')
        ax.set_ylim(1e-3, 1e6)
        rax.set_ylim(0, 2)
        rax.legend()

        fig.savefig(pjoin(outdir, f'{tag}_{dist}.pdf'))

        sf_x = lo.axis('vpt').edges()
        sf_y = nlo.values()[()] / lo.values()[()]

        # try:
        #     f = uproot.create(f'gen_v_pt_qcd_sf.root')
        # except OSError:

        outputrootfile[tag] = (sf_y, sf_x)
Ejemplo n.º 9
0
def plot_htmiss_before_and_after(outdir,
                                 infile,
                                 dataset_tag='jetht',
                                 plot_gen=True):
    '''Do the actual plotting of distributions.'''
    f = uproot.open(infile)
    htmiss_bef = f['htmiss_before']
    htmiss_aft = f['htmiss_after']

    fig, ax = plt.subplots()

    hep.histplot(htmiss_bef.values,
                 htmiss_bef.edges,
                 ax=ax,
                 label='Before rebalancing')
    hep.histplot(htmiss_aft.values,
                 htmiss_aft.edges,
                 ax=ax,
                 label='After rebalancing')

    ax.set_xlabel(r'$H_T^{miss} \ (GeV)$', fontsize=14)
    ax.set_ylabel(r'Counts', fontsize=14)
    ax.set_yscale('log')
    ax.set_ylim(1e-1, 1e7)

    ax.legend(title='Rebalancing')

    ax.text(0.,
            1.,
            f'{tag_to_plottag(dataset_tag)} 2017',
            fontsize=14,
            ha='left',
            va='bottom',
            transform=ax.transAxes)

    # If we're looking at QCD and plot_gen=True, plot the GEN HTmiss distribution as well
    if dataset_tag == 'qcd' and plot_gen:
        # Coffea file to take GEN HT-miss distribution from
        accpath = './input/qcd_QCD_HT700to1000-mg_new_pmx_2017.coffea'
        acc = load(accpath)

        distribution = 'gen_htmiss_noweight'
        h = acc[distribution].integrate('dataset').integrate(
            'region', 'inclusive')

        hist.plot1d(h, ax=ax, clear=False)

    handles, labels = ax.get_legend_handles_labels()
    for handle, label in zip(handles, labels):
        if label == 'None':
            handle.set_label(r'GEN $H_T^{miss}$')

    ax.legend(handles=handles)

    outpath = pjoin(outdir, f'htmiss_before_after_reb.pdf')
    fig.savefig(outpath)
    plt.close(fig)
    print(f'File saved: {outpath}')
Ejemplo n.º 10
0
def make_full_prediction_distribution(trainoutdir, trainingdata, key):
    print(
        f"Make predictions on full training dataset: {trainingdata} and model in {trainoutdir}"
    )
    from coffea import hist
    import xgboost as xgb

    ## full dataset
    df = pd.read_hdf(trainingdata, key)
    featurecols = [x for x in df.columns if x != 'label']
    dfull = xgb.DMatrix(df[featurecols], label=df['label'])

    ## default and optimized models
    xgbm_default = xgb.Booster({"nthread": 16})
    xgbm_default.load_model(join(trainoutdir, "model_default/model.bin"))
    xgbm_optimized = xgb.Booster({"nthread": 16})
    xgbm_optimized.load_model(join(trainoutdir, "model_optimized/model.bin"))

    ## predictions
    preds_default = xgbm_default.predict(dfull)
    preds_optimized = xgbm_optimized.predict(dfull)

    ## making plots
    label_axis = hist.Cat('label', 'S/B')
    bdt_axis = hist.Bin('score', 'BDT score', 50, -10, 10)

    default = hist.Hist("norm. counts", label_axis, bdt_axis)
    default.fill(label='signal',
                 score=preds_default[df['label'].values.astype(bool)])
    default.fill(label='background',
                 score=preds_default[~df['label'].values.astype(bool)])
    optimized = hist.Hist("norm. counts", label_axis, bdt_axis)
    optimized.fill(label='signal',
                   score=preds_optimized[df['label'].values.astype(bool)])
    optimized.fill(label='background',
                   score=preds_optimized[~df['label'].values.astype(bool)])

    fig, ax = plt.subplots(figsize=(8, 6))
    hist.plot1d(default, overlay='label', ax=ax, density=True)
    ax.set_ylim(0, None)
    ax.set_title('default BDT response on full dataset', x=0.0, ha="left")
    ax.set_xlabel(ax.get_xlabel(), x=1.0, ha="right")
    ax.set_ylabel(ax.get_ylabel(), y=1.0, ha="right")
    plt.savefig(join(trainoutdir, "prediction_fulldist_default.pdf"),
                bbox_inches='tight')
    plt.close()

    fig, ax = plt.subplots(figsize=(8, 6))
    hist.plot1d(optimized, overlay='label', ax=ax, density=True)
    ax.set_ylim(0, None)
    ax.set_title('optimized BDT response on full dataset', x=0.0, ha="left")
    ax.set_xlabel(ax.get_xlabel(), x=1.0, ha="right")
    ax.set_ylabel(ax.get_ylabel(), y=1.0, ha="right")
    plt.savefig(join(trainoutdir, "prediction_fulldist_optimized.pdf"),
                bbox_inches='tight')
    plt.close()
Ejemplo n.º 11
0
def pdf_plot(acc):
    outdir = './output/pdfstudy/'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    datasets = [
        'WJetsToLNu_HT_MLM_2017',
        'DYJetsToLL_M-50_HT_MLM_2017',
    ]
    for ds in datasets:
        fig, ax, rax = fig_ratio()
        h = acc['gen_vpt']
        h = h.rebin(h.axis('vpt'),
                    hist.Bin("vpt", r"$p_{T}^{V}$ (GeV)", 10, 0, 2000))
        h = merge_extensions(h, acc, reweight_pu=False)
        scale_xs_lumi(h)
        h = merge_datasets(h)

        h = h.project(h.axis('dataset'), ds)

        for pdf in h.axis('pdf').identifiers():

            if str(pdf) == 'none':
                continue
            data_err_opts['color'] = colors[str(pdf)]
            hist.plot1d(
                h.project('pdf', pdf),
                # overlay='pdf',
                error_opts=data_err_opts,
                ax=ax,
                overflow='all',
                clear=False)

            hist.plotratio(
                h.project('pdf', pdf),
                h.project('pdf', 'none'),
                ax=rax,
                denom_fill_opts={},
                guide_opts={},
                unc='num',
                overflow='all',
                error_opts=data_err_opts,
                clear=False,
            )
        ax.set_ylim(1e-3, 1e8)
        rax.set_ylim(0.9, 1.6)
        ax.set_yscale('log')
        leg = ax.legend()
        for i, pdf in enumerate(h.axis('pdf').identifiers()):
            if str(pdf) == 'none':
                continue
            leg.get_texts()[i].set_text(str(pdf))
        fig.savefig(pjoin(outdir, f'{ds}.pdf'))
        plt.close(fig)
Ejemplo n.º 12
0
def reduce(folder):

    variables = []
    print(os.listdir(folder))

    hists = {}
    for filename in os.listdir(folder):
        #filename.split('.')[0]
        hin = load(folder + '/' + filename)
        hists[filename] = hin
        hist.plot1d(hists[filename])
        plt.savefig(filename + '.png')
Ejemplo n.º 13
0
def pdf_plot(acc):
    outdir = './output/photon_pt_cut/'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for year in [2017,2018]:
        fig = plt.gcf()
        fig.clf()
        ax = plt.gca()
        h = copy.deepcopy(acc['photon_pt0_recoil'])
        h=h.rebin(h.axis('pt'), hist.Bin("pt",r"$p_{T}^{\gamma}$ (GeV)", [0,175,215,10000]))
        h=h.rebin(h.axis('recoil'),hist.Bin('recoil','recoil',list(range(200,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250))))
        h = merge_extensions(h, acc, reweight_pu=False)
        scale_xs_lumi(h)
        h = merge_datasets(h)


        # hlow = h.integrate(h.axis('pt'),)
        pprint(h.axis('dataset').identifiers())
        # h = h.integrate(h.axis('dataset'),f'GJets_HT_MLM_{year}')
        h = h.integrate(h.axis('dataset'),f'GJets_HT_MLM_{year}')
        h = h.integrate(h.axis('region'),'tr_g_notrig_num')
        pprint(h)
        hist.plot1d(
            h,
            overlay='pt',
            # error_opts=data_err_opts,
            ax=ax,
            overflow='all',
            clear=False)
        
        ax.set_ylim(0,2e5)
        ax.set_xlim(200,500)
        ax.set_ylabel('Expected GJets events (a.u.)')
        # rax.set_ylim(0.9,1.6)
        # ax.set_yscale('log')
        leg=ax.legend(['< 175', '175 - 215', '> 215'],title='Photon $p_{T}$')
        # for i, pdf in enumerate(h.axis('pdf').identifiers()):
        #     if str(pdf)=='none':
        #         continue
        #     leg.get_texts()[i].set_text(str(pdf))

        ax.text(0.97, 0.65, 'Photon CR, no trigger applied',
                fontsize=10,
                horizontalalignment='right',
                verticalalignment='bottom',
                transform=ax.transAxes
        )
        ax.plot([250,250],[0,1e8],'--',color='grey')
        
        fig.savefig(pjoin(outdir,f'photon_pt_cut_{year}.pdf'))
        plt.close(fig)
Ejemplo n.º 14
0
def draw(h1, Closure_bin):

    plt.style.use(hep.style.CMS)
    plt.rcParams.update({
        "font.size": 14,
        "axes.titlesize": 18,
        "axes.labelsize": 18,
        "xtick.labelsize": 12,
        "ytick.labelsize": 12,
    })
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 7), sharex=True)

    fake_error_opts = {
        "linestyle": "none",
        "marker": "+",
        "markersize": 10.0,
        "color": "royalblue",
        "elinewidth": 1,
    }

    # -- Draw hist
    hist.plot1d(
        h1.integrate('Closure_bin', Closure_bin).sum('dataset'),
        ax=ax,
        clear=False,
        error_opts=fake_error_opts,
        #density=True
    )

    np.set_printoptions(suppress=True)
    ax.autoscale(axis="x", tight=True)
    ax.set_ylim(ymin, ymax)
    ax.set_xlim(xmin, xmax)
    # ax.set_xlabel('')
    ax.set_yscale('log')

    lum = plt.text(
        1.0,
        1.0,
        r"%.2f fb$^{-1}$ (13 TeV)" % (lumi_factor),
        fontsize=16,
        horizontalalignment="right",
        verticalalignment="bottom",
        transform=ax.transAxes,
    )
    outname = histname + "_" + file_name + ".png"
    plt.savefig(outname)
Ejemplo n.º 15
0
def make_mc_plot(bkgh, sigh=None, title=None, overflow='over', yscale='log'):
    import matplotlib.pyplot as plt
    import numpy as np
    from coffea import hist

    fig, ax = plt.subplots(1, 1, figsize=(8, 6))
    hist.plot1d(bkgh,
                overlay='cat',
                ax=ax,
                clear=False,
                stack=True,
                overflow=overflow,
                line_opts=None,
                fill_opts=fill_opts,
                error_opts=error_opts)
    if sigh:
        hist.plot1d(sigh,
                    overlay='dataset',
                    ax=ax,
                    overflow=overflow,
                    clear=False)
    ax.set_yscale(yscale)
    ax.autoscale(axis='both', tight=True)
    ymin, ymax = ax.get_ylim()
    if yscale == 'linear': ymax = (ymax - ymin) * 1.2 + ymin
    if yscale == 'log':
        ymax = 10**(((np.log10(ymax) - np.log10(ymin)) * 1.2) + np.log10(ymin))
    ax.set_ylim(ymin, ymax)
    ax.set_xlabel(ax.get_xlabel(), x=1, ha='right')
    ax.set_ylabel(ax.get_ylabel(), y=1.0, ha="right")
    ax.set_title(title, x=0.0, ha="left")
    ax.text(1,
            1,
            '59.74/fb (13TeV)',
            ha='right',
            va='bottom',
            transform=ax.transAxes)
    if sigh:
        ax.legend(*groupHandleLabel(ax), prop={
            'size': 8,
        }, ncol=3)
    else:
        leg = ax.legend()

    return fig, ax
Ejemplo n.º 16
0
def plot_mhiggs(tag, y1, label1, y2, label2, title=''):
    fig = plt.figure()
    hmh = hist.Hist("Events", hist.Cat("process", title),
                    hist.Bin("mhiggs", "Higgs mass [GeV]", 60, 0, 300))
    hmh.fill(process=label1, mhiggs=y1)
    hmh.fill(process=label2, mhiggs=y2)
    ax = hist.plot1d(hmh, overlay="process", stack=False)
    filename = tag + '.pdf'
    fig.savefig(filename)
    cprint('imgcat ' + filename, 'green')
    return
Ejemplo n.º 17
0
def main():

    overwrite = True

    # load the config and the cache

    # download file from eos in the future?

    # Inputs are defined in a dictionary
    # dataset : list of files
    fileset = {
        'tW_scattering': glob.glob("data/samples/*.root"),
    }

    # histograms
    histograms = ["MET_pt"]

    output = processor.run_uproot_job(
        fileset,
        treename='Events',
        processor_instance=exampleProcessor(),
        executor=processor.futures_executor,
        executor_args={
            'workers': 1,
            'function_args': {
                'flatten': False
            }
        },
        chunksize=500000,
    )

    # Make a few plots
    outdir = "tmp_plots/"
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for name in histograms:
        print(name)
        histogram = output[name]
        if name == 'MET_pt':
            # rebin
            new_met_bins = hist.Bin('pt', r'$E_T^{miss} \ (GeV)$', 20, 0, 200)
            histogram = histogram.rebin('pt', new_met_bins)

        ax = hist.plot1d(
            histogram, overlay="dataset", density=False, stack=False
        )  # make density plots because we don't care about x-sec differences
        ax.set_yscale('linear')  # can be log
        #ax.set_ylim(0,0.1)
        ax.figure.savefig(os.path.join(outdir, "{}_shape.pdf".format(name)))
        ax.clear()

    return output
Ejemplo n.º 18
0
def truth_nMuon():
    fig, ax = plt.subplots(
        1, 1,
        figsize=(10,
                 7))  #, gridspec_kw={"height_ratios": (3, 1)}, sharex=True)
    plt.style.use(mplhep.style.ROOT)

    # Normalize histograms
    for btype in ["Bu", "Bs"]:
        normalizations = histograms[btype]["nTruthMuon"].integrate(
            "nTruthMuon")
        sfs = {}
        for k, v in normalizations.values().items():
            sfs[k[0]] = 1. / v
        #pprint(sfs)
        histograms[btype]["nTruthMuon"].scale(sfs, axis="dataset")

    for dataset_type in ["inclusive", "probefilter"]:
        print(dataset_type)
        for btype in ["Bu", "Bs"]:
            print(btype)
            if btype == "Bu":
                bname = "Bu2KJpsi2KMuMu"
            elif btype == "Bs":
                bname = "Bs2PhiJpsi2KKMuMu"
            dataset_name = f"{bname}_{dataset_type}"
            #h_truth_nMuon = histograms[btype]["nTruthMuon"].integrate("dataset", [f"{bname}_{dataset_type}"])
            #print(dataset_name)
            #print(histograms[btype]["nTruthMuon"].axis("dataset").identifiers())
            histograms[btype]["nTruthMuon"].axis("dataset").index(
                dataset_name).label = f"{dataset_type}, {btype}"
            #print(histograms[btype]["nTruthMuon"][([dataset_name]),:])
            hist.plot1d(histograms[btype]["nTruthMuon"][([dataset_name]), :],
                        ax=ax,
                        clear=False,
                        overlay="dataset")
    plt.tight_layout()
    fig.savefig(f"{figure_directory}/truth_nMuon.png")
Ejemplo n.º 19
0
def plotHist(output, variable, xlabel):
    scales = output[variable].integrate(variable).values()
    data_scale = scales['Data', ]
    mc_scale = scales['MC', ]
    scales['Data'] = 1 / data_scale
    scales['MC'] = 1 / mc_scale
    del scales['Data', ]
    del scales['MC', ]
    output[variable].scale(scales, axis='dataset')
    fig, ax = plt.subplots(figsize=(5, 5))
    ax = hist.plot1d(output[variable], overlay='dataset')
    ax.set_xlabel(xlabel)
    ax.set_ylabel('')
    scales['Data'] = data_scale
    scales['MC'] = mc_scale
    output[variable].scale(scales, axis='dataset')
Ejemplo n.º 20
0
def plot_ht_stitching(acc, tag, regex):
    outdir = './output/ht/'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for dist in ['lhe_ht']:
        h=copy.deepcopy(acc[dist])
        h = merge_extensions(h, acc)
        scale_xs_lumi(h)
        h = merge_datasets(h)

        fig, ax, _ = hist.plot1d(
            h[re.compile(regex)],
            overlay='dataset',
            overflow='all',
            binwnorm=True)
        plt.yscale('log')
        plt.ylim(1e-3,1e6)

        fig.savefig(pjoin(outdir,f'{tag}_{dist}.pdf'))
Ejemplo n.º 21
0
def main():
    args = commandline()
    output = load(args.file)

    h = output['weights']
    fill_opts = {'edgecolor': (0, 0, 0, 0.3), 'alpha': 0.8}
    fig, ax, _ = hist.plot1d(h.integrate('dataset').integrate(
        "region", args.region)[re.compile(args.regex)],
                             overlay='weight_type',
                             overflow='all',
                             fill_opts=fill_opts)
    fig.suptitle("Weights")
    # ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_ylim(0.1, 1e8)
    try:
        os.makedirs(args.outpath)
    except FileExistsError:
        pass
    fig.savefig(pjoin(args.outpath, "weights.pdf"))
    plt.close(fig)
Ejemplo n.º 22
0
def plot_resolution(tag, y_pred, y_true, y_ref=None):
    sigma_dnn = y_pred - y_true
    if y_ref is not None: sigma_cb = y_ref - y_true

    xmin, xmax, xlabel = -100, 350, "True - Predicted"
    if y_true.sum() == 0:
        xmin, xmax, xlabel = 50, 250, "Higgs mass [GeV]"

    hsigma = hist.Hist("Events", hist.Cat("method", "Reco method"),
                       hist.Bin("sigma", xlabel, 100, xmin, xmax))
    hsigma.fill(method="DNN", sigma=sigma_dnn)
    if y_ref is not None:
        hsigma.fill(method="CB", sigma=sigma_cb)

    fig = plt.figure()
    ax = hist.plot1d(hsigma, overlay="method", stack=False)
    fit_result, gauss, mask = fit_gauss(
        hsigma.axis('sigma').centers(),
        hsigma.values()[('DNN', )])
    print('DNN mean = %.2f, std = %.2f' % tuple(fit_result[1:3]))

    ax.plot(hsigma.axis('sigma').centers()[mask],
            gauss,
            color='maroon',
            linewidth=1,
            label=r'Fitted function')
    if y_ref is not None:
        fit_result, gauss, mask = fit_gauss(
            hsigma.axis('sigma').centers(),
            hsigma.values()[('CB', )])
        print('Ref. mean = %.2f,x std = %.2f' % tuple(fit_result[1:3]))
        ax.plot(hsigma.axis('sigma').centers()[mask],
                gauss,
                color='navy',
                linewidth=1,
                label=r'Fitted function')
    filename = 'sigma_' + tag + '.pdf'
    fig.savefig(filename)
    cprint('imgcat ' + filename, 'green')
    return
Ejemplo n.º 23
0
def main():
    # Inputs are defined in a dictionary
    # dataset : list of files
    fileset = {
        'NonthDM': [
            "root://cms-xrd-global.cern.ch///store/mc/RunIISummer16NanoAODv4/NonthDMMonoJet_MX-1500_l1-2p_l2-0p04_13TeV-madgraph/NANOAODSIM/PUMoriond17_Nano14Dec2018_102X_mcRun2_asymptotic_v6-v1/260000/F78663A9-8E7F-B74E-8F6F-9C9A61A27AE5.root"
        ],
        "Znunu_ht600to800": [
            "root://cms-xrd-global.cern.ch///store/mc/RunIISummer16NanoAODv4/ZJetsToNuNu_HT-600To800_13TeV-madgraph/NANOAODSIM/PUMoriond17_Nano14Dec2018_102X_mcRun2_asymptotic_v6-v1/280000/F4921B81-C2E3-6546-9C00-D908A264FFD8.root",
        ]
    }

    # Run the processor
    output = processor.run_uproot_job(
        fileset,
        treename='Events',
        processor_instance=exampleProcessor(),
        executor=processor.futures_executor,
        executor_args={
            'workers': 1,
            'function_args': {
                'flatten': False
            }
        },
        chunksize=500000,
    )

    # Make a few plots
    outdir = "./tmp_plots"
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for name in ["met", "jet_pt", "new_variable", "jet_pt_met100"]:
        histogram = output[name]
        fig, ax, _ = hist.plot1d(histogram, overlay="dataset")
        ax.set_yscale('log')
        ax.set_ylim(0.1, 1e5)

        fig.savefig(os.path.join(outdir, "{}.pdf".format(name)))
Ejemplo n.º 24
0
def debug_plot_output(output,
                      region='inclusive',
                      outdir='out',
                      logscaley=True):
    """Dump all histograms as PDF."""
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    for name in output.keys():
        if name.startswith("_"):
            continue
        # if any([x in name for x in ['sumw','cutflow','selected_events','kinematics','weights']]):
        #     continue
        try:
            if np.sum(output[name].values().values()) == 0:
                continue
        except:
            continue
        try:
            h = output[name].integrate("region", region)
        except:
            continue
        print(name)
        try:
            fig, ax, _ = hist.plot1d(
                h,
                overlay='dataset',
                overflow='all',
            )
        except:
            continue
        fig.suptitle(f'{region}, {name}')
        # ax.set_xscale('log')
        if logscaley:
            ax.set_yscale('log')
            ax.set_ylim(0.1, 1e8)
        else:
            ax.set_ylim(0.1, 1e3)
        fig.savefig(os.path.join(outdir, f"{region}_{name}.pdf"))
        plt.close(fig)
Ejemplo n.º 25
0
def main():

    overwrite = True

    # load the config and the cache
    cfg = loadConfig()

    # Inputs are defined in a dictionary
    # dataset : list of files
    fileset = {
        'tW_scattering': glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/tW_scattering__nanoAOD/merged/*.root"),
        "TTW":           glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/TTWJetsToLNu_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20_ext1-v1/merged/*.root") \
                        + glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/TTWJetsToQQ_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/merged/*.root"),
        #        "ttbar":        glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p3/TTJets_SingleLeptFromT_TuneCP5_13TeV-madgraphMLM-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/*.root") # adding this is still surprisingly fast (20GB file!)
        "ttbar": glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p3/TTJets_SingleLeptFromTbar_TuneCP5_13TeV-madgraphMLM-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/merged/*.root")
    }

    # histograms
    histograms = [
        "MET_pt", "Jet_pt", "Jet_eta", "Jet_pt_fwd", "W_pt_notFromTop",
        "GenJet_pt_fwd", "Spectator_pt", "Spectator_eta"
    ]
    histograms += [
        "Top_pt", "Top_eta", "Antitop_pt", "Antitop_eta", "W_pt", "W_eta",
        "N_b", "N_jet", "dijet_mass", "dijet_mass_bestW", "dijet_mass_secondW",
        "digenjet_mass", "dijet_deltaR"
    ]

    # initialize cache
    cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']),
                                     cfg['caches']['simpleProcessor']),
                        serialized=True)
    if not overwrite:
        cache.load()

    if cfg == cache.get('cfg') and histograms == cache.get(
            'histograms') and fileset == cache.get('fileset') and cache.get(
                'simple_output'):
        output = cache.get('simple_output')

    else:
        # Run the processor
        output = processor.run_uproot_job(
            fileset,
            treename='Events',
            processor_instance=exampleProcessor(),
            executor=processor.futures_executor,
            executor_args={
                'workers': 1,
                'function_args': {
                    'flatten': False
                }
            },
            chunksize=500000,
        )
        cache['fileset'] = fileset
        cache['cfg'] = cfg
        cache['histograms'] = histograms
        cache['simple_output'] = output
        cache.dump()

    # Make a few plots
    outdir = "./tmp_plots"
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for name in histograms:
        print(name)
        histogram = output[name]
        if name == 'MET_pt':
            # rebin
            new_met_bins = hist.Bin('pt', r'$E_T^{miss} \ (GeV)$', 20, 0, 200)
            histogram = histogram.rebin('pt', new_met_bins)
        if name == 'W_pt_notFromTop':
            # rebin
            new_pt_bins = hist.Bin('pt', r'$p_{T}(W) \ (GeV)$', 25, 0, 500)
            histogram = histogram.rebin('pt', new_pt_bins)

        ax = hist.plot1d(
            histogram, overlay="dataset", density=False, stack=True
        )  # make density plots because we don't care about x-sec differences
        ax.set_yscale('linear')  # can be log
        #ax.set_ylim(0,0.1)
        ax.figure.savefig(os.path.join(outdir, "{}.pdf".format(name)))
        ax.clear()

        ax = hist.plot1d(
            histogram, overlay="dataset", density=True, stack=False
        )  # make density plots because we don't care about x-sec differences
        ax.set_yscale('linear')  # can be log
        #ax.set_ylim(0,0.1)
        ax.figure.savefig(os.path.join(outdir, "{}_shape.pdf".format(name)))
        ax.clear()

    return output
Ejemplo n.º 26
0
def make_plot(acc,
              region,
              distribution,
              year,
              data,
              mc,
              signal=None,
              outdir='./output/stack/',
              integrate=None,
              ylim=None,
              xlim=None,
              rylim=None,
              tag=None,
              output_format='pdf',
              ratio=True):
    """Creates a data vs MC comparison plot

    :param acc: Accumulator (processor output)
    :type acc: coffea.processor.accumulator
    """
    # Rebin
    s = Style()
    h = copy.deepcopy(acc[distribution])
    assert (h)
    try:
        newax = s.get_binning(distribution, region)
        h = h.rebin(h.axis(newax.name), newax)
    except KeyError:
        pass

    # Integrate over an extra axis
    inte_tag = ""
    if integrate:
        (inte_axis, inte_low, inte_high) = integrate
        h = h.integrate(inte_axis,
                        slice(inte_low,
                              inte_high))  #can add an overflow option here
        inte_tag += "_" + inte_axis + "_" + str(inte_low) + "_" + str(
            inte_high)

    # Pick the region we want to look at
    # E.g. cr_2m_j = Di-Muon control region with monojet selection
    h = h.integrate(h.axis('region'), region)

    # Plotting
    # Add ratio plot at the bottom if specified (default)
    # Otherwise just plot the histogram
    if ratio:
        fig, (ax, rax) = plt.subplots(2,
                                      1,
                                      figsize=(7, 7),
                                      gridspec_kw={"height_ratios": (3, 1)},
                                      sharex=True)

    else:
        fig, ax = plt.subplots(1, 1, figsize=(7, 5))

    data_err_opts = {
        'linestyle': 'none',
        'marker': '.',
        'markersize': 10.,
        'color': 'k',
        'elinewidth': 1,
    }
    signal_err_opts = {
        'linestyle': '-',
        'color': 'crimson',
        'elinewidth': 1,
    }

    # Plot single muon data
    # Note the syntax we use to pick the data set
    if data:
        hist.plot1d(h[data],
                    overlay='dataset',
                    error_opts=data_err_opts,
                    ax=ax,
                    overflow='all',
                    binwnorm=1)

    if signal:
        hist.plot1d(h[signal],
                    overlay='dataset',
                    error_opts=signal_err_opts,
                    ax=ax,
                    overflow='all',
                    binwnorm=1,
                    clear=False)

    # Plot MC background samples
    # Here we use a regular expression to match
    # data sets we want
    hist.plot1d(h[mc],
                overlay='dataset',
                stack=True,
                clear=False,
                overflow='all',
                ax=ax,
                binwnorm=1)

    # Apply correct colors to BG histograms
    handles, labels = ax.get_legend_handles_labels()
    new_labels = []
    for handle, label in zip(handles, labels):
        col = None
        for k, v in colors.items():
            if re.match(k, label):
                col = v
                break
        if col:
            handle.set_color(col)
            handle.set_linestyle('-')
            handle.set_edgecolor('k')

        l = None

        channel = channel_name(region)
        # Pick the proper legend labels for the channel
        if channel == 'VBF':
            legend_labels_to_use = legend_labels['VBF']
        elif channel in ['Monojet', 'Mono-V']:
            legend_labels_to_use = legend_labels['Monojet/Mono-V']

        # Add in the common labels
        legend_labels_to_use.update(legend_labels['Common'])

        for k, v in legend_labels_to_use.items():
            if re.match(k, label):
                l = v
        new_labels.append(l if l else label)

    # Update legend
    try:
        region_name = s.region_names[region]
    except KeyError:
        region_name = region
    ax.legend(title=region_name, ncol=2, handles=handles, labels=new_labels)

    # Ratio plot
    if data:
        hist.plotratio(h[data].integrate('dataset'),
                       h[mc].integrate('dataset'),
                       ax=rax,
                       denom_fill_opts={},
                       guide_opts={},
                       unc='num',
                       overflow='all',
                       error_opts=data_err_opts)

    ax.text(1.,
            0.,
            distribution,
            fontsize=10,
            horizontalalignment='right',
            verticalalignment='bottom',
            transform=ax.transAxes)
    fig.text(0.,
             1.,
             '$\\bf{CMS}$ internal',
             fontsize=14,
             horizontalalignment='left',
             verticalalignment='bottom',
             transform=ax.transAxes)

    fig.text(1.,
             1.,
             f'{channel_name(region)}, {lumi(year):.1f} fb$^{{-1}}$ ({year})',
             fontsize=14,
             horizontalalignment='right',
             verticalalignment='bottom',
             transform=ax.transAxes)
    # Aesthetics
    ax.set_yscale("log")
    ax.set_ylabel('Events / GeV')
    plot_settings = style.plot_settings()
    if region in plot_settings.keys():
        plot_settings = plot_settings[region]
    if distribution in plot_settings.keys():
        plot_settings = plot_settings[distribution]
    if ylim:
        if ylim == "auto":
            width = np.diff([x for x in h.axes()
                             if "dataset" not in x.name][0].edges())
            vmc = h[mc].integrate("dataset").values()[()] / width
            try:
                vdata = h[data].integrate("dataset").values()[()] / width
            except:
                vdata = vmc
            if signal:
                vsig = h[signal].integrate("dataset").values()[()] / width
            else:
                vsig = vmc

            ax.set_ylim(
                0.5 * min([
                    np.min(vmc[vmc > 0]),
                    np.min(vdata[vdata > 0]),
                    np.min(vsig[vsig > 0])
                ]),
                1e2 *
                max([np.max(vmc), np.max(vdata),
                     np.min(vsig)]),
            )

        else:
            ax.set_ylim(ylim[0], ylim[1])
    elif 'ylim' in plot_settings.keys():
        ax.set_ylim(plot_settings['ylim'])
    else:
        ax.set_ylim(1e-1, 1e6)

    if xlim:
        ax.set_xlim(xlim[0], xlim[1])
    elif 'xlim' in plot_settings.keys():
        ax.set_xlim(plot_settings['xlim'])

    if ratio:
        if rylim:
            rax.set_ylim(*rylim)
        else:
            rax.set_ylim(0.5, 1.5)
        loc1 = matplotlib.ticker.MultipleLocator(base=0.2)
        loc2 = matplotlib.ticker.MultipleLocator(base=0.1)
        rax.yaxis.set_major_locator(loc1)
        rax.yaxis.set_minor_locator(loc2)
        rax.grid(axis='y', which='minor', linestyle='--')
        rax.grid(axis='y', which='major', linestyle='--')
        rax.set_ylabel('Data / MC')
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for form in output_format.split(','):
        outpath = pjoin(
            outdir,
            f"{region}_{distribution}{inte_tag}_{tag + '_' if tag else ''}{year}.{form}"
        )
        fig.savefig(outpath)
        print(f"Saved plot file in {outpath}")
    plt.close('all')
Ejemplo n.º 27
0
        "should_transfer_files": "YES",
        "when_to_transfer_output": "ON_EXIT",
        "+DaskSchedulerAddress": '"129.93.183.33:8787"',
    })

cluster.adapt(minimum_jobs=5, maximum_jobs=100, maximum_memory="4 GB"
              )  # auto-scale between 5 and 100 jobs (maximum_memory="4 GB")

client = Client(cluster)

exe_args = {
    'client': client,
}

output = processor.run_uproot_job(fileset,
                                  treename='Events',
                                  processor_instance=METProcessor(),
                                  executor=processor.dask_executor,
                                  executor_args=exe_args)

# Generates a 1D histogram from the data output to the 'MET' key. fill_opts are optional, to fill the graph (default is a line).
hist.plot1d(output['MET'],
            overlay='dataset',
            fill_opts={
                'edgecolor': (0, 0, 0, 0.3),
                'alpha': 0.8
            })

# Easy way to print all cutflow dict values. Can just do print(output['cutflow']["KEY_NAME"]) for one.
for key, value in output['cutflow'].items():
    print(key, value)
    }

    outputs = {}
    outputs['data'] = processor.run_uproot_job(dataDS,
                                  treename='ffNtuplizer/ffNtuple',
                                  processor_instance=MuonTimingProcessor(region='CR', data_type='data'),
                                  executor=processor.futures_executor,
                                  executor_args=dict(workers=12, flatten=False),
                                  chunksize=500000,
                                 )


    ## CHANNEL - 2mu2e
    fig, ax = plt.subplots(1,1,figsize=(8,6))
    h = outputs['data']['ndsa'].integrate('channel', slice(1,2))
    hist.plot1d(h, overlay='cat', ax=ax, overflow='over', error_opts=data_err_opts)
    ax.set_title('[2mu2e|CR] mu-type leptonjet N(dsa)', x=0.0, ha="left")
    ax.set_yscale('symlog')
    ax.autoscale(axis='both', tight=True)
    ax.text(1,1,'59.74/fb (13TeV)', ha='right', va='bottom', transform=ax.transAxes)
    ax.get_yaxis().set_major_locator(SymmetricalLogLocator(base=10., linthresh=1, subs=range(1,10)))
    ax.set_xlabel(ax.get_xlabel(), x=1.0, ha="right")
    ax.set_ylabel(ax.get_ylabel(), y=1.0, ha="right")
    fig.savefig(join(outdir, 'ndsa_CR_2mu2e.png'))
    fig.savefig(join(outdir, 'ndsa_CR_2mu2e.pdf'))
    plt.close(fig)

    fig, ax = plt.subplots(1,1,figsize=(8,6))
    h = outputs['data']['mutiming'].integrate('channel', slice(1,2))
    hist.plot1d(h, overlay='cat', ax=ax, overflow='over', error_opts=data_err_opts)
    ax.set_title('[2mu2e|CR] mu-type leptonjet mean timing', x=0.0, ha="left")
Ejemplo n.º 29
0
from coffea import hist
from matplotlib import pyplot as plt
# Load input
output = load('monojet.coffea')

h = output['genvpt_check'].project("dataset")
oldax = h.axis("vpt")
newax = hist.Bin("vpt", r"$p_{T}^{V}$ (GeV)", 25, 0, 2000)

h = h.rebin(oldax, newax)
fig, (ax, rax) = plt.subplots(2,
                              1,
                              figsize=(7, 7),
                              gridspec_kw={"height_ratios": (3, 1)},
                              sharex=True)
fig, ax, _ = hist.plot1d(h, overlay='type', ax=ax)

# Ratio plot
data_err_opts = {
    'linestyle': 'none',
    'marker': '.',
    'markersize': 10.,
    'color': 'k',
    'elinewidth': 1,
    'emarker': '_'
}

a = h.axis('type')
h1 = h.project(a, "BU")
h2 = h.project(a, "Nano")
hist.plotratio(h2,
Ejemplo n.º 30
0
import lz4.frame as lz4f
import cloudpickle
from coffea import hist

with lz4f.open("hists.cpkl.lz4", mode="r", compression_level=5) as fin:
    hists = cloudpickle.load(fin)



fig, ax, _ = hist.plot1d(hists["sr_met"],overlay="dataset")
# ax.set_xscale('log')
ax.set_yscale('log')
ax.set_ylim(0.1, 1e5)
fig.savefig("test.pdf")

# print(hists)