Example #1
0
def plot_ht_dist(acc, regex, tag):
    '''Given the accumulator and the dataset regex,
	   plot the HT distribution.'''
    acc.load('lhe_ht')
    h = acc['lhe_ht']

    h = merge_extensions(h, acc, reweight_pu=False)
    scale_xs_lumi(h)
    h = merge_datasets(h)

    # Choose the relevant dataset(s)
    h = h[re.compile(regex)]

    new_ht_bins = hist.Bin('ht', r'$H_T \ (GeV)$', 50, 0, 4000)
    h = h.rebin('ht', new_ht_bins)

    # Plot the HT distribution
    fig, ax = plt.subplots(1, 1)
    hist.plot1d(h, ax=ax, overflow='all', binwnorm=True, overlay='dataset')
    ax.set_yscale('log')
    ax.set_ylim(1e-3, 1e6)
    if 'gjets' in tag:
        ax.plot([600, 600], [1e-3, 1e6])

    if not os.path.exists('./output'):
        os.mkdir('output')

    fig.savefig(f'./output/{tag}_lhe_ht.pdf')
Example #2
0
def plot_lhe_v_pt(acc, tag, regex, outputrootfile, pttype):
    outdir = './output/'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    fig, (ax, rax) = plt.subplots(2,
                                  1,
                                  figsize=(7, 7),
                                  gridspec_kw={"height_ratios": (3, 1)},
                                  sharex=True)
    # new_ax = hist.Bin('vpt','LHE V $p_{T}$ (GeV)',list(range(100,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250)))
    new_ax = hist.Bin('vpt', 'LHE V $p_{T}$ (GeV)',
                      list(range(80, 800, 40)) + list(range(800, 2000, 100)))

    for dist in ['gen_vpt']:
        h = copy.deepcopy(acc[dist])
        h = h.integrate('type', pttype)
        h = h.rebin(h.axis('vpt'), new_ax)
        h = merge_extensions(h, acc, reweight_pu=False)
        scale_xs_lumi(h)
        h = merge_datasets(h)
        h = h[re.compile(regex)]
        h = h.integrate('weight_type', 'nominal')
        h = h.integrate('weight_index', slice(-0.5, 0.5))
        hist.plot1d(h, overlay='dataset', overflow='all', binwnorm=True, ax=ax)
        lo = h[re.compile('.*HT.*')].integrate('dataset')
        nlo = h[re.compile('.*LHE.*')].integrate('dataset')

        hist.plotratio(nlo,
                       lo,
                       ax=rax,
                       denom_fill_opts={},
                       guide_opts={},
                       unc='num',
                       overflow='all',
                       error_opts=data_err_opts,
                       label='2017 NLO/LO ratio')
        old = get_old_kfac(tag)
        old_x = 0.5 * (old.bins[:, 0] + old.bins[:, 1])
        rax.plot(old_x, old.values, 'ob-', label='2016 QCD k fac')
        rax.plot(old_x,
                 old.values * pdfwgt_sf(old_x),
                 'or-',
                 label='2016 x ad-hoc DY pdfwgt SF')
        ax.set_yscale('log')
        ax.set_ylim(1e-3, 1e6)
        rax.set_ylim(0, 2)
        rax.legend()

        fig.savefig(pjoin(outdir, f'{tag}_{dist}.pdf'))

        sf_x = lo.axis('vpt').edges()
        sf_y = nlo.values()[()] / lo.values()[()]

        # try:
        #     f = uproot.create(f'gen_v_pt_qcd_sf.root')
        # except OSError:

        outputrootfile[tag] = (sf_y, sf_x)
Example #3
0
def legacy_limit_input_monojet(acc, outdir='./output', unblind=False):
    """Writes ROOT TH1s to file as a limit input

    :param acc: Accumulator (processor output)
    :type acc: coffea.processor.accumulator
    :param outdir: Output directory
    :type outdir: string
    """
    distribution = 'recoil'

    regions = [
        'cr_2m_j', 'cr_1m_j', 'cr_2e_j', 'cr_1e_j', 'cr_g_j',
        'sr_j_no_veto_all'
    ]
    if unblind:
        regions.append('sr_j')

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for year in [2017, 2018]:
        signal = re.compile(f'(GluGlu|WH|ZH|ggZH|VBF).*(I|i)inv.*{year}')
        f = uproot.recreate(pjoin(outdir, f'legacy_limit_monojet_{year}.root'))
        data, mc = datasets(year, unblind=unblind)

        for region in regions:
            print(f'Region {region}')
            # Rebin
            h = copy.deepcopy(acc[distribution])

            newax = hist.Bin('recoil', 'Recoil (GeV)', recoil_bins_2016())

            h = h.rebin(h.axis(newax.name), newax)

            h = merge_extensions(h, acc)
            scale_xs_lumi(h)

            h = merge_datasets(h)

            h = h.integrate(h.axis('region'), region)

            for dataset in map(str, h.axis('dataset').identifiers()):
                if not (data[region].match(dataset)
                        or mc[region].match(dataset) or signal.match(dataset)):
                    continue
                print(f"   Dataset: {dataset}")

                th1 = export1d(h.integrate('dataset', dataset))
                try:
                    histo_name = f'{legacy_region_name(region)}_{legacy_dataset_name(dataset)}'
                except:
                    print(f"Skipping {dataset}")
                    continue
                f[histo_name] = th1

        if not unblind:
            f[f'{legacy_region_name("sr_j")}_data'] = f[
                f'{legacy_region_name("sr_j")}_zjets']
    merge_legacy_inputs(outdir)
def legacy_limit_input(acc, outdir='./output'):
    """Writes ROOT TH1s to file as a limit input

    :param acc: Accumulator (processor output)
    :type acc: coffea.processor.accumulator
    :param outdir: Output directory
    :type outdir: string
    """
    distribution = 'recoil'

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for wp in ['tau21', 'loosemd', 'tightmd', 'loose', 'tight']:
        year = 2017
        signal = re.compile(f'.*(Hinv|HToInvisible).*{year}')
        f = uproot.recreate(
            pjoin(outdir, f'legacy_limit_monov_{wp}_{year}.root'))
        data, mc = datasets(year)
        for region in [
                'cr_2m_v', 'cr_1m_v', 'cr_2e_v', 'cr_1e_v', 'cr_g_v', 'sr_v'
        ]:
            if wp == 'tau21':
                monov_region_name = region
            else:
                monov_region_name = region.replace('_v', f'_{wp}_v')
            print(f'Region {region}')
            # Rebin
            h = copy.deepcopy(acc[distribution])

            newax = hist.Bin('recoil', 'Recoil (GeV)', recoil_bins_2016())

            h = h.rebin(h.axis(newax.name), newax)

            h = merge_extensions(h, acc)
            scale_xs_lumi(h)

            h = merge_datasets(h)

            h = h.integrate(h.axis('region'), monov_region_name)

            for dataset in map(str, h.axis('dataset').identifiers()):
                if not (data[region].match(dataset)
                        or mc[region].match(dataset) or signal.match(dataset)):
                    print(f"Skip dataset: {dataset}")
                    continue
                print(f"   Dataset: {dataset}")

                th1 = export1d(h.integrate('dataset', dataset))
                try:
                    histo_name = f'{legacy_region_name(region)}_{legacy_dataset_name(dataset)}'
                except:
                    print(f"Skipping {dataset}")
                    continue
                f[histo_name] = th1
        f[f'{legacy_region_name("sr_v")}_data'] = f[
            f'{legacy_region_name("sr_v")}_zjets']
    merge_legacy_inputs(outdir)
Example #5
0
def legacy_limit_input_monojet(acc, args):
    """Writes ROOT TH1s to file as a limit input

    :param acc: Accumulator (processor output)
    :type acc: coffea.processor.accumulator
    :param args.outdir: Output directory
    :type args.outdir: string
    """
    distribution = 'recoil'

    regions = [
                'cr_2m_j',
                'cr_1m_j',
                'cr_2e_j',
                'cr_1e_j',
                'cr_g_j',
                'sr_j_no_veto_all'
                ]
    if args.unblind:
        regions.append('sr_j')

    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    # Histogram prep, rebin, etc
    h = copy.deepcopy(acc[distribution])
    newax = hist.Bin('recoil','Recoil (GeV)', recoil_bins_2016())
    h = h.rebin(h.axis(newax.name), newax)
    h = merge_extensions(h, acc)
    scale_xs_lumi(h)
    h = merge_datasets(h)

    for year in [2017,2018]:
        f = uproot.recreate(pjoin(args.outdir, f'legacy_limit_monojet_{year}.root'))
        data, mc = datasets(year, unblind=args.unblind, nlo=args.nlo)

        for region in regions:
            print(f'Region {region}')
            ih = h.integrate(h.axis('region'),region)

            for dataset in map(str, h.axis('dataset').identifiers()):
                if not (data[region].match(dataset) or mc[region].match(dataset)):
                    continue
                print(f"   Dataset: {dataset}")

                th1 = export1d(ih.integrate('dataset', dataset))
                try:
                    histo_name = f'{legacy_region_name(region)}_{legacy_dataset_name(dataset)}'
                except RuntimeError:
                    print(f"Skipping {dataset}")
                    continue
                f[histo_name] = th1

        if not args.unblind:
            f[f'{legacy_region_name("sr_j")}_data'] = f[f'{legacy_region_name("sr_j")}_zjets']
    merge_legacy_inputs(args.outdir)
Example #6
0
def pdf_plot(acc):
    outdir = './output/pdfstudy/'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    datasets = [
        'WJetsToLNu_HT_MLM_2017',
        'DYJetsToLL_M-50_HT_MLM_2017',
    ]
    for ds in datasets:
        fig, ax, rax = fig_ratio()
        h = acc['gen_vpt']
        h = h.rebin(h.axis('vpt'),
                    hist.Bin("vpt", r"$p_{T}^{V}$ (GeV)", 10, 0, 2000))
        h = merge_extensions(h, acc, reweight_pu=False)
        scale_xs_lumi(h)
        h = merge_datasets(h)

        h = h.project(h.axis('dataset'), ds)

        for pdf in h.axis('pdf').identifiers():

            if str(pdf) == 'none':
                continue
            data_err_opts['color'] = colors[str(pdf)]
            hist.plot1d(
                h.project('pdf', pdf),
                # overlay='pdf',
                error_opts=data_err_opts,
                ax=ax,
                overflow='all',
                clear=False)

            hist.plotratio(
                h.project('pdf', pdf),
                h.project('pdf', 'none'),
                ax=rax,
                denom_fill_opts={},
                guide_opts={},
                unc='num',
                overflow='all',
                error_opts=data_err_opts,
                clear=False,
            )
        ax.set_ylim(1e-3, 1e8)
        rax.set_ylim(0.9, 1.6)
        ax.set_yscale('log')
        leg = ax.legend()
        for i, pdf in enumerate(h.axis('pdf').identifiers()):
            if str(pdf) == 'none':
                continue
            leg.get_texts()[i].set_text(str(pdf))
        fig.savefig(pjoin(outdir, f'{ds}.pdf'))
        plt.close(fig)
Example #7
0
def legacy_limit_input_vbf(acc, outdir='./output'):
    """Writes ROOT TH1s to file as a limit input

    :param acc: Accumulator (processor output)
    :type acc: coffea.processor.accumulator
    :param outdir: Output directory
    :type outdir: string
    """
    distribution = 'mjj'

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for year in [2017, 2018]:
        signal = re.compile(f'VBF_HToInvisible.*{year}')
        f = uproot.recreate(pjoin(outdir, f'legacy_limit_vbf_{year}.root'))
        data, mc = datasets(year)
        for region in [
                'cr_2m_vbf', 'cr_1m_vbf', 'cr_2e_vbf', 'cr_1e_vbf', 'cr_g_vbf',
                'sr_vbf'
        ]:
            print(f'Region {region}')
            # Rebin
            h = copy.deepcopy(acc[distribution])

            newax = hist.Bin('mjj', '$M_{jj}$ (GeV)', mjj_bins_2016())

            h = h.rebin(h.axis(newax.name), newax)

            h = merge_extensions(h, acc)
            scale_xs_lumi(h)

            h = merge_datasets(h)

            h = h.integrate(h.axis('region'), region)

            for dataset in map(str, h.axis('dataset').identifiers()):
                if not (data[region].match(dataset)
                        or mc[region].match(dataset) or signal.match(dataset)):
                    print(f"Skip dataset: {dataset}")
                    continue
                print(f"   Dataset: {dataset}")

                th1 = export1d(h.integrate('dataset', dataset))
                try:
                    histo_name = f'{legacy_region_name(region)}_{legacy_dataset_name_vbf(dataset)}'
                    print(histo_name)
                except:
                    print(f"Skipping {dataset}")
                    continue
                f[histo_name] = th1
        #f[f'{legacy_region_name("sr_vbf")}_data'] = f[f'{legacy_region_name("sr_vbf")}_zjets']
    merge_legacy_inputs(outdir)
Example #8
0
def pdf_plot(acc):
    outdir = './output/photon_pt_cut/'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for year in [2017,2018]:
        fig = plt.gcf()
        fig.clf()
        ax = plt.gca()
        h = copy.deepcopy(acc['photon_pt0_recoil'])
        h=h.rebin(h.axis('pt'), hist.Bin("pt",r"$p_{T}^{\gamma}$ (GeV)", [0,175,215,10000]))
        h=h.rebin(h.axis('recoil'),hist.Bin('recoil','recoil',list(range(200,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250))))
        h = merge_extensions(h, acc, reweight_pu=False)
        scale_xs_lumi(h)
        h = merge_datasets(h)


        # hlow = h.integrate(h.axis('pt'),)
        pprint(h.axis('dataset').identifiers())
        # h = h.integrate(h.axis('dataset'),f'GJets_HT_MLM_{year}')
        h = h.integrate(h.axis('dataset'),f'GJets_HT_MLM_{year}')
        h = h.integrate(h.axis('region'),'tr_g_notrig_num')
        pprint(h)
        hist.plot1d(
            h,
            overlay='pt',
            # error_opts=data_err_opts,
            ax=ax,
            overflow='all',
            clear=False)
        
        ax.set_ylim(0,2e5)
        ax.set_xlim(200,500)
        ax.set_ylabel('Expected GJets events (a.u.)')
        # rax.set_ylim(0.9,1.6)
        # ax.set_yscale('log')
        leg=ax.legend(['< 175', '175 - 215', '> 215'],title='Photon $p_{T}$')
        # for i, pdf in enumerate(h.axis('pdf').identifiers()):
        #     if str(pdf)=='none':
        #         continue
        #     leg.get_texts()[i].set_text(str(pdf))

        ax.text(0.97, 0.65, 'Photon CR, no trigger applied',
                fontsize=10,
                horizontalalignment='right',
                verticalalignment='bottom',
                transform=ax.transAxes
        )
        ax.plot([250,250],[0,1e8],'--',color='grey')
        
        fig.savefig(pjoin(outdir,f'photon_pt_cut_{year}.pdf'))
        plt.close(fig)
Example #9
0
def make_templates(acc, fout):
    '''Reads coffea histograms and converts to ROOT templates.'''
    # Load inputs
    acc.load('sieie')
    acc.load('nevents')
    acc.load('sumw')

    # Scaling
    h = acc['sieie']
    h = merge_extensions(h, acc, reweight_pu=False)
    scale_xs_lumi(h)
    h = merge_datasets(h)

    pt_ax = hist.Bin(
        'pt', '$p_{T}$ (GeV)',
        list(range(200, 300, 100)) + list(range(300, 700, 100)) + [1000])
    h = h.rebin('pt', pt_ax)
    h_iso = h.integrate('cat', 'medium_nosieie')
    h_noniso = h.integrate('cat', 'medium_nosieie_invertiso')

    # Make templates
    templates = {}
    for year in [2017, 2018]:
        mc = re.compile(f'(GJet).*HT.*{year}')
        data = re.compile(f'(EGamma).*{year}.*')
        templates[f'{year}_good'] = h_iso[mc].integrate('dataset')
        bad = h_noniso[data].integrate('dataset')
        subtr = h_noniso[mc].integrate('dataset')
        subtr.scale(-1)
        bad.add(subtr)
        templates[f'{year}_bad'] = bad
        templates[f'{year}_data'] = h_iso[data].integrate('dataset')

    print(templates)
    # Save output
    f = uproot.recreate(fout)
    for name, histo in templates.items():
        edges = histo.axis('pt').edges()

        for i in range(len(edges) - 1):
            low = edges[i]
            high = edges[i + 1]

            th1 = export1d(histo.integrate('pt', slice(low, high)))
            f[f'{name}_pt{low:.0f}-{high:.0f}'] = th1
Example #10
0
def main():
    inpath = "../../input/merged"
    year = 2017
    mc = re.compile(
        f'(VDY.*HT.*|QCD.*|W.*HT.*|ST_|TTJets-FXFX_|Diboson_|GJets.*HT.*|ZJetsToNuNu.*){year}'
    )
    signal = re.compile(f'WH.*{year}')
    distribution = "recoil"
    acc = dir_archive(
        inpath,
        serialized=True,
        compression=0,
        memsize=1e3,
    )
    acc.load(distribution)
    acc.load('sumw')
    acc.load('sumw_pileup')
    acc.load('nevents')
    try:
        acc[distribution] = merge_extensions(
            acc[distribution], acc, reweight_pu=not ('nopu' in distribution))
        scale_xs_lumi(acc[distribution])
        acc[distribution] = merge_datasets(acc[distribution])
        S_over_B(acc,
                 distribution,
                 'sr_tight_v',
                 mc=mc,
                 signal=signal,
                 unc=0.05,
                 outname="SB_unc005.png",
                 cutlim=(250, 750))
        S_over_B(acc,
                 distribution,
                 'sr_tight_v',
                 mc=mc,
                 signal=signal,
                 unc=0.10,
                 outname="SB_unc010.png",
                 cutlim=(250, 750))
    except KeyError:
        print("key error ")
        return -2
def plot_ht_stitching(acc, tag, regex):
    outdir = './output/ht/'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for dist in ['lhe_ht']:
        h=copy.deepcopy(acc[dist])
        h = merge_extensions(h, acc)
        scale_xs_lumi(h)
        h = merge_datasets(h)

        fig, ax, _ = hist.plot1d(
            h[re.compile(regex)],
            overlay='dataset',
            overflow='all',
            binwnorm=True)
        plt.yscale('log')
        plt.ylim(1e-3,1e6)

        fig.savefig(pjoin(outdir,f'{tag}_{dist}.pdf'))
Example #12
0
def eta_phi_plot_photon(inpath):
    '''Create 2D eta-phi plot for photons in VBF photon CR.'''
    indir = os.path.abspath(inpath)

    acc = dir_archive(indir, serialized=True, compression=0, memsize=1e3)

    acc.load('sumw')
    acc.load('sumw_pileup')
    acc.load('sumw2')
    acc.load('nevents')

    outdir = pjoin('./output/', os.path.basename(indir))
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for year in [2017, 2018]:
        data = {'cr_g_vbf': f'EGamma_{year}'}
        for region, datare in data.items():
            distributions = ['photon_eta_phi']

            for distribution in distributions:
                acc.load(distribution)
                h = copy.deepcopy(acc[distribution])
                h = merge_extensions(h,
                                     acc,
                                     reweight_pu=('nopu' in distribution))
                scale_xs_lumi(h)
                h = merge_datasets(h)

                h = h.integrate('dataset', datare)
                h = h.integrate(h.axis('region'), region)
                fig, ax, _ = plot2d(h, xaxis='eta')

                ax.text(0.,
                        1.,
                        region,
                        fontsize=10,
                        horizontalalignment='left',
                        verticalalignment='top',
                        color='white',
                        transform=ax.transAxes)
                ax.text(1.,
                        0.,
                        distribution,
                        fontsize=10,
                        horizontalalignment='right',
                        verticalalignment='bottom',
                        transform=ax.transAxes)
                fig.text(1.,
                         1.,
                         f'{lumi(year)} fb$^{{-1}}$ ({year})',
                         fontsize=14,
                         horizontalalignment='right',
                         verticalalignment='bottom',
                         transform=ax.transAxes)
                fig.text(0.,
                         1.,
                         '$\\bf{CMS}$ internal',
                         fontsize=14,
                         horizontalalignment='left',
                         verticalalignment='bottom',
                         transform=ax.transAxes)
                outname = pjoin(outdir, f'{region}_{distribution}_{year}.pdf')
                fig.savefig(outname)
                print(f'Created file {outname}')
Example #13
0
def get_scale_variations(acc, regex, tag, scale_var, scale_var_type):
    '''Calculate the new k-factors with a scale weight variation.'''

    print(f'Working on: {tag}, {scale_var}')

    # Define rebinning
    if tag in ['wjet', 'dy']:
        vpt_ax_coarse = [0, 40, 80, 120, 160, 200, 240, 280, 320, 400, 520, 640, 760, 880,1200]
        vpt_ax_fine = list(range(0,400,40)) + list(range(400,1200,80))
        vpt_ax = hist.Bin('vpt','V $p_{T}$ (GeV)', vpt_ax_fine)
        mjj_ax = hist.Bin('mjj','M(jj) (GeV)', [0,200] + list(range(500,2500,500)))
    elif tag in ['gjets']:
        vpt_ax_coarse = [0, 40, 80, 120, 160, 200, 240, 280, 320, 400, 520, 640]
        vpt_ax_fine = list(range(0,400,40)) + list(range(400,1200,80)) 
        vpt_ax = hist.Bin('vpt','V $p_{T}$ (GeV)', vpt_ax_fine)
        mjj_ax = hist.Bin('mjj','M(jj) (GeV)',[0,200,500,1000,1500])

    # Set the correct pt type
    pt_tag = 'combined' if tag != 'gjets' else 'stat1'
    acc.load(f'gen_vpt_vbf_{pt_tag}')
    h = acc[f'gen_vpt_vbf_{pt_tag}']

    h = h.rebin('vpt', vpt_ax)
    h = h.rebin('mjj', mjj_ax)

    h = merge_extensions(h, acc, reweight_pu=False)
    scale_xs_lumi(h)
    h = merge_datasets(h)
    h = h[re.compile(regex)]

    lo = h[re.compile('.*HT.*')].integrate('dataset')
    nlo = h[re.compile('.*(LHE|amcat).*')].integrate('dataset')
    
    xaxis = lo.axes()[0]
    yaxis = lo.axes()[1]

    # Print choose the relevant scale variation (relevant to NLO only)
    # For LO, choose the nominal (i.e. no variation)
    lo = lo.integrate('var', 'nominal')
    nlo_var = nlo.integrate('var', scale_var)
    nlo_nom = nlo.integrate('var', 'nominal')

    # Get 1D LO and NLO weights to calculate the variation
    if tag in ['wjet', 'dy']:
        mjj_slice = slice(200,2000)
    elif tag in ['gjets']:
        mjj_slice = slice(200,1500)
    lo_1d = lo.integrate('mjj', mjj_slice, overflow='over')
    nlo_var_1d = nlo_var.integrate('mjj', mjj_slice, overflow='over')
    nlo_nom_1d = nlo_nom.integrate('mjj', mjj_slice, overflow='over')
    
    sumw_lo_1d = lo_1d.values(overflow='over')[()]
    sumw_nlo_var_1d = nlo_var_1d.values(overflow='over')[()]
    sumw_nlo_nom_1d = nlo_nom_1d.values(overflow='over')[()]

    # Calculate 1D scale factors, nominal and varied
    # as a function of V-pt
    sf_nom_1d = sumw_nlo_nom_1d / sumw_lo_1d
    sf_var_1d = sumw_nlo_var_1d / sumw_lo_1d

    # Calculate 1D variation ratio, as a function of V-pt
    var_ratio = sf_var_1d / sf_nom_1d
    
    # Calculate nominal 2D scale factor 
    sumw_lo = lo.values(overflow='over')[()]
    sumw_nlo_nom = nlo_nom.values(overflow='over')[()]

    sf_nom = sumw_nlo_nom / sumw_lo 

    tup = (var_ratio, h.axis('vpt').edges(overflow='over') )

    # Return tuple containing the SF ratios and
    # NLO weights with and without variation
    return tup, (sumw_nlo_var_1d, sumw_nlo_nom_1d)
Example #14
0
def legacy_limit_input_monov(acc, outdir='./output', unblind=False):
    """Writes ROOT TH1s to file as a limit input

    :param acc: Accumulator (processor output)
    :type acc: coffea.processor.accumulator
    :param outdir: Output directory
    :type outdir: string
    """
    distribution = 'recoil'

    regions = [
        'cr_2m_v', 'cr_1m_v', 'cr_2e_v', 'cr_1e_v', 'cr_g_v',
        'sr_v_no_veto_all'
    ]
    if unblind:
        regions.append("sr_v")

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    newax = hist.Bin('recoil', 'Recoil (GeV)', recoil_bins_2016())

    # Histogram prep, rebin, etc
    h = copy.deepcopy(acc[distribution])
    h = h.rebin(h.axis(newax.name), newax)
    h = merge_extensions(h, acc)
    scale_xs_lumi(h)
    h = merge_datasets(h)

    for wp in ['tau21', 'loose', 'tight']:
        for year in [2017, 2018]:
            f = uproot.recreate(
                pjoin(outdir, f'legacy_limit_monov_{wp}_{year}.root'))
            data, mc = datasets(year, unblind)
            for region in regions:
                if wp == 'tau21':
                    monov_region_name = region
                else:
                    if region.endswith("_v"):
                        monov_region_name = region.replace('_v', f'_{wp}_v')
                    else:
                        monov_region_name = region.replace('_v_', f'_{wp}_v_')
                print(f'Region {region}')

                ih = h.integrate(h.axis('region'), monov_region_name)

                for dataset in map(str, ih.axis('dataset').identifiers()):
                    if not (data[region].match(dataset)
                            or mc[region].match(dataset)):
                        continue
                    print(f"   Dataset: {dataset}")
                    th1 = export1d(ih.integrate('dataset', dataset))
                    try:
                        histo_name = f'{legacy_region_name(region)}_{legacy_dataset_name(dataset)}'
                    except:
                        print(f"Skipping {dataset}")
                        continue
                    f[histo_name] = th1
            if not unblind:
                f[f'{legacy_region_name("sr_v")}_data'] = f[
                    f'{legacy_region_name("sr_v")}_zjets']
    merge_legacy_inputs(outdir)
Example #15
0
from bucoffea.plot.util import merge_extensions, merge_datasets, scale_xs_lumi
acc = acc_from_dir('input/das_lhevpt_v2')

data_err_opts = {
    'linestyle': 'none',
    'marker': '.',
    'markersize': 10.,
    'color': 'k',
    'elinewidth': 1,
    'emarker': '_'
}

h = copy.deepcopy(acc['gen_vpt'])

h = merge_extensions(h, acc, reweight_pu=False)
scale_xs_lumi(h)
h = merge_datasets(h)

h = h.integrate('weight_type', 'nominal')
h = h.integrate('weight_index', slice(-0.5, 0.5))
h = h[re.compile('.*DY.*HT.*')].integrate('dataset')

new_ax = hist.Bin('vpt', 'Gen V $p_{T}$ (GeV)',
                  list(range(80, 800, 40)) + list(range(800, 2000, 100)))
h = h.rebin(h.axis('vpt'), new_ax)
print(h)

fig, (ax, rax) = plt.subplots(2,
                              1,
                              figsize=(7, 7),
                              gridspec_kw={"height_ratios": (3, 1)},
Example #16
0
def cr_ratio_plot(
        acc,
        distribution='recoil',
        regions=['cr_2m_j', 'cr_1m_j', 'cr_1e_j', 'cr_2e_j', 'cr_g_j'],
        year=2017,
        tag='',
        outdir='./output',
        mc=None,
        data=None):
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    # Rebin
    s = Style()
    h = copy.deepcopy(acc[distribution])
    try:
        newax = s.get_binning(distribution)
        h = h.rebin(h.axis(newax.name), newax)
    except KeyError:
        pass

    h = merge_extensions(h, acc)

    scale_xs_lumi(h)

    h = merge_datasets(h)

    histograms = {}
    for region in regions:
        histograms[region] = copy.deepcopy(h).integrate(
            h.axis('region'), region)

    if not mc:
        mc = {
            'cr_1m_j':
            re.compile(
                f'(TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|W.*HT.*).*{year}'
            ),
            'cr_1e_j':
            re.compile(
                f'(TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|W.*HT.*).*{year}'
            ),
            'cr_2m_j':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_2e_j':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_g_j':
            re.compile(f'(GJets.*|QCD_HT.*|W.*HT.*).*{year}'),
        }
    if not data:
        data = {
            'cr_1m_j': f'MET_{year}',
            'cr_2m_j': f'MET_{year}',
            'cr_1e_j': f'EGamma_{year}',
            'cr_2e_j': f'EGamma_{year}',
            'cr_g_j': f'EGamma_{year}',
            'cr_1m_vbf': f'MET_{year}',
            'cr_2m_vbf': f'MET_{year}',
            'cr_1e_vbf': f'EGamma_{year}',
            'cr_2e_vbf': f'EGamma_{year}',
            'cr_g_vbf': f'EGamma_{year}'
        }
    name = {
        'cr_1m_j': '1$\mu$',
        'cr_2m_j': '2$\mu$',
        'cr_1e_j': '1e',
        'cr_2e_j': '2e',
        'cr_g_j': '$\gamma$',
        'cr_1m_vbf': '1$\mu$',
        'cr_2m_vbf': '2$\mu$',
        'cr_1e_vbf': '1e',
        'cr_2e_vbf': '2e',
        'cr_g_vbf': '$\gamma$'
    }
    data_err_opts = {
        'linestyle': 'none',
        'marker': '.',
        'markersize': 10.,
        'color': 'k',
        'elinewidth': 1,
        # 'emarker': '_'
    }

    for i in range(len(regions)):
        for j in range(len(regions)):
            if i == j:
                continue
            fig, (ax,
                  rax) = plt.subplots(2,
                                      1,
                                      figsize=(7, 7),
                                      gridspec_kw={"height_ratios": (3, 1)},
                                      sharex=True)
            h1 = histograms[regions[i]]
            h2 = histograms[regions[j]]

            print(data[regions[i]])
            h1_data = h1[data[regions[i]]].integrate('dataset')
            h1_mc = h1[mc[regions[i]]].integrate('dataset')
            h2_data = h2[data[regions[j]]].integrate('dataset')
            h2_mc = h2[mc[regions[j]]].integrate('dataset')

            # Ratio plot

            def ratio(num, den):
                num_sumw, num_sumw2 = num.values(sumw2=True,
                                                 overflow='over')[()]
                den_sumw, den_sumw2 = den.values(sumw2=True,
                                                 overflow='over')[()]
                rsumw_err = np.hypot(
                    np.sqrt(num_sumw2) / den_sumw,
                    num_sumw * np.sqrt(den_sumw2) / den_sumw**2)
                rsumw = num_sumw / den_sumw

                return rsumw, rsumw_err

            data_err_opts['color'] = 'k'
            rsumw_data, rsumw_err_data = ratio(h1_data, h2_data)
            ax.errorbar(x=h1_data.axis(distribution).centers(overflow='over'),
                        y=rsumw_data,
                        yerr=rsumw_err_data,
                        label='Data',
                        **data_err_opts)

            # data_err_opts['color'] = 'r'
            rsumw_mc, rsumw_err_mc = ratio(h1_mc, h2_mc)
            edges = h1_mc.axis(distribution).edges(overflow='over')
            ax.step(x=edges,
                    y=np.r_[rsumw_mc[0], rsumw_mc],
                    color=colors['mc'],
                    label='MC')

            y1 = np.r_[rsumw_mc - rsumw_err_mc,
                       rsumw_mc[-1] - rsumw_err_mc[-1]]
            y2 = np.r_[rsumw_mc + rsumw_err_mc,
                       rsumw_mc[-1] + rsumw_err_mc[-1]]

            ax.fill_between(edges,
                            y1=y1,
                            y2=y2,
                            zorder=-1,
                            color=colors['band'],
                            step='post',
                            label='MC stat. unc')

            ax.set_ylim(0, 5)
            rrsumw = rsumw_data / rsumw_mc
            rrsumw_err = rsumw_err_data / rsumw_mc
            rax.errorbar(x=h1_data.axis(distribution).centers(overflow='over'),
                         y=rrsumw,
                         yerr=rrsumw_err,
                         **data_err_opts)

            rax.set_ylim(0.75, 1.25)

            plt.plot([min(edges), max(edges)], [1, 1], color=colors['mc'])

            y1 = np.r_[(rsumw_mc - rsumw_err_mc) / rsumw_mc,
                       (rsumw_mc[-1] - rsumw_err_mc[-1]) / rsumw_mc[-1]]
            y2 = np.r_[(rsumw_mc + rsumw_err_mc) / rsumw_mc,
                       (rsumw_mc[-1] + rsumw_err_mc[-1]) / rsumw_mc[-1]]

            rax.fill_between(edges,
                             y1=y1,
                             y2=y2,
                             zorder=-1,
                             color=colors['band'],
                             step='post')
            ax.legend(title=f'{name[regions[i]]} over {name[regions[j]]}')
            fig.text(1.,
                     1.,
                     f'{lumi(year)} fb$^{{-1}}$ ({year})',
                     fontsize=14,
                     horizontalalignment='right',
                     verticalalignment='bottom',
                     transform=ax.transAxes)
            fig.text(0.,
                     1.,
                     '$\\bf{CMS}$ internal',
                     fontsize=14,
                     horizontalalignment='left',
                     verticalalignment='bottom',
                     transform=ax.transAxes)

            rax.set_xlabel(f'{distribution} (GeV)', fontsize=14)
            rax.set_ylabel('Data / MC', fontsize=14)
            ax.set_xlabel(f'{distribution} (GeV)', fontsize=14)
            ax.set_ylabel(
                f'Region ratio: {name[regions[i]]} / {name[regions[j]]} (GeV)',
                fontsize=14)

            loc1 = matplotlib.ticker.MultipleLocator(base=0.2)
            loc2 = matplotlib.ticker.MultipleLocator(base=0.1)
            rax.yaxis.set_major_locator(loc1)
            rax.yaxis.set_minor_locator(loc2)
            rax.grid(axis='y', which='minor', linestyle='--')
            rax.grid(axis='y', which='major', linestyle='--')
            # Save and close
            fig.savefig(
                pjoin(
                    outdir,
                    f'ratio_{tag}_{distribution}_{regions[i]}_over_{regions[j]}_{year}.pdf'
                ))
            plt.close(fig)
Example #17
0
def from_coffea(inpath, outfile):

    acc = dir_archive(
                        inpath,
                        serialized=True,
                        compression=0,
                        memsize=1e3,
                        )

    # Merging, scaling, etc
    acc.load('sumw')
    acc.load('sumw_pileup')
    acc.load('nevents')
    mjj_ax = hist.Bin('mjj', r'$M_{jj}$ (GeV)', [200, 400, 600, 900, 1200, 1500, 2000, 2750, 3500, 5000])
    for distribution in ['mjj','mjj_unc', 'mjj_noewk']:
        acc.load(distribution)
        acc[distribution] = merge_extensions(
                                            acc[distribution],
                                            acc, 
                                            reweight_pu=not ('nopu' in distribution)
                                            )
        scale_xs_lumi(acc[distribution])
        acc[distribution] = merge_datasets(acc[distribution])
        acc[distribution] = acc[distribution].rebin(acc[distribution].axis('mjj'), mjj_ax)

    pprint(acc[distribution].axis('dataset').identifiers())
    f = uproot.recreate(outfile)
    for year in [2017,2018]:
        # QCD V
        h_z = acc['mjj'][re.compile(f'ZJetsToNuNu.*HT.*{year}')].integrate('region', 'sr_vbf').integrate('dataset')
        f[f'z_qcd_mjj_nominal_{year}'] = export1d(h_z)

        h_w = acc['mjj'][re.compile(f'WJetsToLNu.*HT.*{year}')].integrate('region', 'sr_vbf').integrate('dataset')
        f[f'w_qcd_mjj_nominal_{year}'] = export1d(h_w)

        h_ph = acc['mjj'][re.compile(f'GJets_DR-0p4.*HT.*{year}')].integrate('region', 'cr_g_vbf').integrate('dataset')
        f[f'gjets_qcd_mjj_nominal_{year}'] = export1d(h_ph)

        # Scale + PDF variations for QCD Z 
        h_z_unc = acc['mjj_unc'][re.compile(f'ZJ.*HT.*{year}')].integrate('region', 'sr_vbf').integrate('dataset')
        for unc in map(str, h_z_unc.axis('uncertainty').identifiers()):
            if 'goverz' in unc or 'ewkcorr' in unc:
                continue
            h = h_z_unc.integrate(h_z_unc.axis('uncertainty'), unc)
            f[f'z_qcd_mjj_{unc}_{year}'] = export1d(h)

        # EWK variations for QCD Z
        # Get EWK down variation first
        h_z_unc_ewk = acc['mjj_noewk'][re.compile(f'ZJetsToNuNu.*HT.*{year}')].integrate('region', 'sr_vbf').integrate('dataset')
        f[f'z_qcd_mjj_unc_w_ewkcorr_overz_common_down_{year}'] = export1d(h_z_unc_ewk)

        # Get EWK up variation
        h_z_unc_ewk.scale(-1)
        h_z_diff = h_z.copy().add(h_z_unc_ewk)
        h_z_unc_ewk_down = h_z.add(h_z_diff) 
        f[f'z_qcd_mjj_unc_w_ewkcorr_overz_common_up_{year}'] = export1d(h_z_unc_ewk_down)

        # EWK variations for QCD W
        # Get EWK down variation first
        h_w_unc_ewk = acc['mjj_noewk'][re.compile(f'WJetsToLNu.*HT.*{year}')].integrate('region', 'sr_vbf').integrate('dataset')
        f[f'w_qcd_mjj_unc_w_ewkcorr_overz_common_down_{year}'] = export1d(h_w_unc_ewk)

        # Get EWK up variation
        h_w_unc_ewk.scale(-1)
        h_w_diff = h_w.copy().add(h_w_unc_ewk)
        h_w_unc_ewk_down = h_w.add(h_w_diff)
        f[f'w_qcd_mjj_unc_w_ewkcorr_overz_common_up_{year}'] = export1d(h_w_unc_ewk_down)

        # Scale + PDF variations for QCD photons
        h_ph_unc = acc['mjj_unc'][re.compile(f'GJets_DR-0p4.*HT.*{year}')].integrate('region', 'cr_g_vbf').integrate('dataset')
        for unc in map(str, h_ph_unc.axis('uncertainty').identifiers()):
            if 'zoverw' in unc or 'ewkcorr' in unc:
                continue
            h = h_ph_unc.integrate(h_ph_unc.axis('uncertainty'), unc)
            f[f'gjets_qcd_mjj_{unc}_{year}'] = export1d(h)

        # EWK variations for QCD photons
        # Get EWK down variation first
        h_ph_unc_ewk = acc['mjj_noewk'][re.compile(f'GJets_DR-0p4.*HT.*{year}')].integrate('region', 'cr_g_vbf').integrate('dataset')
        f[f'gjets_qcd_mjj_unc_w_ewkcorr_overz_common_down_{year}'] = export1d(h_ph_unc_ewk)

        # Get EWK up variation
        h_ph_unc_ewk.scale(-1)
        h_ph_diff = h_ph.copy().add(h_ph_unc_ewk)
        h_ph_unc_ewk_down = h_ph.add(h_ph_diff)
        f[f'gjets_qcd_mjj_unc_w_ewkcorr_overz_common_up_{year}'] = export1d(h_ph_unc_ewk_down)

        # EWK V
        h_z = acc['mjj'][re.compile(f'.*EWKZ.*{year}')].integrate('region', 'sr_vbf').integrate('dataset')
        f[f'z_ewk_mjj_nominal_{year}'] = export1d(h_z)

        h_w = acc['mjj'][re.compile(f'.*EWKW.*{year}')].integrate('region', 'sr_vbf').integrate('dataset')
        f[f'w_ewk_mjj_nominal_{year}'] = export1d(h_w)

        h_ph = acc['mjj'][re.compile(f'GJets_SM_5f_EWK.*{year}')].integrate('region', 'cr_g_vbf').integrate('dataset')
        f[f'gjets_ewk_mjj_nominal_{year}'] = export1d(h_ph)
        print(h_ph.values())

        # Scale + PDF variations for EWK Z
        h_z_unc = acc['mjj_unc'][re.compile(f'.*EWKZ.*{year}')].integrate('region', 'sr_vbf').integrate('dataset')
        for unc in map(str, h_z_unc.axis('uncertainty').identifiers()):
            if 'goverz' in unc or 'ewkcorr' in unc:
                continue
            h = h_z_unc.integrate(h_z_unc.axis('uncertainty'), unc)
            f[f'z_ewk_mjj_{unc}_{year}'] = export1d(h)

        # Scale + PDF variations for EWK photons
        h_ph_unc = acc['mjj_unc'][re.compile(f'GJets_SM.*{year}')].integrate('region', 'cr_g_vbf').integrate('dataset')
        for unc in map(str, h_ph_unc.axis('uncertainty').identifiers()):
            if 'zoverw' in unc or 'ewkcorr' in unc:
                continue
            h = h_ph_unc.integrate(h_ph_unc.axis('uncertainty'), unc)
            f[f'gjets_ewk_mjj_{unc}_{year}'] = export1d(h)
Example #18
0
def legacy_limit_input_vbf(acc, outdir='./output', unblind=False):
    """Writes ROOT TH1s to file as a limit input

    :param acc: Accumulator (processor output)
    :type acc: coffea.processor.accumulator
    :param outdir: Output directory
    :type outdir: string
    """
    distribution = 'mjj'

    regions = [
        'cr_2m_vbf', 'cr_1m_vbf', 'cr_2e_vbf', 'cr_1e_vbf', 'cr_g_vbf',
        'sr_vbf_no_veto_all'
    ]
    if unblind:
        regions.append("sr_vbf")

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    # Rebin
    h = copy.deepcopy(acc[distribution])
    newax = hist.Bin('mjj', '$M_{jj}$ (GeV)', mjj_bins_2016())
    h = h.rebin(h.axis(newax.name), newax)
    h = merge_extensions(h, acc)
    scale_xs_lumi(h)
    h = merge_datasets(h)

    for year in [2017, 2018]:
        signal = re.compile(f'VBF_HToInvisible.*{year}')
        f = uproot.recreate(pjoin(outdir, f'legacy_limit_vbf_{year}.root'))
        data, mc = datasets(year, unblind=unblind)
        for region in regions:
            print('=' * 20)
            print(f'Region {region}')
            print('=' * 20)
            tag = region.split('_')[0]

            ih = h.integrate(h.axis('region'), region)

            for dataset in map(str, h.axis('dataset').identifiers()):
                if not (data[region].match(dataset)
                        or mc[region].match(dataset)):
                    # Insert dummy data for the signal region
                    if region == 'sr_vbf' and re.match(
                            'ZJetsToNuNu.*', dataset) and not unblind:
                        th1 = export1d(ih.integrate('dataset', dataset))
                        histo_name = 'signal_data'
                        f[histo_name] = th1
                        continue
                    else:
                        continue
                print(f"Dataset: {dataset}")

                th1 = export1d(ih.integrate('dataset', dataset))
                try:
                    histo_name = f'{legacy_region_name(region)}_{legacy_dataset_name_vbf(dataset)}'
                    print(f'Saved under histogram: {histo_name}')
                except:
                    print(f"Skipping {dataset}")
                    continue

                print('-' * 20)
                f[histo_name] = th1
        if not unblind:
            f[f'{legacy_region_name("sr_vbf")}_data'] = f[
                f'{legacy_region_name("sr_vbf")}_qcdzjets']
    merge_legacy_inputs(outdir)
Example #19
0
def plot(inpath):
    indir = os.path.abspath(inpath)

    # The processor output is stored in an
    # 'accumulator', which in our case is
    # just a dictionary holding all the histograms
    # Put all your *coffea files into 'indir' and
    # pass the directory as an argument here.
    # All input files in the directory will
    # automatically be found, merged and read.
    # The merging only happens the first time
    # you run over a specific set of inputs.
    acc = dir_archive(inpath, serialized=True, compression=0, memsize=1e3)
    # Get a settings dictionary that details
    # which plots to make for each region,
    # what the axis limits are, etc
    # Can add plots by extending the dictionary
    # Or modify axes ranges, etc
    settings = plot_settings()

    merged = set()

    # Separate plots per year
    for year in [2017, 2018]:
        # The data to be used for each region
        # Muon regions use MET,
        # electron+photon regions use EGamma
        # ( EGamma = SingleElectron+SinglePhoton for 2017)
        data = {
            'sr_vbf': None,
            'cr_1m_vbf': f'MET_{year}',
            'cr_2m_vbf': f'MET_{year}',
            'cr_1e_vbf': f'EGamma_{year}',
            'cr_2e_vbf': f'EGamma_{year}',
            'cr_g_vbf': f'EGamma_{year}',
        }

        # Same for MC selection
        # Match datasets by regular expressions
        # Here for LO V samples (HT binned)
        mc_lo = {
            'sr_vbf':
            re.compile(
                f'(ZJetsToNuNu.*|EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_1m_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_1e_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_2m_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_2e_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_g_vbf':
            re.compile(f'(GJets_(HT|SM).*|QCD_HT.*|WJetsToLNu.*HT.*).*{year}'),
        }

        # Want to compare LO and NLO,
        # so do same thing for NLO V samples
        # All non-V samples remain the same
        mc_nlo = {
            'sr_vbf':
            re.compile(
                f'(ZJetsToNuNu.*|EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*FXFX.*).*{year}'
            ),
            'cr_1m_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*|.*WJetsToLNu.*FXFX.*).*{year}'
            ),
            'cr_1e_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*|.*WJetsToLNu.*FXFX.*).*{year}'
            ),
            'cr_2m_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*).*{year}'
            ),
            'cr_2e_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*).*{year}'
            ),
            'cr_g_vbf':
            re.compile(f'(GJets_(HT|SM).*|QCD_HT.*|W.*FXFX.*).*{year}'),
        }

        regions = list(mc_lo.keys())
        # Remove signal region, no need in ratio plots
        regions.remove('sr_vbf')

        # Make control region ratio plots for both
        # LO and NLO. Can be skipped if you only
        # want data / MC agreement plots.
        outdir = f'./output/{os.path.basename(indir)}/ratios'

        # Load ingredients from cache
        acc.load('mjj')
        acc.load('sumw')
        acc.load('sumw_pileup')
        acc.load('nevents')
        cr_ratio_plot(acc,
                      year=year,
                      tag='losf',
                      outdir=outdir,
                      mc=mc_lo,
                      regions=regions,
                      distribution='mjj')
        cr_ratio_plot(acc,
                      year=year,
                      tag='nlo',
                      outdir=outdir,
                      mc=mc_nlo,
                      regions=regions,
                      distribution='mjj')

        # Data / MC plots are made here
        # Loop over all regions
        for region in mc_lo.keys():
            ratio = True if region != 'sr_vbf' else False
            # Make separate output direcotry for each region
            outdir = f'./output/{os.path.basename(indir)}/{region}'
            # Settings for this region
            plotset = settings[region]

            # Loop over the distributions
            for distribution in plotset.keys():
                # Load from cache
                if not distribution in merged:
                    acc.load(distribution)

                    if not distribution in acc.keys():
                        print(
                            f"WARNING: Distribution {distribution} not found in input files."
                        )
                        continue
                    acc[distribution] = merge_extensions(
                        acc[distribution],
                        acc,
                        reweight_pu=not ('nopu' in distribution))
                    scale_xs_lumi(acc[distribution])
                    acc[distribution] = merge_datasets(acc[distribution])
                    acc[distribution].axis('dataset').sorting = 'integral'
                    merged.add(distribution)
                try:
                    # The heavy lifting of making a plot is hidden
                    # in make_plot. We call it once using the LO MC
                    make_plot(
                        acc,
                        region=region,
                        distribution=distribution,
                        year=year,
                        data=data[region],
                        mc=mc_lo[region],
                        ylim=plotset[distribution].get('ylim', None),
                        xlim=plotset[distribution].get('xlim', None),
                        tag='losf',
                        outdir=f'./output/{os.path.basename(indir)}/{region}',
                        output_format='pdf',
                        ratio=ratio)

                    # And then we also call it for the NLO MC
                    # The output files will be named according to the 'tag'
                    # argument, so we  will be able to tell them apart.
                    make_plot(
                        acc,
                        region=region,
                        distribution=distribution,
                        year=year,
                        data=data[region],
                        mc=mc_nlo[region],
                        ylim=plotset[distribution].get('ylim', None),
                        xlim=plotset[distribution].get('xlim', None),
                        tag='nlo',
                        outdir=f'./output/{os.path.basename(indir)}/{region}',
                        output_format='pdf',
                        ratio=ratio)

                except KeyError:
                    continue
Example #20
0
def plot_recoil(acc,
                xmax=1e3,
                ymin=0,
                ymax=1.1,
                region_tag="1m",
                dataset='SingleMuon',
                year=2018,
                tag="test",
                distribution="recoil",
                axis_name=None,
                noscale=False,
                jeteta_config=None,
                output_format='pdf'):
    # Select and prepare histogram
    h = copy.deepcopy(acc[distribution])
    h = merge_extensions(h,
                         acc,
                         reweight_pu=('nopu' in distribution),
                         noscale=noscale)
    if not noscale:
        scale_xs_lumi(h)
    h = merge_datasets(h)

    # Rebinning
    axis_name = distribution if not axis_name else axis_name
    if 'photon' in distribution:
        newbin = hist.Bin(
            axis_name, f"{axis_name} (GeV)",
            np.array(
                list(range(0, 250, 10)) + list(range(250, 400, 50)) +
                list(range(400, 1100, 100))))
    elif distribution == 'mjj':
        newbin = hist.Bin(
            axis_name, r'$M_{jj}$ (GeV)',
            np.array(
                list(range(200, 600, 200)) + list(range(600, 1500, 300)) +
                [1500, 2000, 2750, 3500]))
    else:
        newbin = hist.Bin(
            axis_name, f"{axis_name} (GeV)",
            np.array(list(range(0, 500, 25)) + list(range(500, 1100, 100))))
    h = h.rebin(h.axis(axis_name), newbin)
    ds = f'{dataset}_{year}'

    # Pick dataset and regions
    h = h.integrate(h.axis('dataset'), ds)
    if jeteta_config:
        hnum = h.integrate(h.axis('region'),
                           f'tr_{region_tag}_num_{jeteta_config}')
        hden = h.integrate(h.axis('region'),
                           f'tr_{region_tag}_den_{jeteta_config}')
    else:
        hnum = h.integrate(h.axis('region'), f'tr_{region_tag}_num')
        hden = h.integrate(h.axis('region'), f'tr_{region_tag}_den')

    # Recoil plot
    try:
        fig, ax, _ = hist.plot1d(hnum, binwnorm=True)
    except KeyError:
        pprint(h.axis('region').identifiers())
        print(f'ERROR: {region_tag}, {dataset}, {year}')
        return
    hist.plot1d(hden, ax=ax, clear=False, binwnorm=True)
    plt.yscale('log')
    plt.gca().set_ylim(0.1, 1e6)
    outdir = f"./output/{tag}"
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    outname = f'{region_tag}{"_noscale_" if noscale else "_"}{distribution}_{dataset}_{year}{"_"+jeteta_config if jeteta_config else ""}'

    fig.savefig(pjoin(outdir, f'{outname}.{output_format}'))
    with open(pjoin(outdir, f'table_{outname}.txt'), "w") as f:
        f.write(content_table(hnum, hden, axis_name) + "\n")
    plt.close(fig)

    # Efficiency plot
    fig, ax, _ = hist.plotratio(hnum,
                                hden,
                                guide_opts={},
                                unc='clopper-pearson',
                                error_opts=markers('data'))
    ax.set_ylim(ymin, ymax)
    ax.set_xlim(0, xmax)
    ax.set_ylabel("Efficiency")

    plt.text(1.,
             1.,
             r"%.1f fb$^{-1}$ (13 TeV)" % lumi_by_region(region_tag, year),
             fontsize=16,
             horizontalalignment='right',
             verticalalignment='bottom',
             transform=ax.transAxes)
    plt.text(1.,
             0.95,
             f'{jeteta_config if jeteta_config else ""}',
             fontsize=12,
             horizontalalignment='right',
             verticalalignment='bottom',
             transform=ax.transAxes)
    plt.text(0.,
             1.,
             f'{region_tag}, {year}',
             fontsize=16,
             horizontalalignment='left',
             verticalalignment='bottom',
             transform=ax.transAxes)
    plt.text(1.,
             0.,
             f'{trgname(year, tag)}',
             fontsize=10,
             horizontalalignment='right',
             verticalalignment='bottom',
             transform=ax.transAxes)

    if 'g_' in region_tag:
        plt.plot([215, 215], [0.8, 1.1], 'r-')

    plt.plot([0, xmax], [0.95, 0.95], 'r-')
    fig.savefig(pjoin(outdir, f'eff_{outname}.pdf'))
    plt.close(fig)
Example #21
0
def sf_2d(acc, tag, regex, pt_type, outputrootfile):
    outdir = './output/2d/'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    # fig, (ax, rax) = plt.subplots(2, 1, figsize=(7,7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True)
    plt.close('all')
    # fig = plt.gcf()
    # fig.clear()
    fig = plt.figure(figsize=(6,7.5))
    ax = plt.gca()
    # new_ax = hist.Bin('vpt','LHE V $p_{T}$ (GeV)',list(range(100,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250)))


    if tag in ['dy', 'wjet']:
        vpt_ax = hist.Bin('vpt','V $p_{T}$ (GeV)',[0, 40, 80, 120, 160, 200, 240, 280, 320, 400, 520, 640, 760, 880,1200])
        mjj_ax = hist.Bin('mjj','M(jj) (GeV)',list(range(0,2500,500)))
        clims = 0.5,1.5
    elif tag in ['gjets']:
        vpt_ax = hist.Bin('vpt','V $p_{T}$ (GeV)',[0, 40, 80, 120, 160, 200, 240, 280, 320, 400, 520, 640])
        mjj_ax = hist.Bin('mjj','M(jj) (GeV)',[0,200,500,1000,1500])
        clims = 1.0, 1.5

    for selection in ['vbf']:
        dist = f'gen_vpt_{selection}_{pt_type}'
        acc.load(dist)
        h = copy.deepcopy(acc[dist])
        print(h)
        h = h.rebin(h.axis('vpt'), vpt_ax)
        h = h.rebin(h.axis('mjj'), mjj_ax)
        h = h.integrate(h.axis("jpt"))

        h = merge_extensions(h, acc, reweight_pu=False)
        scale_xs_lumi(h)
        h = merge_datasets(h)
        h = h[re.compile(regex)]

        lo = h[re.compile('.*HT.*')].integrate('dataset')
        nlo = h[re.compile('.*(LHE|amcat).*')].integrate('dataset')

        sumw_lo, sumw2_lo = lo.values(overflow='over', sumw2=True)[()]
        sumw_nlo, sumw2_nlo = nlo.values(overflow='over', sumw2=True)[()]

        print(sumw_nlo)
        sf = sumw_nlo / sumw_lo
        dsf = np.hypot(
            np.sqrt(sumw2_nlo) / sumw_lo,
            sumw_nlo * np.sqrt(sumw2_lo) / (sumw_lo**2)
        )
        data = (sf, dsf)
        pkl_filename = f'{tag}_kfac.pkl'
        with open(pkl_filename, 'wb') as f:
            pickle.dump(data, f)

        xaxis = lo.axes()[0]
        yaxis = lo.axes()[1]

        im = ax.pcolormesh(xaxis.edges(overflow='over'), yaxis.edges(overflow='over'), sf.T)

        with open(pkl_filename, 'ab') as f:
            pickle.dump((xaxis.edges(overflow='over'), yaxis.edges(overflow='over')), f)

        x_centers = xaxis.centers(overflow='over')
        y_centers = yaxis.centers(overflow='over')
        for ix in range(len(x_centers)):
            for iy in range(len(y_centers)):
                textcol = 'white' if sf.T[iy, ix] < 0.5*(clims[0]+clims[1]) else 'black'
                ax.text(
                        x_centers[ix],
                        y_centers[iy],
                        f'  {sf.T[iy, ix]:.3f} \n$\\pm$ {dsf.T[iy, ix]:.2f}',
                        ha='center',
                        va='center',
                        color=textcol,
                        fontsize=6
                        )
        # hist.plotratio(nlo, lo,
        #     ax=rax,
        #     denom_fill_opts={},
        #     guide_opts={},
        #     unc='num',
        #     overflow='all',
        #     error_opts=data_err_opts,
        #     label='2017 NLO/LO ratio'
        #     )
        # old = get_old_kfac(tag)
        # old_x = 0.5*(old.bins[:,0]+old.bins[:,1])
        # rax.plot(old_x, old.values,'ob-', label='2016 QCD k fac')
        # rax.plot(old_x, old.values * pdfwgt_sf(old_x),'or-', label='2016 x ad-hoc DY pdfwgt SF')
        # ax.set_yscale('log')
        # ax.set_ylim(1e-3,1e6)
        # rax.set_ylim(0,2)
        # rax.legend()

        ax.set_ylabel('$p_{T}(V)$ (GeV)')
        ax.set_xlabel('M(jj) (GeV)')
        cb = fig.colorbar(im)
        cb.set_label('LO $\\rightarrow$ NLO SF')
        im.set_clim(*clims)
        fig.savefig(pjoin(outdir,f'2d_{tag}_{dist}.pdf'))

        # sf_x = lo.axis('vpt').edges()
        # sf_y = nlo.values()[()] / lo.values()[()]

        tup = (sf, xaxis.edges(overflow='over'),yaxis.edges(overflow='over'))
        print(tup[0].shape)
        print(tup[1].shape)
        print(tup[2].shape)
        outputrootfile[f'2d_{tag}_{selection}'] =  tup
Example #22
0
def sf_1d(acc, tag, regex, outputrootfile):
    outdir = './output/'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    fig, (ax, rax) = plt.subplots(2, 1, figsize=(7,7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True)
    # new_ax = hist.Bin('vpt','LHE V $p_{T}$ (GeV)',list(range(100,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250)))

    pt_types = ['stat1']

    if tag in ['dy','wjet']:
        pt_types.append('dress')
        new_ax = hist.Bin('vpt','V $p_{T}$ (GeV)',list(range(100,800,100))+list(range(800,1200,200))+list(range(1200,2800,800)))
    else:
        new_ax = hist.Bin('vpt','V $p_{T}$ (GeV)',[200,250]+list(range(300,800,100))+list(range(800,1400,200)))

    overflow = 'none'
    for pt_type in pt_types:
        for selection in ['inclusive','monojet','vbf']:
            dist = f'gen_vpt_{selection}_{pt_type}'
            acc.load(dist)
            h = copy.deepcopy(acc[dist])

            h = h.rebin(h.axis('vpt'), new_ax)

            if selection == 'monojet':
                h = h.integrate(h.axis("jpt"))
            if selection == 'vbf':
                h = h.integrate(h.axis("jpt"))
                h = h.integrate(h.axis("mjj"))
            h = merge_extensions(h, acc, reweight_pu=False)
            scale_xs_lumi(h)
            h = merge_datasets(h)
            h = h[re.compile(regex)]
            hist.plot1d(
                h,
                overlay='dataset',
                overflow=overflow,
                binwnorm=True,
                ax=ax)
            lo = h[re.compile('.*HT.*')].integrate('dataset')
            nlo = h[re.compile('.*(LHE|amc).*')].integrate('dataset')

            hist.plotratio(nlo, lo,
                ax=rax,
                denom_fill_opts={},
                guide_opts={},
                unc='num',
                overflow=overflow,
                error_opts=data_err_opts,
                label='2017 NLO/LO ratio'
                )

            # if tag in ['dy','wjet']:
            old = get_old_kfac(tag)
            old_x = 0.5*(old.bins[:,0]+old.bins[:,1])
            rax.plot(old_x, old.values,'ob-', label='2016 QCD k fac')
            rax.plot(old_x, old.values * pdfwgt_sf(old_x),'or-', label='2016 x ad-hoc DY pdfwgt SF')
            ax.set_yscale('log')
            ax.set_ylim(1e-3,1e6)
            rax.set_ylim(0,2)
            rax.legend()


            fig.savefig(pjoin(outdir,f'{tag}_{dist}.pdf'))

            sf_x = lo.axis('vpt').edges(overflow=overflow)
            sf_y = nlo.values(overflow=overflow)[()] / lo.values(overflow=overflow)[()]

            outputrootfile[f'{tag}_{pt_type}_{selection}'] = (sf_y,sf_x)
Example #23
0
def extract_yields_in_cr(acc, distribution, region='cr_vbf_qcd_rs', year=2017):
    '''Calculate the data - (nonQCD MC) in the QCD CR.'''
    acc.load(distribution)
    h = acc[distribution]

    h = merge_extensions(h, acc)
    scale_xs_lumi(h)
    h = merge_datasets(h)

    if distribution in BINNINGS.keys():
        new_ax = BINNINGS[distribution]
        h = h.rebin(new_ax.name, new_ax)

    h = h.integrate('region', region)
    data = f'MET_{year}'
    mc = re.compile(
        f'(ZJetsToNuNu.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL_M-50_HT_MLM.*|WJetsToLNu.*HT.*).*{year}'
    )

    fig, ax, rax = fig_ratio()
    data_err_opts = {
        'linestyle': 'none',
        'marker': '.',
        'markersize': 10.,
        'color': 'k',
        'elinewidth': 1,
    }

    hist.plot1d(h[data],
                ax=ax,
                overlay='dataset',
                binwnorm=1,
                error_opts=data_err_opts)

    hist.plot1d(h[mc],
                ax=ax,
                overlay='dataset',
                binwnorm=1,
                stack=True,
                clear=False)

    ax.set_yscale('log')
    ax.set_ylim(1e-4, 1e6)
    ax.set_ylabel('Events / GeV')

    ax.yaxis.set_ticks_position('both')

    handles, labels = ax.get_legend_handles_labels()
    for handle, label in zip(handles, labels):
        for regex, new_label in PRETTY_LEGEND_LABELS.items():
            if re.match(regex, label):
                handle.set_label(new_label)

    ax.legend(title='VBF QCD CR', handles=handles, ncol=2)

    # Calculate data - MC
    h_data = h[data].integrate('dataset')
    h_mc = h[mc].integrate('dataset')
    h_mc.scale(-1)
    h_data.add(h_mc)

    # Plot data - MC on the bottom pad
    hist.plot1d(h_data, ax=rax, binwnorm=1)

    rax.set_ylabel('(Data - MC) / GeV')
    rax.set_ylim(1e-3, 1e1)
    rax.set_yscale('log')

    rax.get_legend().remove()

    rax.yaxis.set_ticks_position('both')

    outdir = './output/qcd_cr'
    try:
        os.makedirs(outdir)
    except FileExistsError:
        pass
    outpath = pjoin(outdir, f'qcd_cr_{distribution}.pdf')
    fig.savefig(outpath)
    plt.close(fig)

    print(f'File saved: {outpath}')

    # Return the QCD yield
    return h_data
Example #24
0
def eta_phi_plot(inpath):
    indir = os.path.abspath(inpath)

    acc = acc_from_dir(indir)
    outdir = pjoin('./output/', os.path.basename(indir))
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for year in [2017, 2018]:
        data = {
            'cr_1m_j': f'MET_{year}',
            'cr_2m_j': f'MET_{year}',
            'cr_1e_j': f'EGamma_{year}',
            'cr_2e_j': f'EGamma_{year}',
            # 'cr_g_j' : f'EGamma_{year}',
        }
        for region, datare in data.items():
            distributions = ['ak4_eta_phi']

            if 'e_' in region:
                distributions.append('electron_eta_phi')
            elif 'm_' in region:
                distributions.append('muon_eta_phi')
            for distribution in distributions:
                h = copy.deepcopy(acc[distribution])
                h = merge_extensions(h,
                                     acc,
                                     reweight_pu=('nopu' in distribution))
                scale_xs_lumi(h)
                h = merge_datasets(h)

                h = h.integrate('dataset', datare)
                h = h.integrate(h.axis('region'), region)
                fig, ax, _ = plot2d(h, xaxis='eta')

                ax.text(0.,
                        1.,
                        region,
                        fontsize=10,
                        horizontalalignment='left',
                        verticalalignment='top',
                        color='white',
                        transform=ax.transAxes)
                ax.text(1.,
                        0.,
                        distribution,
                        fontsize=10,
                        horizontalalignment='right',
                        verticalalignment='bottom',
                        transform=ax.transAxes)
                fig.text(1.,
                         1.,
                         f'{lumi(year)} fb$^{{-1}}$ ({year})',
                         fontsize=14,
                         horizontalalignment='right',
                         verticalalignment='bottom',
                         transform=ax.transAxes)
                fig.text(0.,
                         1.,
                         '$\\bf{CMS}$ internal',
                         fontsize=14,
                         horizontalalignment='left',
                         verticalalignment='bottom',
                         transform=ax.transAxes)
                outname = pjoin(outdir, f'{region}_{distribution}_{year}.pdf')
                fig.savefig(outname)
                print(f'Created file {outname}')
Example #25
0
def get_pdf_uncertainty(acc, regex, tag, nominal='pdf_0'):
    '''Given the input accumulator, calculate the
       PDF uncertainty from all PDF variations.'''
    # Define rebinning
    vpt_ax_fine = list(range(0, 400, 40)) + list(range(400, 1200, 80))
    if tag in ['wjet', 'dy']:
        vpt_ax = hist.Bin('vpt', 'V $p_{T}$ (GeV)', vpt_ax_fine)
        mjj_ax = hist.Bin('mjj', 'M(jj) (GeV)',
                          [0, 200] + list(range(500, 2500, 500)))
    elif tag in ['gjets']:
        vpt_ax = hist.Bin('vpt', 'V $p_{T}$ (GeV)', vpt_ax_fine)
        mjj_ax = hist.Bin('mjj', 'M(jj) (GeV)',
                          [0, 200, 500, 1000, 1500, 2000])

    # Set the correct pt type
    pt_tag = 'combined' if tag != 'gjets' else 'stat1'
    acc.load(f'gen_vpt_vbf_{pt_tag}')
    h = acc[f'gen_vpt_vbf_{pt_tag}']

    h = h.rebin('vpt', vpt_ax)

    h = merge_extensions(h, acc, reweight_pu=False)
    scale_xs_lumi(h)
    h = merge_datasets(h)
    h = h[re.compile(regex)]

    # Integrate out mjj to get 1D variations
    # as a function of V-pt
    mjj_slice = slice(200, 7500)
    h = h.integrate('mjj', mjj_slice, overflow='over')

    # Get NLO distribution
    nlo = h[re.compile('.*(LHE|amcat).*')].integrate('dataset')

    # Nominal NLO weights, as specified in arguments
    # By defualt, use first PDF variation as nominal
    nlo_nom = nlo.integrate('var', nominal).values(overflow='over')[()]

    # NLO with PDF variations
    # Use a dict to collect NLO contents with all PDF variations
    nlo_var = {}

    for var in nlo.identifiers('var'):
        var_name = var.name
        if 'pdf' not in var_name:
            continue
        nlo_var[var_name] = nlo.integrate('var',
                                          var_name).values(overflow='over')[()]

    unc, percent_unc = calculate_pdf_unc(nlo_nom, nlo_var, tag)
    print(percent_unc)

    plot_variations(nlo_nom, nlo_var, tag)

    # Plot the % uncertainty as a function of V-pt
    fig, ax = plt.subplots(1, 1)
    vpt_edges = vpt_ax.edges(overflow='over')
    vpt_centers = ((vpt_edges + np.roll(vpt_edges, -1)) / 2)[:-1]
    ax.plot(vpt_centers, percent_unc, 'o')
    ax.set_xlabel(r'$p_T(V) \ (GeV)$')
    ax.set_ylabel(r'$\sigma_{pdf}$ / Nominal Counts')
    tag_to_title = {
        'dy': r'$Z\rightarrow \ell \ell$',
        'wjet': r'$W\rightarrow \ell \nu$',
        'gjets': r'$\gamma$ + jets'
    }
    title = tag_to_title[tag]
    ax.set_title(title)
    ax.grid(True)
    ax.plot([200, 200], [0, 0.07], 'r')
    ax.set_ylim(0, 0.07)

    # Save the figure
    outdir = './output/theory_variations/pdf'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    outpath = pjoin(outdir, f'{tag}_pdf_unc.pdf')
    fig.savefig(outpath)

    # Return nominal weights and uncertainty
    return nlo_nom, unc, vpt_edges, vpt_centers
Example #26
0
def plot(args):
    indir = os.path.abspath(args.inpath)

    # The processor output is stored in an
    # 'accumulator', which in our case is
    # just a dictionary holding all the histograms
    # Put all your *coffea files into 'indir' and
    # pass the directory as an argument here.
    # All input files in the directory will
    # automatically be found, merged and read.
    # The merging only happens the first time
    # you run over a specific set of inputs.
    acc = dir_archive(args.inpath, serialized=True, compression=0, memsize=1e3)
    # Get a settings dictionary that details
    # which plots to make for each region,
    # what the axis limits are, etc
    # Can add plots by extending the dictionary
    # Or modify axes ranges, etc
    settings = plot_settings()

    merged = set()

    # Separate plots per year
    for year in [2017, 2018]:
        # The data to be used for each region
        # Muon regions use MET,
        # electron+photon regions use EGamma
        # ( EGamma = SingleElectron+SinglePhoton for 2017)
        data = {
            'sr_vbf': f'MET_{year}',
            'cr_1m_vbf': f'MET_{year}',
            'cr_2m_vbf': f'MET_{year}',
            'cr_1e_vbf': f'EGamma_{year}',
            'cr_2e_vbf': f'EGamma_{year}',
            'cr_g_vbf': f'EGamma_{year}',
        }

        # Same for MC selection
        # Match datasets by regular expressions
        # Here for LO V samples (HT binned)
        mc_lo = {
            'sr_vbf':
            re.compile(
                f'(ZJetsToNuNu.*|EW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_1m_vbf':
            re.compile(
                f'(EWKW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_1e_vbf':
            re.compile(
                f'(EWKW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_2m_vbf':
            re.compile(
                f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_2e_vbf':
            re.compile(
                f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_g_vbf':
            re.compile(
                f'(GJets_(DR-0p4|SM).*|QCD_data.*|WJetsToLNu.*HT.*).*{year}'),
        }

        # Load ingredients from cache
        acc.load('sumw')
        acc.load('sumw_pileup')
        acc.load('nevents')

        # Data / MC plots are made here
        # Loop over all regions
        for region in mc_lo.keys():
            if not re.match(args.region, region):
                continue
            # Plot ratio pads for all regions (now that we're unblinded)
            ratio = True
            # Make separate output direcotry for each region
            outdir = f'./output/{os.path.basename(indir)}/{region}'
            # Settings for this region
            plotset = settings[region]

            # Loop over the distributions
            for distribution in plotset.keys():
                if not re.match(args.distribution, distribution):
                    continue
                # Load from cache
                if not distribution in merged:
                    acc.load(distribution)

                    if not distribution in acc.keys():
                        print(
                            f"WARNING: Distribution {distribution} not found in input files."
                        )
                        continue
                    acc[distribution] = merge_extensions(
                        acc[distribution],
                        acc,
                        reweight_pu=not ('nopu' in distribution))
                    scale_xs_lumi(acc[distribution])
                    acc[distribution] = merge_datasets(acc[distribution])
                    acc[distribution].axis('dataset').sorting = 'integral'
                    merged.add(distribution)
                try:
                    # The heavy lifting of making a plot is hidden
                    # in make_plot. We call it once using the LO MC
                    imc = mc_lo[region]
                    if "cr_g" in region and distribution != "recoil":
                        imc = re.compile(
                            imc.pattern.replace('QCD_data', 'QCD.*HT'))
                    make_plot(
                        acc,
                        region=region,
                        distribution=distribution,
                        year=year,
                        data=data[region],
                        mc=imc,
                        ylim=plotset[distribution].get('ylim', None),
                        xlim=plotset[distribution].get('xlim', None),
                        tag='losf',
                        outdir=f'./output/{os.path.basename(indir)}/{region}',
                        output_format='pdf',
                        ratio=ratio)
                except KeyError:
                    continue
Example #27
0
def eta_phi_plot(inpath):
    indir = os.path.abspath(inpath)

    acc = dir_archive(indir, serialized=True, compression=0, memsize=1e3)

    acc.load('sumw')
    acc.load('sumw_pileup')
    acc.load('sumw2')
    acc.load('nevents')

    outdir = pjoin('./output/', os.path.basename(indir))
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for year in [2017, 2018]:
        data = {
            'cr_1m_j': f'MET_{year}',
            'cr_2m_j': f'MET_{year}',
            'cr_1e_j': f'EGamma_{year}',
            'cr_2e_j': f'EGamma_{year}',
            # 'cr_g_j' : f'EGamma_{year}',
        }
        for region, datare in data.items():
            distributions = ['ak4_eta0_phi0']

            if 'e_' in region:
                distributions.append('electron_eta_phi')
            elif 'm_' in region:
                distributions.append('muon_eta_phi')
            for distribution in distributions:
                acc.load(distribution)
                h = copy.deepcopy(acc[distribution])
                h = merge_extensions(h,
                                     acc,
                                     reweight_pu=('nopu' in distribution))
                scale_xs_lumi(h)
                h = merge_datasets(h)

                h = h.integrate('dataset', datare)
                h = h.integrate(h.axis('region'), region)
                ax = plot2d(h, xaxis='eta')

                ax.text(0.,
                        1.,
                        region,
                        fontsize=10,
                        horizontalalignment='left',
                        verticalalignment='top',
                        color='white',
                        transform=ax.transAxes)
                ax.text(1.,
                        0.,
                        distribution,
                        fontsize=10,
                        horizontalalignment='right',
                        verticalalignment='bottom',
                        transform=ax.transAxes)
                ax.figure.text(1.,
                               1.,
                               f'{lumi(year):.1f} fb$^{{-1}}$ ({year})',
                               fontsize=14,
                               horizontalalignment='right',
                               verticalalignment='bottom',
                               transform=ax.transAxes)
                ax.figure.text(0.,
                               1.,
                               '$\\bf{CMS}$ internal',
                               fontsize=14,
                               horizontalalignment='left',
                               verticalalignment='bottom',
                               transform=ax.transAxes)
                for ext in 'pdf', 'png':
                    outname = pjoin(outdir,
                                    f'{region}_{distribution}_{year}.{ext}')
                    ax.figure.savefig(outname)
                    print(f'Created file {outname}')