def test_accumulators():
    a = processor.accumulator(0.)
    a += 3.
    a += processor.accumulator(2)
    assert a.value == 5.
    assert a.identity().value == 0.

    a = processor.accumulator(np.array([0.]))
    a += 3.
    a += processor.accumulator(2)
    assert a.value == np.array([5.])
    assert a.identity().value == np.array([0.])

    b = processor.set_accumulator({'apples', 'oranges'})
    b += {'pears'}
    b += 'grapes'
    assert b == {'apples', 'oranges', 'pears', 'grapes'}

    c = processor.dict_accumulator({'num': a, 'fruit': b})
    c['num'] += 2.
    c += processor.dict_accumulator({
        'num2':
        processor.accumulator(0),
        'fruit':
        processor.set_accumulator({'apples', 'cherries'}),
    })
    assert c['num2'].value == 0
    assert c['num'].value == 7.
    assert c['fruit'] == {'apples', 'oranges', 'pears', 'grapes', 'cherries'}

    d = processor.defaultdict_accumulator(lambda: processor.accumulator(0.))
    d['x'] = processor.accumulator(0.)
    d['x'] += 4.
    d['y'] += 5.
    d['z'] += d['x']
    d['x'] += d['y']
    assert d['x'].value == 9.
    assert d['y'].value == 5.
    assert d['z'].value == 4.
    assert d['w'].value == 0.

    e = d + c

    f = processor.defaultdict_accumulator(lambda: 2.)
    f['x'] += 4.
    assert f['x'] == 6.

    f += f
    assert f['x'] == 12.
    assert f['y'] == 2.
Esempio n. 2
0
def get_pileup(item):
    dataset, filename = item
    file = uproot.open(filename)
    puhist = file["Pu"]
    pileup = processor.accumulator(np.zeros_like(puhist.values))
    pileup += puhist.values
    sumwhist = file["SumWeights"]
    sumw = processor.accumulator(np.zeros(1))
    sumw += sumwhist.values[0]
    return processor.dict_accumulator({
        'pileup':
        processor.dict_accumulator({dataset: pileup}),
        'sumw':
        processor.dict_accumulator({dataset: sumw}),
    })
Esempio n. 3
0
    def __init__(self):
        dataset_axis = hist.Cat("dataset", "Primary dataset")
        mass_axis = hist.Bin("mass", r"$m_{\mu\mu}$ [GeV]", 30000, 0.25, 300)

        self._accumulator = processor.dict_accumulator({
            'mass':
            hist.Hist("Counts", dataset_axis, mass_axis),
            'cutflow':
            processor.defaultdict_accumulator(int),
        })
Esempio n. 4
0
    def __init__(self, columns):
        self._columns = ['dataset'] + columns
        dataset_axis = hist.Cat("dataset", "Primary dataset")
        mass_axis = hist.Bin("mass", r"$m_{\mu\mu}$ [GeV]", 30000, 0.25, 300)
        pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 30000, 0.25, 300)

        self._accumulator = processor.dict_accumulator({
            'mass':
            hist.Hist("Counts", dataset_axis, mass_axis),
            'pt':
            hist.Hist("Counts", dataset_axis, pt_axis),
            'cutflow':
            processor.defaultdict_accumulator(int),
        })
Esempio n. 5
0
def get_pileup(item):
    dataset, filename = item
    file = uproot.open(filename)
    puhist = file["Pu"]
    pileup = processor.accumulator(np.zeros_like(puhist.values))
    pileup += puhist.values
    sumwhist = file["SumWeights"]
    sumw = processor.accumulator(np.zeros(1))
    sumw += sumwhist.values[0]
    return processor.dict_accumulator({
        'pileup':
        processor.dict_accumulator({dataset: pileup}),
        'sumw':
        processor.dict_accumulator({dataset: sumw}),
    })


final_accumulator = processor.dict_accumulator({
    'pileup':
    processor.dict_accumulator(),
    'sumw':
    processor.dict_accumulator(),
})
processor.futures_executor(filelist, get_pileup, final_accumulator, workers=8)

with lz4f.open("correction_files/pileup_mc.cpkl.lz4", "wb") as fout:
    cloudpickle.dump(final_accumulator['pileup'], fout)

with lz4f.open("correction_files/sumw_mc.cpkl.lz4", "wb") as fout:
    cloudpickle.dump(final_accumulator['sumw'], fout)
Esempio n. 6
0
 def work_function(item):
     dataset, file = item
     out, stats = process_file(dataset, file, processor_instance,
                               combined_accumulator['stats'], preload_items)
     return processor.dict_accumulator({'stats': stats, 'job': out})
Esempio n. 7
0
    filelist = []
    for dataset, files in sample.items():
        for file in files[:args.limit]:
            filelist.append((dataset, file))

    with lz4f.open(args.processor, mode="rb") as fin:
        processor_instance = cloudpickle.load(fin)

    combined_accumulator = processor.dict_accumulator({
        'stats':
        processor.dict_accumulator({
            'nentries':
            processor.accumulator(0),
            'bytesread':
            processor.accumulator(0),
            'sumworktime':
            processor.accumulator(0.),
            'columns_accessed':
            processor.set_accumulator(),
        }),
        'job':
        processor_instance.accumulator.identity(),
    })

    def work_function(item):
        dataset, file = item
        out, stats = process_file(dataset, file, processor_instance,
                                  combined_accumulator['stats'], preload_items)
        return processor.dict_accumulator({'stats': stats, 'job': out})

    tstart = time.time()
    def __init__(self, corrections, debug=False):
        self._corrections = corrections
        self._debug = debug

        dataset_axis = hist.Cat("dataset", "Primary dataset")
        gencat_axis = hist.Bin("AK8Puppijet0_isHadronicV", "V matching index",
                               [0, 1, 2, 3, 9, 10, 11])
        jetpt_axis = hist.Bin("AK8Puppijet0_pt", r"Jet $p_T$",
                              [450, 500, 550, 600, 675, 800, 1200])
        jetmass_axis = hist.Bin("AK8Puppijet0_msd", r"Jet $m_{sd}$", 23, 40,
                                201)
        jetpt_coarse_axis = hist.Bin("AK8Puppijet0_pt", r"Jet $p_T$",
                                     [450, 1200])
        jetmass_coarse_axis = hist.Bin("AK8Puppijet0_msd", r"Jet $m_{sd}$",
                                       [40, 103, 152, 201])
        jetrho_axis = hist.Bin("ak8jet_rho", r"Jet $\rho$", 13, -6, -2.1)
        doubleb_axis = hist.Bin("AK8Puppijet0_deepdoubleb", "Double-b", 20, 0.,
                                1)
        doublec_axis = hist.Bin("AK8Puppijet0_deepdoublec", "Double-c", 20, 0.,
                                1.)
        doublecvb_axis = hist.Bin("AK8Puppijet0_deepdoublecvb", "Double-cvb",
                                  20, 0., 1.)
        doubleb_wps = [1., 0.9, 0.89, 0.85, 0.7]
        doubleb_coarse_axis = hist.Bin("AK8Puppijet0_deepdoubleb", "Double-b",
                                       doubleb_wps[::-1])
        doublec_wps = [0.87, 0.84, 0.83, 0.79, 0.69]
        doublec_coarse_axis = hist.Bin("AK8Puppijet0_deepdoublec", "Double-c",
                                       doublec_wps[::-1])
        doublecvb_wps = [0.93, 0.91, 0.6, 0.2, 0.17]
        doublecvb_coarse_axis = hist.Bin("AK8Puppijet0_deepdoublecvb",
                                         "Double-cvb", doublecvb_wps[::-1])

        hists = processor.dict_accumulator()
        hist.Hist.DEFAULT_DTYPE = 'f'  # save some space by keeping float bin counts instead of double
        hists['sumw'] = processor.dict_accumulator(
        )  # the defaultdict_accumulator is broken :<
        hists['jetpt_preselection'] = hist.Hist(
            "Events",
            dataset_axis,
            gencat_axis,
            hist.Bin("AK8Puppijet0_pt", "Jet $p_T$", 100, 300, 1300),
        )
        hists['jeteta_preselection'] = hist.Hist(
            "Events",
            dataset_axis,
            gencat_axis,
            hist.Bin("AK8Puppijet0_eta", r"Jet $\eta$", 50, -3, 3),
        )
        hists['jetpt_muoncontrol'] = hist.Hist(
            "Events",
            dataset_axis,
            gencat_axis,
            hist.Bin("AK8Puppijet0_pt", "Jet $p_T$", 100, 300, 1300),
        )
        hists['muonpt_muoncontrol'] = hist.Hist(
            "Events",
            dataset_axis,
            gencat_axis,
            hist.Bin("vmuoLoose0_pt", "Leading muon $p_T$", 100, 0, 1000),
        )
        hists['muoneta_muoncontrol'] = hist.Hist(
            "Events",
            dataset_axis,
            gencat_axis,
            hist.Bin("vmuoLoose0_eta", r"Leading muon $\eta$", 50, -3, 3),
        )
        hists['jetpt_signalregion'] = hist.Hist(
            "Events", dataset_axis, gencat_axis,
            hist.Bin("AK8Puppijet0_pt", "Jet $p_T$", 100, 300, 1300))
        hists['sculpt_signalregion'] = hist.Hist(
            "Events", dataset_axis, gencat_axis, jetpt_axis, jetmass_axis,
            doubleb_coarse_axis, doublec_coarse_axis, doublecvb_coarse_axis)
        hists['tagtensor_signalregion'] = hist.Hist(
            "Events", dataset_axis, gencat_axis, jetpt_coarse_axis,
            jetmass_coarse_axis, doubleb_axis, doublec_axis, doublecvb_axis)
        hists['opposite_ak8_n3sdb1_signalregion'] = hist.Hist(
            "Events", dataset_axis, gencat_axis, jetpt_coarse_axis,
            jetmass_coarse_axis,
            hist.Bin("opposite_ak8_n3sdb1", r"Jet $N_{3,sd}^{\beta=1}$", 40,
                     0.5, 3))
        hists['opposite_ak8_tau32_signalregion'] = hist.Hist(
            "Events", dataset_axis, gencat_axis, jetpt_coarse_axis,
            jetmass_coarse_axis,
            hist.Bin("opposite_ak8_tau32", r"Jet $\tau_{32}$", 40, 0, 1))
        hists['opposite_ak8_msd_signalregion'] = hist.Hist(
            "Events", dataset_axis, gencat_axis, jetpt_coarse_axis,
            jetmass_coarse_axis,
            hist.Bin("opposite_ak8_msd", r"Jet $\m_{sd}$", 40, 50, 200))
        hists['njets_ak4_signalregion'] = hist.Hist(
            "Events", dataset_axis, gencat_axis, jetpt_coarse_axis,
            jetmass_coarse_axis,
            hist.Bin("nAK4PuppijetsPt30", "Number AK4 Jets", 8, 0, 8))

        hists['nminus1_antiak4btagMediumOppHem_signalregion'] = hist.Hist(
            "Events", dataset_axis, gencat_axis, jetpt_coarse_axis,
            jetmass_coarse_axis,
            hist.Bin("opposite_ak4_leadingDeepCSV",
                     r"Max(DeepCSV) (of $\leq4$ leading)", 40, 0, 1))
        hists['nminus1_pfmet_signalregion'] = hist.Hist(
            "Events", dataset_axis, gencat_axis, jetpt_coarse_axis,
            jetmass_coarse_axis, doubleb_coarse_axis,
            hist.Bin("pfmet", r"PF $p_{T}^{miss}$", 40, 0, 200))
        hists['nminus1_n2ddtPass_signalregion'] = hist.Hist(
            "Events", dataset_axis, gencat_axis, jetmass_coarse_axis,
            doubleb_coarse_axis,
            hist.Bin("ak8jet_n2ddt", r"Jet $N_{2,DDT}^{\beta=1}$", 40, -.25,
                     .25))
        hists['nminus1_ak4btagMediumDR08_muoncontrol'] = hist.Hist(
            "Events", dataset_axis, gencat_axis, jetmass_coarse_axis,
            doubleb_coarse_axis,
            hist.Bin("ak4_leadingDeepCSV_dR08",
                     r"Max(DeepCSV) ($\DeltaR(ak4, ak8)>0.8$)", 40, 0, 1))
        hists['nminus1_muonDphiAK8_muoncontrol'] = hist.Hist(
            "Events", dataset_axis, gencat_axis, jetmass_coarse_axis,
            doubleb_coarse_axis,
            hist.Bin("muon_dphi", r"$\Delta\phi(\mu, j)$", 40, 0, np.pi))
        hists['templates_signalregion'] = hist.Hist(
            "Events", dataset_axis, gencat_axis,
            hist.Cat("systematic", "Systematic"), jetpt_axis, jetmass_axis,
            doubleb_coarse_axis)
        hists['templates_muoncontrol'] = hist.Hist(
            "Events", dataset_axis, gencat_axis,
            hist.Cat("systematic", "Systematic"), jetpt_axis, jetmass_axis,
            doubleb_coarse_axis)
        hists['templates_hCCsignalregion'] = hist.Hist(
            "Events", dataset_axis, gencat_axis,
            hist.Cat("systematic", "Systematic"), jetpt_axis, jetmass_axis,
            doublec_coarse_axis)
        hists['templates_hCCmuoncontrol'] = hist.Hist(
            "Events", dataset_axis, gencat_axis,
            hist.Cat("systematic", "Systematic"), jetpt_axis, jetmass_axis,
            doublec_coarse_axis)
        self._accumulator = hists