예제 #1
0
    def __init__(self):

        ## make binning for hists
        self.dataset_axis = hist.Cat("dataset", "Event Process")
        self.pu_nTrueInt_axis = hist.Bin("pu_nTrueInt", "nTrueInt", 100, 0,
                                         100)
        self.pu_nPU_axis = hist.Bin("pu_nPU", "nPU", 100, 0, 100)

        ## make dictionary of hists
        histo_dict = {}
        histo_dict['PU_nTrueInt'] = hist.Hist("PU_nTrueInt", self.dataset_axis,
                                              self.pu_nTrueInt_axis)
        histo_dict['PU_nPU'] = hist.Hist("PU_nPU", self.dataset_axis,
                                         self.pu_nPU_axis)

        #set_trace()
        ## construct dictionary of dictionaries to hold meta info for each sample
        for sample in fileset.keys():
            if 'Int' in sample:
                histo_dict['%s_pos' %
                           sample] = processor.defaultdict_accumulator(int)
                histo_dict['%s_pos_runs_to_lumis' %
                           sample] = processor.value_accumulator(list)
                histo_dict['%s_neg' %
                           sample] = processor.defaultdict_accumulator(int)
                histo_dict['%s_neg_runs_to_lumis' %
                           sample] = processor.value_accumulator(list)
            else:
                histo_dict[sample] = processor.defaultdict_accumulator(int)
                histo_dict['%s_runs_to_lumis' %
                           sample] = processor.value_accumulator(list)

        self._accumulator = processor.dict_accumulator(histo_dict)
        self.sample_name = ''
예제 #2
0
def test_accumulators():
    a = processor.value_accumulator(float)
    a += 3.0
    assert a.value == 3.0
    assert a.identity().value == 0.0

    a = processor.value_accumulator(partial(np.array, [2.0]))
    a += 3.0
    assert np.array_equal(a.value, np.array([5.0]))
    assert np.array_equal(a.identity().value, np.array([2.0]))

    lacc = processor.list_accumulator(range(4))
    lacc += [3]
    lacc += processor.list_accumulator([1, 2])
    assert lacc == [0, 1, 2, 3, 3, 1, 2]

    b = processor.set_accumulator({"apples", "oranges"})
    b += {"pears"}
    b += "grapes"
    assert b == {"apples", "oranges", "pears", "grapes"}

    c = processor.dict_accumulator({"num": a, "fruit": b})
    c["num"] += 2.0
    c += processor.dict_accumulator({
        "num2":
        processor.value_accumulator(int),
        "fruit":
        processor.set_accumulator({"apples", "cherries"}),
    })
    assert c["num2"].value == 0
    assert np.array_equal(c["num"].value, np.array([7.0]))
    assert c["fruit"] == {"apples", "oranges", "pears", "grapes", "cherries"}

    d = processor.defaultdict_accumulator(float)
    d["x"] = 0.0
    d["x"] += 4.0
    d["y"] += 5.0
    d["z"] += d["x"]
    d["x"] += d["y"]
    assert d["x"] == 9.0
    assert d["y"] == 5.0
    assert d["z"] == 4.0
    assert d["w"] == 0.0

    f = processor.defaultdict_accumulator(lambda: 2.0)
    f["x"] += 4.0
    assert f["x"] == 6.0

    f += f
    assert f["x"] == 12.0
    assert f["y"] == 2.0

    a = processor.column_accumulator(np.arange(6).reshape(2, 3))
    b = processor.column_accumulator(np.arange(12).reshape(4, 3))
    a += b
    assert a.value.sum() == 81
예제 #3
0
def test_accumulators():
    a = processor.value_accumulator(float)
    a += 3.
    assert a.value == 3.
    assert a.identity().value == 0.

    a = processor.value_accumulator(partial(np.array, [2.]))
    a += 3.
    assert np.array_equal(a.value, np.array([5.]))
    assert np.array_equal(a.identity().value, np.array([2.]))

    l = processor.list_accumulator(range(4))
    l += [3]
    l += processor.list_accumulator([1, 2])
    assert l == [0, 1, 2, 3, 3, 1, 2]

    b = processor.set_accumulator({'apples', 'oranges'})
    b += {'pears'}
    b += 'grapes'
    assert b == {'apples', 'oranges', 'pears', 'grapes'}

    c = processor.dict_accumulator({'num': a, 'fruit': b})
    c['num'] += 2.
    c += processor.dict_accumulator({
        'num2': processor.value_accumulator(int),
        'fruit': processor.set_accumulator({'apples', 'cherries'}),
    })
    assert c['num2'].value == 0
    assert np.array_equal(c['num'].value, np.array([7.]))
    assert c['fruit'] == {'apples', 'oranges', 'pears', 'grapes', 'cherries'}

    d = processor.defaultdict_accumulator(float)
    d['x'] = 0.
    d['x'] += 4.
    d['y'] += 5.
    d['z'] += d['x']
    d['x'] += d['y']
    assert d['x'] == 9.
    assert d['y'] == 5.
    assert d['z'] == 4.
    assert d['w'] == 0.

    e = d + c

    f = processor.defaultdict_accumulator(lambda: 2.)
    f['x'] += 4.
    assert f['x'] == 6.

    f += f
    assert f['x'] == 12.
    assert f['y'] == 2.

    a = processor.column_accumulator(np.arange(6).reshape(2,3))
    b = processor.column_accumulator(np.arange(12).reshape(4,3))
    a += b
    assert a.value.sum() == 81
예제 #4
0
def get_pileup(item):
    dataset, filename = item
    file = uproot.open(filename)
    puhist = file["Pu"]
    pileup = processor.value_accumulator(partial(np.zeros, puhist.values.size))
    pileup += puhist.values
    sumwhist = file["SumWeights"]
    sumw = processor.value_accumulator(int)
    sumw += sumwhist.values[0]
    return processor.dict_accumulator({
        'pileup':
        processor.dict_accumulator({dataset: pileup}),
        'sumw':
        processor.dict_accumulator({dataset: sumw}),
    })
예제 #5
0
    def __init__(self, mcEventYields=None, jetSyst='nominal'):
        ################################
        # INITIALIZE COFFEA PROCESSOR
        ################################

        self.mcEventYields = mcEventYields

        if not jetSyst in ['nominal', 'JERUp', 'JERDown', 'JESUp', 'JESDown']:
            raise Exception(
                f'{jetSyst} is not in acceptable jet systematic types [nominal, JERUp, JERDown, JESUp, JESDown]'
            )

        self.jetSyst = jetSyst

        dataset_axis = hist.Cat("dataset", "Dataset")
        lep_axis = hist.Cat("lepFlavor", "Lepton Flavor")

        systematic_axis = hist.Cat("systematic", "Systematic Uncertainty")

        m3_axis = hist.Bin("M3", r"$M_3$ [GeV]", 200, 0., 1000)
        mass_axis = hist.Bin("mass", r"$m_{\ell\gamma}$ [GeV]", 400, 0., 400)
        pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 200, 0., 1000)
        eta_axis = hist.Bin("eta", r"$\eta_{\gamma}$", 300, -1.5, 1.5)
        chIso_axis = hist.Bin("chIso", r"Charged Hadron Isolation",
                              np.arange(-0.1, 20.001, .05))

        ## Define axis to keep track of photon category
        phoCategory_axis = hist.Bin("category", r"Photon Category",
                                    [1, 2, 3, 4, 5])
        phoCategory_axis.identifiers()[0].label = "Genuine Photon"
        phoCategory_axis.identifiers()[1].label = "Misidentified Electron"
        phoCategory_axis.identifiers()[2].label = "Hadronic Photon"
        phoCategory_axis.identifiers()[3].label = "Hadronic Fake"

        ### Accumulator for holding histograms
        self._accumulator = processor.dict_accumulator({
            # 3. ADD HISTOGRAMS
            ## book histograms for photon pt, eta, and charged hadron isolation
            #'photon_pt':
            #'photon_eta':
            #'photon_chIso':

            ## book histogram for photon/lepton mass in a 3j0t region
            #'photon_lepton_mass_3j0t':

            ## book histogram for M3 variable
            #'M3':
            'EventCount':
            processor.value_accumulator(int)
        })

        ext = extractor()
        ext.add_weight_sets([
            f"btag2016 * {cwd}/ScaleFactors/Btag/DeepCSV_2016LegacySF_V1.btag.csv"
        ])
        ext.finalize()
        self.evaluator = ext.make_evaluator()

        self.ele_id_sf = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_id_sf.coffea')
        self.ele_id_err = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_id_err.coffea')

        self.ele_reco_sf = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_reco_sf.coffea')
        self.ele_reco_err = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_reco_err.coffea')

        self.mu_id_sf = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_id_sf.coffea')
        self.mu_id_err = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_id_err.coffea')

        self.mu_iso_sf = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_iso_sf.coffea')
        self.mu_iso_err = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_iso_err.coffea')

        self.mu_trig_sf = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_trig_sf.coffea')
        self.mu_trig_err = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_trig_err.coffea')
예제 #6
0
    def __init__(self, runNum=-1, eventNum=-1, mcEventYields=None):
        self.mcEventYields = mcEventYields
        dataset_axis = hist.Cat("dataset", "Dataset")
        lep_axis = hist.Bin("lepFlavor", r"ElectronOrMuon", 2, -1, 1)
        lep_axis.identifiers()[0].label = 'Electron'
        lep_axis.identifiers()[1].label = 'Muon'

        m3_axis = hist.Bin("M3", r"$M_3$ [GeV]", 200, 0., 1000)
        mass_axis = hist.Bin("mass", r"$m_{\ell\gamma}$ [GeV]", 400, 0., 400)
        pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 200, 0., 1000)
        eta_axis = hist.Bin("eta", r"$\eta_{\gamma}$", 300, -1.5, 1.5)
        chIso_axis = hist.Bin("chIso", r"Charged Hadron Isolation",
                              np.arange(-0.1, 20.001, .05))

        ## Define axis to keep track of photon category
        phoCategory_axis = hist.Bin("category", r"Photon Category",
                                    [1, 2, 3, 4, 5])
        phoCategory_axis.identifiers()[0].label = "Genuine Photon"
        phoCategory_axis.identifiers()[1].label = "Misidentified Electron"
        phoCategory_axis.identifiers()[2].label = "Hadronic Photon"
        phoCategory_axis.identifiers()[3].label = "Hadronic Fake"

        ###
        self._accumulator = processor.dict_accumulator({
            ##photon histograms
            'photon_pt':
            hist.Hist("Counts", dataset_axis, pt_axis, phoCategory_axis,
                      lep_axis),
            'photon_eta':
            hist.Hist("Counts", dataset_axis, eta_axis, phoCategory_axis,
                      lep_axis),
            'photon_chIso':
            hist.Hist("Counts", dataset_axis, chIso_axis, phoCategory_axis,
                      lep_axis),
            'photon_chIsoSideband':
            hist.Hist("Counts", dataset_axis, chIso_axis, phoCategory_axis,
                      lep_axis),
            'photon_lepton_mass':
            hist.Hist("Counts", dataset_axis, mass_axis, phoCategory_axis,
                      lep_axis),
            'photon_lepton_mass_3j0t':
            hist.Hist("Counts", dataset_axis, mass_axis, phoCategory_axis,
                      lep_axis),
            'M3':
            hist.Hist("Counts", dataset_axis, m3_axis, phoCategory_axis,
                      lep_axis),
            'M3Presel':
            hist.Hist("Counts", dataset_axis, m3_axis, lep_axis),
            'EventCount':
            processor.value_accumulator(int)
        })

        self.eventNum = eventNum
        self.runNum = runNum

        ext = extractor()
        ext.add_weight_sets([
            f"btag2016 * {cwd}/ScaleFactors/Btag/DeepCSV_2016LegacySF_V1.btag.csv"
        ])
        ext.finalize()
        self.evaluator = ext.make_evaluator()

        ele_id_file = uproot.open(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele2016/2016LegacyReReco_ElectronTight_Fall17V2.root'
        )
        self.ele_id_sf = dense_lookup.dense_lookup(
            ele_id_file["EGamma_SF2D"].values,
            ele_id_file["EGamma_SF2D"].edges)
        self.ele_id_err = dense_lookup.dense_lookup(
            ele_id_file["EGamma_SF2D"].variances**0.5,
            ele_id_file["EGamma_SF2D"].edges)

        ele_reco_file = uproot.open(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele2016/egammaEffi.txt_EGM2D_runBCDEF_passingRECO.root'
        )
        self.ele_reco_sf = dense_lookup.dense_lookup(
            ele_reco_file["EGamma_SF2D"].values,
            ele_reco_file["EGamma_SF2D"].edges)
        self.ele_reco_err = dense_lookup.dense_lookup(
            ele_reco_file["EGamma_SF2D"].variances**.5,
            ele_reco_file["EGamma_SF2D"].edges)

        mu_id_vals = 0
        mu_id_err = 0
        mu_iso_vals = 0
        mu_iso_err = 0
        mu_trig_vals = 0
        mu_trig_err = 0

        for scaleFactors in muSFFileList:
            id_file = uproot.open(scaleFactors['id'][0])
            iso_file = uproot.open(scaleFactors['iso'][0])
            trig_file = uproot.open(scaleFactors['trig'][0])

            mu_id_vals += id_file[scaleFactors['id']
                                  [1]].values * scaleFactors['scale']
            mu_id_err += id_file[scaleFactors['id']
                                 [1]].variances**0.5 * scaleFactors['scale']
            mu_id_edges = id_file[scaleFactors['id'][1]].edges

            mu_iso_vals += iso_file[scaleFactors['iso']
                                    [1]].values * scaleFactors['scale']
            mu_iso_err += iso_file[scaleFactors['iso']
                                   [1]].variances**0.5 * scaleFactors['scale']
            mu_iso_edges = iso_file[scaleFactors['iso'][1]].edges

            mu_trig_vals += trig_file[scaleFactors['trig']
                                      [1]].values * scaleFactors['scale']
            mu_trig_err += trig_file[
                scaleFactors['trig'][1]].variances**0.5 * scaleFactors['scale']
            mu_trig_edges = trig_file[scaleFactors['trig'][1]].edges

        self.mu_id_sf = dense_lookup.dense_lookup(mu_id_vals, mu_id_edges)
        self.mu_id_err = dense_lookup.dense_lookup(mu_id_err, mu_id_edges)
        self.mu_iso_sf = dense_lookup.dense_lookup(mu_iso_vals, mu_iso_edges)
        self.mu_iso_err = dense_lookup.dense_lookup(mu_iso_err, mu_iso_edges)
        self.mu_trig_sf = dense_lookup.dense_lookup(mu_trig_vals,
                                                    mu_trig_edges)
        self.mu_trig_err = dense_lookup.dense_lookup(mu_trig_err,
                                                     mu_trig_edges)
예제 #7
0
    if args.validate:
        for ds, fn in tqdm(filelist, desc='Validating files'):
            try:
                validate(ds, fn)
            except OSError:
                print("File open error for %s, %s" % (ds, fn))
        exit(0)

    processor_instance = load(args.processor)

    combined_accumulator = processor.dict_accumulator({
        'stats':
        processor.dict_accumulator({
            'nentries':
            processor.value_accumulator(int),
            'bytesread':
            processor.value_accumulator(int),
            'sumworktime':
            processor.value_accumulator(float),
            'columns_accessed':
            processor.set_accumulator(),
        }),
        'job':
        processor_instance.accumulator.identity(),
    })

    def work_function(item):
        dataset, file = item
        out, stats = process_file(dataset, file, processor_instance,
                                  combined_accumulator['stats'], preload_items,