Example #1
0
def test_lumidata():
    from numba import types
    from numba.typed import Dict

    lumidata = LumiData("tests/samples/lumi_small.csv")

    runslumis = np.zeros((10, 2), dtype=np.uint32)
    runslumis[:, 0] = lumidata._lumidata[0:10, 0]
    runslumis[:, 1] = lumidata._lumidata[0:10, 1]
    l = lumidata.get_lumi(runslumis)
    diff = abs(l - 1.539941814)
    print("lumi:", l, "diff:", diff)
    assert (diff < 1e-4)

    # test build_lumi_table_kernel
    py_index = Dict.empty(key_type=types.Tuple([types.uint32, types.uint32]),
                          value_type=types.float64)
    pyruns = lumidata._lumidata[:, 0].astype('u4')
    pylumis = lumidata._lumidata[:, 1].astype('u4')
    LumiData.build_lumi_table_kernel.py_func(pyruns, pylumis,
                                             lumidata._lumidata, py_index)

    assert (len(py_index) == len(lumidata.index))

    # test get_lumi_kernel
    py_tot_lumi = np.zeros((1, ), dtype=np.float64)
    LumiData.get_lumi_kernel.py_func(runslumis[:, 0], runslumis[:, 1],
                                     py_index, py_tot_lumi)

    assert (abs(py_tot_lumi[0] - l) < 1e-4)
Example #2
0
def test_lumilist():
    lumidata = LumiData("tests/samples/lumi_small.csv")

    runslumis1 = np.zeros((10, 2), dtype=np.uint32)
    runslumis1[:, 0] = lumidata._lumidata[0:10, 0]
    runslumis1[:, 1] = lumidata._lumidata[0:10, 1]

    runslumis2 = np.zeros((10, 2), dtype=np.uint32)
    runslumis2[:, 0] = lumidata._lumidata[10:20, 0]
    runslumis2[:, 1] = lumidata._lumidata[10:20, 1]

    llist1 = LumiList(runs=runslumis1[:, 0], lumis=runslumis1[:, 1])
    llist2 = LumiList(runs=runslumis2[:, 0], lumis=runslumis2[:, 1])
    llist3 = LumiList()

    llist3 += llist1
    llist3 += llist2

    lumi1 = lumidata.get_lumi(llist1)
    lumi2 = lumidata.get_lumi(llist2)
    lumi3 = lumidata.get_lumi(llist3)

    assert (abs(lumi3 - (lumi1 + lumi2)) < 1e-4)

    llist1.clear()
    assert (llist1.array.size == 0)
Example #3
0
def test_lumidata():
    lumidata = LumiData("tests/samples/lumi_small.csv")

    runslumis = np.zeros((10, 2), dtype=np.uint32)
    runslumis[:, 0] = lumidata._lumidata[0:10, 0]
    runslumis[:, 1] = lumidata._lumidata[0:10, 1]
    l = lumidata.get_lumi(runslumis)
    diff = abs(l - 1.539941814)
    print("lumi:", l, "diff:", diff)
    assert (diff < 0.1)
Example #4
0
def test_lumidata():
    from numba import types
    from numba.typed import Dict

    lumidata = LumiData("tests/samples/lumi_small.csv")

    # pickle & unpickle
    lumidata_pickle = cloudpickle.loads(cloudpickle.dumps(lumidata))

    # check same internal lumidata
    assert np.all(lumidata._lumidata == lumidata_pickle._lumidata)

    runslumis = np.zeros((10, 2), dtype=np.uint32)
    results = {"lumi": {}, "index": {}}
    for ld in lumidata, lumidata_pickle:
        runslumis[:, 0] = ld._lumidata[0:10, 0]
        runslumis[:, 1] = ld._lumidata[0:10, 1]
        lumi = ld.get_lumi(runslumis)
        results["lumi"][ld] = lumi
        diff = abs(lumi - 1.539941814)
        print("lumi:", lumi, "diff:", diff)
        assert diff < 1e-4

        # test build_lumi_table_kernel
        py_index = Dict.empty(
            key_type=types.Tuple([types.uint32, types.uint32]), value_type=types.float64
        )
        pyruns = ld._lumidata[:, 0].astype("u4")
        pylumis = ld._lumidata[:, 1].astype("u4")
        LumiData._build_lumi_table_kernel.py_func(
            pyruns, pylumis, ld._lumidata, py_index
        )

        assert len(py_index) == len(ld.index)

        # test get_lumi_kernel
        py_tot_lumi = np.zeros((1,), dtype=np.float64)
        LumiData._get_lumi_kernel.py_func(
            runslumis[:, 0], runslumis[:, 1], py_index, py_tot_lumi
        )

        assert abs(py_tot_lumi[0] - lumi) < 1e-4

        # store results:
        results["lumi"][ld] = lumi
        results["index"][ld] = ld.index

    assert np.all(results["lumi"][lumidata] == results["lumi"][lumidata_pickle])
    assert len(results["index"][lumidata]) == len(results["index"][lumidata_pickle])
Example #5
0
    def load(self,
             samples,
             nchunks=1,
             parallelize_outer=1,
             parallelize_inner=1):
        import multiprocessing as mp
        import time
        import numpy as np
        t0 = time.time()
        if (parallelize_outer * parallelize_inner) > (mp.cpu_count() - 1):
            print(
                f"Trying to create too many workers ({parallelize_outer*parallelize_inner})! Max allowed: {mp.cpu_count()-1}."
            )
            raise

        self.nchunks = nchunks

        if parallelize_outer > 1:
            pool = mp.Pool(parallelize_outer)
            a = [
                pool.apply_async(self.load_sample, args=(s, )) for s in samples
            ]
            results = []
            for process in a:
                process.wait()
                results.append(process.get())
                pool.close()
        else:
            results = []
            for s in samples:
                results.append(self.load_sample(s, parallelize_inner))

        self.filesets_chunked = {}
        for res in results:
            sample = res['sample']
            if res['is_missing']:
                self.missing_samples.append(sample)
            else:
                self.samples.append(sample)
                self.filesets[sample] = {}
                self.filesets_chunked[sample] = []
                self.filesets[sample][sample] = res['files']
                self.full_fileset[sample] = res['files']

                self.metadata[sample] = {}
                self.metadata[sample] = res['metadata']
                self.data_entries = self.data_entries + res['data_entries']
                self.lumi_list += res['lumi_list']

                all_filenames = np.array(
                    self.filesets[sample][sample]['files'])
                all_filenames_chunked = np.array_split(all_filenames, nchunks)
                all_filenames_chunked = [
                    a.tolist() for a in all_filenames_chunked
                ]
                for i in range(nchunks):
                    if len(all_filenames_chunked[i]) > 0:
                        files_i = {
                            'files': all_filenames_chunked[i],
                            'treename': 'Events'
                        }
                        self.filesets_chunked[sample].append({sample: files_i})

        if self.data_entries:
            print()
            data_entries_total = self.lumi_data[self.year]['events']
            print(f"Total events in {self.year}: {data_entries_total}")

            print(f"Loaded {self.data_entries} of {self.year} data events")
            prc = round(self.data_entries / data_entries_total * 100, 2)
            print(f"This is ~ {prc}% of {self.year} data.")
            lumi_data = LumiData(f"data/lumimasks/lumi{self.year}.csv")
            #           print(self.lumi_list.array)

            #            self.lumi = lumi_data.get_lumi(self.lumi_list)
            print(f"Integrated luminosity: {self.lumi}/pb")
            print()
        if self.missing_samples:
            print(f"Missing samples: {self.missing_samples}")

        t1 = time.time()
        dt = round(t1 - t0, 2)
        print(f"Loading took {dt} s")

        self.data_samples = [s for s in self.samples if 'data' in s]
        self.mc_samples = [s for s in self.samples if not ('data' in s)]
        self.datasets_to_save_unbin += self.data_samples