def test_lumidata(): from numba import types from numba.typed import Dict lumidata = LumiData("tests/samples/lumi_small.csv") runslumis = np.zeros((10, 2), dtype=np.uint32) runslumis[:, 0] = lumidata._lumidata[0:10, 0] runslumis[:, 1] = lumidata._lumidata[0:10, 1] l = lumidata.get_lumi(runslumis) diff = abs(l - 1.539941814) print("lumi:", l, "diff:", diff) assert (diff < 1e-4) # test build_lumi_table_kernel py_index = Dict.empty(key_type=types.Tuple([types.uint32, types.uint32]), value_type=types.float64) pyruns = lumidata._lumidata[:, 0].astype('u4') pylumis = lumidata._lumidata[:, 1].astype('u4') LumiData.build_lumi_table_kernel.py_func(pyruns, pylumis, lumidata._lumidata, py_index) assert (len(py_index) == len(lumidata.index)) # test get_lumi_kernel py_tot_lumi = np.zeros((1, ), dtype=np.float64) LumiData.get_lumi_kernel.py_func(runslumis[:, 0], runslumis[:, 1], py_index, py_tot_lumi) assert (abs(py_tot_lumi[0] - l) < 1e-4)
def test_lumilist(): lumidata = LumiData("tests/samples/lumi_small.csv") runslumis1 = np.zeros((10, 2), dtype=np.uint32) runslumis1[:, 0] = lumidata._lumidata[0:10, 0] runslumis1[:, 1] = lumidata._lumidata[0:10, 1] runslumis2 = np.zeros((10, 2), dtype=np.uint32) runslumis2[:, 0] = lumidata._lumidata[10:20, 0] runslumis2[:, 1] = lumidata._lumidata[10:20, 1] llist1 = LumiList(runs=runslumis1[:, 0], lumis=runslumis1[:, 1]) llist2 = LumiList(runs=runslumis2[:, 0], lumis=runslumis2[:, 1]) llist3 = LumiList() llist3 += llist1 llist3 += llist2 lumi1 = lumidata.get_lumi(llist1) lumi2 = lumidata.get_lumi(llist2) lumi3 = lumidata.get_lumi(llist3) assert (abs(lumi3 - (lumi1 + lumi2)) < 1e-4) llist1.clear() assert (llist1.array.size == 0)
def test_lumidata(): lumidata = LumiData("tests/samples/lumi_small.csv") runslumis = np.zeros((10, 2), dtype=np.uint32) runslumis[:, 0] = lumidata._lumidata[0:10, 0] runslumis[:, 1] = lumidata._lumidata[0:10, 1] l = lumidata.get_lumi(runslumis) diff = abs(l - 1.539941814) print("lumi:", l, "diff:", diff) assert (diff < 0.1)
def test_lumidata(): from numba import types from numba.typed import Dict lumidata = LumiData("tests/samples/lumi_small.csv") # pickle & unpickle lumidata_pickle = cloudpickle.loads(cloudpickle.dumps(lumidata)) # check same internal lumidata assert np.all(lumidata._lumidata == lumidata_pickle._lumidata) runslumis = np.zeros((10, 2), dtype=np.uint32) results = {"lumi": {}, "index": {}} for ld in lumidata, lumidata_pickle: runslumis[:, 0] = ld._lumidata[0:10, 0] runslumis[:, 1] = ld._lumidata[0:10, 1] lumi = ld.get_lumi(runslumis) results["lumi"][ld] = lumi diff = abs(lumi - 1.539941814) print("lumi:", lumi, "diff:", diff) assert diff < 1e-4 # test build_lumi_table_kernel py_index = Dict.empty( key_type=types.Tuple([types.uint32, types.uint32]), value_type=types.float64 ) pyruns = ld._lumidata[:, 0].astype("u4") pylumis = ld._lumidata[:, 1].astype("u4") LumiData._build_lumi_table_kernel.py_func( pyruns, pylumis, ld._lumidata, py_index ) assert len(py_index) == len(ld.index) # test get_lumi_kernel py_tot_lumi = np.zeros((1,), dtype=np.float64) LumiData._get_lumi_kernel.py_func( runslumis[:, 0], runslumis[:, 1], py_index, py_tot_lumi ) assert abs(py_tot_lumi[0] - lumi) < 1e-4 # store results: results["lumi"][ld] = lumi results["index"][ld] = ld.index assert np.all(results["lumi"][lumidata] == results["lumi"][lumidata_pickle]) assert len(results["index"][lumidata]) == len(results["index"][lumidata_pickle])
def load(self, samples, nchunks=1, parallelize_outer=1, parallelize_inner=1): import multiprocessing as mp import time import numpy as np t0 = time.time() if (parallelize_outer * parallelize_inner) > (mp.cpu_count() - 1): print( f"Trying to create too many workers ({parallelize_outer*parallelize_inner})! Max allowed: {mp.cpu_count()-1}." ) raise self.nchunks = nchunks if parallelize_outer > 1: pool = mp.Pool(parallelize_outer) a = [ pool.apply_async(self.load_sample, args=(s, )) for s in samples ] results = [] for process in a: process.wait() results.append(process.get()) pool.close() else: results = [] for s in samples: results.append(self.load_sample(s, parallelize_inner)) self.filesets_chunked = {} for res in results: sample = res['sample'] if res['is_missing']: self.missing_samples.append(sample) else: self.samples.append(sample) self.filesets[sample] = {} self.filesets_chunked[sample] = [] self.filesets[sample][sample] = res['files'] self.full_fileset[sample] = res['files'] self.metadata[sample] = {} self.metadata[sample] = res['metadata'] self.data_entries = self.data_entries + res['data_entries'] self.lumi_list += res['lumi_list'] all_filenames = np.array( self.filesets[sample][sample]['files']) all_filenames_chunked = np.array_split(all_filenames, nchunks) all_filenames_chunked = [ a.tolist() for a in all_filenames_chunked ] for i in range(nchunks): if len(all_filenames_chunked[i]) > 0: files_i = { 'files': all_filenames_chunked[i], 'treename': 'Events' } self.filesets_chunked[sample].append({sample: files_i}) if self.data_entries: print() data_entries_total = self.lumi_data[self.year]['events'] print(f"Total events in {self.year}: {data_entries_total}") print(f"Loaded {self.data_entries} of {self.year} data events") prc = round(self.data_entries / data_entries_total * 100, 2) print(f"This is ~ {prc}% of {self.year} data.") lumi_data = LumiData(f"data/lumimasks/lumi{self.year}.csv") # print(self.lumi_list.array) # self.lumi = lumi_data.get_lumi(self.lumi_list) print(f"Integrated luminosity: {self.lumi}/pb") print() if self.missing_samples: print(f"Missing samples: {self.missing_samples}") t1 = time.time() dt = round(t1 - t0, 2) print(f"Loading took {dt} s") self.data_samples = [s for s in self.samples if 'data' in s] self.mc_samples = [s for s in self.samples if not ('data' in s)] self.datasets_to_save_unbin += self.data_samples