def _work_queue_preprocessing( items, accumulator, fn_wrapper, infile_function, tmpdir, exec_defaults ): preprocessing_bar = tqdm( desc="Preprocessing", total=len(items), disable=not exec_defaults["status"], unit=exec_defaults["unit"], bar_format=exec_defaults["bar_format"], ) for item in items: task = PreProcCoffeaWQTask( fn_wrapper, infile_function, item, tmpdir, exec_defaults ) _wq_queue.submit(task) _vprint("submitted preprocessing task {}", task.id) while not _wq_queue.empty(): task = _wq_queue.wait(5) if task: success = task.report( exec_defaults["print_stdout"], exec_defaults["resource_monitor"] ) if success: accumulator = accumulate([task.output], accumulator) preprocessing_bar.update(1) task.cleanup_inputs() task.cleanup_outputs() else: task.resubmit(tmpdir, exec_defaults) preprocessing_bar.close() return accumulator
def accumulate_result_files( chunks_accum_in_mem, clevel, files_to_accumulate, accumulator=None ): from coffea.processor import accumulate in_memory = [] # work on local copy of list files_to_accumulate = list(files_to_accumulate) while files_to_accumulate: f = files_to_accumulate.pop() # ensure that no files are left unprocessed because lenght of list # smaller than desired files in memory. chunks_accum_in_mem = min(chunks_accum_in_mem, len(files_to_accumulate)) with open(f, "rb") as rf: result_f = dill.load(rf) if clevel is not None: result_f = _decompress(result_f) if not accumulator: accumulator = result_f continue in_memory.append(result_f) if len(in_memory) > chunks_accum_in_mem - 1: accumulator = accumulate(in_memory, accumulator) while in_memory: result = in_memory.pop() # noqa del result return accumulator
def test_accumulator_types(): class MyDict(dict): pass out = processor.accumulate(( { "x": 2 }, MyDict({"x": 3}), )) assert type(out) is dict with pytest.raises(ValueError): processor.accumulate(( defaultdict(lambda: 2), MyDict({"x": 3}), )) out = processor.accumulate(( MyDict({"x": 3}), { "x": 2 }, )) assert type(out) is dict with pytest.raises(ValueError): processor.accumulate(( MyDict({"x": 3}), defaultdict(lambda: 2), ))
def process(self, events): isRealData = not hasattr(events, "genWeight") if isRealData: # Nominal JEC are already applied in data return self.process_shift(events, None) import cachetools jec_cache = cachetools.Cache(np.inf) nojer = "NOJER" if self._skipJER else "" fatjets = fatjet_factory[f"{self._year}mc{nojer}"].build( add_jec_variables(events.FatJet, events.fixedGridRhoFastjetAll), jec_cache) jets = jet_factory[f"{self._year}mc{nojer}"].build( add_jec_variables(events.Jet, events.fixedGridRhoFastjetAll), jec_cache) met = met_factory.build(events.MET, jets, {}) shifts = [ ({ "Jet": jets, "FatJet": fatjets, "MET": met }, None), ] if self.systematics: shifts += [ ({ "Jet": jets.JES_jes.up, "FatJet": fatjets.JES_jes.up, "MET": met.JES_jes.up }, "JESUp"), ({ "Jet": jets.JES_jes.down, "FatJet": fatjets.JES_jes.down, "MET": met.JES_jes.down }, "JESDown"), ({ "Jet": jets, "FatJet": fatjets, "MET": met.MET_UnclusteredEnergy.up }, "UESUp"), ({ "Jet": jets, "FatJet": fatjets, "MET": met.MET_UnclusteredEnergy.down }, "UESDown"), ] if not self._skipJER: shifts.extend([ ({ "Jet": jets.JER.up, "FatJet": fatjets.JER.up, "MET": met.JER.up }, "JERUp"), ({ "Jet": jets.JER.down, "FatJet": fatjets.JER.down, "MET": met.JER.down }, "JERDown"), ]) # HEM15/16 issue # if self._year == "2018": # _runid = (events.run >= 319077) # j_mask = ak.where((jets.phi > -1.57) & (jets.phi < -0.87) & # (jets.eta > -2.5) & (jets.eta < 1.3), 0.8, 1) # fj_mask = ak.where((fatjets.phi > -1.57) & (fatjets.phi < -0.87) & # (fatjets.eta > -2.5) & (fatjets.eta < 1.3), # 0.8, 1) # shift_jets = copy.deepcopy(jets) # shift_fatjets = copy.deepcopy(fatjets) # for collection, mask in zip([shift_jets, shift_fatjets], [j_mask, fj_mask]): # collection["pt"] = mask * collection.pt # collection["mass"] = mask * collection.mass # shifts.extend([ # ({"Jet": shift_jets, "FatJet": shift_fatjets, "MET": met}, "HEM18"), # ]) return processor.accumulate( self.process_shift(update(events, collections), name) for collections, name in shifts)
def test_new_accumulators(): a = processor.accumulate((0.0, 3.0)) assert a == 3.0 a = processor.accumulate(( np.array([2.0]), 3.0, )) assert np.array_equal(a, np.array([5.0])) lacc = processor.accumulate(( list(range(4)), [3], [1, 2], )) assert lacc == [0, 1, 2, 3, 3, 1, 2] b = processor.accumulate(( {"apples", "oranges"}, {"pears"}, {"grapes"}, )) assert b == {"apples", "oranges", "pears", "grapes"} c = processor.accumulate(( { "num": a, "fruit": b }, { "num": 2.0 }, { "num2": 0, "fruit": {"apples", "cherries"}, }, )) assert c["num2"] == 0 assert np.array_equal(c["num"], np.array([7.0])) assert c["fruit"] == {"apples", "oranges", "pears", "grapes", "cherries"} d = processor.accumulate(( defaultdict(float), { "x": 4.0, "y": 5.0 }, { "z": 4.0, "x": 5.0 }, )) assert d["x"] == 9.0 assert d["y"] == 5.0 assert d["z"] == 4.0 # this is different than old style! with pytest.raises(KeyError): d["w"] f = processor.accumulate(( defaultdict(lambda: 2.0), defaultdict(lambda: 2, {"x": 4.0}), )) assert f["x"] == 4.0 assert f["y"] == 2.0 # this is different than old style! f = processor.accumulate([f], f) assert f["x"] == 8.0 assert f["y"] == 4.0 assert f["z"] == 2.0 a = processor.accumulate(( processor.column_accumulator(np.arange(6).reshape(2, 3)), processor.column_accumulator(np.arange(12).reshape(4, 3)), )) assert a.value.sum() == 81
def collate(accumulator, mergemap): out = {} for group, names in mergemap.items(): out[group] = processor.accumulate([v for k, v in accumulator.items() if k in names]) return out