Example #1
0
def _work_queue_preprocessing(
    items, accumulator, fn_wrapper, infile_function, tmpdir, exec_defaults
):
    preprocessing_bar = tqdm(
        desc="Preprocessing",
        total=len(items),
        disable=not exec_defaults["status"],
        unit=exec_defaults["unit"],
        bar_format=exec_defaults["bar_format"],
    )

    for item in items:
        task = PreProcCoffeaWQTask(
            fn_wrapper, infile_function, item, tmpdir, exec_defaults
        )
        _wq_queue.submit(task)
        _vprint("submitted preprocessing task {}", task.id)

    while not _wq_queue.empty():
        task = _wq_queue.wait(5)
        if task:
            success = task.report(
                exec_defaults["print_stdout"], exec_defaults["resource_monitor"]
            )
            if success:
                accumulator = accumulate([task.output], accumulator)
                preprocessing_bar.update(1)
                task.cleanup_inputs()
                task.cleanup_outputs()
            else:
                task.resubmit(tmpdir, exec_defaults)

    preprocessing_bar.close()

    return accumulator
Example #2
0
def accumulate_result_files(
    chunks_accum_in_mem, clevel, files_to_accumulate, accumulator=None
):
    from coffea.processor import accumulate

    in_memory = []

    # work on local copy of list
    files_to_accumulate = list(files_to_accumulate)
    while files_to_accumulate:
        f = files_to_accumulate.pop()

        # ensure that no files are left unprocessed because lenght of list
        # smaller than desired files in memory.
        chunks_accum_in_mem = min(chunks_accum_in_mem, len(files_to_accumulate))

        with open(f, "rb") as rf:
            result_f = dill.load(rf)
            if clevel is not None:
                result_f = _decompress(result_f)

        if not accumulator:
            accumulator = result_f
            continue

        in_memory.append(result_f)
        if len(in_memory) > chunks_accum_in_mem - 1:
            accumulator = accumulate(in_memory, accumulator)
            while in_memory:
                result = in_memory.pop()  # noqa
                del result
    return accumulator
Example #3
0
def test_accumulator_types():
    class MyDict(dict):
        pass

    out = processor.accumulate((
        {
            "x": 2
        },
        MyDict({"x": 3}),
    ))
    assert type(out) is dict

    with pytest.raises(ValueError):
        processor.accumulate((
            defaultdict(lambda: 2),
            MyDict({"x": 3}),
        ))

    out = processor.accumulate((
        MyDict({"x": 3}),
        {
            "x": 2
        },
    ))
    assert type(out) is dict

    with pytest.raises(ValueError):
        processor.accumulate((
            MyDict({"x": 3}),
            defaultdict(lambda: 2),
        ))
    def process(self, events):
        isRealData = not hasattr(events, "genWeight")

        if isRealData:
            # Nominal JEC are already applied in data
            return self.process_shift(events, None)

        import cachetools
        jec_cache = cachetools.Cache(np.inf)
        nojer = "NOJER" if self._skipJER else ""
        fatjets = fatjet_factory[f"{self._year}mc{nojer}"].build(
            add_jec_variables(events.FatJet, events.fixedGridRhoFastjetAll),
            jec_cache)
        jets = jet_factory[f"{self._year}mc{nojer}"].build(
            add_jec_variables(events.Jet, events.fixedGridRhoFastjetAll),
            jec_cache)
        met = met_factory.build(events.MET, jets, {})

        shifts = [
            ({
                "Jet": jets,
                "FatJet": fatjets,
                "MET": met
            }, None),
        ]
        if self.systematics:
            shifts += [
                ({
                    "Jet": jets.JES_jes.up,
                    "FatJet": fatjets.JES_jes.up,
                    "MET": met.JES_jes.up
                }, "JESUp"),
                ({
                    "Jet": jets.JES_jes.down,
                    "FatJet": fatjets.JES_jes.down,
                    "MET": met.JES_jes.down
                }, "JESDown"),
                ({
                    "Jet": jets,
                    "FatJet": fatjets,
                    "MET": met.MET_UnclusteredEnergy.up
                }, "UESUp"),
                ({
                    "Jet": jets,
                    "FatJet": fatjets,
                    "MET": met.MET_UnclusteredEnergy.down
                }, "UESDown"),
            ]
            if not self._skipJER:
                shifts.extend([
                    ({
                        "Jet": jets.JER.up,
                        "FatJet": fatjets.JER.up,
                        "MET": met.JER.up
                    }, "JERUp"),
                    ({
                        "Jet": jets.JER.down,
                        "FatJet": fatjets.JER.down,
                        "MET": met.JER.down
                    }, "JERDown"),
                ])
        # HEM15/16 issue
        # if self._year == "2018":
        #     _runid = (events.run >= 319077)
        #     j_mask = ak.where((jets.phi > -1.57) & (jets.phi < -0.87) &
        #                       (jets.eta > -2.5) & (jets.eta < 1.3), 0.8, 1)
        #     fj_mask = ak.where((fatjets.phi > -1.57) & (fatjets.phi < -0.87) &
        #                        (fatjets.eta > -2.5) & (fatjets.eta < 1.3),
        #                        0.8, 1)
        #     shift_jets = copy.deepcopy(jets)
        #     shift_fatjets = copy.deepcopy(fatjets)
        #     for collection, mask in zip([shift_jets, shift_fatjets], [j_mask, fj_mask]):
        #         collection["pt"] = mask * collection.pt
        #         collection["mass"] = mask * collection.mass
        #     shifts.extend([
        #         ({"Jet": shift_jets, "FatJet": shift_fatjets, "MET": met}, "HEM18"),
        #     ])

        return processor.accumulate(
            self.process_shift(update(events, collections), name)
            for collections, name in shifts)
Example #5
0
def test_new_accumulators():
    a = processor.accumulate((0.0, 3.0))
    assert a == 3.0

    a = processor.accumulate((
        np.array([2.0]),
        3.0,
    ))
    assert np.array_equal(a, np.array([5.0]))

    lacc = processor.accumulate((
        list(range(4)),
        [3],
        [1, 2],
    ))
    assert lacc == [0, 1, 2, 3, 3, 1, 2]

    b = processor.accumulate((
        {"apples", "oranges"},
        {"pears"},
        {"grapes"},
    ))
    assert b == {"apples", "oranges", "pears", "grapes"}

    c = processor.accumulate((
        {
            "num": a,
            "fruit": b
        },
        {
            "num": 2.0
        },
        {
            "num2": 0,
            "fruit": {"apples", "cherries"},
        },
    ))
    assert c["num2"] == 0
    assert np.array_equal(c["num"], np.array([7.0]))
    assert c["fruit"] == {"apples", "oranges", "pears", "grapes", "cherries"}

    d = processor.accumulate((
        defaultdict(float),
        {
            "x": 4.0,
            "y": 5.0
        },
        {
            "z": 4.0,
            "x": 5.0
        },
    ))
    assert d["x"] == 9.0
    assert d["y"] == 5.0
    assert d["z"] == 4.0
    # this is different than old style!
    with pytest.raises(KeyError):
        d["w"]

    f = processor.accumulate((
        defaultdict(lambda: 2.0),
        defaultdict(lambda: 2, {"x": 4.0}),
    ))
    assert f["x"] == 4.0
    assert f["y"] == 2.0

    # this is different than old style!
    f = processor.accumulate([f], f)
    assert f["x"] == 8.0
    assert f["y"] == 4.0
    assert f["z"] == 2.0

    a = processor.accumulate((
        processor.column_accumulator(np.arange(6).reshape(2, 3)),
        processor.column_accumulator(np.arange(12).reshape(4, 3)),
    ))
    assert a.value.sum() == 81
Example #6
0
def collate(accumulator, mergemap):
    out = {}
    for group, names in mergemap.items():
        out[group] = processor.accumulate([v for k, v in accumulator.items() if k in names])
    return out