def test_cutflow_2_collect(select_2, tmpdir, infile, full_event_range,
                           multi_chunk_func):
    chunk_data = FakeBEEvent(
        MaskedUprootTree(infile, event_ranger=full_event_range), "data")
    chunk_mc = FakeBEEvent(
        MaskedUprootTree(infile, event_ranger=full_event_range), "mc")

    collector, dataset_readers_list = multi_chunk_func(select_2, tmpdir,
                                                       chunk_data, chunk_mc)
    output = collector._prepare_output(dataset_readers_list)

    assert len(output) == 12
    data = output.xs("test_data", level="dataset", axis="rows")
    data_weighted = data.xs("EventWeight", level=1, axis="columns")
    data_unweighted = data.xs("unweighted", level=1, axis="columns")
    assert all(data_weighted == data_unweighted)
    mc = output.xs("test_mc", level="dataset", axis="rows")
    mc_unweighted = mc.xs("unweighted", level=1, axis="columns")
    assert all(mc_unweighted == data_unweighted)
    assert output.loc[("test_data", 0, "All"),
                      ("totals_incl", "unweighted")] == 4580 * 2
    assert output.loc[("test_data", 0, "All"),
                      ("totals_incl", "EventWeight")] == 4580 * 2
    assert output.loc[("test_mc", 0, "All"),
                      ("totals_incl", "unweighted")] == 4580 * 2
    assert output.loc[("test_data", 1, "NMuon > 1"),
                      ("passed_only_cut", "unweighted")] == 289 * 2
    assert output.loc[("test_mc", 1, "NMuon > 1"),
                      ("passed_only_cut", "unweighted")] == 289 * 2
Beispiel #2
0
def test_cutflow_1_executes_data(cutflow_1, infile, full_event_range, tmpdir):
    chunk = FakeBEEvent(
        MaskedUprootTree(infile, event_ranger=full_event_range), "data")
    cutflow_1.event(chunk)

    assert len(chunk.tree) == 289

    collector = cutflow_1.collector()
    assert collector.filename == str(tmpdir / "cuts_cutflow_1-NElectron.csv")
Beispiel #3
0
def test_sequential_stages(cutflow_1, select_2, infile, full_event_range,
                           tmpdir):
    cutflow_2 = stage.CutFlow("cutflow_2",
                              str(tmpdir),
                              selection=select_2,
                              weights="EventWeight")
    chunk = FakeBEEvent(
        MaskedUprootTree(infile, event_ranger=full_event_range), "data")
    cutflow_1.event(chunk)
    cutflow_2.event(chunk)

    assert len(chunk.tree) == 2
    jet_py = chunk.tree.array("Jet_Py")
    assert pytest.approx(
        jet_py.flatten()) == [49.641838, 45.008915, -78.01798, 60.730812]
Beispiel #4
0
def test_cutflow_1_executes_mc(cutflow_1, infile, full_event_range, tmpdir):
    chunk = FakeBEEvent(
        MaskedUprootTree(infile, event_ranger=full_event_range), "mc")
    cutflow_1.event(chunk)

    assert len(chunk.tree) == 289

    collector = cutflow_1.collector()
    assert collector.filename == str(tmpdir / "cuts_cutflow_1-NElectron.csv")

    dataset_readers_list = (("test_mc", (cutflow_1, )), )
    output = collector._prepare_output(dataset_readers_list)
    assert len(output) == 1
    assert all(output[("passed_only_cut", "unweighted")] == [289])
    assert all(output[("passed_incl", "unweighted")] == [289])
    assert all(output[("totals_incl", "unweighted")] == [4580])
Beispiel #5
0
    def process(self, df):
        output = self.accumulator.identity()

        start = df._branchargs['entrystart']
        stop = df._branchargs['entrystop']
        tree = MaskedUprootTree(df._tree, EventRanger(start, stop, stop - start))
        dsname = df['dataset']
        cfg_proxy = ConfigProxy(dsname, 'data' if dsname == 'data' else 'mc')
        chunk = SingleChunk(tree, ChunkConfig(cfg_proxy))

        output['stages'][dsname] = stages_accumulator(self._sequence)

        for work in output['stages'][dsname]._value:
            work.event(chunk)

        return output