client = dask.distributed.Client(processes=True,
                                     n_workers=1,
                                     threads_per_worker=1,
                                     memory_limit="2.9GB")
    print("Client created")

    file_name = "ggh_delphes.root"
    file_path = f"{os.getcwd()}/tests/samples/{file_name}"
    datasets = {"ggh_powheg": file_path}

    fileset = get_fileset(client, datasets, parameters)

    executor_args = {"client": client, "use_dataframes": True, "retries": 0}
    executor = DaskExecutor(**executor_args)
    run = Runner(executor=executor, schema=DelphesSchema, chunksize=10000)
    output = run(fileset,
                 "Delphes",
                 processor_instance=DimuonProcessorDelphes())

    df = output.compute()
    print(df)

    elapsed = round(time.time() - tick, 3)
    print(f"Finished everything in {elapsed} s.")

    dimuon_mass = df.loc[df.event == 20002, "dimuon_mass"].values[0]
    jj_mass = df.loc[df.event == 20011, "jj_mass"].values[0]
    assert df.shape == (86, 78)
    assert almost_equal(dimuon_mass, 124.3369651)
    assert almost_equal(jj_mass, 78.593476)
예제 #2
0
        processor_instance=DimuonProcessor(**processor_args),
    )

    df = load_dataframe(client, parameters, inputs=out_df)
    out_hist = to_histograms(client, parameters, df=df)
    out_plot = plotter(client, parameters, hist_df=out_hist)

    elapsed = round(time.time() - tick, 3)
    print(f"Finished everything in {elapsed} s.")

    out_df = out_df.compute()
    dimuon_mass = out_df.loc[out_df.event == 2, "dimuon_mass"].values[0]
    jj_mass = out_df.loc[out_df.event == 2, "jj_mass nominal"].values[0]

    assert out_df.shape == (21806, 116)
    assert almost_equal(dimuon_mass, 124.16069531)
    assert almost_equal(jj_mass, 1478.3898375)

    slicer = {
        "region": "h-peak",
        "channel": "vbf",
        "variation": "nominal",
        "val_sumw2": "value",
        "dimuon_mass": slice(None),
    }

    assert almost_equal(out_hist["hist"][0][slicer].sum(),
                        31778.21631,
                        precision=0.01)
    assert almost_equal(sum(out_plot), 31778.21631, precision=0.01)
예제 #3
0
                 processor_instance=DimuonProcessorDelphes())

    df = load_dataframe(client, parameters, inputs=out_df)
    # run_mva(client, parameters, df=df)
    out_hist = to_histograms(client, parameters, df=df)
    out_plot = plotter(client, parameters, hist_df=out_hist)
    out_tmp = to_templates(client, parameters, hist_df=out_hist)

    elapsed = round(time.time() - tick, 3)
    print(f"Finished everything in {elapsed} s.")

    out_df = out_df.compute()
    dimuon_mass = out_df.loc[out_df.event == 20002, "dimuon_mass"].values[0]
    jj_mass = out_df.loc[out_df.event == 20011, "jj_mass"].values[0]
    assert out_df.shape == (86, 78)
    assert almost_equal(dimuon_mass, 124.3369651)
    assert almost_equal(jj_mass, 78.593476)
    slicer = {
        "region": "h-peak",
        "channel": "ggh_0jets",
        "val_sumw2": "value",
        "dimuon_mass": slice(None),
    }

    assert almost_equal(
        out_hist.loc[out_hist.var_name == "dimuon_mass",
                     "hist"].values[0][slicer].sum(),
        12426.530232,
    )
    assert almost_equal(sum(out_plot), 12426.530232)
    assert almost_equal(sum(out_tmp), 12426.530232)
예제 #4
0
    samp_info.load("test", use_dask=False)
    samp_info.lumi_weights["test"] = 1.0
    print(samp_info.fileset)

    executor_args = {"client": client, "use_dataframes": True, "retries": 0}
    processor_args = {
        "samp_info": samp_info,
        "do_timer": False,
        "do_btag_syst": False
    }

    executor = DaskExecutor(**executor_args)
    run = Runner(executor=executor, schema=NanoAODSchema, chunksize=10000)
    output = run(
        samp_info.fileset,
        "Events",
        processor_instance=DimuonProcessor(**processor_args),
    )

    df = output.compute()
    print(df)

    elapsed = round(time.time() - tick, 3)
    print(f"Finished everything in {elapsed} s.")

    dimuon_mass = df.loc[df.event == 2, "dimuon_mass"].values[0]
    jj_mass = df.loc[df.event == 2, "jj_mass nominal"].values[0]
    assert df.shape == (21806, 116)
    assert almost_equal(dimuon_mass, 124.16069531)
    assert almost_equal(jj_mass, 1478.3898375)
예제 #5
0
}


if __name__ == "__main__":
    tick = time.time()

    client = Client(
        processes=True, n_workers=1, threads_per_worker=1, memory_limit="4GB"
    )

    file_name = "dy_nanoaod_stage1_output.parquet"
    path = f"{os.getcwd()}/tests/samples/{file_name}"

    df = load_dataframe(client, parameters, inputs=[path])
    out_hist = to_histograms(client, parameters, df=df)
    out_plot = plotter(client, parameters, hist_df=out_hist)

    elapsed = round(time.time() - tick, 3)
    print(f"Finished everything in {elapsed} s.")

    slicer = {
        "region": "h-peak",
        "channel": "vbf",
        "variation": "nominal",
        "val_sumw2": "value",
        "dimuon_mass": slice(None),
    }

    assert almost_equal(out_hist["hist"][0][slicer].sum(), 0.14842246076249055)
    assert almost_equal(sum(out_plot), 0.14842246076249055)
}

if __name__ == "__main__":
    tick = time.time()

    client = Client(processes=True,
                    n_workers=1,
                    threads_per_worker=1,
                    memory_limit="4GB")

    file_name = "dy_delphes_stage1_output.parquet"
    path = f"{os.getcwd()}/tests/samples/{file_name}"

    out_df = load_dataframe(client, parameters, inputs=[path])
    out_hist = to_histograms(client, parameters, df=out_df)
    out_plot = plotter(client, parameters, hist_df=out_hist)
    out_tmp = to_templates(client, parameters, hist_df=out_hist)
    elapsed = round(time.time() - tick, 3)
    print(f"Finished everything in {elapsed} s.")

    slicer = {
        "region": "h-peak",
        "channel": "ggh_0jets",
        "val_sumw2": "value",
        "dimuon_mass": slice(None),
    }

    assert almost_equal(out_hist["hist"][0][slicer].sum(), 3349.189725131393)
    assert almost_equal(sum(out_plot), 3349.189725131393)
    assert almost_equal(sum(out_tmp), 3349.189725131393)