client = dask.distributed.Client(processes=True, n_workers=1, threads_per_worker=1, memory_limit="2.9GB") print("Client created") file_name = "ggh_delphes.root" file_path = f"{os.getcwd()}/tests/samples/{file_name}" datasets = {"ggh_powheg": file_path} fileset = get_fileset(client, datasets, parameters) executor_args = {"client": client, "use_dataframes": True, "retries": 0} executor = DaskExecutor(**executor_args) run = Runner(executor=executor, schema=DelphesSchema, chunksize=10000) output = run(fileset, "Delphes", processor_instance=DimuonProcessorDelphes()) df = output.compute() print(df) elapsed = round(time.time() - tick, 3) print(f"Finished everything in {elapsed} s.") dimuon_mass = df.loc[df.event == 20002, "dimuon_mass"].values[0] jj_mass = df.loc[df.event == 20011, "jj_mass"].values[0] assert df.shape == (86, 78) assert almost_equal(dimuon_mass, 124.3369651) assert almost_equal(jj_mass, 78.593476)
processor_instance=DimuonProcessor(**processor_args), ) df = load_dataframe(client, parameters, inputs=out_df) out_hist = to_histograms(client, parameters, df=df) out_plot = plotter(client, parameters, hist_df=out_hist) elapsed = round(time.time() - tick, 3) print(f"Finished everything in {elapsed} s.") out_df = out_df.compute() dimuon_mass = out_df.loc[out_df.event == 2, "dimuon_mass"].values[0] jj_mass = out_df.loc[out_df.event == 2, "jj_mass nominal"].values[0] assert out_df.shape == (21806, 116) assert almost_equal(dimuon_mass, 124.16069531) assert almost_equal(jj_mass, 1478.3898375) slicer = { "region": "h-peak", "channel": "vbf", "variation": "nominal", "val_sumw2": "value", "dimuon_mass": slice(None), } assert almost_equal(out_hist["hist"][0][slicer].sum(), 31778.21631, precision=0.01) assert almost_equal(sum(out_plot), 31778.21631, precision=0.01)
processor_instance=DimuonProcessorDelphes()) df = load_dataframe(client, parameters, inputs=out_df) # run_mva(client, parameters, df=df) out_hist = to_histograms(client, parameters, df=df) out_plot = plotter(client, parameters, hist_df=out_hist) out_tmp = to_templates(client, parameters, hist_df=out_hist) elapsed = round(time.time() - tick, 3) print(f"Finished everything in {elapsed} s.") out_df = out_df.compute() dimuon_mass = out_df.loc[out_df.event == 20002, "dimuon_mass"].values[0] jj_mass = out_df.loc[out_df.event == 20011, "jj_mass"].values[0] assert out_df.shape == (86, 78) assert almost_equal(dimuon_mass, 124.3369651) assert almost_equal(jj_mass, 78.593476) slicer = { "region": "h-peak", "channel": "ggh_0jets", "val_sumw2": "value", "dimuon_mass": slice(None), } assert almost_equal( out_hist.loc[out_hist.var_name == "dimuon_mass", "hist"].values[0][slicer].sum(), 12426.530232, ) assert almost_equal(sum(out_plot), 12426.530232) assert almost_equal(sum(out_tmp), 12426.530232)
samp_info.load("test", use_dask=False) samp_info.lumi_weights["test"] = 1.0 print(samp_info.fileset) executor_args = {"client": client, "use_dataframes": True, "retries": 0} processor_args = { "samp_info": samp_info, "do_timer": False, "do_btag_syst": False } executor = DaskExecutor(**executor_args) run = Runner(executor=executor, schema=NanoAODSchema, chunksize=10000) output = run( samp_info.fileset, "Events", processor_instance=DimuonProcessor(**processor_args), ) df = output.compute() print(df) elapsed = round(time.time() - tick, 3) print(f"Finished everything in {elapsed} s.") dimuon_mass = df.loc[df.event == 2, "dimuon_mass"].values[0] jj_mass = df.loc[df.event == 2, "jj_mass nominal"].values[0] assert df.shape == (21806, 116) assert almost_equal(dimuon_mass, 124.16069531) assert almost_equal(jj_mass, 1478.3898375)
} if __name__ == "__main__": tick = time.time() client = Client( processes=True, n_workers=1, threads_per_worker=1, memory_limit="4GB" ) file_name = "dy_nanoaod_stage1_output.parquet" path = f"{os.getcwd()}/tests/samples/{file_name}" df = load_dataframe(client, parameters, inputs=[path]) out_hist = to_histograms(client, parameters, df=df) out_plot = plotter(client, parameters, hist_df=out_hist) elapsed = round(time.time() - tick, 3) print(f"Finished everything in {elapsed} s.") slicer = { "region": "h-peak", "channel": "vbf", "variation": "nominal", "val_sumw2": "value", "dimuon_mass": slice(None), } assert almost_equal(out_hist["hist"][0][slicer].sum(), 0.14842246076249055) assert almost_equal(sum(out_plot), 0.14842246076249055)
} if __name__ == "__main__": tick = time.time() client = Client(processes=True, n_workers=1, threads_per_worker=1, memory_limit="4GB") file_name = "dy_delphes_stage1_output.parquet" path = f"{os.getcwd()}/tests/samples/{file_name}" out_df = load_dataframe(client, parameters, inputs=[path]) out_hist = to_histograms(client, parameters, df=out_df) out_plot = plotter(client, parameters, hist_df=out_hist) out_tmp = to_templates(client, parameters, hist_df=out_hist) elapsed = round(time.time() - tick, 3) print(f"Finished everything in {elapsed} s.") slicer = { "region": "h-peak", "channel": "ggh_0jets", "val_sumw2": "value", "dimuon_mass": slice(None), } assert almost_equal(out_hist["hist"][0][slicer].sum(), 3349.189725131393) assert almost_equal(sum(out_plot), 3349.189725131393) assert almost_equal(sum(out_tmp), 3349.189725131393)