def template_analysis(environment_file, filelist, executor, compression): from coffea.processor import Runner from coffea.processor.test_items import NanoTestProcessor executor = executor( compression=compression, environment_file=environment_file, resources_mode="fixed", cores=2, memory=500, # MB disk=1000, # MB master_name="coffea_test", port=work_queue_port, print_stdout=True, ) run = Runner(executor) hists = run(filelist, "Events", NanoTestProcessor()) print(hists) assert hists["cutflow"]["ZJets_pt"] == 18 assert hists["cutflow"]["ZJets_mass"] == 6 assert hists["cutflow"]["Data_pt"] == 84 assert hists["cutflow"]["Data_mass"] == 66
def submit_job(client, parameters): mkdir(parameters["out_dir"]) out_dir = f"{parameters['out_dir']}/" mkdir(out_dir) executor_args = {"client": client, "retries": 0} executor = DaskExecutor(**executor_args) processor_args = { "apply_to_output": partial(save_dask_pandas_to_parquet, out_dir=out_dir) } run = Runner( executor=executor, schema=DelphesSchema, chunksize=parameters["chunksize"], maxchunks=parameters["maxchunks"], ) try: run( parameters["fileset"], "Delphes", processor_instance=DimuonProcessorDelphes(**processor_args), ) except Exception as e: tb = traceback.format_exc() return "Failed: " + str(e) + " " + tb return "Success!"
def submit_job(arg_set, parameters): mkdir(parameters["out_dir"]) if parameters["pt_variations"] == ["nominal"]: out_dir = f"{parameters['out_dir']}/" else: out_dir = f"{parameters['out_dir']}_jec/" mkdir(out_dir) executor_args = {"client": parameters["client"], "retries": 0} processor_args = { "samp_info": parameters["samp_infos"], "do_timer": False, "do_btag_syst": False, "pt_variations": parameters["pt_variations"], "apply_to_output": partial(save_dask_pandas_to_parquet, out_dir=out_dir), } executor = DaskExecutor(**executor_args) run = Runner( executor=executor, schema=NanoAODSchema, chunksize=parameters["chunksize"], maxchunks=parameters["maxchunks"], ) try: run( parameters["samp_infos"].fileset, "Events", processor_instance=DimuonProcessor(**processor_args), ) except Exception as e: tb = traceback.format_exc() return "Failed: " + str(e) + " " + tb return "Success!"
parameters = {"lumi": 3000000} client = dask.distributed.Client(processes=True, n_workers=1, threads_per_worker=1, memory_limit="2.9GB") print("Client created") file_name = "ggh_delphes.root" file_path = f"{os.getcwd()}/tests/samples/{file_name}" datasets = {"ggh_powheg": file_path} fileset = get_fileset(client, datasets, parameters) executor_args = {"client": client, "use_dataframes": True, "retries": 0} executor = DaskExecutor(**executor_args) run = Runner(executor=executor, schema=DelphesSchema, chunksize=10000) output = run(fileset, "Delphes", processor_instance=DimuonProcessorDelphes()) df = output.compute() print(df) elapsed = round(time.time() - tick, 3) print(f"Finished everything in {elapsed} s.") dimuon_mass = df.loc[df.event == 20002, "dimuon_mass"].values[0] jj_mass = df.loc[df.event == 20011, "jj_mass"].values[0] assert df.shape == (86, 78) assert almost_equal(dimuon_mass, 124.3369651) assert almost_equal(jj_mass, 78.593476)
manager_host_port="localhost:{}".format(wq_port) # with a batch system, e.g., condor. # (If coffea not at the installation site, then a conda # environment_file should be defined in the work_queue_executor_args.) # batch_type="condor", manager_name=wq_manager_name ) workers.max_workers = 2 workers.min_workers = 1 workers.cores = 2 workers.memory = 1000 # MB. workers.disk = 2000 # MB with workers: # define the Runner instance run_fn = Runner( executor=executor, chunksize=100000, maxchunks=4, # change this to None for a large run ) # execute the analysis on the given dataset hists = run_fn(fileset, "Events", MyProcessor()) elapsed = time.time() - tstart print(hists) print(hists["mass"]) # (assert only valid when using maxchunks=4) assert hists["sumw"]["DoubleMuon"] == 400224
samp_info = SamplesInfo(xrootd=False) samp_info.paths = dataset samp_info.year = "2018" samp_info.load("vbf_powheg", use_dask=False) samp_info.lumi_weights["vbf_powheg"] = 1.0 print(samp_info.fileset) executor_args = {"client": client, "use_dataframes": True, "retries": 0} processor_args = { "samp_info": samp_info, "do_timer": False, "do_btag_syst": False } executor = DaskExecutor(**executor_args) run = Runner(executor=executor, schema=NanoAODSchema, chunksize=10000) out_df = run( samp_info.fileset, "Events", processor_instance=DimuonProcessor(**processor_args), ) df = load_dataframe(client, parameters, inputs=out_df) out_hist = to_histograms(client, parameters, df=df) out_plot = plotter(client, parameters, hist_df=out_hist) elapsed = round(time.time() - tick, 3) print(f"Finished everything in {elapsed} s.") out_df = out_df.compute() dimuon_mass = out_df.loc[out_df.event == 2, "dimuon_mass"].values[0]