def build_list_of_files(file_list): if len(file_list) != len(set(file_list)): # Check that runlist does not have duplicates fatal_msg("Runlist has duplicated entries, fix runlist!") not_readable = [] for i in file_list: # Check that input files can be open f = TFile(i.strip(), "READ") if not f.IsOpen(): verbose_msg("Cannot open AOD file:", i, color=bcolors.WARNING) not_readable.append(i) if len(not_readable) > 0: warning_msg(len(not_readable), "files cannot be read and will be skipped") for i in not_readable: file_list.remove(i) files_per_batch = [] iter_file_list = iter(file_list) for i in range(0, len(file_list)): sub_set = list(islice(iter_file_list, batch_size)) if len(sub_set) <= 0: continue files_per_batch.append(sub_set) run_list = [] if len(files_per_batch) > 0: for i, lines in enumerate(files_per_batch): p = os.path.join(out_path, f"{i}") if not os.path.isdir(p): os.makedirs(p) run_list.append(os.path.join( p, f"ListForRun5Analysis.{i}.txt")) with open(run_list[-1], "w") as f: for j in lines: f.write(j.strip() + "\n") msg("Number of runs:", len(run_list)) return run_list
def build_list_of_files(file_list): verbose_msg("Building list of files from", file_list) # Check that runlist does not have duplicates unique_file_list = set(file_list) if len(file_list) != len(unique_file_list): # for i in file_list fatal_msg("Runlist has duplicated entries, fix runlist!", len(unique_file_list), "unique files, while got", len(file_list), "files") file_status = { "Does not exist": [], "Cannot be open": [], "Was recovered": [], "Is Ok": [] } if check_input_file_integrity: # Check that input files can be open for i in file_list: verbose_msg("Checking that TFile", i.strip(), "can be processed") file_status[is_root_file_sane(i)] = i recovered_files = file_status["Was recovered"] not_readable = [] for i in file_status: if i == "Is Ok": continue not_readable += file_status[i] if len(recovered_files) > 0: msg( "Recovered", len(recovered_files), "files:\n", ) if len(not_readable) > 0: warning_msg(len(not_readable), "over", len(file_list), "files cannot be read and will be skipped") for i in not_readable: if i not in file_list: warning_msg("did not find file to remove", f"'{i}'") file_list.remove(i) files_per_batch = [] iter_file_list = iter(file_list) for i in range(0, len(file_list)): sub_set = list(islice(iter_file_list, batch_size)) if len(sub_set) <= 0: continue files_per_batch.append(sub_set) run_list = [] if len(files_per_batch) > 0: for i, lines in enumerate(files_per_batch): p = os.path.join(out_path, f"{i}") if not os.path.isdir(p): os.makedirs(p) run_list.append(os.path.join(p, f"ListForRun5Analysis.{i}.txt")) with open(run_list[-1], "w") as f: for j in lines: f.write(j.strip() + "\n") msg("Number of runs:", len(run_list)) return run_list
def opt(entry, require=True): try: o = parser.get(config_entry, entry) b = ['yes', 'no', 'on', 'off', 'true', 'false'] for i in b: if o.lower() == i: o = parser.getboolean(config_entry, entry) break verbose_msg("Got option", entry, "=", f"'{o}'") running_options[entry] = o return o except: if require: fatal_msg("Missing entry", f"'{entry}'", "in configuration file", f"'{configuration_file}'") return None
def set_config(config_file, config, value): config = config.strip() value = value.strip() config_string = f"{config} {value}" run_cmd("sed -i -e \"" f"s/{config} .*$/{config_string}" "\" " + config_file) # Checking that the file has the correct configuration with open(config_file) as f: has_it = False config_string = config_string.replace("\\", "").strip("/") for lineno, line in enumerate(f): if line.strip() == config_string: verbose_msg(f"Found config string '{config_string}'", f"at line #{lineno} '{line.strip()}'") has_it = True break if not has_it: fatal_msg("Configuration file", config_file, f"does not have config string '{config_string}'")
def main(mode, input_file, out_path, out_tag="", batch_size=4, n_max_files=100, dpl_configuration_file=None, njobs=1, merge_output=True, merge_only=False, shm_mem_size=16000000000, rate_lim=1000000000, readers=1, avoid_overwriting_merge=False, clean_localhost_after_running=True, extra_arguments="", resume_previous_analysis=False, check_input_file_integrity=True, analysis_timeout=None, linearize_single_core=True): if do_bash_script: njobs = 1 linearize_single_core = True if len(input_file) == 1: input_file = input_file[0] else: input_file = input_file[0:n_max_files] if not merge_only: msg("Running", f"'{mode}'", "analysis on", f"'{input_file}'", color=bcolors.BOKBLUE) msg("Maximum", n_max_files, "files with batch size", batch_size, "and", njobs, "jobs" if njobs > 1 else "job", color=bcolors.BOKBLUE) else: msg("Merging output of", f"'{mode}'", "analysis", color=bcolors.BOKBLUE) if analysis_timeout is not None: msg("Using analysis timeout of", analysis_timeout, "seconds", color=bcolors.BOKBLUE) analysis_timeout = f"--time-limit {analysis_timeout}" else: analysis_timeout = "" o2_arguments = f"-b --shm-segment-size {shm_mem_size} --aod-memory-rate-limit {rate_lim} --readers {readers} {analysis_timeout}" o2_arguments += extra_arguments if mode not in analyses: raise ValueError("Did not find analyses matching mode", mode, ", please choose in", ", ".join(analyses.keys())) an = analyses[mode] tag = mode + out_tag # Build input file list input_file_list = [] def is_root_file_sane(file_name_to_check): file_name_to_check = file_name_to_check.strip() if not os.path.isfile(file_name_to_check): warning_msg("File", file_name_to_check, "does not exist") return "Does not exist" file_to_check = TFile(file_name_to_check, "READ") if not file_to_check.IsOpen(): warning_msg("Cannot open AOD file:", file_name_to_check) return "Cannot be open" elif file_to_check.TestBit(TFile.kRecovered): verbose_msg(file_name_to_check, "was a recovered file") return "Was recovered" else: verbose_msg(file_name_to_check, "is OK") return "Is Ok" def build_list_of_files(file_list): verbose_msg("Building list of files from", file_list) # Check that runlist does not have duplicates unique_file_list = set(file_list) if len(file_list) != len(unique_file_list): # for i in file_list fatal_msg("Runlist has duplicated entries, fix runlist!", len(unique_file_list), "unique files, while got", len(file_list), "files") file_status = { "Does not exist": [], "Cannot be open": [], "Was recovered": [], "Is Ok": [] } if check_input_file_integrity: # Check that input files can be open for i in file_list: verbose_msg("Checking that TFile", i.strip(), "can be processed") file_status[is_root_file_sane(i)] = i recovered_files = file_status["Was recovered"] not_readable = [] for i in file_status: if i == "Is Ok": continue not_readable += file_status[i] if len(recovered_files) > 0: msg( "Recovered", len(recovered_files), "files:\n", ) if len(not_readable) > 0: warning_msg(len(not_readable), "over", len(file_list), "files cannot be read and will be skipped") for i in not_readable: if i not in file_list: warning_msg("did not find file to remove", f"'{i}'") file_list.remove(i) files_per_batch = [] iter_file_list = iter(file_list) for i in range(0, len(file_list)): sub_set = list(islice(iter_file_list, batch_size)) if len(sub_set) <= 0: continue files_per_batch.append(sub_set) run_list = [] if len(files_per_batch) > 0: for i, lines in enumerate(files_per_batch): p = os.path.join(out_path, f"{i}") if not os.path.isdir(p): os.makedirs(p) run_list.append(os.path.join(p, f"ListForRun5Analysis.{i}.txt")) with open(run_list[-1], "w") as f: for j in lines: f.write(j.strip() + "\n") msg("Number of runs:", len(run_list)) return run_list if type(input_file) is list: input_file = [os.path.join(os.getcwd(), i) for i in input_file] input_file_list = build_list_of_files(input_file) elif not input_file.endswith(".root"): with open(input_file, "r") as f: lines = f.readlines() msg("Building input list from", len(lines), "inputs, limiting to", n_max_files) if len(lines) > n_max_files: lines = lines[0:n_max_files] lines = [ os.path.join(os.path.dirname(os.path.abspath(input_file)), i) for i in lines ] input_file_list = build_list_of_files(lines) else: input_file_list = [os.path.join(os.getcwd(), input_file)] if dpl_configuration_file is not None: dpl_configuration_file = os.path.join(os.getcwd(), dpl_configuration_file) run_list = [] for i, j in enumerate(input_file_list): run_list.append( set_o2_analysis(an, o2_arguments=o2_arguments, input_file=j, tag=tag, dpl_configuration_file=dpl_configuration_file, resume_previous_analysis=resume_previous_analysis, write_runner_script=not merge_only)) if not merge_only: if do_bash_script: with open("parallelbash.sh", "w") as f: f.write(f"#!/bin/bash\n\n") f.write(f"echo \"Start running\"\n\n") f.write(f"date\n\n") f.write("""function trap_ctrlc (){ # perform cleanup here echo "Ctrl-C caught...performing clean up" exit 2 }\n\n""") f.write("""trap "trap_ctrlc" 2\n""") run_in_parallel( processes=njobs, job_runner=run_o2_analysis, job_arguments=run_list, job_message=f"Running analysis, it's {datetime.datetime.now()}", linearize_single_core=linearize_single_core) if do_bash_script: with open("parallelbash.sh", "a") as f: f.write(f"wait\n\n") f.write(f"date\n\n") msg("Now run bash script `bash parallelbash.sh`") return if clean_localhost_after_running: run_cmd( "find /tmp/ -maxdepth 1 -name localhost* -user $(whoami) | xargs rm -v 2>&1", check_status=False) if (merge_output or merge_only) and len(run_list) > 1: files_to_merge = [] for i in input_file_list: p = os.path.dirname(os.path.abspath(i)) for j in os.listdir(p): if j.endswith(f"_{tag}.root"): files_to_merge.append(os.path.join(p, j)) if len(files_to_merge) == 0: warning_msg("Did not find any file to merge for tag", tag) return files_per_type = {} # List of files to be merged per type # List of files to be merged per type that are not declared sane non_sane_files_per_type = {} for i in files_to_merge: if is_root_file_sane(i) != "Is Ok": non_sane_files_per_type[fn].setdefault(fn, []).append(i) warning_msg("Result file", i, "is not sane") continue fn = os.path.basename(i) files_per_type.setdefault(fn, []) files_per_type[fn].append(i) for i in non_sane_files_per_type: warning_msg("Non sane files for type", i) for j in non_sane_files_per_type[i]: msg(j) merged_files = [] for i in files_per_type: merged_file = os.path.join(out_path, i) if avoid_overwriting_merge and os.path.isfile(merged_file): warning_msg( "file", merged_file, "is already found, remove it before merging, you can use the --mergeonly flag to avoid running the analysis again" ) continue merged_files.append(merged_file) merge_file_list = os.path.join( os.path.dirname(os.path.abspath(merged_file)), "tomerge_" + "".join(i.split(".")[:-1]) + ".txt") verbose_msg("List of files to be merged:", merge_file_list) with open(merge_file_list, "w") as fmerge: for j in files_per_type[i]: fmerge.write(j + "\n") if len(files_per_type[i]) > len(run_list): fatal_msg("Trying to merge too many files of type", i, "for tag", tag, ":", len(files_per_type[i]), "vs", len(run_list), "runs") msg("Merging", len(files_per_type[i]), "files to", merged_file) run_cmd( f"hadd -j {njobs} -f {merged_file} `cat {merge_file_list}`", log_file=merge_file_list.replace(".txt", ".log"), time_it=True, comment=f"Merging to {merged_file}") if len(merged_files) == 0: warning_msg("Merged no files") else: msg("Merging completed, merged:", *merged_files, color=bcolors.BOKGREEN)
def set_o2_analysis( o2_analyses=[ "o2-analysis-hf-task-d0 --pipeline qa-tracking-kine:4,qa-tracking-resolution:4" ], o2_arguments="--shm-segment-size 16000000000 --readers 4 --configuration json://$PWD/dpl-config_std.json", input_file="listfiles.txt", tag="QA", output_files=[ "AnalysisResults.root", "AnalysisResults_trees.root", "QAResults.root" ], dpl_configuration_file=None, resume_previous_analysis=False, write_runner_script=True): """ Function to prepare everything you need for your O2 analysis. From the output folder to the script containing the O2 workflow. The output can be found in the same directory as the input data. """ # Creating output directory output_path = os.path.dirname(os.path.abspath(input_file)) # Creating the script to run O2 tmp_script_name = os.path.join(output_path, f"tmpscript_{tag.lower()}.sh") if not write_runner_script: # Returning in case write_runner_script is False return tmp_script_name # Defining log file log_file = f"log_{tag.lower()}.log" verbose_msg("Configuring the tasks with O2", color=bcolors.BOKBLUE) # Checking input file verbose_msg("Using", input_file, "as input file") if not input_file.endswith(".root"): input_file = f"@{os.path.join(os.getcwd(), input_file)}" # Writing instructions to runner script with open(tmp_script_name, "w") as tmp_script: verbose_msg("Writing o2 instructions to", f"'{tmp_script_name}'") def write_instructions(instructions, n=1, check_status=False): verbose_msg("--\t", instructions.strip()) tmp_script.write(f"{instructions}" + "".join(["\n"] * n)) if check_status: tmp_script.write("\nReturnValue=$?\n") tmp_script.write("if [[ $ReturnValue != 0 ]]; then\n") tmp_script.write(" echo \"Encountered error with command: '") tmp_script.write(instructions.replace("\"", "\\\"").strip()) tmp_script.write("'\"\n") tmp_script.write(" exit $ReturnValue\n") tmp_script.write("fi\n\n") write_instructions(f"#!/bin/bash", n=2) # Move to run dir write_instructions(f"cd {output_path} || exit 1", n=2) # Print run dir write_instructions(f"pwd", n=2) write_instructions(f"echo Running \"$0\"", n=2) def get_tagged_output_file(output_file_name): return output_file_name.replace(".root", f"_{tag}.root") for i in output_files: # Removing old output write_instructions(f"[ -f {i} ] && rm -v {i} 2>&1") i = get_tagged_output_file(i) if resume_previous_analysis: write_instructions( f"[ -f {i} ] && echo 'file {i} already present, continuing' && exit 0" ) else: write_instructions(f"[ -f {i} ] && rm -v {i} 2>&1") write_instructions("") o2_workflow = "" for i in o2_analyses: line = f"{i} {o2_arguments}" if i == o2_analyses[0]: line += f" --aod-file {input_file}" if dpl_configuration_file is not None: line += f" --configuration json://{os.path.normpath(dpl_configuration_file)}" if len(o2_analyses) > 1 and i != o2_analyses[-1]: line = f"{line} | \\\n \t" else: line = f"{line}" if line.count("configuration") > 1: fatal_msg("Cannot have more than one configuration") o2_workflow += line write_instructions(f"O2Workflow=\"{o2_workflow}\"", n=2) write_instructions("if [[ -z \"${1}\" ]]; then", n=2) write_instructions(" echo \"Running: \n \t ${O2Workflow}\"" f" > {log_file}") write_instructions(" eval \"${O2Workflow}\"" f" >> {log_file}", check_status=True) write_instructions("else") write_instructions(" eval \"${O2Workflow}\"") write_instructions("fi") # Print run dir write_instructions("pwd") for i in ["ERROR", "FATAL", "crash"]: write_instructions( f"if grep -q \"\[{i}\]\" {log_file}; then echo \": got some {i}s in '{log_file}'\" && exit 1; fi" ) write_instructions("") for i in output_files: # renaming output with tag write_instructions( f"[ -f {i} ] && mv {i} {get_tagged_output_file(i)} 2>&1") write_instructions(f"date", n=2) write_instructions(f"echo Completed \"$0\"", n=2) write_instructions("\nexit 0") return tmp_script_name
action="store_true", help= "Flag to avoid checking the input file integrity so as to gain time") args = parser.parse_args() set_verbose_mode(args) # Set bash script mode do_bash_script = args.do_bash_script bash_parallel_jobs = args.njobs # Load analysis workflows workflows = configparser.RawConfigParser() msg("Analysis configuration from", args.workflows) for i in args.workflows: if not os.path.isfile(i): fatal_msg(f"Did not fid configuration file '{i}'") workflows.read(i) for i in workflows.sections(): full_workflow = workflows.get(i, "w").split("\n") analyses[i] = full_workflow if "|" in full_workflow: fatal_msg("`|` present in workflow", i) if len(analyses[i]) == 0: fatal_msg("Empty workflow for analysis", i) for i in args.modes: if i not in analyses.keys(): fatal_msg("Analysis", i, "not in", " ".join(workflows.sections()), "from configuration files:", args.workflows) if args.show: msg(i, "workflow:")
def main(input_files, do_merge=True, sanity_file=None, max_bunch_size=200, out_path="./", over_write_lists=False, jobs=1): msg("Merging to", out_path, "with maximum input size", max_bunch_size) out_path = os.path.normpath(out_path) if not os.path.exists(out_path): warning_msg("Output path", out_path, "does not exist") ans = input("Create it? (Y/[N])") if ans == "Y": os.makedirs(out_path) else: msg("Exit") return sane_files = None if sanity_file is not None: msg("Using sanity file", sanity_file) sane_files = [] with open(sanity_file, "r") as f: for i in f: sane_files.append(os.path.abspath(os.path.normpath(i.strip()))) size_of_files = {} for i in input_files: i = os.path.normpath(i.strip()) if sane_files is not None and os.path.abspath(i) not in sane_files: msg("Skipping", i, "because not in sanity file") continue size_of_files[i] = os.path.getsize(i) * 1e-6 bunched_files = [[]] bunched_sizes = [] bunch_size = [] for i in size_of_files: verbose_msg("Checking file", i, "of size", size_of_files[i], "MB") if sum(bunch_size) > max_bunch_size: verbose_msg("Bunch size", sum(bunch_size), "reached limit with", len(bunch_size), "files", max_bunch_size, "MB", "preparing next bunch!") bunched_files.append([]) bunched_sizes.append(sum(bunch_size)) bunch_size = [] bunch_size.append(size_of_files[i]) bunched_files[-1].append(i) bunched_sizes.append(sum(bunch_size)) verbose_msg("Got", len(bunched_files), "bunches") for i, j in enumerate(bunched_files): verbose_msg(f"{i})", bunched_sizes[i], "MB, with", len(j), j) msg("Preparing", len(bunched_files), "bunched lists") bunched_aod_names.clear() for i, j in enumerate(bunched_files): fn = f"aod_merge_list_bunch{i}.txt" verbose_msg("Writing bunch", i, "to", fn) if not over_write_lists: if os.path.isfile(fn): fatal_msg(fn, "already present, remove it first") with open(fn, "w") as f: for k in j: f.write(k + "\n") if do_merge: out_aod = os.path.join(out_path, f"AO2D_Merge_{i}.root") if os.path.isfile(out_aod): fatal_msg(out_aod, "already present") bunched_aod_names[fn] = { "out_aod": out_aod, "file_index": i, "total_files": len(bunched_files), "input_size": bunched_sizes[i] } run_in_parallel(jobs, run_merge, list(bunched_aod_names.keys()), job_message="Running AOD merging", linearize_single_core=True)
def main(mode, input_file, out_path, out_tag="", batch_size=4, n_max_files=100, dpl_configuration_file=None, njobs=1, merge_output=True, merge_only=False, shm_mem_size=16000000000, rate_lim=1000000000, readers=1, avoid_overwriting_merge=False, clean_localhost_after_running=True, extra_arguments=""): if len(input_file) == 1: input_file = input_file[0] else: input_file = input_file[0:n_max_files] if not merge_only: msg("Running", f"'{mode}'", "analysis on", f"'{input_file}'", color=bcolors.BOKBLUE) msg("Maximum", n_max_files, "files with batch size", batch_size, "and", njobs, "jobs" if njobs > 1 else "job", color=bcolors.BOKBLUE) else: msg("Merging output of", f"'{mode}'", "analysis", color=bcolors.BOKBLUE) o2_arguments = f"-b --shm-segment-size {shm_mem_size} --aod-memory-rate-limit {rate_lim} --readers {readers}" o2_arguments += extra_arguments if mode not in analyses: raise ValueError("Did not find analyses matching mode", mode, ", please choose in", ", ".join(analyses.keys())) an = analyses[mode] tag = mode + out_tag # Build input file list input_file_list = [] def build_list_of_files(file_list): if len(file_list) != len( set(file_list)): # Check that runlist does not have duplicates fatal_msg("Runlist has duplicated entries, fix runlist!") not_readable = [] for i in file_list: # Check that input files can be open f = TFile(i.strip(), "READ") if not f.IsOpen(): verbose_msg("Cannot open AOD file:", i, color=bcolors.WARNING) not_readable.append(i) if len(not_readable) > 0: warning_msg(len(not_readable), "files cannot be read and will be skipped") for i in not_readable: file_list.remove(i) files_per_batch = [] iter_file_list = iter(file_list) for i in range(0, len(file_list)): sub_set = list(islice(iter_file_list, batch_size)) if len(sub_set) <= 0: continue files_per_batch.append(sub_set) run_list = [] if len(files_per_batch) > 0: for i, lines in enumerate(files_per_batch): p = os.path.join(out_path, f"{i}") if not os.path.isdir(p): os.makedirs(p) run_list.append(os.path.join(p, f"ListForRun5Analysis.{i}.txt")) with open(run_list[-1], "w") as f: for j in lines: f.write(j.strip() + "\n") msg("Number of runs:", len(run_list)) return run_list if type(input_file) is list: input_file = [os.path.join(os.getcwd(), i) for i in input_file] input_file_list = build_list_of_files(input_file) elif not input_file.endswith(".root"): with open(input_file, "r") as f: lines = f.readlines() msg("Building input list from", len(lines), "inputs, limiting to", n_max_files) if len(lines) > n_max_files: lines = lines[0:n_max_files] input_file_list = build_list_of_files(lines) else: input_file_list = [os.path.join(os.getcwd(), input_file)] if dpl_configuration_file is not None: dpl_configuration_file = os.path.join(os.getcwd(), dpl_configuration_file) run_list = [] for i, j in enumerate(input_file_list): run_list.append( set_o2_analysis(an, o2_arguments=o2_arguments, input_file=j, tag=tag, dpl_configuration_file=dpl_configuration_file)) if not merge_only: run_in_parallel(processes=njobs, job_runner=run_o2_analysis, job_arguments=run_list, job_message="Running analysis") if clean_localhost_after_running: run_cmd( "find /tmp/ -maxdepth 1 -name localhost* -user $(whoami) | xargs rm -v" ) if (merge_output or merge_only) and len(run_list) > 1: files_to_merge = [] for i in input_file_list: p = os.path.dirname(os.path.abspath(i)) for j in os.listdir(p): if j.endswith(f"_{tag}.root"): files_to_merge.append(os.path.join(p, j)) if len(files_to_merge) == 0: warning_msg("Did not find any file to merge for tag", tag) return if len(files_to_merge) > len(run_list): fatal_msg("Trying to merge too many files!", tag) msg("Merging", len(files_to_merge), "results", color=bcolors.BOKBLUE) files_per_type = {} # List of files to be merged per type for i in files_to_merge: fn = os.path.basename(i) files_per_type.setdefault(fn, []) files_per_type[fn].append(i) merged_files = [] for i in files_per_type: merged_file = os.path.join(out_path, i) if avoid_overwriting_merge and os.path.isfile(merged_file): warning_msg( "file", merged_file, "is already found, remove it before merging, you can use the --mergeonly flag to avoid running the analysis again" ) continue merged_files.append(merged_file) merge_file_list = os.path.join( os.path.dirname(os.path.abspath(merged_file)), "tomerge_" + "".join(i.split(".")[:-1]) + ".txt") verbose_msg("List of files to be merged:", merge_file_list) with open(merge_file_list, "w") as fmerge: for j in files_per_type[i]: fmerge.write(j + "\n") run_cmd( f"hadd -j {njobs} -f {merged_file} `cat {merge_file_list}`", log_file=merge_file_list.replace(".txt", ".log")) if len(merged_files) == 0: warning_msg("Merged no files") else: msg("Merging completed, merged:", *merged_files, color=bcolors.BOKGREEN)
action="store_true", help="Flag to show the workflow of the current tag") parser.add_argument( "--no_clean", "-nc", action="store_true", help="Flag to avoid cleaning the localhost files after running") args = parser.parse_args() set_verbose_mode(args) # Load analysis workflows workflows = configparser.RawConfigParser() msg("Analysis configuration from", args.workflows) for i in args.workflows: if not os.path.isfile(i): fatal_msg(f"Did not fid configuration file '{i}'") workflows.read(i) for i in workflows.sections(): analyses[i] = workflows.get(i, "w").split("\n") for i in args.modes: if i not in analyses.keys(): fatal_msg("Analysis", i, "not in", " ".join(workflows.sections()), "from configuration files:", args.workflows) if args.show: msg(i, "workflow:") for j in enumerate(analyses[i]): msg(" - ", *j) main(mode=i, input_file=args.input, dpl_configuration_file=args.configuration,
def main(configuration_file, config_entry, njobs, nruns, nevents, qa, output_path, clean_delphes_files, create_luts, turn_off_vertexing, append_production, use_nuclei, avoid_file_copy, debug_aod, tof_mismatch): arguments = locals() # List of arguments to put into the log parser = configparser.RawConfigParser() parser.read(configuration_file) if config_entry not in parser.keys(): k = list(parser.keys()) k.sort() fatal_msg( f"Did not find configuration entry '{config_entry}' in config file", configuration_file + "\n\t Available entries:\n\t\t" + "\n\t\t".join(k)) run_cmd("./clean.sh > /dev/null 2>&1", check_status=False) # Dictionary of fetched options running_options = {} for i in arguments: running_options["ARG " + i] = arguments[i] def opt(entry, require=True): try: o = parser.get(config_entry, entry) b = ['yes', 'no', 'on', 'off', 'true', 'false'] for i in b: if o.lower() == i: o = parser.getboolean(config_entry, entry) break verbose_msg("Got option", entry, "=", f"'{o}'") running_options[entry] = o return o except: if require: fatal_msg("Missing entry", f"'{entry}'", "in configuration file", f"'{configuration_file}'") return None # Config from the config file # simulation configuration if output_path is None: output_path = "" output_path = os.path.join(os.getcwd(), output_path) msg("Output will be found in", f"'{output_path}'") if not os.path.isdir(output_path): msg("Creating output path") os.makedirs(output_path) if not os.path.isdir(output_path): raise RuntimeError("Cannot find output path", output_path) # detector configuration bField = opt("bField") sigmaT = opt("sigmaT") sigmaT0 = opt("sigmaT0") tof_radius = opt("tof_radius") rich_radius = opt("rich_radius") rich_index = opt("rich_index") forward_rich_index = opt("forward_rich_index") minimum_track_radius = opt("minimum_track_radius") etaMax = opt("etamax") barrel_half_length = opt("barrel_half_length") # copy relevant files in the working directory def do_copy(in_file, out_file=None, in_path=None): """Function to copy files""" in_file = os.path.normpath(in_file) # Normalize path if out_file is None: # If left unconfigured use the same name but put in the current path out_file = os.path.basename(in_file) out_file = os.path.normpath(out_file) # Normalize path if in_path is not None: in_file = os.path.join(in_path, in_file) in_file = os.path.expanduser(os.path.expandvars(in_file)) if avoid_file_copy: if os.path.isfile(out_file) or (in_file == out_file): verbose_msg("Skipping copy of", in_file, "to", out_file, "because of --avoid-config-copy") else: verbose_msg("Copying", in_file, "to", out_file, "because of --avoid-config-copy") run_cmd(f"cp {in_file} {out_file}", comment="Copying files without python") return verbose_msg("Copying", in_file, "to", out_file) shutil.copy2(in_file, out_file) # Fetching the propagation card do_copy(opt("propagate_card"), "propagate.tcl", in_path=opt("card_path")) lut_path = opt("lut_path") lut_tag = opt("lut_tag") lut_tag = f"rmin{int(float(minimum_track_radius))}.{lut_tag}" lut_particles = ["el", "mu", "pi", "ka", "pr"] if use_nuclei: lut_particles += ["de", "tr", "he3"] if create_luts: # Creating LUTs verbose_msg("Creating LUTs") lut_path = os.path.join(lut_path, "create_luts.sh") run_cmd( f"{lut_path} -p {lut_path} -t {lut_tag} -B {float(bField)*0.1} -R {minimum_track_radius} -P \"0 1 2 3 4 5 6\" -j 1 -F 2>&1", f"Creating the lookup tables with tag {lut_tag} from {lut_path} script" ) else: # Fetching LUTs verbose_msg(f"Fetching LUTs with tag {lut_tag} from path {lut_path}") for i in lut_particles: lut_bg = "{}kG".format(bField).replace(".", "") do_copy(f"lutCovm.{i}.{lut_bg}.{lut_tag}.dat", f"lutCovm.{i}.dat", in_path=lut_path) # Checking that we actually have LUTs for i in lut_particles: i = f"lutCovm.{i}.dat" if not os.path.isfile(i): fatal_msg("Did not find LUT file", i) custom_gen = opt("custom_gen", require=False) if custom_gen is None: # Checking that the generators are defined generators = opt("generators", require=False) if generators is None: fatal_msg( "Did not find any generator configuration corresponding to the entry", config_entry, "in your configuration file", configuration_file) generators = generators.split(" ") for i in generators: do_copy(i) msg("Using pythia with configuration", generators) else: def check_duplicate(option_name): if f" {option_name}" in custom_gen: fatal_msg(f"Remove '{option_name}' from", custom_gen, "as it will be automatically set") for i in ["--output", "-o", "--nevents", "-n"]: check_duplicate(i) if "INPUT_FILES" in custom_gen: input_hepmc_files = custom_gen.replace("INPUT_FILES", "").strip().split(" ") input_hepmc_file_list = [] for i in input_hepmc_files: input_hepmc_file_list += glob.glob(os.path.normpath(i)) if len(input_hepmc_file_list) >= nruns: input_hepmc_file_list = input_hepmc_file_list[0:nruns] else: nruns = len(input_hepmc_file_list) if len(input_hepmc_file_list) <= 0: fatal_msg( "Did not find any input file matching to the request:", custom_gen) custom_gen = f"INPUT_FILES " + " ".join(input_hepmc_file_list) msg( "Using", len(input_hepmc_file_list), "input HepMC file" + ("" if len(input_hepmc_file_list) == 1 else "s"), input_hepmc_file_list) else: msg("Using custom generator", custom_gen) # Printing configuration msg(" --- running createO2tables.py", color=bcolors.HEADER) msg(" n. jobs =", njobs) msg(" n. runs =", nruns) msg(" events per run =", nevents) msg(" tot. events =", "{:.0e}".format(nevents * nruns)) msg(" LUT path =", f"'{lut_path}'") msg(" --- with detector configuration", color=bcolors.HEADER) msg(" B field =", bField, "[kG]") msg(" Barrel radius =", minimum_track_radius, "[cm]") msg(" Barrel half length =", barrel_half_length, "[cm]") if create_luts: msg(" Minimum track radius =", minimum_track_radius, "[cm]") msg(" LUT =", lut_tag) msg(" etaMax =", etaMax) msg(" --- with TOF configuration", color=bcolors.HEADER) msg(" sigmaT =", sigmaT, "[ns]") msg(" sigmaT0 =", sigmaT0, "[ns]") msg(" tof_radius =", tof_radius, "[cm]") msg(" --- with RICH configuration", color=bcolors.HEADER) msg(" rich_radius =", rich_radius, "[cm]") msg(" rich_index =", rich_index) msg(" --- with Forward RICH configuration", color=bcolors.HEADER) msg(" forward_rich_index =", forward_rich_index) aod_path = opt("aod_path") do_copy("createO2tables.h", in_path=aod_path) do_copy("createO2tables.C", in_path=aod_path) do_copy("muonAccEffPID.root", in_path=aod_path) if qa: do_copy("diagnostic_tools/dpl-config_std.json") def set_config(config_file, config, value): config = config.strip() value = value.strip() config_string = f"{config} {value}" run_cmd("sed -i -e \"" f"s/{config} .*$/{config_string}" "\" " + config_file) # Checking that the file has the correct configuration with open(config_file) as f: has_it = False config_string = config_string.replace("\\", "").strip("/") for lineno, line in enumerate(f): if line.strip() == config_string: verbose_msg(f"Found config string '{config_string}'", f"at line #{lineno} '{line.strip()}'") has_it = True break if not has_it: fatal_msg("Configuration file", config_file, f"does not have config string '{config_string}'") # set magnetic field set_config("propagate.tcl", "set barrel_Bz", f"{bField}" "e\-1/") set_config("createO2tables.C", "const double Bz = ", f"{bField}" "e\-1\;/") if turn_off_vertexing: set_config("createO2tables.C", "constexpr bool do_vertexing = ", "false\;/") else: # Check that the geometry file for the vertexing is there if not os.path.isfile("o2sim_grp.root") or not os.path.isfile( "o2sim_geometry.root"): run_cmd( "mkdir tmpo2sim && cd tmpo2sim && o2-sim -m PIPE ITS MFT -g boxgen -n 1 -j 1 --configKeyValues 'BoxGun.number=1' && cp o2sim_grp.root .. && cp o2sim_geometry.root .. && cd .. && rm -r tmpo2sim" ) if use_nuclei: set_config("createO2tables.C", "constexpr bool enable_nuclei = ", "true\;/") if debug_aod: set_config("createO2tables.C", "constexpr bool debug_qa = ", "true\;/") if tof_mismatch: if not tof_mismatch in [1, 2]: fatal_msg("tof_mismatch", tof_mismatch, "is not 1 or 2") set_config("createO2tables.C", "constexpr int tof_mismatch = ", f"{tof_mismatch}\;/") if qa: set_config("dpl-config_std.json", "\\\"d_bz\\\":", "\\\"" f"{bField}" "\\\"\,/") # set barrel_radius set_config("propagate.tcl", "set barrel_Radius", f"{minimum_track_radius}" "e\-2/") # set barrel_half_length set_config("propagate.tcl", "set barrel_HalfLength", f"{barrel_half_length}" "e\-2/") # set tof_radius set_config("createO2tables.C", "constexpr double tof_radius =", f"{tof_radius}" "\;/") # set tof_length set_config("createO2tables.C", "const double tof_length =", f"{barrel_half_length}" "\;/") # set rich_radius set_config("createO2tables.C", "constexpr double rich_radius =", f"{rich_radius}" "\;/") # set rich_index set_config("createO2tables.C", "const double rich_index =", f"{rich_index}" "\;/") # set forward_rich_index set_config("createO2tables.C", "const double forward_rich_index =", f"{forward_rich_index}" "\;/") # set acceptance set_config("propagate.tcl", "set barrel_Acceptance", "\{ 0.0 + 1.0 * fabs(eta) < " f"{etaMax}" " \}/") # set time resolution set_config("propagate.tcl", "set barrel_TimeResolution", f"{sigmaT}" "e\-9/") set_config("createO2tables.C", "const double tof_sigmat =", f"{sigmaT}" "\;/") set_config("createO2tables.C", "const double tof_sigmat0 =", f"{sigmaT0}" "\;/") run_list = range(nruns) if append_production: if output_path is None: fatal_msg("Output path is not defined, cannot append") last_preexisting_aod = [ each for each in os.listdir(output_path) if each.endswith('.root') and "AODRun5" in each ] if len(last_preexisting_aod) == 0: fatal_msg("Appending to a non existing production") last_preexisting_aod = sorted([ int(each.replace("AODRun5.", "").replace(".root", "")) for each in last_preexisting_aod ])[-1] + 1 msg(f" Appending to production with {last_preexisting_aod} AODs", color=bcolors.BWARNING) run_list = range(last_preexisting_aod, last_preexisting_aod + nruns) def configure_run(run_number): # Create executable that runs Generation, Delphes and analysis runner_file = f"runner{run_number}.sh" with open(runner_file, "w") as f_run: def write_to_runner(line, log_file=None, check_status=False): """ Writes commands to runner """ log_line = "" if log_file is not None: log_line = f" &> {log_file} 2>&1" line += log_line line += "\n" f_run.write(line) if check_status: f_run.write("\nReturnValue=$?\n") f_run.write("if [[ $ReturnValue != 0 ]]; then\n") f_run.write(" echo \"Encountered error with command: '") line = line.replace(log_line, "") f_run.write(line.replace("\"", "\\\"").strip()) f_run.write("'\"\n") if log_file is not None: f_run.write(" echo \"Check log: '") f_run.write(log_file.strip() + "'\"\n") f_run.write(" exit $ReturnValue\n") f_run.write("fi\n") def copy_and_link(file_name): """ In runner, copies file to output path (if different from current) and links it to current """ if os.path.normpath(output_path) != os.getcwd(): write_to_runner(f"mv {file_name} {output_path} \n") write_to_runner( f"ln -s {os.path.join(output_path, file_name)} . \n") write_to_runner("#! /usr/bin/env bash\n") delphes_file = f"delphes.{run_number}.root" delphes_log_file = delphes_file.replace(".root", ".log") hepmc_file = None mc_seed = random.randint(1, 800000000) if custom_gen: # Using HEPMC hepmc_file = f"hepmcfile.{run_number}.hepmc" if "INPUT_FILES" in custom_gen: input_hepmc_file = custom_gen.replace( "INPUT_FILES", "").strip().split(" ") input_hepmc_file = input_hepmc_file[run_number] write_to_runner(f"ln -s {input_hepmc_file}" f" {hepmc_file} \n") else: gen_log_file = f"gen.{run_number}.log" custom_gen_option = f" --output {hepmc_file} --nevents {nevents} --seed {mc_seed}" write_to_runner(custom_gen + custom_gen_option, log_file=gen_log_file, check_status=True) write_to_runner( f"DelphesHepMC propagate.tcl {delphes_file} {hepmc_file}", log_file=delphes_log_file, check_status=True) else: # Using DelphesPythia # copy generator configuration generator_cfg = f"generator.{run_number}.cfg" generator_orig = generators[0].split("/")[-1] do_copy(generator_orig, generator_cfg) # Adjust configuration file with open(generator_cfg, "a") as f_cfg: # number of events and random seed f_cfg.write(f"\n\n\n#### Additional part ###\n\n\n\n") f_cfg.write(f"Main:numberOfEvents {nevents}\n") f_cfg.write(f"Random:setSeed = on\n") f_cfg.write(f"Random:seed = {mc_seed}\n") # collision time spread [mm/c] f_cfg.write("Beams:allowVertexSpread on \n") f_cfg.write("Beams:sigmaTime 60.\n") for i in generators[1:]: with open(i.split("/")[-1], "r") as f_append: f_cfg.write(f_append.read()) write_to_runner( f"DelphesPythia8 propagate.tcl {generator_cfg} {delphes_file}", log_file=delphes_log_file, check_status=True) aod_file = f"AODRun5.{run_number}.root" aod_log_file = aod_file.replace(".root", ".log") write_to_runner( f"root -l -b -q 'createO2tables.C+(\"{delphes_file}\", \"tmp_{aod_file}\", 0)'", log_file=aod_log_file, check_status=True) # Check that there were no O2 errors write_to_runner( f"if grep -q \"\[ERROR\]\" {aod_log_file}; then echo \": got some errors in '{aod_log_file}'\" && echo \"Found some ERROR in this log\" >> {aod_log_file}; fi" ) write_to_runner( f"if grep -q \"\[FATAL\]\" {aod_log_file}; then echo \": got some fatals in '{aod_log_file}'\" && echo \"Found some FATAL in this log\" >> {aod_log_file} && exit 1; fi" ) # Rename the temporary AODs to standard AODs write_to_runner(f"mv tmp_{aod_file} {aod_file}", check_status=True) if not clean_delphes_files: copy_and_link(delphes_file) if hepmc_file is not None: copy_and_link(hepmc_file) copy_and_link(aod_file) if clean_delphes_files: write_to_runner(f"rm {delphes_file}") write_to_runner(f"rm {generator_cfg}") if hepmc_file is not None: write_to_runner(f"rm {hepmc_file}") write_to_runner("exit 0\n") # Configuring all the runs for i in run_list: configure_run(i) # Compiling the table creator macro once for all run_cmd("root -l -b -q 'createO2tables.C+(\"\")' > /dev/null 2>&1", comment="to compile the table creator only once, before running") if not os.path.isfile("createO2tables_C.so"): run_cmd("root -l -b -q 'createO2tables.C+(\"\")'", comment="to compile with full log") fatal_msg("'createO2tables.C' did not compile!") total_processing_time = time.time() msg(" --- start processing the runs ", color=bcolors.HEADER) run_in_parallel(processes=njobs, job_runner=process_run, job_arguments=run_list, job_message="Running production") # merge runs when all done msg(" --- all runs are processed, so long", color=bcolors.HEADER) total_processing_time = time.time() - total_processing_time msg(f"-- took {total_processing_time} seconds in total --", color=bcolors.BOKGREEN) # Writing the list of produced AODs output_list_file = "listfiles.txt" with open(output_list_file, "w") as listfiles: for i in os.listdir("."): if "AODRun5." in i and i.endswith(".root"): listfiles.write(f"{os.getcwd()}/{i}\n") # Writing summary of production summaryfile = "summary.txt" with open(summaryfile, "w") as f: f.write("\n## Summary of last run ##\n") now = datetime.now() dt_string = now.strftime("%d/%m/%Y %H:%M:%S") f.write(f"Finished at {dt_string}\n") f.write(f"Took {total_processing_time} seconds\n") def write_config(entry, prefix=""): f.write(prefix + entry.strip("ARG ") + f" = {running_options[entry]}\n") f.write("\n## Configuration ##\n") for i in running_options: if "ARG" in i: write_config(i, prefix=" - ") f.write("\n## Options ##\n") for i in running_options: if "ARG" not in i: write_config(i, prefix=" * ") output_size = sum( os.path.getsize(os.path.join(output_path, f)) for f in os.listdir(output_path) if os.path.isfile(os.path.join(output_path, f))) f.write("\n## Size of the ouput ##\n") f.write(f" - {output_size} bytes\n") f.write(f" - {output_size/1e6} MB\n") f.write(f" - {output_size/1e9} GB\n") run_cmd("echo >> " + summaryfile) run_cmd("echo + DelphesO2 Version + >> " + summaryfile) run_cmd("git rev-parse HEAD >> " + summaryfile, check_status=False) if os.path.normpath(output_path) != os.getcwd(): if append_production: s = os.path.join(output_path, summaryfile) run_cmd(f"echo '' >> {s}") run_cmd(f"echo ' **' >> {s}") run_cmd(f"echo 'Appended production' >> {s}") run_cmd(f"echo ' **' >> {s}") run_cmd(f"echo '' >> {s}") run_cmd(f"cat {summaryfile} >> {s}") else: run_cmd(f"mv {summaryfile} {output_path}") run_cmd(f"ln -s {os.path.join(output_path, summaryfile)} ./") if qa: msg(" --- running test analysis", color=bcolors.HEADER) run_cmd( f"./diagnostic_tools/doanalysis.py TrackQA RICH TOF -i {output_list_file} -M 25 -B 25" ) if tof_mismatch == 1: # TOF mismatch in create mode run_cmd( f"hadd -j {njobs} -f tofMM.root tof_mismatch_template_DF_*.root && rm tof_mismatch_template_DF_*.root" )
def check_duplicate(option_name): if f" {option_name}" in custom_gen: fatal_msg(f"Remove '{option_name}' from", custom_gen, "as it will be automatically set")
"Option to avoid copying the configuration files and to use the ones directly in the current path e.g. for grid use" ) parser.add_argument( "--use-preexisting-luts", "-l", action="store_true", help= "Option to use preexisting LUTs instead of creating new ones, in this case LUTs with the requested tag are fetched from the LUT path. By default new LUTs are created at each run." ) args = parser.parse_args() set_verbose_mode(args) # Check arguments if args.append and args.output_path is None: fatal_msg( "Asked to append production but did not specify output path (option '-o')" ) main(configuration_file=args.configuration_file, config_entry=args.entry, njobs=args.njobs, nevents=args.nevents, nruns=args.nruns, output_path=args.output_path, clean_delphes_files=args.clean_delphes, qa=args.qa, create_luts=not args.use_preexisting_luts, turn_off_vertexing=args.no_vertexing, append_production=args.append, use_nuclei=not args.no_nuclei, avoid_file_copy=args.avoid_config_copy, debug_aod=args.debug,