def copylist(fname="",
             jobs=InputArgument(1, "Number of parallel jobs to use",
                                ["--njobs", "-j"], int)):
    """Takes a text file and downloads the files from grid"""
    if jobs is None:
        jobs = 1
    verbose_msg("Copying files from list", fname, "with", jobs, "jobs")
    fname = path.normpath(fname)
    if not path.isfile(fname):
        warning_msg("Input file not provided! Aborting")
        return
    sofar = copied(fname, "So far")
    f = open(fname, "r")
    Group = []
    for line in f:
        if "%" in line:
            msg("Character % encountered! Aborting")
            break
        if "#" in line:
            msg("Character # encountered! Skipping")
            continue
        line = "./" + line
        if jobs == 1:
            copyfile(line)
        else:
            Group.append(line)
    if jobs > 1:
        msg("Copying list in parallel with", jobs, "jobs")
        run_in_parallel(processes=jobs,
                        job_runner=copyfile,
                        job_arguments=Group,
                        job_message="Downloading files",
                        linearize_single_core=True)
    copied(fname, extra_msg="In recent run", last_time=sofar)
Beispiel #2
0
def main():
    parser = DownloaderArgumentParser()
    parser.add_argument(
        '--name',
        metavar='PAT[,PAT...]',
        help=
        'download only models whose names match at least one of the specified patterns'
    )
    parser.add_argument(
        '--list',
        type=Path,
        metavar='FILE.LST',
        help=
        'download only models whose names match at least one of the patterns in the specified file'
    )
    parser.add_argument('--all',
                        action='store_true',
                        help='download all available models')
    parser.add_argument('--print_all',
                        action='store_true',
                        help='print all available models')
    parser.add_argument(
        '--precisions',
        metavar='PREC[,PREC...]',
        help=
        'download only models with the specified precisions (actual for DLDT networks)'
    )
    parser.add_argument('-o',
                        '--output_dir',
                        type=Path,
                        metavar='DIR',
                        default=Path.cwd(),
                        help='path where to save models')
    parser.add_argument(
        '--cache_dir',
        type=Path,
        metavar='DIR',
        help='directory to use as a cache for downloaded files')
    parser.add_argument('--num_attempts',
                        type=positive_int_arg,
                        metavar='N',
                        default=1,
                        help='attempt each download up to N times')
    parser.add_argument('--progress_format',
                        choices=('text', 'json'),
                        default='text',
                        help='which format to use for progress reporting')
    # unlike Model Converter, -jauto is not supported here, because CPU count has no
    # relation to the optimal number of concurrent downloads
    parser.add_argument('-j',
                        '--jobs',
                        type=positive_int_arg,
                        metavar='N',
                        default=1,
                        help='how many downloads to perform concurrently')

    args = parser.parse_args()

    def make_reporter(context):
        return common.Reporter(
            context,
            enable_human_output=args.progress_format == 'text',
            enable_json_output=args.progress_format == 'json')

    reporter = make_reporter(common.DirectOutputContext())

    cache = NullCache() if args.cache_dir is None else DirCache(args.cache_dir)
    models = common.load_models_from_args(parser, args)

    failed_models = set()

    if args.precisions is None:
        requested_precisions = common.KNOWN_PRECISIONS
    else:
        requested_precisions = set(args.precisions.split(','))
        unknown_precisions = requested_precisions - common.KNOWN_PRECISIONS
        if unknown_precisions:
            sys.exit('Unknown precisions specified: {}.'.format(', '.join(
                sorted(unknown_precisions))))

    reporter.print_group_heading('Downloading models')
    with contextlib.ExitStack() as exit_stack:
        session_factory = ThreadSessionFactory(exit_stack)
        if args.jobs == 1:
            results = [
                download_model(reporter, args, cache, session_factory,
                               requested_precisions, model) for model in models
            ]
        else:
            results = common.run_in_parallel(
                args.jobs, lambda context, model: download_model(
                    make_reporter(context), args, cache, session_factory,
                    requested_precisions, model), models)

    failed_models = {
        model.name
        for model, successful in zip(models, results) if not successful
    }

    reporter.print_group_heading('Post-processing')
    for model in models:
        if model.name in failed_models or not model.postprocessing: continue

        reporter.emit_event('model_postprocessing_begin', model=model.name)

        output = args.output_dir / model.subdirectory

        for postproc in model.postprocessing:
            postproc.apply(reporter, output)

        reporter.emit_event('model_postprocessing_end', model=model.name)

    if failed_models:
        reporter.print('FAILED:')
        for failed_model_name in failed_models:
            reporter.print(failed_model_name)
        sys.exit(1)
Beispiel #3
0
def main(mode,
         input_file,
         out_path,
         out_tag="",
         batch_size=4,
         n_max_files=100,
         dpl_configuration_file=None,
         njobs=1,
         merge_output=True,
         merge_only=False,
         shm_mem_size=16000000000,
         rate_lim=1000000000,
         readers=1,
         avoid_overwriting_merge=False,
         clean_localhost_after_running=True,
         extra_arguments="",
         resume_previous_analysis=False,
         check_input_file_integrity=True,
         analysis_timeout=None,
         linearize_single_core=True):
    if do_bash_script:
        njobs = 1
        linearize_single_core = True

    if len(input_file) == 1:
        input_file = input_file[0]
    else:
        input_file = input_file[0:n_max_files]
    if not merge_only:
        msg("Running",
            f"'{mode}'",
            "analysis on",
            f"'{input_file}'",
            color=bcolors.BOKBLUE)
        msg("Maximum",
            n_max_files,
            "files with batch size",
            batch_size,
            "and",
            njobs,
            "jobs" if njobs > 1 else "job",
            color=bcolors.BOKBLUE)
    else:
        msg("Merging output of",
            f"'{mode}'",
            "analysis",
            color=bcolors.BOKBLUE)
    if analysis_timeout is not None:
        msg("Using analysis timeout of",
            analysis_timeout,
            "seconds",
            color=bcolors.BOKBLUE)
        analysis_timeout = f"--time-limit {analysis_timeout}"
    else:
        analysis_timeout = ""

    o2_arguments = f"-b --shm-segment-size {shm_mem_size} --aod-memory-rate-limit {rate_lim} --readers {readers} {analysis_timeout}"
    o2_arguments += extra_arguments
    if mode not in analyses:
        raise ValueError("Did not find analyses matching mode", mode,
                         ", please choose in", ", ".join(analyses.keys()))
    an = analyses[mode]
    tag = mode + out_tag
    # Build input file list
    input_file_list = []

    def is_root_file_sane(file_name_to_check):
        file_name_to_check = file_name_to_check.strip()
        if not os.path.isfile(file_name_to_check):
            warning_msg("File", file_name_to_check, "does not exist")
            return "Does not exist"
        file_to_check = TFile(file_name_to_check, "READ")
        if not file_to_check.IsOpen():
            warning_msg("Cannot open AOD file:", file_name_to_check)
            return "Cannot be open"
        elif file_to_check.TestBit(TFile.kRecovered):
            verbose_msg(file_name_to_check, "was a recovered file")
            return "Was recovered"
        else:
            verbose_msg(file_name_to_check, "is OK")
            return "Is Ok"

    def build_list_of_files(file_list):
        verbose_msg("Building list of files from", file_list)
        # Check that runlist does not have duplicates
        unique_file_list = set(file_list)
        if len(file_list) != len(unique_file_list):
            # for i in file_list
            fatal_msg("Runlist has duplicated entries, fix runlist!",
                      len(unique_file_list), "unique files, while got",
                      len(file_list), "files")
        file_status = {
            "Does not exist": [],
            "Cannot be open": [],
            "Was recovered": [],
            "Is Ok": []
        }
        if check_input_file_integrity:  # Check that input files can be open
            for i in file_list:
                verbose_msg("Checking that TFile", i.strip(),
                            "can be processed")
                file_status[is_root_file_sane(i)] = i
        recovered_files = file_status["Was recovered"]
        not_readable = []
        for i in file_status:
            if i == "Is Ok":
                continue
            not_readable += file_status[i]
        if len(recovered_files) > 0:
            msg(
                "Recovered",
                len(recovered_files),
                "files:\n",
            )
        if len(not_readable) > 0:
            warning_msg(len(not_readable), "over", len(file_list),
                        "files cannot be read and will be skipped")
            for i in not_readable:
                if i not in file_list:
                    warning_msg("did not find file to remove", f"'{i}'")
                file_list.remove(i)

        files_per_batch = []
        iter_file_list = iter(file_list)
        for i in range(0, len(file_list)):
            sub_set = list(islice(iter_file_list, batch_size))
            if len(sub_set) <= 0:
                continue
            files_per_batch.append(sub_set)
        run_list = []
        if len(files_per_batch) > 0:
            for i, lines in enumerate(files_per_batch):
                p = os.path.join(out_path, f"{i}")
                if not os.path.isdir(p):
                    os.makedirs(p)
                run_list.append(os.path.join(p,
                                             f"ListForRun5Analysis.{i}.txt"))
                with open(run_list[-1], "w") as f:
                    for j in lines:
                        f.write(j.strip() + "\n")
        msg("Number of runs:", len(run_list))
        return run_list

    if type(input_file) is list:
        input_file = [os.path.join(os.getcwd(), i) for i in input_file]
        input_file_list = build_list_of_files(input_file)
    elif not input_file.endswith(".root"):
        with open(input_file, "r") as f:
            lines = f.readlines()
            msg("Building input list from", len(lines), "inputs, limiting to",
                n_max_files)
            if len(lines) > n_max_files:
                lines = lines[0:n_max_files]
            lines = [
                os.path.join(os.path.dirname(os.path.abspath(input_file)), i)
                for i in lines
            ]
            input_file_list = build_list_of_files(lines)
    else:
        input_file_list = [os.path.join(os.getcwd(), input_file)]

    if dpl_configuration_file is not None:
        dpl_configuration_file = os.path.join(os.getcwd(),
                                              dpl_configuration_file)

    run_list = []
    for i, j in enumerate(input_file_list):
        run_list.append(
            set_o2_analysis(an,
                            o2_arguments=o2_arguments,
                            input_file=j,
                            tag=tag,
                            dpl_configuration_file=dpl_configuration_file,
                            resume_previous_analysis=resume_previous_analysis,
                            write_runner_script=not merge_only))
    if not merge_only:
        if do_bash_script:
            with open("parallelbash.sh", "w") as f:
                f.write(f"#!/bin/bash\n\n")
                f.write(f"echo \"Start running\"\n\n")
                f.write(f"date\n\n")
                f.write("""function trap_ctrlc (){
                            # perform cleanup here
                            echo "Ctrl-C caught...performing clean up"
                            exit 2
                        }\n\n""")
                f.write("""trap "trap_ctrlc" 2\n""")

        run_in_parallel(
            processes=njobs,
            job_runner=run_o2_analysis,
            job_arguments=run_list,
            job_message=f"Running analysis, it's {datetime.datetime.now()}",
            linearize_single_core=linearize_single_core)
        if do_bash_script:
            with open("parallelbash.sh", "a") as f:
                f.write(f"wait\n\n")
                f.write(f"date\n\n")
            msg("Now run bash script `bash parallelbash.sh`")
            return
        if clean_localhost_after_running:
            run_cmd(
                "find /tmp/ -maxdepth 1 -name localhost* -user $(whoami) | xargs rm -v 2>&1",
                check_status=False)

    if (merge_output or merge_only) and len(run_list) > 1:
        files_to_merge = []
        for i in input_file_list:
            p = os.path.dirname(os.path.abspath(i))
            for j in os.listdir(p):
                if j.endswith(f"_{tag}.root"):
                    files_to_merge.append(os.path.join(p, j))
        if len(files_to_merge) == 0:
            warning_msg("Did not find any file to merge for tag", tag)
            return
        files_per_type = {}  # List of files to be merged per type
        # List of files to be merged per type that are not declared sane
        non_sane_files_per_type = {}
        for i in files_to_merge:
            if is_root_file_sane(i) != "Is Ok":
                non_sane_files_per_type[fn].setdefault(fn, []).append(i)
                warning_msg("Result file", i, "is not sane")
                continue
            fn = os.path.basename(i)
            files_per_type.setdefault(fn, [])
            files_per_type[fn].append(i)
        for i in non_sane_files_per_type:
            warning_msg("Non sane files for type", i)
            for j in non_sane_files_per_type[i]:
                msg(j)
        merged_files = []
        for i in files_per_type:
            merged_file = os.path.join(out_path, i)
            if avoid_overwriting_merge and os.path.isfile(merged_file):
                warning_msg(
                    "file", merged_file,
                    "is already found, remove it before merging, you can use the --mergeonly flag to avoid running the analysis again"
                )
                continue
            merged_files.append(merged_file)
            merge_file_list = os.path.join(
                os.path.dirname(os.path.abspath(merged_file)),
                "tomerge_" + "".join(i.split(".")[:-1]) + ".txt")
            verbose_msg("List of files to be merged:", merge_file_list)
            with open(merge_file_list, "w") as fmerge:
                for j in files_per_type[i]:
                    fmerge.write(j + "\n")
            if len(files_per_type[i]) > len(run_list):
                fatal_msg("Trying to merge too many files of type", i,
                          "for tag", tag, ":", len(files_per_type[i]), "vs",
                          len(run_list), "runs")
            msg("Merging", len(files_per_type[i]), "files to", merged_file)
            run_cmd(
                f"hadd -j {njobs} -f {merged_file} `cat {merge_file_list}`",
                log_file=merge_file_list.replace(".txt", ".log"),
                time_it=True,
                comment=f"Merging to {merged_file}")
        if len(merged_files) == 0:
            warning_msg("Merged no files")
        else:
            msg("Merging completed, merged:",
                *merged_files,
                color=bcolors.BOKGREEN)
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-d',
        '--download_dir',
        type=Path,
        metavar='DIR',
        default=Path.cwd(),
        help='root of the directory tree with downloaded model files')
    parser.add_argument(
        '-o',
        '--output_dir',
        type=Path,
        metavar='DIR',
        help='root of the directory tree to place converted files into')
    parser.add_argument(
        '--name',
        metavar='PAT[,PAT...]',
        help=
        'convert only models whose names match at least one of the specified patterns'
    )
    parser.add_argument(
        '--list',
        type=Path,
        metavar='FILE.LST',
        help=
        'convert only models whose names match at least one of the patterns in the specified file'
    )
    parser.add_argument('--all',
                        action='store_true',
                        help='convert all available models')
    parser.add_argument('--print_all',
                        action='store_true',
                        help='print all available models')
    parser.add_argument(
        '--precisions',
        metavar='PREC[,PREC...]',
        help=
        'run only conversions that produce models with the specified precisions'
    )
    parser.add_argument('-p',
                        '--python',
                        type=Path,
                        metavar='PYTHON',
                        default=sys.executable,
                        help='Python executable to run Model Optimizer with')
    parser.add_argument('--mo',
                        type=Path,
                        metavar='MO.PY',
                        help='Model Optimizer entry point script')
    parser.add_argument('--add_mo_arg',
                        dest='extra_mo_args',
                        metavar='ARG',
                        action='append',
                        help='Extra argument to pass to Model Optimizer')
    parser.add_argument(
        '--dry_run',
        action='store_true',
        help='Print the conversion commands without running them')
    parser.add_argument('-j',
                        '--jobs',
                        type=num_jobs_arg,
                        default=1,
                        help='number of conversions to run concurrently')

    # aliases for backwards compatibility
    parser.add_argument('--add-mo-arg',
                        dest='extra_mo_args',
                        action='append',
                        help=argparse.SUPPRESS)
    parser.add_argument('--dry-run',
                        action='store_true',
                        help=argparse.SUPPRESS)

    args = parser.parse_args()

    mo_path = args.mo
    if mo_path is None:
        try:
            mo_path = Path(os.environ['INTEL_OPENVINO_DIR']
                           ) / 'deployment_tools/model_optimizer/mo.py'
        except KeyError:
            sys.exit(
                'Unable to locate Model Optimizer. ' +
                'Use --mo or run setupvars.sh/setupvars.bat from the OpenVINO toolkit.'
            )

    extra_mo_args = args.extra_mo_args or []

    if args.precisions is None:
        requested_precisions = common.KNOWN_PRECISIONS
    else:
        requested_precisions = set(args.precisions.split(','))
        unknown_precisions = requested_precisions - common.KNOWN_PRECISIONS
        if unknown_precisions:
            sys.exit('Unknown precisions specified: {}.'.format(', '.join(
                sorted(unknown_precisions))))

    models = common.load_models_from_args(parser, args)

    output_dir = args.download_dir if args.output_dir is None else args.output_dir

    def convert(reporter, model):
        if model.mo_args is None:
            reporter.print_section_heading(
                'Skipping {} (no conversions defined)', model.name)
            reporter.print()
            return True

        model_precisions = requested_precisions & model.precisions
        if not model_precisions:
            reporter.print_section_heading(
                'Skipping {} (all conversions skipped)', model.name)
            reporter.print()
            return True

        model_format = model.framework

        if model.conversion_to_onnx_args:
            if not convert_to_onnx(reporter, model, output_dir, args):
                return False
            model_format = 'onnx'

        expanded_mo_args = [
            string.Template(arg).substitute(
                dl_dir=args.download_dir / model.subdirectory,
                mo_dir=mo_path.parent,
                conv_dir=output_dir / model.subdirectory,
                config_dir=common.MODEL_ROOT / model.subdirectory)
            for arg in model.mo_args
        ]

        for model_precision in sorted(model_precisions):
            mo_cmd = [
                str(args.python), '--',
                str(mo_path), '--framework={}'.format(model_format),
                '--data_type={}'.format(model_precision),
                '--output_dir={}'.format(output_dir / model.subdirectory /
                                         model_precision),
                '--model_name={}'.format(model.name), *expanded_mo_args,
                *extra_mo_args
            ]

            reporter.print_section_heading(
                '{}Converting {} to IR ({})',
                '(DRY RUN) ' if args.dry_run else '', model.name,
                model_precision)

            reporter.print('Conversion command: {}',
                           common.command_string(mo_cmd))

            if not args.dry_run:
                reporter.print(flush=True)

                if not reporter.job_context.subprocess(mo_cmd):
                    return False

            reporter.print()

        return True

    reporter = common.Reporter(common.DirectOutputContext())

    if args.jobs == 1 or args.dry_run:
        results = [convert(reporter, model) for model in models]
    else:
        results = common.run_in_parallel(
            args.jobs,
            lambda context, model: convert(common.Reporter(context), model),
            models)

    failed_models = [
        model.name for model, successful in zip(models, results)
        if not successful
    ]

    if failed_models:
        reporter.print('FAILED:')
        for failed_model_name in failed_models:
            reporter.print(failed_model_name)
        sys.exit(1)
Beispiel #5
0
def main(input_files,
         do_merge=True,
         sanity_file=None,
         max_bunch_size=200,
         out_path="./",
         over_write_lists=False,
         jobs=1):
    msg("Merging to", out_path, "with maximum input size", max_bunch_size)
    out_path = os.path.normpath(out_path)
    if not os.path.exists(out_path):
        warning_msg("Output path", out_path, "does not exist")
        ans = input("Create it? (Y/[N])")
        if ans == "Y":
            os.makedirs(out_path)
        else:
            msg("Exit")
            return
    sane_files = None
    if sanity_file is not None:
        msg("Using sanity file", sanity_file)
        sane_files = []
        with open(sanity_file, "r") as f:
            for i in f:
                sane_files.append(os.path.abspath(os.path.normpath(i.strip())))
    size_of_files = {}
    for i in input_files:
        i = os.path.normpath(i.strip())
        if sane_files is not None and os.path.abspath(i) not in sane_files:
            msg("Skipping", i, "because not in sanity file")
            continue
        size_of_files[i] = os.path.getsize(i) * 1e-6
    bunched_files = [[]]
    bunched_sizes = []
    bunch_size = []
    for i in size_of_files:
        verbose_msg("Checking file", i, "of size", size_of_files[i], "MB")
        if sum(bunch_size) > max_bunch_size:
            verbose_msg("Bunch size", sum(bunch_size), "reached limit with",
                        len(bunch_size), "files", max_bunch_size, "MB",
                        "preparing next bunch!")
            bunched_files.append([])
            bunched_sizes.append(sum(bunch_size))
            bunch_size = []
        bunch_size.append(size_of_files[i])
        bunched_files[-1].append(i)
    bunched_sizes.append(sum(bunch_size))
    verbose_msg("Got", len(bunched_files), "bunches")
    for i, j in enumerate(bunched_files):
        verbose_msg(f"{i})", bunched_sizes[i], "MB, with", len(j), j)

    msg("Preparing", len(bunched_files), "bunched lists")
    bunched_aod_names.clear()
    for i, j in enumerate(bunched_files):
        fn = f"aod_merge_list_bunch{i}.txt"
        verbose_msg("Writing bunch", i, "to", fn)
        if not over_write_lists:
            if os.path.isfile(fn):
                fatal_msg(fn, "already present, remove it first")
        with open(fn, "w") as f:
            for k in j:
                f.write(k + "\n")
        if do_merge:
            out_aod = os.path.join(out_path, f"AO2D_Merge_{i}.root")
            if os.path.isfile(out_aod):
                fatal_msg(out_aod, "already present")
            bunched_aod_names[fn] = {
                "out_aod": out_aod,
                "file_index": i,
                "total_files": len(bunched_files),
                "input_size": bunched_sizes[i]
            }

    run_in_parallel(jobs,
                    run_merge,
                    list(bunched_aod_names.keys()),
                    job_message="Running AOD merging",
                    linearize_single_core=True)
Beispiel #6
0
def main(mode,
         input_file,
         out_path,
         out_tag="",
         batch_size=4,
         n_max_files=100,
         dpl_configuration_file=None,
         njobs=1,
         merge_output=True,
         merge_only=False,
         shm_mem_size=16000000000,
         rate_lim=1000000000,
         readers=1,
         avoid_overwriting_merge=False,
         clean_localhost_after_running=True,
         extra_arguments=""):
    if len(input_file) == 1:
        input_file = input_file[0]
    else:
        input_file = input_file[0:n_max_files]
    if not merge_only:
        msg("Running",
            f"'{mode}'",
            "analysis on",
            f"'{input_file}'",
            color=bcolors.BOKBLUE)
        msg("Maximum",
            n_max_files,
            "files with batch size",
            batch_size,
            "and",
            njobs,
            "jobs" if njobs > 1 else "job",
            color=bcolors.BOKBLUE)
    else:
        msg("Merging output of",
            f"'{mode}'",
            "analysis",
            color=bcolors.BOKBLUE)
    o2_arguments = f"-b --shm-segment-size {shm_mem_size} --aod-memory-rate-limit {rate_lim} --readers {readers}"
    o2_arguments += extra_arguments
    if mode not in analyses:
        raise ValueError("Did not find analyses matching mode", mode,
                         ", please choose in", ", ".join(analyses.keys()))
    an = analyses[mode]
    tag = mode + out_tag
    # Build input file list
    input_file_list = []

    def build_list_of_files(file_list):
        if len(file_list) != len(
                set(file_list)):  # Check that runlist does not have duplicates
            fatal_msg("Runlist has duplicated entries, fix runlist!")
        not_readable = []
        for i in file_list:  # Check that input files can be open
            f = TFile(i.strip(), "READ")
            if not f.IsOpen():
                verbose_msg("Cannot open AOD file:", i, color=bcolors.WARNING)
                not_readable.append(i)
        if len(not_readable) > 0:
            warning_msg(len(not_readable),
                        "files cannot be read and will be skipped")
            for i in not_readable:
                file_list.remove(i)

        files_per_batch = []
        iter_file_list = iter(file_list)
        for i in range(0, len(file_list)):
            sub_set = list(islice(iter_file_list, batch_size))
            if len(sub_set) <= 0:
                continue
            files_per_batch.append(sub_set)
        run_list = []
        if len(files_per_batch) > 0:
            for i, lines in enumerate(files_per_batch):
                p = os.path.join(out_path, f"{i}")
                if not os.path.isdir(p):
                    os.makedirs(p)
                run_list.append(os.path.join(p,
                                             f"ListForRun5Analysis.{i}.txt"))
                with open(run_list[-1], "w") as f:
                    for j in lines:
                        f.write(j.strip() + "\n")
        msg("Number of runs:", len(run_list))
        return run_list

    if type(input_file) is list:
        input_file = [os.path.join(os.getcwd(), i) for i in input_file]
        input_file_list = build_list_of_files(input_file)
    elif not input_file.endswith(".root"):
        with open(input_file, "r") as f:
            lines = f.readlines()
            msg("Building input list from", len(lines), "inputs, limiting to",
                n_max_files)
            if len(lines) > n_max_files:
                lines = lines[0:n_max_files]
            input_file_list = build_list_of_files(lines)
    else:
        input_file_list = [os.path.join(os.getcwd(), input_file)]

    if dpl_configuration_file is not None:
        dpl_configuration_file = os.path.join(os.getcwd(),
                                              dpl_configuration_file)

    run_list = []
    for i, j in enumerate(input_file_list):
        run_list.append(
            set_o2_analysis(an,
                            o2_arguments=o2_arguments,
                            input_file=j,
                            tag=tag,
                            dpl_configuration_file=dpl_configuration_file))
    if not merge_only:
        run_in_parallel(processes=njobs,
                        job_runner=run_o2_analysis,
                        job_arguments=run_list,
                        job_message="Running analysis")
        if clean_localhost_after_running:
            run_cmd(
                "find /tmp/ -maxdepth 1 -name localhost* -user $(whoami) | xargs rm -v"
            )

    if (merge_output or merge_only) and len(run_list) > 1:
        files_to_merge = []
        for i in input_file_list:
            p = os.path.dirname(os.path.abspath(i))
            for j in os.listdir(p):
                if j.endswith(f"_{tag}.root"):
                    files_to_merge.append(os.path.join(p, j))
        if len(files_to_merge) == 0:
            warning_msg("Did not find any file to merge for tag", tag)
            return
        if len(files_to_merge) > len(run_list):
            fatal_msg("Trying to merge too many files!", tag)
        msg("Merging", len(files_to_merge), "results", color=bcolors.BOKBLUE)
        files_per_type = {}  # List of files to be merged per type
        for i in files_to_merge:
            fn = os.path.basename(i)
            files_per_type.setdefault(fn, [])
            files_per_type[fn].append(i)
        merged_files = []
        for i in files_per_type:
            merged_file = os.path.join(out_path, i)
            if avoid_overwriting_merge and os.path.isfile(merged_file):
                warning_msg(
                    "file", merged_file,
                    "is already found, remove it before merging, you can use the --mergeonly flag to avoid running the analysis again"
                )
                continue
            merged_files.append(merged_file)
            merge_file_list = os.path.join(
                os.path.dirname(os.path.abspath(merged_file)),
                "tomerge_" + "".join(i.split(".")[:-1]) + ".txt")
            verbose_msg("List of files to be merged:", merge_file_list)
            with open(merge_file_list, "w") as fmerge:
                for j in files_per_type[i]:
                    fmerge.write(j + "\n")
            run_cmd(
                f"hadd -j {njobs} -f {merged_file} `cat {merge_file_list}`",
                log_file=merge_file_list.replace(".txt", ".log"))
        if len(merged_files) == 0:
            warning_msg("Merged no files")
        else:
            msg("Merging completed, merged:",
                *merged_files,
                color=bcolors.BOKGREEN)
Beispiel #7
0
def main(configuration_file, config_entry, njobs, nruns, nevents, qa,
         output_path, clean_delphes_files, create_luts, turn_off_vertexing,
         append_production, use_nuclei, avoid_file_copy, debug_aod,
         tof_mismatch):
    arguments = locals()  # List of arguments to put into the log
    parser = configparser.RawConfigParser()
    parser.read(configuration_file)
    if config_entry not in parser.keys():
        k = list(parser.keys())
        k.sort()
        fatal_msg(
            f"Did not find configuration entry '{config_entry}' in config file",
            configuration_file + "\n\t Available entries:\n\t\t" +
            "\n\t\t".join(k))

    run_cmd("./clean.sh > /dev/null 2>&1", check_status=False)
    # Dictionary of fetched options
    running_options = {}
    for i in arguments:
        running_options["ARG " + i] = arguments[i]

    def opt(entry, require=True):
        try:
            o = parser.get(config_entry, entry)
            b = ['yes', 'no', 'on', 'off', 'true', 'false']
            for i in b:
                if o.lower() == i:
                    o = parser.getboolean(config_entry, entry)
                    break
            verbose_msg("Got option", entry, "=", f"'{o}'")
            running_options[entry] = o
            return o
        except:
            if require:
                fatal_msg("Missing entry", f"'{entry}'",
                          "in configuration file", f"'{configuration_file}'")
            return None

    # Config from the config file
    # simulation configuration
    if output_path is None:
        output_path = ""
    output_path = os.path.join(os.getcwd(), output_path)
    msg("Output will be found in", f"'{output_path}'")
    if not os.path.isdir(output_path):
        msg("Creating output path")
        os.makedirs(output_path)
        if not os.path.isdir(output_path):
            raise RuntimeError("Cannot find output path", output_path)

    # detector configuration
    bField = opt("bField")
    sigmaT = opt("sigmaT")
    sigmaT0 = opt("sigmaT0")
    tof_radius = opt("tof_radius")
    rich_radius = opt("rich_radius")
    rich_index = opt("rich_index")
    forward_rich_index = opt("forward_rich_index")
    minimum_track_radius = opt("minimum_track_radius")
    etaMax = opt("etamax")
    barrel_half_length = opt("barrel_half_length")

    # copy relevant files in the working directory
    def do_copy(in_file, out_file=None, in_path=None):
        """Function to copy files"""
        in_file = os.path.normpath(in_file)  # Normalize path
        if out_file is None:
            # If left unconfigured use the same name but put in the current path
            out_file = os.path.basename(in_file)
        out_file = os.path.normpath(out_file)  # Normalize path
        if in_path is not None:
            in_file = os.path.join(in_path, in_file)
        in_file = os.path.expanduser(os.path.expandvars(in_file))
        if avoid_file_copy:
            if os.path.isfile(out_file) or (in_file == out_file):
                verbose_msg("Skipping copy of", in_file, "to", out_file,
                            "because of --avoid-config-copy")
            else:
                verbose_msg("Copying", in_file, "to", out_file,
                            "because of --avoid-config-copy")
                run_cmd(f"cp {in_file} {out_file}",
                        comment="Copying files without python")
            return
        verbose_msg("Copying", in_file, "to", out_file)
        shutil.copy2(in_file, out_file)

    # Fetching the propagation card
    do_copy(opt("propagate_card"), "propagate.tcl", in_path=opt("card_path"))

    lut_path = opt("lut_path")
    lut_tag = opt("lut_tag")
    lut_tag = f"rmin{int(float(minimum_track_radius))}.{lut_tag}"
    lut_particles = ["el", "mu", "pi", "ka", "pr"]
    if use_nuclei:
        lut_particles += ["de", "tr", "he3"]
    if create_luts:
        # Creating LUTs
        verbose_msg("Creating LUTs")
        lut_path = os.path.join(lut_path, "create_luts.sh")
        run_cmd(
            f"{lut_path} -p {lut_path} -t {lut_tag} -B {float(bField)*0.1} -R {minimum_track_radius} -P \"0 1 2 3 4 5 6\" -j 1 -F 2>&1",
            f"Creating the lookup tables with tag {lut_tag} from {lut_path} script"
        )
    else:
        # Fetching LUTs
        verbose_msg(f"Fetching LUTs with tag {lut_tag} from path {lut_path}")
        for i in lut_particles:
            lut_bg = "{}kG".format(bField).replace(".", "")
            do_copy(f"lutCovm.{i}.{lut_bg}.{lut_tag}.dat",
                    f"lutCovm.{i}.dat",
                    in_path=lut_path)

    # Checking that we actually have LUTs
    for i in lut_particles:
        i = f"lutCovm.{i}.dat"
        if not os.path.isfile(i):
            fatal_msg("Did not find LUT file", i)

    custom_gen = opt("custom_gen", require=False)
    if custom_gen is None:
        # Checking that the generators are defined
        generators = opt("generators", require=False)
        if generators is None:
            fatal_msg(
                "Did not find any generator configuration corresponding to the entry",
                config_entry, "in your configuration file", configuration_file)
        generators = generators.split(" ")
        for i in generators:
            do_copy(i)
        msg("Using pythia with configuration", generators)
    else:

        def check_duplicate(option_name):
            if f" {option_name}" in custom_gen:
                fatal_msg(f"Remove '{option_name}' from", custom_gen,
                          "as it will be automatically set")

        for i in ["--output", "-o", "--nevents", "-n"]:
            check_duplicate(i)
        if "INPUT_FILES" in custom_gen:
            input_hepmc_files = custom_gen.replace("INPUT_FILES",
                                                   "").strip().split(" ")
            input_hepmc_file_list = []
            for i in input_hepmc_files:
                input_hepmc_file_list += glob.glob(os.path.normpath(i))

            if len(input_hepmc_file_list) >= nruns:
                input_hepmc_file_list = input_hepmc_file_list[0:nruns]
            else:
                nruns = len(input_hepmc_file_list)

            if len(input_hepmc_file_list) <= 0:
                fatal_msg(
                    "Did not find any input file matching to the request:",
                    custom_gen)
            custom_gen = f"INPUT_FILES " + " ".join(input_hepmc_file_list)
            msg(
                "Using", len(input_hepmc_file_list), "input HepMC file" +
                ("" if len(input_hepmc_file_list) == 1 else "s"),
                input_hepmc_file_list)
        else:
            msg("Using custom generator", custom_gen)

    # Printing configuration
    msg(" --- running createO2tables.py", color=bcolors.HEADER)
    msg("  n. jobs        =", njobs)
    msg("  n. runs        =", nruns)
    msg("  events per run =", nevents)
    msg("  tot. events    =", "{:.0e}".format(nevents * nruns))
    msg("  LUT path       =", f"'{lut_path}'")
    msg(" --- with detector configuration", color=bcolors.HEADER)
    msg("  B field              =", bField, "[kG]")
    msg("  Barrel radius        =", minimum_track_radius, "[cm]")
    msg("  Barrel half length   =", barrel_half_length, "[cm]")
    if create_luts:
        msg("  Minimum track radius =", minimum_track_radius, "[cm]")
    msg("  LUT                  =", lut_tag)
    msg("  etaMax               =", etaMax)
    msg(" --- with TOF configuration", color=bcolors.HEADER)
    msg("  sigmaT               =", sigmaT, "[ns]")
    msg("  sigmaT0              =", sigmaT0, "[ns]")
    msg("  tof_radius           =", tof_radius, "[cm]")
    msg(" --- with RICH configuration", color=bcolors.HEADER)
    msg("  rich_radius          =", rich_radius, "[cm]")
    msg("  rich_index           =", rich_index)
    msg(" --- with Forward RICH configuration", color=bcolors.HEADER)
    msg("  forward_rich_index   =", forward_rich_index)

    aod_path = opt("aod_path")
    do_copy("createO2tables.h", in_path=aod_path)
    do_copy("createO2tables.C", in_path=aod_path)
    do_copy("muonAccEffPID.root", in_path=aod_path)
    if qa:
        do_copy("diagnostic_tools/dpl-config_std.json")

    def set_config(config_file, config, value):
        config = config.strip()
        value = value.strip()
        config_string = f"{config} {value}"
        run_cmd("sed -i -e \""
                f"s/{config} .*$/{config_string}"
                "\" " + config_file)
        # Checking that the file has the correct configuration
        with open(config_file) as f:
            has_it = False
            config_string = config_string.replace("\\", "").strip("/")
            for lineno, line in enumerate(f):
                if line.strip() == config_string:
                    verbose_msg(f"Found config string '{config_string}'",
                                f"at line #{lineno} '{line.strip()}'")
                    has_it = True
                    break
            if not has_it:
                fatal_msg("Configuration file", config_file,
                          f"does not have config string '{config_string}'")

    # set magnetic field
    set_config("propagate.tcl", "set barrel_Bz", f"{bField}" "e\-1/")
    set_config("createO2tables.C", "const double Bz = ", f"{bField}" "e\-1\;/")
    if turn_off_vertexing:
        set_config("createO2tables.C", "constexpr bool do_vertexing = ",
                   "false\;/")
    else:  # Check that the geometry file for the vertexing is there
        if not os.path.isfile("o2sim_grp.root") or not os.path.isfile(
                "o2sim_geometry.root"):
            run_cmd(
                "mkdir tmpo2sim && cd tmpo2sim && o2-sim -m PIPE ITS MFT -g boxgen -n 1 -j 1 --configKeyValues 'BoxGun.number=1' && cp o2sim_grp.root .. && cp o2sim_geometry.root .. && cd .. && rm -r tmpo2sim"
            )
    if use_nuclei:
        set_config("createO2tables.C", "constexpr bool enable_nuclei = ",
                   "true\;/")
    if debug_aod:
        set_config("createO2tables.C", "constexpr bool debug_qa = ", "true\;/")
    if tof_mismatch:
        if not tof_mismatch in [1, 2]:
            fatal_msg("tof_mismatch", tof_mismatch, "is not 1 or 2")
        set_config("createO2tables.C", "constexpr int tof_mismatch = ",
                   f"{tof_mismatch}\;/")
    if qa:
        set_config("dpl-config_std.json", "\\\"d_bz\\\":", "\\\""
                   f"{bField}"
                   "\\\"\,/")
    # set barrel_radius
    set_config("propagate.tcl", "set barrel_Radius", f"{minimum_track_radius}"
               "e\-2/")
    # set barrel_half_length
    set_config("propagate.tcl", "set barrel_HalfLength",
               f"{barrel_half_length}"
               "e\-2/")
    # set tof_radius
    set_config("createO2tables.C", "constexpr double tof_radius =",
               f"{tof_radius}"
               "\;/")
    # set tof_length
    set_config("createO2tables.C", "const double tof_length =",
               f"{barrel_half_length}"
               "\;/")
    # set rich_radius
    set_config("createO2tables.C", "constexpr double rich_radius =",
               f"{rich_radius}"
               "\;/")
    # set rich_index
    set_config("createO2tables.C", "const double rich_index =", f"{rich_index}"
               "\;/")
    # set forward_rich_index
    set_config("createO2tables.C", "const double forward_rich_index =",
               f"{forward_rich_index}"
               "\;/")
    # set acceptance
    set_config("propagate.tcl", "set barrel_Acceptance",
               "\{ 0.0 + 1.0 * fabs(eta) < "
               f"{etaMax}"
               " \}/")
    # set time resolution
    set_config("propagate.tcl", "set barrel_TimeResolution", f"{sigmaT}"
               "e\-9/")
    set_config("createO2tables.C", "const double tof_sigmat =", f"{sigmaT}"
               "\;/")
    set_config("createO2tables.C", "const double tof_sigmat0 =", f"{sigmaT0}"
               "\;/")
    run_list = range(nruns)
    if append_production:
        if output_path is None:
            fatal_msg("Output path is not defined, cannot append")
        last_preexisting_aod = [
            each for each in os.listdir(output_path)
            if each.endswith('.root') and "AODRun5" in each
        ]
        if len(last_preexisting_aod) == 0:
            fatal_msg("Appending to a non existing production")
        last_preexisting_aod = sorted([
            int(each.replace("AODRun5.", "").replace(".root", ""))
            for each in last_preexisting_aod
        ])[-1] + 1
        msg(f" Appending to production with {last_preexisting_aod} AODs",
            color=bcolors.BWARNING)
        run_list = range(last_preexisting_aod, last_preexisting_aod + nruns)

    def configure_run(run_number):
        # Create executable that runs Generation, Delphes and analysis
        runner_file = f"runner{run_number}.sh"
        with open(runner_file, "w") as f_run:

            def write_to_runner(line, log_file=None, check_status=False):
                """
                Writes commands to runner
                """
                log_line = ""
                if log_file is not None:
                    log_line = f" &> {log_file} 2>&1"
                    line += log_line
                line += "\n"
                f_run.write(line)
                if check_status:
                    f_run.write("\nReturnValue=$?\n")
                    f_run.write("if [[ $ReturnValue != 0 ]]; then\n")
                    f_run.write("  echo \"Encountered error with command: '")
                    line = line.replace(log_line, "")
                    f_run.write(line.replace("\"", "\\\"").strip())
                    f_run.write("'\"\n")
                    if log_file is not None:
                        f_run.write("  echo \"Check log: '")
                        f_run.write(log_file.strip() + "'\"\n")
                    f_run.write("  exit $ReturnValue\n")
                    f_run.write("fi\n")

            def copy_and_link(file_name):
                """
                In runner, copies file to output path (if different from current) and links it to current
                """
                if os.path.normpath(output_path) != os.getcwd():
                    write_to_runner(f"mv {file_name} {output_path} \n")
                    write_to_runner(
                        f"ln -s {os.path.join(output_path, file_name)} . \n")

            write_to_runner("#! /usr/bin/env bash\n")
            delphes_file = f"delphes.{run_number}.root"
            delphes_log_file = delphes_file.replace(".root", ".log")
            hepmc_file = None
            mc_seed = random.randint(1, 800000000)
            if custom_gen:  # Using HEPMC
                hepmc_file = f"hepmcfile.{run_number}.hepmc"
                if "INPUT_FILES" in custom_gen:
                    input_hepmc_file = custom_gen.replace(
                        "INPUT_FILES", "").strip().split(" ")
                    input_hepmc_file = input_hepmc_file[run_number]
                    write_to_runner(f"ln -s {input_hepmc_file}"
                                    f" {hepmc_file} \n")
                else:
                    gen_log_file = f"gen.{run_number}.log"
                    custom_gen_option = f" --output {hepmc_file} --nevents {nevents} --seed {mc_seed}"
                    write_to_runner(custom_gen + custom_gen_option,
                                    log_file=gen_log_file,
                                    check_status=True)
                write_to_runner(
                    f"DelphesHepMC propagate.tcl {delphes_file} {hepmc_file}",
                    log_file=delphes_log_file,
                    check_status=True)
            else:  # Using DelphesPythia
                # copy generator configuration
                generator_cfg = f"generator.{run_number}.cfg"
                generator_orig = generators[0].split("/")[-1]
                do_copy(generator_orig, generator_cfg)
                # Adjust configuration file
                with open(generator_cfg, "a") as f_cfg:
                    # number of events and random seed
                    f_cfg.write(f"\n\n\n#### Additional part ###\n\n\n\n")
                    f_cfg.write(f"Main:numberOfEvents {nevents}\n")
                    f_cfg.write(f"Random:setSeed = on\n")
                    f_cfg.write(f"Random:seed = {mc_seed}\n")
                    # collision time spread [mm/c]
                    f_cfg.write("Beams:allowVertexSpread on \n")
                    f_cfg.write("Beams:sigmaTime 60.\n")
                    for i in generators[1:]:
                        with open(i.split("/")[-1], "r") as f_append:
                            f_cfg.write(f_append.read())
                write_to_runner(
                    f"DelphesPythia8 propagate.tcl {generator_cfg} {delphes_file}",
                    log_file=delphes_log_file,
                    check_status=True)
            aod_file = f"AODRun5.{run_number}.root"
            aod_log_file = aod_file.replace(".root", ".log")
            write_to_runner(
                f"root -l -b -q 'createO2tables.C+(\"{delphes_file}\", \"tmp_{aod_file}\", 0)'",
                log_file=aod_log_file,
                check_status=True)
            # Check that there were no O2 errors
            write_to_runner(
                f"if grep -q \"\[ERROR\]\" {aod_log_file}; then echo \": got some errors in '{aod_log_file}'\" && echo \"Found some ERROR in this log\" >> {aod_log_file}; fi"
            )
            write_to_runner(
                f"if grep -q \"\[FATAL\]\" {aod_log_file}; then echo \": got some fatals in '{aod_log_file}'\" && echo \"Found some FATAL in this log\" >> {aod_log_file} && exit 1; fi"
            )
            # Rename the temporary AODs to standard AODs
            write_to_runner(f"mv tmp_{aod_file} {aod_file}", check_status=True)
            if not clean_delphes_files:
                copy_and_link(delphes_file)
                if hepmc_file is not None:
                    copy_and_link(hepmc_file)
            copy_and_link(aod_file)
            if clean_delphes_files:
                write_to_runner(f"rm {delphes_file}")
                write_to_runner(f"rm {generator_cfg}")
                if hepmc_file is not None:
                    write_to_runner(f"rm {hepmc_file}")
            write_to_runner("exit 0\n")

    # Configuring all the runs
    for i in run_list:
        configure_run(i)

    # Compiling the table creator macro once for all
    run_cmd("root -l -b -q 'createO2tables.C+(\"\")' > /dev/null 2>&1",
            comment="to compile the table creator only once, before running")
    if not os.path.isfile("createO2tables_C.so"):
        run_cmd("root -l -b -q 'createO2tables.C+(\"\")'",
                comment="to compile with full log")
        fatal_msg("'createO2tables.C' did not compile!")
    total_processing_time = time.time()
    msg(" --- start processing the runs ", color=bcolors.HEADER)
    run_in_parallel(processes=njobs,
                    job_runner=process_run,
                    job_arguments=run_list,
                    job_message="Running production")

    # merge runs when all done
    msg(" --- all runs are processed, so long", color=bcolors.HEADER)
    total_processing_time = time.time() - total_processing_time
    msg(f"-- took {total_processing_time} seconds in total --",
        color=bcolors.BOKGREEN)

    # Writing the list of produced AODs
    output_list_file = "listfiles.txt"
    with open(output_list_file, "w") as listfiles:
        for i in os.listdir("."):
            if "AODRun5." in i and i.endswith(".root"):
                listfiles.write(f"{os.getcwd()}/{i}\n")

    # Writing summary of production
    summaryfile = "summary.txt"
    with open(summaryfile, "w") as f:
        f.write("\n## Summary of last run ##\n")
        now = datetime.now()
        dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
        f.write(f"Finished at {dt_string}\n")
        f.write(f"Took {total_processing_time} seconds\n")

        def write_config(entry, prefix=""):
            f.write(prefix + entry.strip("ARG ") +
                    f" = {running_options[entry]}\n")

        f.write("\n## Configuration ##\n")
        for i in running_options:
            if "ARG" in i:
                write_config(i, prefix=" - ")

        f.write("\n## Options ##\n")
        for i in running_options:
            if "ARG" not in i:
                write_config(i, prefix=" * ")

        output_size = sum(
            os.path.getsize(os.path.join(output_path, f))
            for f in os.listdir(output_path)
            if os.path.isfile(os.path.join(output_path, f)))
        f.write("\n##  Size of the ouput ##\n")
        f.write(f" - {output_size} bytes\n")
        f.write(f" - {output_size/1e6} MB\n")
        f.write(f" - {output_size/1e9} GB\n")
    run_cmd("echo  >> " + summaryfile)
    run_cmd("echo + DelphesO2 Version + >> " + summaryfile)
    run_cmd("git rev-parse HEAD >> " + summaryfile, check_status=False)

    if os.path.normpath(output_path) != os.getcwd():
        if append_production:
            s = os.path.join(output_path, summaryfile)
            run_cmd(f"echo '' >> {s}")
            run_cmd(f"echo '  **' >> {s}")
            run_cmd(f"echo 'Appended production' >> {s}")
            run_cmd(f"echo '  **' >> {s}")
            run_cmd(f"echo '' >> {s}")
            run_cmd(f"cat {summaryfile} >> {s}")
        else:
            run_cmd(f"mv {summaryfile} {output_path}")
            run_cmd(f"ln -s {os.path.join(output_path, summaryfile)} ./")

    if qa:
        msg(" --- running test analysis", color=bcolors.HEADER)
        run_cmd(
            f"./diagnostic_tools/doanalysis.py TrackQA RICH TOF -i {output_list_file} -M 25 -B 25"
        )
    if tof_mismatch == 1:  # TOF mismatch in create mode
        run_cmd(
            f"hadd -j {njobs} -f tofMM.root tof_mismatch_template_DF_*.root && rm tof_mismatch_template_DF_*.root"
        )
Beispiel #8
0
    input_files = []
    for i in args.input_files:
        i = os.path.normpath(i)
        if i.endswith(".root"):
            input_files.append(i)
        elif i.endswith(".txt"):
            with open(i, "r") as f:
                for j in f:
                    j = j.strip()
                    input_files.append(
                        os.path.join(os.path.abspath(os.path.dirname(i)),
                                     os.path.normpath(j)))

    run_in_parallel(args.njobs,
                    main,
                    input_files,
                    "Checking file",
                    linearize_single_core=True)
    if len(bad_files) > 0:
        warning_msg("There were", len(bad_files), "bad files")
        for i in bad_files:
            msg(i)

    if args.output is not None:
        msg("Writing good files to", args.output)
        with open(args.output, "w") as f:
            for i in input_files:
                if not i in bad_files:
                    f.write(i + "\n")