Exemple #1
0
def test_retrieve_host_template(monkeypatch):
    """Test `retrieve_host_template` utility function."""

    # Check template name that we supply with the package
    assert utils.retrieve_host_template("draco") is not None

    # Check that the user can define some custom template
    monkeypatch.setattr("mdbenchmark.utils.ENV.get_template", lambda x: "minerva")
    assert utils.retrieve_host_template("minerva") == "minerva"
Exemple #2
0
def test_write_benchmark(engine, gpu, job_name, module, input_name, extensions, tmpdir):
    """Test that the write_benchmark works as expected."""
    host = "draco"
    base_dirname = "{}_{}".format(host, engine)
    nodes = 5
    with tmpdir.as_cwd():
        base_directory = dtr.Tree(base_dirname)

        for ext in extensions:
            open("md.{}".format(ext), "a").close()

        template = retrieve_host_template("draco")
        utils.write_benchmark(
            engine=engine,
            base_directory=base_directory,
            template=template,
            nodes=nodes,
            gpu=gpu,
            module=module,
            name=input_name,
            job_name=job_name,
            relative_path=".",
            host=host,
            time=15,
        )

        expected_job_name = "md" if job_name is None else job_name

        assert os.path.exists(base_dirname)
        assert os.path.exists(
            os.path.join(base_dirname, "{}".format(nodes), input_name)
        )

        with open(
            os.path.join(base_dirname, "{}".format(nodes), "bench.job"), "r"
        ) as f:
            for line in f:
                if "#SBATCH -J" in line:
                    assert line == "#SBATCH -J {}\n".format(expected_job_name)
                if "--partition=" in line:
                    if gpu:
                        assert line == "#SBATCH --partition=gpu\n"
                    else:
                        assert line == "#SBATCH --partition=express\n"
                if "--nodes=" in line:
                    assert line == "#SBATCH --nodes={}\n".format(nodes)
                if "--time=" in line:
                    assert line == "#SBATCH --time={}\n".format("00:20:00")
                if "module load {}/".format(engine) in line:
                    assert line == "module load {}\n".format(module)
                if "srun" in line:
                    if engine == "gromacs":
                        assert line == "srun gmx_mpi mdrun -v -maxh 0.25 -deffnm md"
                    elif engine == "namd":
                        assert line == "srun namd2 md.namd"
Exemple #3
0
def do_generate(
    name,
    cpu,
    gpu,
    module,
    host,
    min_nodes,
    max_nodes,
    time,
    skip_validation,
    job_name,
    yes,
    physical_cores,
    logical_cores,
    number_of_ranks,
    enable_hyperthreading,
    multidir,
):
    """Generate a bunch of benchmarks."""

    # Instantiate the version we are going to use
    benchmark_version = Version3Categories()

    # Validate the CPU and GPU flags
    validate_cpu_gpu_flags(cpu, gpu)

    # Validate the number of nodes
    validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes)

    if logical_cores < physical_cores:
        console.error(
            "The number of logical cores cannot be smaller than the number of physical cores."
        )

    if physical_cores and not logical_cores:
        console.warn("Assuming logical_cores = 2 * physical_cores")
        logical_cores = 2 * physical_cores

    if physical_cores and logical_cores:
        processor = Processor(physical_cores=physical_cores,
                              logical_cores=logical_cores)
    else:
        processor = Processor()

    # Hyperthreading check
    if enable_hyperthreading and not processor.supports_hyperthreading:
        console.error(
            "The processor of this machine does not support hyperthreading.")

    if not number_of_ranks:
        number_of_ranks = (processor.physical_cores, )

    # Validate number of simulations
    validate_number_of_simulations(multidir, min_nodes, max_nodes,
                                   number_of_ranks)

    # Grab the template name for the host. This should always work because
    # click does the validation for us
    template = utils.retrieve_host_template(host)

    # Warn the user that NAMD support is still experimental.
    if any(["namd" in m for m in module]):
        console.warn(NAMD_WARNING, "--gpu")

    # Stop if we cannot find any modules. If the user specified multiple
    # modules, we will continue with only the valid ones.
    modules = mdengines.normalize_modules(module, skip_validation)
    if not modules:
        console.error("No requested modules available!")

    # Check if all needed files exist. Throw an error if they do not.
    validate_required_files(name=name, modules=modules)

    # Validate that we can use the number of ranks and threads.
    # We can continue, if no ValueError is thrown
    for ranks in number_of_ranks:
        try:
            processor.get_ranks_and_threads(
                ranks, with_hyperthreading=enable_hyperthreading)
        except ValueError as e:
            console.error(e)

    # Create all benchmark combinations and put them into a DataFrame
    data = construct_generate_data(
        name,
        job_name,
        modules,
        host,
        template,
        cpu,
        gpu,
        time,
        min_nodes,
        max_nodes,
        processor,
        number_of_ranks,
        enable_hyperthreading,
        multidir,
    )
    df = pd.DataFrame(data, columns=benchmark_version.generate_categories)

    # Consolidate the data by grouping on the number of nodes and print to the
    # user as an overview.
    consolidated_df = consolidate_dataframe(
        df, columns=benchmark_version.consolidate_categories)
    print_dataframe(
        consolidated_df[benchmark_version.generate_printing],
        columns=map_columns(
            map_dict=benchmark_version.category_mapping,
            columns=benchmark_version.generate_printing,
        ),
    )

    # Save the number of benchmarks for later printing
    number_of_benchmarks = df.shape[0]
    # Ask the user for confirmation to generate files.
    # If the user defined `--yes`, we will skip the confirmation immediately.
    if yes:
        console.info(
            "We will generate {} " +
            "{benchmark}.".format(benchmark="benchmark" if number_of_benchmarks
                                  == 1 else "benchmarks"),
            number_of_benchmarks,
        )
    elif not click.confirm("We will generate {} benchmarks. Continue?".format(
            number_of_benchmarks)):
        console.error("Exiting. No benchmarks were generated.")

    # Generate the benchmarks
    with click.progressbar(
            df.iterrows(),
            length=number_of_benchmarks,
            show_pos=True,
            label="Generating benchmarks",
    ) as bar:
        for _, row in bar:
            relative_path, file_basename = os.path.split(row["name"])
            mappings = benchmark_version.generate_mapping
            kwargs = {"name": file_basename, "relative_path": relative_path}
            for key, value in mappings.items():
                kwargs[value] = row[key]

            write_benchmark(**kwargs)

    # Finish up by telling the user how to submit the benchmarks
    console.info(
        "Finished! You can submit the jobs with {}.",
        "mdbenchmark submit",
    )
Exemple #4
0
def do_generate(
    name,
    cpu,
    gpu,
    module,
    host,
    min_nodes,
    max_nodes,
    time,
    skip_validation,
    job_name,
    yes,
):
    """Generate a bunch of benchmarks."""
    # Validate the CPU and GPU flags
    validate_cpu_gpu_flags(cpu, gpu)

    # Validate the number of nodes
    validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes)

    # Grab the template name for the host. This should always work because
    # click does the validation for us
    template = utils.retrieve_host_template(host)

    # Warn the user that NAMD support is still experimental.
    if any(["namd" in m for m in module]):
        console.warn(NAMD_WARNING, "--gpu")

    module = mdengines.normalize_modules(module, skip_validation)

    # If several modules were given and we only cannot find one of them, we
    # continue.
    if not module:
        console.error("No requested modules available!")

    df_overview = pd.DataFrame(columns=[
        "name",
        "job_name",
        "base_directory",
        "template",
        "engine",
        "module",
        "nodes",
        "run time [min]",
        "gpu",
        "host",
    ])

    i = 1
    for m in module:
        # Here we detect the MD engine (supported: GROMACS and NAMD).
        engine = mdengines.detect_md_engine(m)

        # Check if all needed files exist. Throw an error if they do not.
        engine.check_input_file_exists(name)

        gpu_cpu = {"cpu": cpu, "gpu": gpu}
        for pu, state in sorted(gpu_cpu.items()):
            if not state:
                continue

            directory = "{}_{}".format(host, m)
            gpu = False
            gpu_string = ""
            if pu == "gpu":
                gpu = True
                directory += "_gpu"
                gpu_string = " with GPUs"

            console.info("Creating benchmark system for {}.", m + gpu_string)

            base_directory = dtr.Tree(directory)

            for nodes in range(min_nodes, max_nodes + 1):
                df_overview.loc[i] = [
                    name,
                    job_name,
                    base_directory,
                    template,
                    engine,
                    m,
                    nodes,
                    time,
                    gpu,
                    host,
                ]
                i += 1

    console.info("{}", "Benchmark Summary:")

    df_short = ConsolidateDataFrame(df_overview)
    PrintDataFrame(df_short)

    if yes:
        console.info("Generating the above benchmarks.")
    elif not click.confirm(
            "The above benchmarks will be generated. Continue?"):
        console.error("Exiting. No benchmarks generated.")

    for _, row in df_overview.iterrows():
        relative_path, file_basename = os.path.split(row["name"])
        write_benchmark(
            engine=row["engine"],
            base_directory=row["base_directory"],
            template=row["template"],
            nodes=row["nodes"],
            gpu=row["gpu"],
            module=row["module"],
            name=file_basename,
            relative_path=relative_path,
            job_name=row["job_name"],
            host=row["host"],
            time=row["run time [min]"],
        )

    # Provide some output for the user
    console.info(
        "Finished generating all benchmarks.\n"
        "You can now submit the jobs with {}.",
        "mdbenchmark submit",
    )