Ejemplo n.º 1
0
def test_includes(tmpdir):
    with tmpdir.as_cwd():
        sequoia = _create_tree(
            'sequoia',
            ['hello.txt', 'data/hello.txt', 'data/world.dat', 'world.dat'])
        sequoia2 = dtr.Tree("sequoia2").makedirs()
        sequoia3 = dtr.Tree("sequoia3").makedirs()
        sequoia4 = dtr.Tree("sequoia4").makedirs()

        # Only txt
        sequoia.sync(sequoia2, include='*.txt')

        assert os.path.exists('sequoia2/data/hello.txt')
        assert os.path.exists('sequoia2/hello.txt')

        assert not os.path.exists('sequoia2/world.dat')
        assert not os.path.exists('sequoia2/data/world.dat')

        # Only txt and dat
        sequoia.sync(sequoia3, include=['*.txt', '*.dat'])

        assert os.path.exists('sequoia3/data/hello.txt')
        assert os.path.exists('sequoia3/hello.txt')

        assert os.path.exists('sequoia3/world.dat')
        assert os.path.exists('sequoia3/data/world.dat')

        # We can also test include/excludes at the same time
        sequoia.sync(sequoia4, exclude='*.txt', include=['data/*'])

        assert os.path.exists('sequoia4/data/world.dat')
        assert os.path.exists('sequoia4/data/hello.txt')
        assert not os.path.exists('sequoia4/hello.txt')
        assert not os.path.exists('sequoia4/world.dat')
Ejemplo n.º 2
0
def test_sync_local(tmpdir):
    '''Test that syncronization works locally'''
    with tmpdir.as_cwd():
        sequoia = dtr.Tree("sequoia").makedirs()
        sequoia2 = dtr.Tree("sequoia2").makedirs()
        py.path.local('sequoia/hello.txt').write('hello')

        assert os.path.exists('sequoia/hello.txt')

        sequoia.sync(sequoia2)
        assert os.path.exists('sequoia2/hello.txt')
Ejemplo n.º 3
0
def _create_tree(name, files=[]):
    tree = dtr.Tree(name).makedirs()
    for file_ in files:
        py.path.local(os.path.join(tree.abspath, file_)).write('hello',
                                                               ensure=True)

    return tree
Ejemplo n.º 4
0
 def filled_collection(self, tmpdir):
     # returns (a bundle of [t1, t2, t3], then individal references to each)
     with tmpdir.as_cwd():
         t1 = dtr.Tree('larry')
         t2 = dtr.Leaf('curly')
         t3 = dtr.Treant('moe')
         b = dtr.View(t1, t2, t3)
         return b, (t1, t2, t3)
Ejemplo n.º 5
0
def test_write_benchmark(engine, gpu, job_name, module, input_name, extensions, tmpdir):
    """Test that the write_benchmark works as expected."""
    host = "draco"
    base_dirname = "{}_{}".format(host, engine)
    nodes = 5
    with tmpdir.as_cwd():
        base_directory = dtr.Tree(base_dirname)

        for ext in extensions:
            open("md.{}".format(ext), "a").close()

        template = retrieve_host_template("draco")
        utils.write_benchmark(
            engine=engine,
            base_directory=base_directory,
            template=template,
            nodes=nodes,
            gpu=gpu,
            module=module,
            name=input_name,
            job_name=job_name,
            relative_path=".",
            host=host,
            time=15,
        )

        expected_job_name = "md" if job_name is None else job_name

        assert os.path.exists(base_dirname)
        assert os.path.exists(
            os.path.join(base_dirname, "{}".format(nodes), input_name)
        )

        with open(
            os.path.join(base_dirname, "{}".format(nodes), "bench.job"), "r"
        ) as f:
            for line in f:
                if "#SBATCH -J" in line:
                    assert line == "#SBATCH -J {}\n".format(expected_job_name)
                if "--partition=" in line:
                    if gpu:
                        assert line == "#SBATCH --partition=gpu\n"
                    else:
                        assert line == "#SBATCH --partition=express\n"
                if "--nodes=" in line:
                    assert line == "#SBATCH --nodes={}\n".format(nodes)
                if "--time=" in line:
                    assert line == "#SBATCH --time={}\n".format("00:20:00")
                if "module load {}/".format(engine) in line:
                    assert line == "module load {}\n".format(module)
                if "srun" in line:
                    if engine == "gromacs":
                        assert line == "srun gmx_mpi mdrun -v -maxh 0.25 -deffnm md"
                    elif engine == "namd":
                        assert line == "srun namd2 md.namd"
Ejemplo n.º 6
0
def test_excludes(tmpdir):
    with tmpdir.as_cwd():
        sequoia = _create_tree(
            'sequoia',
            ['hello.txt', 'data/hello.txt', 'data/world.dat', 'world.dat'])
        sequoia2 = dtr.Tree("sequoia2").makedirs()
        sequoia3 = dtr.Tree("sequoia3").makedirs()

        sequoia.sync(sequoia2, exclude='*.txt')

        assert os.path.exists('sequoia2/world.dat')
        assert os.path.exists('sequoia2/data/world.dat')

        assert not os.path.exists('sequoia2/hello.txt')
        assert not os.path.exists('sequoia2/data/hello.txt')

        sequoia.sync(sequoia3, exclude=['*.txt', '*.dat'])
        assert not os.path.exists('sequoia3/hello.txt')
        assert not os.path.exists('sequoia3/world.dat')

        assert os.path.exists('sequoia3/data/')
Ejemplo n.º 7
0
def test_overwrite(tmpdir):
    with tmpdir.as_cwd():
        sequoia = _create_tree(
            'sequoia',
            ['hello.txt', 'data/hello.txt', 'data/world.dat', 'world.dat'])
        sequoia2 = dtr.Tree("sequoia2").makedirs()

        # Upload contents
        sequoia.sync(sequoia2, mode='upload')

        # Change contents
        open(sequoia2['hello.txt'].abspath, 'w').write('newcontent')

        # Upload contents again
        sequoia.sync(sequoia2, mode='upload')

        # Verify that the hello.txt is not overwritten
        assert sequoia2['hello.txt'].read() == 'newcontent'
Ejemplo n.º 8
0
def test_cleanup_before_restart(engine, files_to_delete, files_to_keep, tmpdir):
    """Test that the cleanup of each directory works as intended for all MD engines."""
    # Create temporary directory
    tmp = tmpdir.mkdir("mdbenchmark")

    # Create empty files
    for f in files_to_delete + files_to_keep:
        open("{}/{}".format(tmp, f), "a").close()

    # Run the cleanup script
    utils.cleanup_before_restart(engine=engine, sim=dtr.Tree(tmp.strpath))

    # Look for files that were left
    files_found = []
    for f in files_to_keep:
        files_found.extend(glob(os.path.join(tmp.strpath, f)))

    # Get rid of the `tmp` path and only compare the actual filenames
    assert files_to_keep == [x[len(str(tmp)) + 1 :] for x in files_found]
Ejemplo n.º 9
0
def construct_generate_data(
    name,
    job_name,
    modules,
    host,
    template,
    cpu,
    gpu,
    time,
    min_nodes,
    max_nodes,
    processor,
    number_of_ranks,
    enable_hyperthreading,
    multidir,
):
    data = []
    for module in modules:
        # Here we detect the MD engine (supported: GROMACS and NAMD).
        engine = mdengines.detect_md_engine(module)

        # Iterate over CPUs or GPUs
        gpu_cpu = {"cpu": cpu, "gpu": gpu}
        for key, value in sorted(gpu_cpu.items()):
            # Skip the current processing unit
            if not value:
                continue

            # Generate directory name and string representation for the user.
            # Also set the `gpu` variable for later use.
            gpu = True if key == "gpu" else False
            directory = construct_directory_name(template.name, module, gpu)

            # Set up the path to the new directory as `datreant.Tree`
            base_directory = dtr.Tree(directory)

            # Do the main iteration over nodes, ranks and number of simulations
            for nodes in range(min_nodes, max_nodes + 1):
                for _ranks in number_of_ranks:
                    ranks, threads = processor.get_ranks_and_threads(
                        _ranks, with_hyperthreading=enable_hyperthreading
                    )
                    for nsim in multidir:

                        # Append the data to our list
                        data.append(
                            [
                                name,
                                job_name,
                                base_directory,
                                host,
                                engine,
                                module,
                                nodes,
                                time,
                                gpu,
                                template,
                                ranks,
                                threads,
                                enable_hyperthreading,
                                nsim,
                            ]
                        )

    return data
Ejemplo n.º 10
0
def generate(
    name,
    cpu,
    gpu,
    module,
    host,
    min_nodes,
    max_nodes,
    time,
    skip_validation,
    job_name,
    yes,
):
    """Generate benchmarks for molecular dynamics simulations.

    Requires the ``--name`` option to be provided an existing file, e.g.,
    ``protein.tpr`` for GROMACS and ``protein.namd``, ``protein.pdb`` and
    ``protein.psf`` for NAMD. The filename ``protein`` will then be used as the job
    name, or can be overwritten with the ``--job-name`` option.

    The specified module name will be validated and searched on the current
    system. To skip this check, use the ``--skip-validation`` option.

    Benchmarks will be generated for CPUs per default (``--cpu``), but can also
    be generated for GPUs (``--gpu``) at the same time or without CPUs
    (``--no-cpu``).

    The hostname of the current system will be used to look for benchmark
    templates, but can be overwritten with the ``--template`` option. Templates
    for the MPCDF clusters ``cobra``, ``draco`` and ``hydra`` are provided with the
    package. All available templates can be listed with the ``--list-hosts``
    option.
    """
    # Validate the CPU and GPU flags
    validate_cpu_gpu_flags(cpu, gpu)

    # Validate the number of nodes
    validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes)

    # Grab the template name for the host. This should always work because
    # click does the validation for us
    template = utils.retrieve_host_template(host)

    # Warn the user that NAMD support is still experimental.
    if any(["namd" in m for m in module]):
        console.warn(NAMD_WARNING, "--gpu")

    module = mdengines.normalize_modules(module, skip_validation)

    # If several modules were given and we only cannot find one of them, we
    # continue.
    if not module:
        console.error("No requested modules available!")

    df_overview = pd.DataFrame(columns=[
        "name",
        "job_name",
        "base_directory",
        "template",
        "engine",
        "module",
        "nodes",
        "run time [min]",
        "gpu",
        "host",
    ])

    i = 1
    for m in module:
        # Here we detect the MD engine (supported: GROMACS and NAMD).
        engine = mdengines.detect_md_engine(m)

        # Check if all needed files exist. Throw an error if they do not.
        engine.check_input_file_exists(name)

        gpu_cpu = {"cpu": cpu, "gpu": gpu}
        for pu, state in sorted(gpu_cpu.items()):
            if not state:
                continue

            directory = "{}_{}".format(host, m)
            gpu = False
            gpu_string = ""
            if pu == "gpu":
                gpu = True
                directory += "_gpu"
                gpu_string = " with GPUs"

            console.info("Creating benchmark system for {}.", m + gpu_string)

            base_directory = dtr.Tree(directory)

            for nodes in range(min_nodes, max_nodes + 1):
                df_overview.loc[i] = [
                    name,
                    job_name,
                    base_directory,
                    template,
                    engine,
                    m,
                    nodes,
                    time,
                    gpu,
                    host,
                ]
                i += 1

    console.info("{}", "Benchmark Summary:")

    df_short = ConsolidateDataFrame(df_overview)
    PrintDataFrame(df_short)

    if yes:
        console.info("Generating the above benchmarks.")
    elif not click.confirm(
            "The above benchmarks will be generated. Continue?"):
        console.error("Exiting. No benchmarks generated.")

    for index, row in df_overview.iterrows():
        relative_path, file_basename = os.path.split(row["name"])
        write_benchmark(
            engine=row["engine"],
            base_directory=row["base_directory"],
            template=row["template"],
            nodes=row["nodes"],
            gpu=row["gpu"],
            module=row["module"],
            name=file_basename,
            relative_path=relative_path,
            job_name=row["job_name"],
            host=row["host"],
            time=row["run time [min]"],
        )

    # Provide some output for the user
    console.info(
        "Finished generating all benchmarks.\n"
        "You can now submit the jobs with {}.",
        "mdbenchmark submit",
    )
Ejemplo n.º 11
0
    def test_init_from_Tree(self, tmpdir):
        with tmpdir.as_cwd():
            tree = dtr.Tree('this')
            t = Treant(tree)

            assert t.path == tree.path
Ejemplo n.º 12
0
def do_generate(
    name,
    cpu,
    gpu,
    module,
    host,
    min_nodes,
    max_nodes,
    time,
    skip_validation,
    job_name,
    yes,
):
    """Generate a bunch of benchmarks."""
    # Validate the CPU and GPU flags
    validate_cpu_gpu_flags(cpu, gpu)

    # Validate the number of nodes
    validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes)

    # Grab the template name for the host. This should always work because
    # click does the validation for us
    template = utils.retrieve_host_template(host)

    # Warn the user that NAMD support is still experimental.
    if any(["namd" in m for m in module]):
        console.warn(NAMD_WARNING, "--gpu")

    module = mdengines.normalize_modules(module, skip_validation)

    # If several modules were given and we only cannot find one of them, we
    # continue.
    if not module:
        console.error("No requested modules available!")

    df_overview = pd.DataFrame(columns=[
        "name",
        "job_name",
        "base_directory",
        "template",
        "engine",
        "module",
        "nodes",
        "run time [min]",
        "gpu",
        "host",
    ])

    i = 1
    for m in module:
        # Here we detect the MD engine (supported: GROMACS and NAMD).
        engine = mdengines.detect_md_engine(m)

        # Check if all needed files exist. Throw an error if they do not.
        engine.check_input_file_exists(name)

        gpu_cpu = {"cpu": cpu, "gpu": gpu}
        for pu, state in sorted(gpu_cpu.items()):
            if not state:
                continue

            directory = "{}_{}".format(host, m)
            gpu = False
            gpu_string = ""
            if pu == "gpu":
                gpu = True
                directory += "_gpu"
                gpu_string = " with GPUs"

            console.info("Creating benchmark system for {}.", m + gpu_string)

            base_directory = dtr.Tree(directory)

            for nodes in range(min_nodes, max_nodes + 1):
                df_overview.loc[i] = [
                    name,
                    job_name,
                    base_directory,
                    template,
                    engine,
                    m,
                    nodes,
                    time,
                    gpu,
                    host,
                ]
                i += 1

    console.info("{}", "Benchmark Summary:")

    df_short = ConsolidateDataFrame(df_overview)
    PrintDataFrame(df_short)

    if yes:
        console.info("Generating the above benchmarks.")
    elif not click.confirm(
            "The above benchmarks will be generated. Continue?"):
        console.error("Exiting. No benchmarks generated.")

    for _, row in df_overview.iterrows():
        relative_path, file_basename = os.path.split(row["name"])
        write_benchmark(
            engine=row["engine"],
            base_directory=row["base_directory"],
            template=row["template"],
            nodes=row["nodes"],
            gpu=row["gpu"],
            module=row["module"],
            name=file_basename,
            relative_path=relative_path,
            job_name=row["job_name"],
            host=row["host"],
            time=row["run time [min]"],
        )

    # Provide some output for the user
    console.info(
        "Finished generating all benchmarks.\n"
        "You can now submit the jobs with {}.",
        "mdbenchmark submit",
    )