def test_includes(tmpdir): with tmpdir.as_cwd(): sequoia = _create_tree( 'sequoia', ['hello.txt', 'data/hello.txt', 'data/world.dat', 'world.dat']) sequoia2 = dtr.Tree("sequoia2").makedirs() sequoia3 = dtr.Tree("sequoia3").makedirs() sequoia4 = dtr.Tree("sequoia4").makedirs() # Only txt sequoia.sync(sequoia2, include='*.txt') assert os.path.exists('sequoia2/data/hello.txt') assert os.path.exists('sequoia2/hello.txt') assert not os.path.exists('sequoia2/world.dat') assert not os.path.exists('sequoia2/data/world.dat') # Only txt and dat sequoia.sync(sequoia3, include=['*.txt', '*.dat']) assert os.path.exists('sequoia3/data/hello.txt') assert os.path.exists('sequoia3/hello.txt') assert os.path.exists('sequoia3/world.dat') assert os.path.exists('sequoia3/data/world.dat') # We can also test include/excludes at the same time sequoia.sync(sequoia4, exclude='*.txt', include=['data/*']) assert os.path.exists('sequoia4/data/world.dat') assert os.path.exists('sequoia4/data/hello.txt') assert not os.path.exists('sequoia4/hello.txt') assert not os.path.exists('sequoia4/world.dat')
def test_sync_local(tmpdir): '''Test that syncronization works locally''' with tmpdir.as_cwd(): sequoia = dtr.Tree("sequoia").makedirs() sequoia2 = dtr.Tree("sequoia2").makedirs() py.path.local('sequoia/hello.txt').write('hello') assert os.path.exists('sequoia/hello.txt') sequoia.sync(sequoia2) assert os.path.exists('sequoia2/hello.txt')
def _create_tree(name, files=[]): tree = dtr.Tree(name).makedirs() for file_ in files: py.path.local(os.path.join(tree.abspath, file_)).write('hello', ensure=True) return tree
def filled_collection(self, tmpdir): # returns (a bundle of [t1, t2, t3], then individal references to each) with tmpdir.as_cwd(): t1 = dtr.Tree('larry') t2 = dtr.Leaf('curly') t3 = dtr.Treant('moe') b = dtr.View(t1, t2, t3) return b, (t1, t2, t3)
def test_write_benchmark(engine, gpu, job_name, module, input_name, extensions, tmpdir): """Test that the write_benchmark works as expected.""" host = "draco" base_dirname = "{}_{}".format(host, engine) nodes = 5 with tmpdir.as_cwd(): base_directory = dtr.Tree(base_dirname) for ext in extensions: open("md.{}".format(ext), "a").close() template = retrieve_host_template("draco") utils.write_benchmark( engine=engine, base_directory=base_directory, template=template, nodes=nodes, gpu=gpu, module=module, name=input_name, job_name=job_name, relative_path=".", host=host, time=15, ) expected_job_name = "md" if job_name is None else job_name assert os.path.exists(base_dirname) assert os.path.exists( os.path.join(base_dirname, "{}".format(nodes), input_name) ) with open( os.path.join(base_dirname, "{}".format(nodes), "bench.job"), "r" ) as f: for line in f: if "#SBATCH -J" in line: assert line == "#SBATCH -J {}\n".format(expected_job_name) if "--partition=" in line: if gpu: assert line == "#SBATCH --partition=gpu\n" else: assert line == "#SBATCH --partition=express\n" if "--nodes=" in line: assert line == "#SBATCH --nodes={}\n".format(nodes) if "--time=" in line: assert line == "#SBATCH --time={}\n".format("00:20:00") if "module load {}/".format(engine) in line: assert line == "module load {}\n".format(module) if "srun" in line: if engine == "gromacs": assert line == "srun gmx_mpi mdrun -v -maxh 0.25 -deffnm md" elif engine == "namd": assert line == "srun namd2 md.namd"
def test_excludes(tmpdir): with tmpdir.as_cwd(): sequoia = _create_tree( 'sequoia', ['hello.txt', 'data/hello.txt', 'data/world.dat', 'world.dat']) sequoia2 = dtr.Tree("sequoia2").makedirs() sequoia3 = dtr.Tree("sequoia3").makedirs() sequoia.sync(sequoia2, exclude='*.txt') assert os.path.exists('sequoia2/world.dat') assert os.path.exists('sequoia2/data/world.dat') assert not os.path.exists('sequoia2/hello.txt') assert not os.path.exists('sequoia2/data/hello.txt') sequoia.sync(sequoia3, exclude=['*.txt', '*.dat']) assert not os.path.exists('sequoia3/hello.txt') assert not os.path.exists('sequoia3/world.dat') assert os.path.exists('sequoia3/data/')
def test_overwrite(tmpdir): with tmpdir.as_cwd(): sequoia = _create_tree( 'sequoia', ['hello.txt', 'data/hello.txt', 'data/world.dat', 'world.dat']) sequoia2 = dtr.Tree("sequoia2").makedirs() # Upload contents sequoia.sync(sequoia2, mode='upload') # Change contents open(sequoia2['hello.txt'].abspath, 'w').write('newcontent') # Upload contents again sequoia.sync(sequoia2, mode='upload') # Verify that the hello.txt is not overwritten assert sequoia2['hello.txt'].read() == 'newcontent'
def test_cleanup_before_restart(engine, files_to_delete, files_to_keep, tmpdir): """Test that the cleanup of each directory works as intended for all MD engines.""" # Create temporary directory tmp = tmpdir.mkdir("mdbenchmark") # Create empty files for f in files_to_delete + files_to_keep: open("{}/{}".format(tmp, f), "a").close() # Run the cleanup script utils.cleanup_before_restart(engine=engine, sim=dtr.Tree(tmp.strpath)) # Look for files that were left files_found = [] for f in files_to_keep: files_found.extend(glob(os.path.join(tmp.strpath, f))) # Get rid of the `tmp` path and only compare the actual filenames assert files_to_keep == [x[len(str(tmp)) + 1 :] for x in files_found]
def construct_generate_data( name, job_name, modules, host, template, cpu, gpu, time, min_nodes, max_nodes, processor, number_of_ranks, enable_hyperthreading, multidir, ): data = [] for module in modules: # Here we detect the MD engine (supported: GROMACS and NAMD). engine = mdengines.detect_md_engine(module) # Iterate over CPUs or GPUs gpu_cpu = {"cpu": cpu, "gpu": gpu} for key, value in sorted(gpu_cpu.items()): # Skip the current processing unit if not value: continue # Generate directory name and string representation for the user. # Also set the `gpu` variable for later use. gpu = True if key == "gpu" else False directory = construct_directory_name(template.name, module, gpu) # Set up the path to the new directory as `datreant.Tree` base_directory = dtr.Tree(directory) # Do the main iteration over nodes, ranks and number of simulations for nodes in range(min_nodes, max_nodes + 1): for _ranks in number_of_ranks: ranks, threads = processor.get_ranks_and_threads( _ranks, with_hyperthreading=enable_hyperthreading ) for nsim in multidir: # Append the data to our list data.append( [ name, job_name, base_directory, host, engine, module, nodes, time, gpu, template, ranks, threads, enable_hyperthreading, nsim, ] ) return data
def generate( name, cpu, gpu, module, host, min_nodes, max_nodes, time, skip_validation, job_name, yes, ): """Generate benchmarks for molecular dynamics simulations. Requires the ``--name`` option to be provided an existing file, e.g., ``protein.tpr`` for GROMACS and ``protein.namd``, ``protein.pdb`` and ``protein.psf`` for NAMD. The filename ``protein`` will then be used as the job name, or can be overwritten with the ``--job-name`` option. The specified module name will be validated and searched on the current system. To skip this check, use the ``--skip-validation`` option. Benchmarks will be generated for CPUs per default (``--cpu``), but can also be generated for GPUs (``--gpu``) at the same time or without CPUs (``--no-cpu``). The hostname of the current system will be used to look for benchmark templates, but can be overwritten with the ``--template`` option. Templates for the MPCDF clusters ``cobra``, ``draco`` and ``hydra`` are provided with the package. All available templates can be listed with the ``--list-hosts`` option. """ # Validate the CPU and GPU flags validate_cpu_gpu_flags(cpu, gpu) # Validate the number of nodes validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes) # Grab the template name for the host. This should always work because # click does the validation for us template = utils.retrieve_host_template(host) # Warn the user that NAMD support is still experimental. if any(["namd" in m for m in module]): console.warn(NAMD_WARNING, "--gpu") module = mdengines.normalize_modules(module, skip_validation) # If several modules were given and we only cannot find one of them, we # continue. if not module: console.error("No requested modules available!") df_overview = pd.DataFrame(columns=[ "name", "job_name", "base_directory", "template", "engine", "module", "nodes", "run time [min]", "gpu", "host", ]) i = 1 for m in module: # Here we detect the MD engine (supported: GROMACS and NAMD). engine = mdengines.detect_md_engine(m) # Check if all needed files exist. Throw an error if they do not. engine.check_input_file_exists(name) gpu_cpu = {"cpu": cpu, "gpu": gpu} for pu, state in sorted(gpu_cpu.items()): if not state: continue directory = "{}_{}".format(host, m) gpu = False gpu_string = "" if pu == "gpu": gpu = True directory += "_gpu" gpu_string = " with GPUs" console.info("Creating benchmark system for {}.", m + gpu_string) base_directory = dtr.Tree(directory) for nodes in range(min_nodes, max_nodes + 1): df_overview.loc[i] = [ name, job_name, base_directory, template, engine, m, nodes, time, gpu, host, ] i += 1 console.info("{}", "Benchmark Summary:") df_short = ConsolidateDataFrame(df_overview) PrintDataFrame(df_short) if yes: console.info("Generating the above benchmarks.") elif not click.confirm( "The above benchmarks will be generated. Continue?"): console.error("Exiting. No benchmarks generated.") for index, row in df_overview.iterrows(): relative_path, file_basename = os.path.split(row["name"]) write_benchmark( engine=row["engine"], base_directory=row["base_directory"], template=row["template"], nodes=row["nodes"], gpu=row["gpu"], module=row["module"], name=file_basename, relative_path=relative_path, job_name=row["job_name"], host=row["host"], time=row["run time [min]"], ) # Provide some output for the user console.info( "Finished generating all benchmarks.\n" "You can now submit the jobs with {}.", "mdbenchmark submit", )
def test_init_from_Tree(self, tmpdir): with tmpdir.as_cwd(): tree = dtr.Tree('this') t = Treant(tree) assert t.path == tree.path
def do_generate( name, cpu, gpu, module, host, min_nodes, max_nodes, time, skip_validation, job_name, yes, ): """Generate a bunch of benchmarks.""" # Validate the CPU and GPU flags validate_cpu_gpu_flags(cpu, gpu) # Validate the number of nodes validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes) # Grab the template name for the host. This should always work because # click does the validation for us template = utils.retrieve_host_template(host) # Warn the user that NAMD support is still experimental. if any(["namd" in m for m in module]): console.warn(NAMD_WARNING, "--gpu") module = mdengines.normalize_modules(module, skip_validation) # If several modules were given and we only cannot find one of them, we # continue. if not module: console.error("No requested modules available!") df_overview = pd.DataFrame(columns=[ "name", "job_name", "base_directory", "template", "engine", "module", "nodes", "run time [min]", "gpu", "host", ]) i = 1 for m in module: # Here we detect the MD engine (supported: GROMACS and NAMD). engine = mdengines.detect_md_engine(m) # Check if all needed files exist. Throw an error if they do not. engine.check_input_file_exists(name) gpu_cpu = {"cpu": cpu, "gpu": gpu} for pu, state in sorted(gpu_cpu.items()): if not state: continue directory = "{}_{}".format(host, m) gpu = False gpu_string = "" if pu == "gpu": gpu = True directory += "_gpu" gpu_string = " with GPUs" console.info("Creating benchmark system for {}.", m + gpu_string) base_directory = dtr.Tree(directory) for nodes in range(min_nodes, max_nodes + 1): df_overview.loc[i] = [ name, job_name, base_directory, template, engine, m, nodes, time, gpu, host, ] i += 1 console.info("{}", "Benchmark Summary:") df_short = ConsolidateDataFrame(df_overview) PrintDataFrame(df_short) if yes: console.info("Generating the above benchmarks.") elif not click.confirm( "The above benchmarks will be generated. Continue?"): console.error("Exiting. No benchmarks generated.") for _, row in df_overview.iterrows(): relative_path, file_basename = os.path.split(row["name"]) write_benchmark( engine=row["engine"], base_directory=row["base_directory"], template=row["template"], nodes=row["nodes"], gpu=row["gpu"], module=row["module"], name=file_basename, relative_path=relative_path, job_name=row["job_name"], host=row["host"], time=row["run time [min]"], ) # Provide some output for the user console.info( "Finished generating all benchmarks.\n" "You can now submit the jobs with {}.", "mdbenchmark submit", )