def test_generate_namd_experimental_warning(cli_runner, monkeypatch, tmpdir): """Test that we print the NAMD experimental warning.""" with tmpdir.as_cwd(): for f in ["md.namd", "md.psf", "md.pdb"]: open(f, "a").close() # monkeypatch the output of the available modules monkeypatch.setattr( "mdbenchmark.mdengines.get_available_modules", lambda: {"namd": ["123"]} ) result = cli_runner.invoke( cli, ["generate", "--module=namd/123", "--host=draco", "--name=md", "--yes"] ) output1 = ( "WARNING NAMD support is experimental. " "All input files must be in the current directory. " "Parameter paths must be absolute. Only crude file checks are performed! " "If you use the --gpu option make sure you use the GPU compatible NAMD module!\n" "Creating benchmark system for namd/123.\n" ) bundle = dtr.discover() df = DataFrameFromBundle(bundle) df = ConsolidateDataFrame(df) test_output = "Benchmark Summary:\n" + PrintDataFrame(df, False) + "\n" output2 = ( "Generating the above benchmarks.\n" "Finished generating all benchmarks.\nYou can " "now submit the jobs with mdbenchmark submit.\n" ) output = output1 + test_output + output2 assert result.exit_code == 0 assert result.output == output
def _output(short=False): title = "Benchmark Summary:\n" bundle = dtr.discover() df = DataFrameFromBundle(bundle) if short: df = ConsolidateDataFrame(df) return title + PrintDataFrame(df, False) + "\n"
def do_submit(directory, force_restart, yes): """Submit the benchmarks.""" # Migrate from MDBenchmark<2 to MDBenchmark=>2 mds_to_dtr.migrate_to_datreant(directory) bundle = dtr.discover(directory) # Exit if no bundles were found in the current directory. if not bundle: console.error("No benchmarks found.") grouped_bundles = bundle.categories.groupby("started") try: bundles_not_yet_started = grouped_bundles[False] except KeyError: bundles_not_yet_started = None if not bundles_not_yet_started and not force_restart: console.error( "All generated benchmarks were already started once. " "You can force a restart with {}.", "--force", ) # Start all benchmark simulations if a restart was requested. Otherwise # only start the ones that were not run yet. bundles_to_start = bundle if not force_restart: bundles_to_start = bundles_not_yet_started df = DataFrameFromBundle(bundles_to_start) # Reformat NaN values nicely into question marks. df_to_print = df.replace(np.nan, "?") df_to_print = df.drop(columns=["ns/day", "ncores"]) console.info("{}", "Benchmark Summary:") df_short = ConsolidateDataFrame(df_to_print) PrintDataFrame(df_short) # Ask the user to confirm whether they want to submit the benchmarks if yes: console.info("The above benchmarks will be submitted.") elif not click.confirm("The above benchmarks will be submitted. Continue?"): console.error("Exiting. No benchmarks submitted.") batch_cmd = get_batch_command() console.info("Submitting a total of {} benchmarks.", len(bundles_to_start)) for sim in bundles_to_start: # Remove files generated by previous mdbenchmark run if force_restart: engine = detect_md_engine(sim.categories["module"]) cleanup_before_restart(engine=engine, sim=sim) sim.categories["started"] = True os.chdir(sim.abspath) subprocess.call([batch_cmd, "bench.job"]) console.info( "Submitted all benchmarks. Run {} once they are finished to get the results.", "mdbenchmark analyze", )
def test_submit_resubmit(cli_runner, monkeypatch, tmpdir, data): """Test that we cannot submit a benchmark system that was already submitted, unless we force it. """ with tmpdir.as_cwd(): # Test that we get an error if we try to point the submit function to # an non-existent path. result = cli_runner.invoke(cli, ["submit", "--directory=look_here/"], "--yes") assert result.exit_code == 1 assert result.output == "ERROR No benchmarks found.\n" # Test that we get an error if we try to start benchmarks that were # already started once. result = cli_runner.invoke( cli, [ "submit", "--directory={}".format( data["analyze-files-gromacs"]), "--yes" ], ) df = pd.read_csv(data["analyze-files-gromacs-consolidated.csv"], index_col=0) s = PrintDataFrame(df, False) output = "ERROR All generated benchmarks were already started once. You can force a restart with --force.\n" assert result.exit_code == 1 assert result.output == output # Test that we can force restart already run benchmarks. # Monkeypatch a few functions monkeypatch.setattr("subprocess.call", lambda x: True) monkeypatch.setattr("mdbenchmark.cli.submit.get_batch_command", lambda: "sbatch") monkeypatch.setattr("mdbenchmark.cli.submit.detect_md_engine", lambda x: gromacs) monkeypatch.setattr("mdbenchmark.submit.cleanup_before_restart", lambda engine, sim: True) output = ( "Benchmark Summary:\n" + s + "\nThe above benchmarks will be submitted.\n" + "Submitting a total of 5 benchmarks.\n" + "Submitted all benchmarks. Run mdbenchmark analyze once they are finished to get the results.\n" ) result = cli_runner.invoke( cli, [ "submit", "--directory={}".format(data["analyze-files-gromacs"]), "--force", "--yes", ], ) assert result.exit_code == 0 assert result.output == output
def test_analyze_namd(cli_runner, tmpdir, data): with tmpdir.as_cwd(): result = cli_runner.invoke( cli, ["analyze", "--directory={}".format(data["analyze-files-namd"])]) bundle = dtr.discover(data["analyze-files-namd"]) df = DataFrameFromBundle(bundle) test_output = PrintDataFrame(df, False) + "\n" assert result.exit_code == 0 assert result.output == test_output
def test_analyze_gromacs(cli_runner, tmpdir, data): """Test that the output is OK when all outputs are fine.""" with tmpdir.as_cwd(): result = cli_runner.invoke(cli, [ "analyze", "--directory={}".format(data["analyze-files-gromacs"]) ]) df = pd.read_csv(data["analyze-files-gromacs.csv"]) test_output = PrintDataFrame(df, False) + "\n" assert result.exit_code == 0 assert result.output == test_output
def test_analyze_with_errors(cli_runner, tmpdir, data): """Test that we warn the user of errors in the output files. Also test that we show a question mark instead of a float in the corresponding cell. """ with tmpdir.as_cwd(): result = cli_runner.invoke(cli, [ "analyze", "--directory={}".format(data["analyze-files-w-errors"]) ]) bundle = dtr.discover(data["analyze-files-w-errors"]) df = DataFrameFromBundle(bundle) df = df.replace(np.nan, "?") test_output = PrintDataFrame(df, False) + "\n" assert result.exit_code == 0 assert result.output == test_output
def test_analyze_plot(cli_runner, tmpdir, data): with tmpdir.as_cwd(): result = cli_runner.invoke( cli.cli, [ "analyze", "--directory={}".format(data["analyze-files-gromacs"], "--plot"), ], ) bundle = dtr.discover(data["analyze-files-gromacs"]) df = DataFrameFromBundle(bundle) test_output = PrintDataFrame(df, False) + "\n" assert result.exit_code == 0 assert result.output == test_output os.path.isfile("runtimes.pdf")
def do_analyze(directory, plot, ncores, save_csv): """Analyze benchmarks.""" # Migrate from MDBenchmark<2 to MDBenchmark=>2 mds_to_dtr.migrate_to_datreant(directory) bundle = dtr.discover(directory) df = DataFrameFromBundle(bundle) if save_csv is not None and not save_csv.endswith(".csv"): save_csv = "{}.csv".format(save_csv) df.to_csv(save_csv) # Reformat NaN values nicely into question marks. # move this to the bundle function! df = df.replace(np.nan, "?") if df.isnull().values.any(): console.warn( "We were not able to gather informations for all systems. " "Systems marked with question marks have either crashed or " "were not started yet." ) PrintDataFrame(df) if plot: console.warn("'--plot' has been deprecated, use '{}'.", "mdbenchmark plot") fig = Figure() FigureCanvas(fig) ax = fig.add_subplot(111) df = pd.read_csv(save_csv) if ncores: console.warn( "Ignoring your value from '{}' and parsing number of cores from log files.", "--number-cores/-ncores", ) ax = plot_over_group(df, plot_cores=ncores, fit=True, ax=ax) lgd = ax.legend(loc="upper center", bbox_to_anchor=(0.5, -0.175)) fig.tight_layout() fig.savefig( "runtimes.pdf", type="pdf", bbox_extra_artists=(lgd,), bbox_inches="tight" )
def test_submit_test_prompt_no(cli_runner, tmpdir, data): """Test whether prompt answer no works.""" with tmpdir.as_cwd(): result = cli_runner.invoke( cli.cli, ["submit", "--directory={}".format(data["analyze-files-gromacs"])], input="n\n", ) df = pd.read_csv(data["analyze-files-gromacs-consolidated.csv"], index_col=0) s = PrintDataFrame(df, False) output = ( "Benchmark Summary:\n" + s + "\nThe above benchmarks will be submitted. Continue? [y/N]: n\n" + "ERROR Exiting. No benchmarks submitted.\n") assert result.exit_code == 1 assert result.output == output
def test_submit_test_prompt_yes(cli_runner, tmpdir, data, monkeypatch): """Test whether promt answer no works.""" with tmpdir.as_cwd(): # Test that we can force restart already run benchmarks. # Monkeypatch a few functions monkeypatch.setattr("subprocess.call", lambda x: True) monkeypatch.setattr("mdbenchmark.cli.submit.get_batch_command", lambda: "sbatch") monkeypatch.setattr("mdbenchmark.cli.submit.detect_md_engine", lambda x: gromacs) monkeypatch.setattr("mdbenchmark.cli.submit.cleanup_before_restart", lambda engine, sim: True) result = cli_runner.invoke( cli, [ "submit", "--directory={}".format( data["analyze-files-gromacs-one-unstarted"]), ], input="y\n", ) df = pd.read_csv(data["analyze-files-gromacs-prompt.csv"], index_col=0) s = PrintDataFrame(df, False) output = ( "Benchmark Summary:\n" + s + "\nThe above benchmarks will be submitted. Continue? [y/N]: y\n" + "Submitting a total of 1 benchmarks.\n" + "Submitted all benchmarks. Run mdbenchmark analyze once they are finished to get the results.\n" ) assert result.exit_code == 0 assert result.output == output # Lazy way of resetting the value of `started` to `false`. # TODO: We need to clean up all of our unit tests... treant = dtr.Bundle(data["analyze-files-gromacs-one-unstarted"] + "/1") treant.categories["started"] = False
def test_analyze_many_rows(cli_runner, tmpdir, datafiles): """Test that pandas does not limit the number of printed rows.""" with tmpdir.as_cwd(): open("protein.tpr", "a").close() result = cli_runner.invoke( cli.cli, [ "generate", "--module=gromacs/2016.3", "--host=draco", "--max-nodes=64", "--name=protein", "--yes", ], ) result = cli_runner.invoke(cli.cli, ["analyze", "--directory=draco_gromacs"]) df = pd.read_csv(datafiles["analyze-many-rows.csv"], index_col=0) test_output = PrintDataFrame(df, False) + "\n" assert result.exit_code == 0 assert result.output == test_output
def test_generate_test_prompt_yes(cli_runner, tmpdir, generate_output): """Test whether promt answer yes works.""" with tmpdir.as_cwd(): open("protein.tpr", "a").close() result = cli_runner.invoke( cli.cli, [ "generate", "--module=gromacs/2016", "--host=draco", "--max-nodes=4", "--gpu", "--no-cpu", "--name=protein", ], input="y\n", ) output1 = ( "WARNING Cannot locate modules available on this host. Not performing module name validation.\n" "Creating benchmark system for gromacs/2016 with GPUs.\n" ) bundle = dtr.discover() df = DataFrameFromBundle(bundle) df = ConsolidateDataFrame(df) output2 = ( "The above benchmarks will be generated. Continue? [y/N]: y\n" "Finished generating all benchmarks.\n" "You can now submit the jobs with mdbenchmark submit.\n" ) mid = "Benchmark Summary:\n" + PrintDataFrame(df, False) + "\n" output = output1 + mid + output2 # Test that we get a warning, if no module name validation is performed. assert result.exit_code == 0 assert result.output == output
def test_generate_odd_number_of_nodes( cli_runner, engine, module, extensions, generate_output, monkeypatch, tmpdir, version, ): """Make sure we generate the correct folder structure.""" with tmpdir.as_cwd(): for ext in extensions: open("protein.{}".format(ext), "a").close() monkeypatch.setattr( "mdbenchmark.mdengines.get_available_modules", lambda: {"gromacs": ["2016"], "namd": ["11"]}, ) result = cli_runner.invoke( cli.cli, [ "generate", "--module={}".format(module), "--host=draco", "--min-nodes=6", "--max-nodes=8", "--gpu", "--no-cpu", "--name=protein", "--yes", ], ) output1 = "Creating benchmark system for {} with GPUs.\n".format(module) bundle = dtr.discover() df = DataFrameFromBundle(bundle) df = ConsolidateDataFrame(df) test_output = "Benchmark Summary:\n" + PrintDataFrame(df, False) + "\n" output2 = ( "Generating the above benchmarks.\n" "Finished generating all benchmarks.\n" "You can now submit the jobs with mdbenchmark submit.\n" ) if "namd" in module: output = NAMD_WARNING_FORMATTED + output1 + test_output + output2 else: output = output1 + test_output + output2 assert result.exit_code == 0 assert result.output == output assert os.path.exists("draco_{}".format(engine)) host_engine_version_path = "draco_{}/{}_gpu/".format(engine, version) for i in range(6, 9): assert os.path.exists(host_engine_version_path + "{}".format(i)) for ext in extensions: assert os.path.exists( host_engine_version_path + "{}/protein.{}".format(i, ext) ) assert os.path.exists(host_engine_version_path + "{}/bench.job".format(i))
def do_generate( name, cpu, gpu, module, host, min_nodes, max_nodes, time, skip_validation, job_name, yes, ): """Generate a bunch of benchmarks.""" # Validate the CPU and GPU flags validate_cpu_gpu_flags(cpu, gpu) # Validate the number of nodes validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes) # Grab the template name for the host. This should always work because # click does the validation for us template = utils.retrieve_host_template(host) # Warn the user that NAMD support is still experimental. if any(["namd" in m for m in module]): console.warn(NAMD_WARNING, "--gpu") module = mdengines.normalize_modules(module, skip_validation) # If several modules were given and we only cannot find one of them, we # continue. if not module: console.error("No requested modules available!") df_overview = pd.DataFrame(columns=[ "name", "job_name", "base_directory", "template", "engine", "module", "nodes", "run time [min]", "gpu", "host", ]) i = 1 for m in module: # Here we detect the MD engine (supported: GROMACS and NAMD). engine = mdengines.detect_md_engine(m) # Check if all needed files exist. Throw an error if they do not. engine.check_input_file_exists(name) gpu_cpu = {"cpu": cpu, "gpu": gpu} for pu, state in sorted(gpu_cpu.items()): if not state: continue directory = "{}_{}".format(host, m) gpu = False gpu_string = "" if pu == "gpu": gpu = True directory += "_gpu" gpu_string = " with GPUs" console.info("Creating benchmark system for {}.", m + gpu_string) base_directory = dtr.Tree(directory) for nodes in range(min_nodes, max_nodes + 1): df_overview.loc[i] = [ name, job_name, base_directory, template, engine, m, nodes, time, gpu, host, ] i += 1 console.info("{}", "Benchmark Summary:") df_short = ConsolidateDataFrame(df_overview) PrintDataFrame(df_short) if yes: console.info("Generating the above benchmarks.") elif not click.confirm( "The above benchmarks will be generated. Continue?"): console.error("Exiting. No benchmarks generated.") for _, row in df_overview.iterrows(): relative_path, file_basename = os.path.split(row["name"]) write_benchmark( engine=row["engine"], base_directory=row["base_directory"], template=row["template"], nodes=row["nodes"], gpu=row["gpu"], module=row["module"], name=file_basename, relative_path=relative_path, job_name=row["job_name"], host=row["host"], time=row["run time [min]"], ) # Provide some output for the user console.info( "Finished generating all benchmarks.\n" "You can now submit the jobs with {}.", "mdbenchmark submit", )