def test_analyze_with_errors(cli_runner, tmpdir, capsys, data): """Test that we warn the user of errors in the output files. Also test that we show a question mark instead of a float in the corresponding cell. """ with tmpdir.as_cwd(): result = cli_runner.invoke(cli, [ "analyze", "--directory={}".format(data["analyze-files-w-errors"]) ]) version = Version2Categories() bundle = dtr.discover(data["analyze-files-w-errors"]) df = parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) df = df.iloc[:, :-1] df = df.replace(np.nan, "?") print_dataframe( df, columns=map_columns(version.category_mapping, version.analyze_printing), ) out, _ = capsys.readouterr() out = "Setting up...\n" + out assert result.exit_code == 0 assert result.output == "\n".join(out.split("\n"))
def test_discover_depth(tmpdir): """Check that using `depth` parameter gives expected result.""" with tmpdir.as_cwd(): ghosts = ('something/inky', 'something/else/blinky', 'pinky', 'something/clyde') for name in ghosts: dtr.Treant(name) assert len(discover('.', depth=0)) == 0 assert len(discover('pinky', depth=0)) == 1 assert len(discover('.', depth=1)) == 1 assert len(discover('.', depth=2)) == 3 assert len(discover('.', depth=3)) == 4
def test_generate_namd_experimental_warning(cli_runner, monkeypatch, tmpdir): """Test that we print the NAMD experimental warning.""" with tmpdir.as_cwd(): for f in ["md.namd", "md.psf", "md.pdb"]: open(f, "a").close() # monkeypatch the output of the available modules monkeypatch.setattr( "mdbenchmark.mdengines.get_available_modules", lambda: {"namd": ["123"]} ) result = cli_runner.invoke( cli, ["generate", "--module=namd/123", "--host=draco", "--name=md", "--yes"] ) output1 = ( "WARNING NAMD support is experimental. " "All input files must be in the current directory. " "Parameter paths must be absolute. Only crude file checks are performed! " "If you use the --gpu option make sure you use the GPU compatible NAMD module!\n" "Creating benchmark system for namd/123.\n" ) bundle = dtr.discover() df = DataFrameFromBundle(bundle) df = ConsolidateDataFrame(df) test_output = "Benchmark Summary:\n" + PrintDataFrame(df, False) + "\n" output2 = ( "Generating the above benchmarks.\n" "Finished generating all benchmarks.\nYou can " "now submit the jobs with mdbenchmark submit.\n" ) output = output1 + test_output + output2 assert result.exit_code == 0 assert result.output == output
def test_consolidate_dataframe(capsys, data): bundle = dtr.discover(data["analyze-files-gromacs"]) version = VersionFactory(categories=bundle.categories).version_class df = utils.parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) test_output = utils.consolidate_dataframe( df, columns=version.consolidate_categories ) print_dataframe( test_output[version.generate_printing[1:]], columns=map_columns( map_dict=version.category_mapping, columns=version.generate_printing[1:], ), ) expected_output = ( "Setting up...\n\n" "+----------------+---------+--------------+---------+--------+-----------+-------------+-------------------+\n", "| Module | Nodes | Time (min) | GPUs? | Host | # ranks | # threads | Hyperthreading? |\n", "|----------------+---------+--------------+---------+--------+-----------+-------------+-------------------|\n", "| gromacs/2016.3 | 1-5 | 15 | False | draco | nan | nan | nan |\n", "+----------------+---------+--------------+---------+--------+-----------+-------------+-------------------+\n\n", ) out, _ = capsys.readouterr() assert "\n".join(out.split("\n")) == "".join(expected_output)
def _output(short=False): title = "Benchmark Summary:\n" bundle = dtr.discover() df = DataFrameFromBundle(bundle) if short: df = ConsolidateDataFrame(df) return title + PrintDataFrame(df, False) + "\n"
def test_parse_bundle(data): bundle = dtr.discover(data["analyze-files-gromacs"]) version = VersionFactory(categories=bundle.categories).version_class test_output = utils.parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) expected_output = pd.read_csv(data["analyze-files-gromacs.csv"], index_col=False) assert_frame_equal(test_output, expected_output)
def do_submit(directory, force_restart, yes): """Submit the benchmarks.""" # Migrate from MDBenchmark<2 to MDBenchmark=>2 mds_to_dtr.migrate_to_datreant(directory) bundle = dtr.discover(directory) # Exit if no bundles were found in the current directory. if not bundle: console.error("No benchmarks found.") grouped_bundles = bundle.categories.groupby("started") try: bundles_not_yet_started = grouped_bundles[False] except KeyError: bundles_not_yet_started = None if not bundles_not_yet_started and not force_restart: console.error( "All generated benchmarks were already started once. " "You can force a restart with {}.", "--force", ) # Start all benchmark simulations if a restart was requested. Otherwise # only start the ones that were not run yet. bundles_to_start = bundle if not force_restart: bundles_to_start = bundles_not_yet_started df = DataFrameFromBundle(bundles_to_start) # Reformat NaN values nicely into question marks. df_to_print = df.replace(np.nan, "?") df_to_print = df.drop(columns=["ns/day", "ncores"]) console.info("{}", "Benchmark Summary:") df_short = ConsolidateDataFrame(df_to_print) PrintDataFrame(df_short) # Ask the user to confirm whether they want to submit the benchmarks if yes: console.info("The above benchmarks will be submitted.") elif not click.confirm("The above benchmarks will be submitted. Continue?"): console.error("Exiting. No benchmarks submitted.") batch_cmd = get_batch_command() console.info("Submitting a total of {} benchmarks.", len(bundles_to_start)) for sim in bundles_to_start: # Remove files generated by previous mdbenchmark run if force_restart: engine = detect_md_engine(sim.categories["module"]) cleanup_before_restart(engine=engine, sim=sim) sim.categories["started"] = True os.chdir(sim.abspath) subprocess.call([batch_cmd, "bench.job"]) console.info( "Submitted all benchmarks. Run {} once they are finished to get the results.", "mdbenchmark analyze", )
def analyze(directory, plot, ncores, save_csv): """Analyze benchmarks and print the performance results. Benchmarks are searched recursively starting from the directory specified in ``--directory``. If the option is not specified, the working directory will be used. Benchmarks that have not started yet or finished without printing the performance result, will be marked accordingly. The benchmark performance results can be saved in a CSV file with the ``--save-csv`` option and a custom filename. To plot the results use ``mdbenchmark plot``. """ # Migrate from MDBenchmark<2 to MDBenchmark=>2 mds_to_dtr.migrate_to_datreant(directory) bundle = dtr.discover(directory) df = DataFrameFromBundle(bundle) if df.isnull().values.any(): console.warn( "We were not able to gather informations for all systems. " "Systems marked with question marks have either crashed or " "were not started yet.") # Reformat NaN values nicely into question marks. # move this to the bundle function! PrintDataFrame(df) if save_csv is not None and not save_csv.endswith(".csv"): save_csv = "{}.csv".format(save_csv) df.to_csv(save_csv) if plot: console.warn("'--plot' has been deprecated, use '{}'.", "mdbenchmark plot") fig = Figure() FigureCanvas(fig) ax = fig.add_subplot(111) df = pd.read_csv(save_csv) if ncores: console.warn( "Ignoring your value from '{}' and parsing number of cores from log files.", "--number-cores/-ncores", ) ax = plot_over_group(df, plot_cores=ncores, fit=True, ax=ax) lgd = ax.legend(loc="upper center", bbox_to_anchor=(0.5, -0.175)) fig.tight_layout() fig.savefig("runtimes.pdf", type="pdf", bbox_extra_artists=(lgd, ), bbox_inches="tight")
def test_analyze_namd(cli_runner, tmpdir, data): with tmpdir.as_cwd(): result = cli_runner.invoke( cli, ["analyze", "--directory={}".format(data["analyze-files-namd"])]) bundle = dtr.discover(data["analyze-files-namd"]) df = DataFrameFromBundle(bundle) test_output = PrintDataFrame(df, False) + "\n" assert result.exit_code == 0 assert result.output == test_output
def test_DataFrameFromBundle(data): """Test DataFrameFromBundle function. This is used in other tests, therefore everything is hard coded If changes are made to the layout type this should be changed here. """ bundle = dtr.discover(data["analyze-files-gromacs"]) test_output = utils.DataFrameFromBundle(bundle) expected_output = pd.read_csv(data["analyze-files-gromacs.csv"]) # TODO: This test fails if we test the dtype. This is weird and I would # like to know why this is...the output itself is fine. assert_frame_equal(test_output, expected_output, check_dtype=False)
def test_ConsolidateDataFrame(data): """ Test the ConsolidateDataFrame function. This is used in other tests, therefore everyting is hard coded. If changes are made to the layout type this should be changed here. """ bundle = dtr.discover(data["analyze-files-gromacs"]) df = utils.DataFrameFromBundle(bundle) test_output = utils.ConsolidateDataFrame(df) expected_output = pd.read_csv( data["analyze-files-gromacs-consolidated.csv"], index_col=0) assert_frame_equal(test_output, expected_output, check_dtype=False)
def test_discover_depth_treantdepth(tmpdir): """Check that using `treantdepth` and `depth` parameters together gives expected result. """ with tmpdir.as_cwd(): ghosts = ('inky', 'inky/blinky', 'pinky', 'inky/blinky/nothing/clyde') for name in ghosts: dtr.Treant(name) assert len(discover('.', treantdepth=0, depth=0)) == 0 assert len(discover('.', treantdepth=0, depth=1)) == 2 assert len(discover('pinky', treantdepth=0, depth=0)) == 1 assert len(discover('inky', treantdepth=0, depth=2)) == 1 assert len(discover('.', treantdepth=1, depth=1)) == 2 assert len(discover('inky', treantdepth=1, depth=1)) == 2 assert len(discover('inky', treantdepth=1, depth=0)) == 1 assert len(discover('inky', treantdepth=2)) == 3 assert len(discover('inky', treantdepth=2, depth=2)) == 2 assert len(discover('inky', treantdepth=2, depth=3)) == 3
def test_discover(tmpdir): with tmpdir.as_cwd(): ghosts = ('inky', 'blinky', 'pinky', 'clyde') for name in ghosts: dtr.Treant( 'a/very/deep/directory/structure/that/just/keeps/going/' + name) b = discover('.') assert len(b) == 4 for name in ghosts: assert name in b.names
def do_analyze(directory, save_csv): """Analyze benchmarks.""" bundle = dtr.discover(directory) version = VersionFactory(categories=bundle.categories).version_class df = parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) # Remove the versions column from the DataFrame columns_to_drop = ["version"] df = df.drop(columns=columns_to_drop) if save_csv is not None: if not save_csv.endswith(".csv"): save_csv = "{}.csv".format(save_csv) df.to_csv(save_csv, index=False) console.success("Successfully benchmark data to {}.", save_csv) # Reformat NaN values nicely into question marks. # move this to the bundle function! df = df.replace(np.nan, "?") if df.isnull().values.any(): console.warn( "We were not able to gather informations for all systems. " "Systems marked with question marks have either crashed or " "were not started yet.") # Warn user that we are going to print more than 50 benchmark results to the console if df.shape[0] > 50: if click.confirm( "We are about to print the results of {} benchmarks to the console. Continue?" .format(click.style(str(df.shape[0]), bold=True))): pass else: console.error("Exiting.") # Print the data to the console print_dataframe( df, columns=map_columns(version.category_mapping, version.analyze_printing), )
def test_analyze_with_errors(cli_runner, tmpdir, data): """Test that we warn the user of errors in the output files. Also test that we show a question mark instead of a float in the corresponding cell. """ with tmpdir.as_cwd(): result = cli_runner.invoke(cli, [ "analyze", "--directory={}".format(data["analyze-files-w-errors"]) ]) bundle = dtr.discover(data["analyze-files-w-errors"]) df = DataFrameFromBundle(bundle) df = df.replace(np.nan, "?") test_output = PrintDataFrame(df, False) + "\n" assert result.exit_code == 0 assert result.output == test_output
def test_analyze_plot(cli_runner, tmpdir, data): with tmpdir.as_cwd(): result = cli_runner.invoke( cli.cli, [ "analyze", "--directory={}".format(data["analyze-files-gromacs"], "--plot"), ], ) bundle = dtr.discover(data["analyze-files-gromacs"]) df = DataFrameFromBundle(bundle) test_output = PrintDataFrame(df, False) + "\n" assert result.exit_code == 0 assert result.output == test_output os.path.isfile("runtimes.pdf")
def do_analyze(directory, plot, ncores, save_csv): """Analyze benchmarks.""" # Migrate from MDBenchmark<2 to MDBenchmark=>2 mds_to_dtr.migrate_to_datreant(directory) bundle = dtr.discover(directory) df = DataFrameFromBundle(bundle) if save_csv is not None and not save_csv.endswith(".csv"): save_csv = "{}.csv".format(save_csv) df.to_csv(save_csv) # Reformat NaN values nicely into question marks. # move this to the bundle function! df = df.replace(np.nan, "?") if df.isnull().values.any(): console.warn( "We were not able to gather informations for all systems. " "Systems marked with question marks have either crashed or " "were not started yet." ) PrintDataFrame(df) if plot: console.warn("'--plot' has been deprecated, use '{}'.", "mdbenchmark plot") fig = Figure() FigureCanvas(fig) ax = fig.add_subplot(111) df = pd.read_csv(save_csv) if ncores: console.warn( "Ignoring your value from '{}' and parsing number of cores from log files.", "--number-cores/-ncores", ) ax = plot_over_group(df, plot_cores=ncores, fit=True, ax=ax) lgd = ax.legend(loc="upper center", bbox_to_anchor=(0.5, -0.175)) fig.tight_layout() fig.savefig( "runtimes.pdf", type="pdf", bbox_extra_artists=(lgd,), bbox_inches="tight" )
def test_generate_test_prompt_no(cli_runner, tmpdir): """Test whether promt answer no works.""" with tmpdir.as_cwd(): open("protein.tpr", "a").close() result = cli_runner.invoke( cli, [ "generate", "--module=gromacs/2016", "--host=draco", "--max-nodes=4", "--gpu", "--no-cpu", "--name=protein", ], input="n\n", ) bundle = dtr.discover() assert result.exit_code == 1 assert len(bundle) == 0
def test_generate_test_prompt_yes(cli_runner, tmpdir, generate_output): """Test whether promt answer yes works.""" with tmpdir.as_cwd(): open("protein.tpr", "a").close() result = cli_runner.invoke( cli.cli, [ "generate", "--module=gromacs/2016", "--host=draco", "--max-nodes=4", "--gpu", "--no-cpu", "--name=protein", ], input="y\n", ) output1 = ( "WARNING Cannot locate modules available on this host. Not performing module name validation.\n" "Creating benchmark system for gromacs/2016 with GPUs.\n" ) bundle = dtr.discover() df = DataFrameFromBundle(bundle) df = ConsolidateDataFrame(df) output2 = ( "The above benchmarks will be generated. Continue? [y/N]: y\n" "Finished generating all benchmarks.\n" "You can now submit the jobs with mdbenchmark submit.\n" ) mid = "Benchmark Summary:\n" + PrintDataFrame(df, False) + "\n" output = output1 + mid + output2 # Test that we get a warning, if no module name validation is performed. assert result.exit_code == 0 assert result.output == output
def test_analyze_namd(cli_runner, tmpdir, capsys, data): with tmpdir.as_cwd(): result = cli_runner.invoke( cli, ["analyze", "--directory={}".format(data["analyze-files-namd"])]) version = Version2Categories() bundle = dtr.discover(data["analyze-files-namd"]) df = parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) df = df.iloc[:, :-1] df = df.replace(np.nan, "?") print_dataframe( df, columns=map_columns(version.category_mapping, version.analyze_printing), ) out, _ = capsys.readouterr() out = "Setting up...\n" + out assert result.exit_code == 0 assert result.output == "\n".join(out.split("\n"))
def test_generate_odd_number_of_nodes( cli_runner, engine, module, extensions, generate_output, monkeypatch, tmpdir, version, ): """Make sure we generate the correct folder structure.""" with tmpdir.as_cwd(): for ext in extensions: open("protein.{}".format(ext), "a").close() monkeypatch.setattr( "mdbenchmark.mdengines.get_available_modules", lambda: {"gromacs": ["2016"], "namd": ["11"]}, ) result = cli_runner.invoke( cli.cli, [ "generate", "--module={}".format(module), "--host=draco", "--min-nodes=6", "--max-nodes=8", "--gpu", "--no-cpu", "--name=protein", "--yes", ], ) output1 = "Creating benchmark system for {} with GPUs.\n".format(module) bundle = dtr.discover() df = DataFrameFromBundle(bundle) df = ConsolidateDataFrame(df) test_output = "Benchmark Summary:\n" + PrintDataFrame(df, False) + "\n" output2 = ( "Generating the above benchmarks.\n" "Finished generating all benchmarks.\n" "You can now submit the jobs with mdbenchmark submit.\n" ) if "namd" in module: output = NAMD_WARNING_FORMATTED + output1 + test_output + output2 else: output = output1 + test_output + output2 assert result.exit_code == 0 assert result.output == output assert os.path.exists("draco_{}".format(engine)) host_engine_version_path = "draco_{}/{}_gpu/".format(engine, version) for i in range(6, 9): assert os.path.exists(host_engine_version_path + "{}".format(i)) for ext in extensions: assert os.path.exists( host_engine_version_path + "{}/protein.{}".format(i, ext) ) assert os.path.exists(host_engine_version_path + "{}/bench.job".format(i))
import numpy as np import matplotlib.pyplot as plt import seaborn.apionly as sns import matplotlib.gridspec as gridspec import datreant as dtr import pandas as pd b = dtr.discover() #tags for Treants source = ['Lustre', 'SSD'] size = ['9000', '900'] analysis = ['RDF', 'RMS'] scheduler = ['distr', 'multi'] nodes = ['3nodes', '6nodes'] scheduler_full_name = {'distr': 'distributed', 'multi': 'multiprocessing'} # for Lustre distributed for ana in analysis: fig = plt.figure(figsize=(12, 8), tight_layout=True) gs = gridspec.GridSpec(5, 9) ax0 = fig.add_subplot(gs[0:2, 0:3]) ax0.set_xlabel('Number of cores') ax0.set_ylabel('Wait (s)') sns.despine(offset=10, ax=ax0) ax0.set_title('pmda.{} on 900 frames'.format(ana.lower())) ax0.set_xscale("log") ax0.set_yscale("log")
def do_submit(directory, force_restart, yes): """Submit the benchmarks.""" bundle = dtr.discover(directory) # Exit if no bundles were found in the current directory. if not bundle: console.error("No benchmarks found.") grouped_bundles = bundle.categories.groupby("started") try: bundles_not_yet_started = grouped_bundles[False] except KeyError: bundles_not_yet_started = None if not bundles_not_yet_started and not force_restart: console.error( "All generated benchmarks were already started once. " "You can force a restart with {}.", "--force", ) # Start all benchmark simulations if a restart was requested. Otherwise # only start the ones that were not run yet. bundles_to_start = bundle if not force_restart: bundles_to_start = bundles_not_yet_started benchmark_version = VersionFactory( categories=bundles_to_start.categories).version_class df = parse_bundle( bundles_to_start, columns=benchmark_version.submit_categories, sort_values_by=benchmark_version.analyze_sort, discard_performance=True, ) # Reformat NaN values nicely into question marks. df_to_print = df.replace(np.nan, "?") columns_to_drop = ["ncores", "version"] df_to_print = df.drop(columns=columns_to_drop) # Consolidate the data by grouping on the number of nodes and print to the # user as an overview. consolidated_df = consolidate_dataframe( df_to_print, columns=benchmark_version.consolidate_categories) print_dataframe( consolidated_df, columns=map_columns( map_dict=benchmark_version.category_mapping, columns=benchmark_version.generate_printing[1:], ), ) # Ask the user to confirm whether they want to submit the benchmarks if yes: console.info("The above benchmarks will be submitted.") elif not click.confirm( "The above benchmarks will be submitted. Continue?"): console.error("Exiting. No benchmarks submitted.") batch_cmd = get_batch_command() console.info("Submitting a total of {} benchmarks.", len(bundles_to_start)) for sim in bundles_to_start: # Remove files generated by previous mdbenchmark run if force_restart: engine = detect_md_engine(sim.categories["module"]) cleanup_before_restart(engine=engine, sim=sim) sim.categories["started"] = True os.chdir(sim.abspath) subprocess.call([batch_cmd, "bench.job"]) console.info( "Submitted all benchmarks. Run {} once they are finished to get the results.", "mdbenchmark analyze", )
def submit(directory, force_restart, yes): """Submit benchmarks to queuing system. Benchmarks are searched recursively starting from the directory specified in ``--directory``. If the option is not specified, the working directory will be used. Requests a user prompt. Using ``--yes`` flag skips this step. Checks whether benchmark folders were already generated, exits otherwise. Only runs benchmarks that were not already started. Can be overwritten with ``--force``. """ # Migrate from MDBenchmark<2 to MDBenchmark=>2 mds_to_dtr.migrate_to_datreant(directory) bundle = dtr.discover(directory) # Exit if no bundles were found in the current directory. if not bundle: console.error("No benchmarks found.") df = DataFrameFromBundle(bundle) # Reformat NaN values nicely into question marks. df_to_print = df.replace(np.nan, "?") df_to_print = df.drop(columns=["ns/day", "ncores"]) console.info("{}", "Benchmark Summary:") df_short = ConsolidateDataFrame(df_to_print) PrintDataFrame(df_short) # here I add the user promt to confirm the submission of the simulations if yes: console.info("The above benchmarks will be submitted.") elif not click.confirm("The above benchmarks will be submitted. Continue?"): console.error("Exiting. No benchmarks submitted.") grouped_bundles = bundle.categories.groupby("started") try: bundles_not_yet_started = grouped_bundles[False] except KeyError: bundles_not_yet_started = None if not bundles_not_yet_started and not force_restart: console.error( "All generated benchmarks were already started once. " "You can force a restart with {}.", "--force", ) # Start all benchmark simulations if a restart was requested. Otherwise # only start the ones that were not run yet. bundles_to_start = bundle if not force_restart: bundles_to_start = bundles_not_yet_started batch_cmd = get_batch_command() console.info("Submitting a total of {} benchmarks.", len(bundles_to_start)) for sim in bundles_to_start: # Remove files generated by previous mdbenchmark run if force_restart: engine = detect_md_engine(sim["module"]) cleanup_before_restart(engine=engine, sim=sim) sim.categories["started"] = True os.chdir(sim.abspath) subprocess.call([batch_cmd, "bench.job"]) console.info( "Submitted all benchmarks. Run {} once they are finished to get the results.", "mdbenchmark analyze", )