def test_consolidate_dataframe(capsys, data): bundle = dtr.discover(data["analyze-files-gromacs"]) version = VersionFactory(categories=bundle.categories).version_class df = utils.parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) test_output = utils.consolidate_dataframe( df, columns=version.consolidate_categories ) print_dataframe( test_output[version.generate_printing[1:]], columns=map_columns( map_dict=version.category_mapping, columns=version.generate_printing[1:], ), ) expected_output = ( "Setting up...\n\n" "+----------------+---------+--------------+---------+--------+-----------+-------------+-------------------+\n", "| Module | Nodes | Time (min) | GPUs? | Host | # ranks | # threads | Hyperthreading? |\n", "|----------------+---------+--------------+---------+--------+-----------+-------------+-------------------|\n", "| gromacs/2016.3 | 1-5 | 15 | False | draco | nan | nan | nan |\n", "+----------------+---------+--------------+---------+--------+-----------+-------------+-------------------+\n\n", ) out, _ = capsys.readouterr() assert "\n".join(out.split("\n")) == "".join(expected_output)
def test_parse_bundle(data): bundle = dtr.discover(data["analyze-files-gromacs"]) version = VersionFactory(categories=bundle.categories).version_class test_output = utils.parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) expected_output = pd.read_csv(data["analyze-files-gromacs.csv"], index_col=False) assert_frame_equal(test_output, expected_output)
def plot_over_group(df, plot_cores, fit, performance_column, ax=None): selection = "ncores" if plot_cores else "nodes" benchmark_version = VersionFactory( version="3" if "use_gpu" in df.columns else "2" ).version_class for key, group in df.groupby(benchmark_version.consolidate_categories): # Do not try to plot groups without performance values if group[performance_column].isnull().all(): continue if benchmark_version.version == "3": module, template, gpus, ranks, hyperthreading, multidir = key threads = group.number_of_threads.iloc[0] else: gpus, module, template = key label = "{template} - {module}, {node_type}".format( template=template, module=module, node_type="mixed CPU-GPU" if gpus else "CPU-only", ) # Add ranks, threads and multdir information to label if benchmark_version.version == "3": label += " (ranks: {ranks}, threads: {threads}{ht}, nsims: {nsims})".format( ranks=ranks, threads=threads, ht=" [HT]" if hyperthreading else "", nsims=multidir, ) plot_line( df=group, selection=selection, label=label, fit=fit, performance_column=performance_column, ax=ax, ) selection_label = "cores" if plot_cores else "nodes" ax.set_xlabel("Number of {selection}".format(selection=selection_label)) ax.set_ylabel("Performance (ns/day)") return ax
def do_analyze(directory, save_csv): """Analyze benchmarks.""" bundle = dtr.discover(directory) version = VersionFactory(categories=bundle.categories).version_class df = parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) # Remove the versions column from the DataFrame columns_to_drop = ["version"] df = df.drop(columns=columns_to_drop) if save_csv is not None: if not save_csv.endswith(".csv"): save_csv = "{}.csv".format(save_csv) df.to_csv(save_csv, index=False) console.success("Successfully benchmark data to {}.", save_csv) # Reformat NaN values nicely into question marks. # move this to the bundle function! df = df.replace(np.nan, "?") if df.isnull().values.any(): console.warn( "We were not able to gather informations for all systems. " "Systems marked with question marks have either crashed or " "were not started yet.") # Warn user that we are going to print more than 50 benchmark results to the console if df.shape[0] > 50: if click.confirm( "We are about to print the results of {} benchmarks to the console. Continue?" .format(click.style(str(df.shape[0]), bold=True))): pass else: console.error("Exiting.") # Print the data to the console print_dataframe( df, columns=map_columns(version.category_mapping, version.analyze_printing), )
def do_submit(directory, force_restart, yes): """Submit the benchmarks.""" bundle = dtr.discover(directory) # Exit if no bundles were found in the current directory. if not bundle: console.error("No benchmarks found.") grouped_bundles = bundle.categories.groupby("started") try: bundles_not_yet_started = grouped_bundles[False] except KeyError: bundles_not_yet_started = None if not bundles_not_yet_started and not force_restart: console.error( "All generated benchmarks were already started once. " "You can force a restart with {}.", "--force", ) # Start all benchmark simulations if a restart was requested. Otherwise # only start the ones that were not run yet. bundles_to_start = bundle if not force_restart: bundles_to_start = bundles_not_yet_started benchmark_version = VersionFactory( categories=bundles_to_start.categories).version_class df = parse_bundle( bundles_to_start, columns=benchmark_version.submit_categories, sort_values_by=benchmark_version.analyze_sort, discard_performance=True, ) # Reformat NaN values nicely into question marks. df_to_print = df.replace(np.nan, "?") columns_to_drop = ["ncores", "version"] df_to_print = df.drop(columns=columns_to_drop) # Consolidate the data by grouping on the number of nodes and print to the # user as an overview. consolidated_df = consolidate_dataframe( df_to_print, columns=benchmark_version.consolidate_categories) print_dataframe( consolidated_df, columns=map_columns( map_dict=benchmark_version.category_mapping, columns=benchmark_version.generate_printing[1:], ), ) # Ask the user to confirm whether they want to submit the benchmarks if yes: console.info("The above benchmarks will be submitted.") elif not click.confirm( "The above benchmarks will be submitted. Continue?"): console.error("Exiting. No benchmarks submitted.") batch_cmd = get_batch_command() console.info("Submitting a total of {} benchmarks.", len(bundles_to_start)) for sim in bundles_to_start: # Remove files generated by previous mdbenchmark run if force_restart: engine = detect_md_engine(sim.categories["module"]) cleanup_before_restart(engine=engine, sim=sim) sim.categories["started"] = True os.chdir(sim.abspath) subprocess.call([batch_cmd, "bench.job"]) console.info( "Submitted all benchmarks. Run {} once they are finished to get the results.", "mdbenchmark analyze", )
def test_version_class_zero_does_not_exist(): obj = VersionFactory(version="0") assert obj.version_class is None
def test_guess_version(version): obj = VersionFactory(categories=CATEGORIES[f"VERSION_{version}"]) assert obj.version == version
def test_version(version): obj = VersionFactory(version=version) assert obj.version == version
def test_init_raises_exception(): with pytest.raises(ValueError) as err: VersionFactory() assert "must be set" in str(err.value)