Example #1
0
def test_consolidate_dataframe(capsys, data):
    bundle = dtr.discover(data["analyze-files-gromacs"])
    version = VersionFactory(categories=bundle.categories).version_class
    df = utils.parse_bundle(
        bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort,
    )
    test_output = utils.consolidate_dataframe(
        df, columns=version.consolidate_categories
    )

    print_dataframe(
        test_output[version.generate_printing[1:]],
        columns=map_columns(
            map_dict=version.category_mapping, columns=version.generate_printing[1:],
        ),
    )

    expected_output = (
        "Setting up...\n\n"
        "+----------------+---------+--------------+---------+--------+-----------+-------------+-------------------+\n",
        "| Module         | Nodes   |   Time (min) | GPUs?   | Host   |   # ranks |   # threads |   Hyperthreading? |\n",
        "|----------------+---------+--------------+---------+--------+-----------+-------------+-------------------|\n",
        "| gromacs/2016.3 | 1-5     |           15 | False   | draco  |       nan |         nan |               nan |\n",
        "+----------------+---------+--------------+---------+--------+-----------+-------------+-------------------+\n\n",
    )

    out, _ = capsys.readouterr()
    assert "\n".join(out.split("\n")) == "".join(expected_output)
Example #2
0
def test_parse_bundle(data):
    bundle = dtr.discover(data["analyze-files-gromacs"])
    version = VersionFactory(categories=bundle.categories).version_class
    test_output = utils.parse_bundle(
        bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort,
    )
    expected_output = pd.read_csv(data["analyze-files-gromacs.csv"], index_col=False)
    assert_frame_equal(test_output, expected_output)
Example #3
0
def plot_over_group(df, plot_cores, fit, performance_column, ax=None):
    selection = "ncores" if plot_cores else "nodes"
    benchmark_version = VersionFactory(
        version="3" if "use_gpu" in df.columns else "2"
    ).version_class

    for key, group in df.groupby(benchmark_version.consolidate_categories):
        # Do not try to plot groups without performance values
        if group[performance_column].isnull().all():
            continue

        if benchmark_version.version == "3":
            module, template, gpus, ranks, hyperthreading, multidir = key
            threads = group.number_of_threads.iloc[0]
        else:
            gpus, module, template = key

        label = "{template} - {module}, {node_type}".format(
            template=template,
            module=module,
            node_type="mixed CPU-GPU" if gpus else "CPU-only",
        )

        # Add ranks, threads and multdir information to label
        if benchmark_version.version == "3":
            label += " (ranks: {ranks}, threads: {threads}{ht}, nsims: {nsims})".format(
                ranks=ranks,
                threads=threads,
                ht=" [HT]" if hyperthreading else "",
                nsims=multidir,
            )

        plot_line(
            df=group,
            selection=selection,
            label=label,
            fit=fit,
            performance_column=performance_column,
            ax=ax,
        )

    selection_label = "cores" if plot_cores else "nodes"
    ax.set_xlabel("Number of {selection}".format(selection=selection_label))
    ax.set_ylabel("Performance (ns/day)")

    return ax
Example #4
0
def do_analyze(directory, save_csv):
    """Analyze benchmarks."""
    bundle = dtr.discover(directory)
    version = VersionFactory(categories=bundle.categories).version_class

    df = parse_bundle(
        bundle,
        columns=version.analyze_categories,
        sort_values_by=version.analyze_sort,
    )

    # Remove the versions column from the DataFrame
    columns_to_drop = ["version"]
    df = df.drop(columns=columns_to_drop)

    if save_csv is not None:
        if not save_csv.endswith(".csv"):
            save_csv = "{}.csv".format(save_csv)
        df.to_csv(save_csv, index=False)

        console.success("Successfully benchmark data to {}.", save_csv)

    # Reformat NaN values nicely into question marks.
    # move this to the bundle function!
    df = df.replace(np.nan, "?")
    if df.isnull().values.any():
        console.warn(
            "We were not able to gather informations for all systems. "
            "Systems marked with question marks have either crashed or "
            "were not started yet.")

    # Warn user that we are going to print more than 50 benchmark results to the console
    if df.shape[0] > 50:
        if click.confirm(
                "We are about to print the results of {} benchmarks to the console. Continue?"
                .format(click.style(str(df.shape[0]), bold=True))):
            pass
        else:
            console.error("Exiting.")

    # Print the data to the console
    print_dataframe(
        df,
        columns=map_columns(version.category_mapping,
                            version.analyze_printing),
    )
Example #5
0
def do_submit(directory, force_restart, yes):
    """Submit the benchmarks."""
    bundle = dtr.discover(directory)

    # Exit if no bundles were found in the current directory.
    if not bundle:
        console.error("No benchmarks found.")

    grouped_bundles = bundle.categories.groupby("started")
    try:
        bundles_not_yet_started = grouped_bundles[False]
    except KeyError:
        bundles_not_yet_started = None
    if not bundles_not_yet_started and not force_restart:
        console.error(
            "All generated benchmarks were already started once. "
            "You can force a restart with {}.",
            "--force",
        )

    # Start all benchmark simulations if a restart was requested. Otherwise
    # only start the ones that were not run yet.
    bundles_to_start = bundle
    if not force_restart:
        bundles_to_start = bundles_not_yet_started

    benchmark_version = VersionFactory(
        categories=bundles_to_start.categories).version_class

    df = parse_bundle(
        bundles_to_start,
        columns=benchmark_version.submit_categories,
        sort_values_by=benchmark_version.analyze_sort,
        discard_performance=True,
    )

    # Reformat NaN values nicely into question marks.
    df_to_print = df.replace(np.nan, "?")

    columns_to_drop = ["ncores", "version"]
    df_to_print = df.drop(columns=columns_to_drop)

    # Consolidate the data by grouping on the number of nodes and print to the
    # user as an overview.
    consolidated_df = consolidate_dataframe(
        df_to_print, columns=benchmark_version.consolidate_categories)
    print_dataframe(
        consolidated_df,
        columns=map_columns(
            map_dict=benchmark_version.category_mapping,
            columns=benchmark_version.generate_printing[1:],
        ),
    )

    # Ask the user to confirm whether they want to submit the benchmarks
    if yes:
        console.info("The above benchmarks will be submitted.")
    elif not click.confirm(
            "The above benchmarks will be submitted. Continue?"):
        console.error("Exiting. No benchmarks submitted.")

    batch_cmd = get_batch_command()
    console.info("Submitting a total of {} benchmarks.", len(bundles_to_start))
    for sim in bundles_to_start:
        # Remove files generated by previous mdbenchmark run
        if force_restart:
            engine = detect_md_engine(sim.categories["module"])
            cleanup_before_restart(engine=engine, sim=sim)
        sim.categories["started"] = True
        os.chdir(sim.abspath)
        subprocess.call([batch_cmd, "bench.job"])
    console.info(
        "Submitted all benchmarks. Run {} once they are finished to get the results.",
        "mdbenchmark analyze",
    )
Example #6
0
def test_version_class_zero_does_not_exist():
    obj = VersionFactory(version="0")
    assert obj.version_class is None
Example #7
0
def test_guess_version(version):
    obj = VersionFactory(categories=CATEGORIES[f"VERSION_{version}"])
    assert obj.version == version
Example #8
0
def test_version(version):
    obj = VersionFactory(version=version)
    assert obj.version == version
Example #9
0
def test_init_raises_exception():
    with pytest.raises(ValueError) as err:
        VersionFactory()

    assert "must be set" in str(err.value)