Python print_dataframe Exemples, mdbenchmark.utils.print_dataframe Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_utils.py Projet : yongjiguan/MDBenchmark

def test_consolidate_dataframe(capsys, data):
    bundle = dtr.discover(data["analyze-files-gromacs"])
    version = VersionFactory(categories=bundle.categories).version_class
    df = utils.parse_bundle(
        bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort,
    )
    test_output = utils.consolidate_dataframe(
        df, columns=version.consolidate_categories
    )

    print_dataframe(
        test_output[version.generate_printing[1:]],
        columns=map_columns(
            map_dict=version.category_mapping, columns=version.generate_printing[1:],
        ),
    )

    expected_output = (
        "Setting up...\n\n"
        "+----------------+---------+--------------+---------+--------+-----------+-------------+-------------------+\n",
        "| Module         | Nodes   |   Time (min) | GPUs?   | Host   |   # ranks |   # threads |   Hyperthreading? |\n",
        "|----------------+---------+--------------+---------+--------+-----------+-------------+-------------------|\n",
        "| gromacs/2016.3 | 1-5     |           15 | False   | draco  |       nan |         nan |               nan |\n",
        "+----------------+---------+--------------+---------+--------+-----------+-------------+-------------------+\n\n",
    )

    out, _ = capsys.readouterr()
    assert "\n".join(out.split("\n")) == "".join(expected_output)

Exemple #2

0

Afficher le fichier

def test_analyze_with_errors(cli_runner, tmpdir, capsys, data):
    """Test that we warn the user of errors in the output files. Also test that we
show a question mark instead of a float in the corresponding cell.
    """
    with tmpdir.as_cwd():

        result = cli_runner.invoke(cli, [
            "analyze", "--directory={}".format(data["analyze-files-w-errors"])
        ])
        version = Version2Categories()
        bundle = dtr.discover(data["analyze-files-w-errors"])
        df = parse_bundle(
            bundle,
            columns=version.analyze_categories,
            sort_values_by=version.analyze_sort,
        )
        df = df.iloc[:, :-1]
        df = df.replace(np.nan, "?")
        print_dataframe(
            df,
            columns=map_columns(version.category_mapping,
                                version.analyze_printing),
        )

        out, _ = capsys.readouterr()
        out = "Setting up...\n" + out
        assert result.exit_code == 0
        assert result.output == "\n".join(out.split("\n"))

Exemple #3

0

Afficher le fichier

def test_submit_test_prompt_no(cli_runner, tmpdir, data):
    """Test whether prompt answer no works."""
    benchmark_version = Version2Categories()
    with tmpdir.as_cwd():
        result = cli_runner.invoke(
            cli,
            [
                "submit",
                "--directory={}".format(data["analyze-files-gromacs-one-unstarted"]),
            ],
            input="n\n",
        )

        df = pd.read_csv(data["gromacs/test_prompt.csv"], index_col=0)
        print_dataframe(
            df,
            columns=map_columns(
                map_dict=benchmark_version.category_mapping,
                columns=benchmark_version.generate_printing[1:],
            ),
        )

        assert result.exit_code == 1
        assert (
            result.output.split("\n")[-2] == "ERROR Exiting. No benchmarks submitted."
        )

Exemple #4

0

Afficher le fichier

Fichier : test_utils.py Projet : yongjiguan/MDBenchmark

def test_print_dataframe(capsys, data):
    df = pd.read_csv(data["analyze-files-gromacs.csv"])
    version = Version2Categories()
    utils.print_dataframe(df, version.analyze_printing + ["version"])

    expected_output = tabulate.tabulate(
        df, headers="keys", tablefmt="psql", showindex=False
    )
    expected_output = "\n" + expected_output + "\n\n"
    out, _ = capsys.readouterr()

    assert "\n".join(out.split("\n")) == expected_output

Exemple #5

0

Afficher le fichier

def do_analyze(directory, save_csv):
    """Analyze benchmarks."""
    bundle = dtr.discover(directory)
    version = VersionFactory(categories=bundle.categories).version_class

    df = parse_bundle(
        bundle,
        columns=version.analyze_categories,
        sort_values_by=version.analyze_sort,
    )

    # Remove the versions column from the DataFrame
    columns_to_drop = ["version"]
    df = df.drop(columns=columns_to_drop)

    if save_csv is not None:
        if not save_csv.endswith(".csv"):
            save_csv = "{}.csv".format(save_csv)
        df.to_csv(save_csv, index=False)

        console.success("Successfully benchmark data to {}.", save_csv)

    # Reformat NaN values nicely into question marks.
    # move this to the bundle function!
    df = df.replace(np.nan, "?")
    if df.isnull().values.any():
        console.warn(
            "We were not able to gather informations for all systems. "
            "Systems marked with question marks have either crashed or "
            "were not started yet.")

    # Warn user that we are going to print more than 50 benchmark results to the console
    if df.shape[0] > 50:
        if click.confirm(
                "We are about to print the results of {} benchmarks to the console. Continue?"
                .format(click.style(str(df.shape[0]), bold=True))):
            pass
        else:
            console.error("Exiting.")

    # Print the data to the console
    print_dataframe(
        df,
        columns=map_columns(version.category_mapping,
                            version.analyze_printing),
    )

Exemple #6

0

Afficher le fichier

def test_submit_resubmit(cli_runner, monkeypatch, tmpdir, data):
    """Test that we cannot submit a benchmark system that was already submitted,
    unless we force it.
    """
    with tmpdir.as_cwd():
        # Test that we get an error if we try to point the submit function to
        # an non-existent path.
        result = cli_runner.invoke(cli, ["submit", "--directory=look_here/"], "--yes")
        assert result.exit_code == 1
        assert result.output == "ERROR No benchmarks found.\n"

        # Test that we get an error if we try to start benchmarks that were
        # already started once.
        result = cli_runner.invoke(
            cli,
            ["submit", "--directory={}".format(data["analyze-files-gromacs"]), "--yes"],
        )
        df = pd.read_csv(data["analyze-files-gromacs-consolidated.csv"], index_col=0)
        s = print_dataframe(df, False)

        output = "ERROR All generated benchmarks were already started once. You can force a restart with --force.\n"

        assert result.exit_code == 1
        assert result.output == output

        # Test that we can force restart already run benchmarks.
        # Monkeypatch a few functions
        monkeypatch.setattr("subprocess.call", lambda x: True)
        monkeypatch.setattr(
            "mdbenchmark.cli.submit.get_batch_command", lambda: "sbatch"
        )
        monkeypatch.setattr(
            "mdbenchmark.cli.submit.detect_md_engine", lambda x: gromacs
        )
        monkeypatch.setattr(
            "mdbenchmark.submit.cleanup_before_restart", lambda engine, sim: True
        )

        output = (
            "Benchmark Summary:\n"
            + s
            + "\nThe above benchmarks will be submitted.\n"
            + "Submitting a total of 5 benchmarks.\n"
            + "Submitted all benchmarks. Run mdbenchmark analyze once they are finished to get the results.\n"
        )

        result = cli_runner.invoke(
            cli,
            [
                "submit",
                "--directory={}".format(data["analyze-files-gromacs"]),
                "--force",
                "--yes",
            ],
        )
        assert result.exit_code == 0
        assert result.output == output

Exemple #7

0

Afficher le fichier

def test_analyze_gromacs(cli_runner, tmpdir, capsys, data):
    """Test that the output is OK when all outputs are fine."""
    with tmpdir.as_cwd():

        result = cli_runner.invoke(cli, [
            "analyze", "--directory={}".format(data["analyze-files-gromacs"])
        ])

        df = pd.read_csv(data["analyze-files-gromacs.csv"])
        df = df.iloc[:, :-1]
        df = df.replace(np.nan, "?")
        version = Version2Categories()
        print_dataframe(
            df,
            columns=map_columns(version.category_mapping,
                                version.analyze_printing),
        )

        out, _ = capsys.readouterr()
        out = "Setting up...\n" + out
        assert result.exit_code == 0
        assert result.output == "\n".join(out.split("\n"))

Exemple #8

0

Afficher le fichier

def test_analyze_namd(cli_runner, tmpdir, capsys, data):
    with tmpdir.as_cwd():
        result = cli_runner.invoke(
            cli,
            ["analyze", "--directory={}".format(data["analyze-files-namd"])])
        version = Version2Categories()
        bundle = dtr.discover(data["analyze-files-namd"])
        df = parse_bundle(
            bundle,
            columns=version.analyze_categories,
            sort_values_by=version.analyze_sort,
        )
        df = df.iloc[:, :-1]
        df = df.replace(np.nan, "?")
        print_dataframe(
            df,
            columns=map_columns(version.category_mapping,
                                version.analyze_printing),
        )

        out, _ = capsys.readouterr()
        out = "Setting up...\n" + out
        assert result.exit_code == 0
        assert result.output == "\n".join(out.split("\n"))

Exemple #9

0

Afficher le fichier

def test_submit_test_prompt_yes(cli_runner, tmpdir, data, monkeypatch):
    """Test whether promt answer no works."""
    with tmpdir.as_cwd():
        # Test that we can force restart already run benchmarks.
        # Monkeypatch a few functions
        monkeypatch.setattr("subprocess.call", lambda x: True)
        monkeypatch.setattr(
            "mdbenchmark.cli.submit.get_batch_command", lambda: "sbatch"
        )
        monkeypatch.setattr(
            "mdbenchmark.cli.submit.detect_md_engine", lambda x: gromacs
        )
        monkeypatch.setattr(
            "mdbenchmark.cli.submit.cleanup_before_restart", lambda engine, sim: True
        )

        result = cli_runner.invoke(
            cli,
            [
                "submit",
                "--directory={}".format(data["analyze-files-gromacs-one-unstarted"]),
            ],
            input="y\n",
        )

        df = pd.read_csv(data["gromacs/test_prompt.csv"], index_col=0)
        s = print_dataframe(df, False)

        output = (
            "Benchmark Summary:\n"
            + s
            + "\nThe above benchmarks will be submitted. Continue? [y/N]: y\n"
            + "Submitting a total of 1 benchmarks.\n"
            + "Submitted all benchmarks. Run mdbenchmark analyze once they are finished to get the results.\n"
        )

        assert result.exit_code == 0
        assert result.output == output

        # Lazy way of resetting the value of `started` to `false`.
        # TODO: We need to clean up all of our unit tests...
        treant = dtr.Bundle(data["analyze-files-gromacs-one-unstarted"] + "/1")
        treant.categories["started"] = False

Exemple #10

0

Afficher le fichier

Fichier : submit.py Projet : yongjiguan/MDBenchmark

def do_submit(directory, force_restart, yes):
    """Submit the benchmarks."""
    bundle = dtr.discover(directory)

    # Exit if no bundles were found in the current directory.
    if not bundle:
        console.error("No benchmarks found.")

    grouped_bundles = bundle.categories.groupby("started")
    try:
        bundles_not_yet_started = grouped_bundles[False]
    except KeyError:
        bundles_not_yet_started = None
    if not bundles_not_yet_started and not force_restart:
        console.error(
            "All generated benchmarks were already started once. "
            "You can force a restart with {}.",
            "--force",
        )

    # Start all benchmark simulations if a restart was requested. Otherwise
    # only start the ones that were not run yet.
    bundles_to_start = bundle
    if not force_restart:
        bundles_to_start = bundles_not_yet_started

    benchmark_version = VersionFactory(
        categories=bundles_to_start.categories).version_class

    df = parse_bundle(
        bundles_to_start,
        columns=benchmark_version.submit_categories,
        sort_values_by=benchmark_version.analyze_sort,
        discard_performance=True,
    )

    # Reformat NaN values nicely into question marks.
    df_to_print = df.replace(np.nan, "?")

    columns_to_drop = ["ncores", "version"]
    df_to_print = df.drop(columns=columns_to_drop)

    # Consolidate the data by grouping on the number of nodes and print to the
    # user as an overview.
    consolidated_df = consolidate_dataframe(
        df_to_print, columns=benchmark_version.consolidate_categories)
    print_dataframe(
        consolidated_df,
        columns=map_columns(
            map_dict=benchmark_version.category_mapping,
            columns=benchmark_version.generate_printing[1:],
        ),
    )

    # Ask the user to confirm whether they want to submit the benchmarks
    if yes:
        console.info("The above benchmarks will be submitted.")
    elif not click.confirm(
            "The above benchmarks will be submitted. Continue?"):
        console.error("Exiting. No benchmarks submitted.")

    batch_cmd = get_batch_command()
    console.info("Submitting a total of {} benchmarks.", len(bundles_to_start))
    for sim in bundles_to_start:
        # Remove files generated by previous mdbenchmark run
        if force_restart:
            engine = detect_md_engine(sim.categories["module"])
            cleanup_before_restart(engine=engine, sim=sim)
        sim.categories["started"] = True
        os.chdir(sim.abspath)
        subprocess.call([batch_cmd, "bench.job"])
    console.info(
        "Submitted all benchmarks. Run {} once they are finished to get the results.",
        "mdbenchmark analyze",
    )

Exemple #11

0

Afficher le fichier

Fichier : generate.py Projet : yongjiguan/MDBenchmark

def do_generate(
    name,
    cpu,
    gpu,
    module,
    host,
    min_nodes,
    max_nodes,
    time,
    skip_validation,
    job_name,
    yes,
    physical_cores,
    logical_cores,
    number_of_ranks,
    enable_hyperthreading,
    multidir,
):
    """Generate a bunch of benchmarks."""

    # Instantiate the version we are going to use
    benchmark_version = Version3Categories()

    # Validate the CPU and GPU flags
    validate_cpu_gpu_flags(cpu, gpu)

    # Validate the number of nodes
    validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes)

    if logical_cores < physical_cores:
        console.error(
            "The number of logical cores cannot be smaller than the number of physical cores."
        )

    if physical_cores and not logical_cores:
        console.warn("Assuming logical_cores = 2 * physical_cores")
        logical_cores = 2 * physical_cores

    if physical_cores and logical_cores:
        processor = Processor(physical_cores=physical_cores,
                              logical_cores=logical_cores)
    else:
        processor = Processor()

    # Hyperthreading check
    if enable_hyperthreading and not processor.supports_hyperthreading:
        console.error(
            "The processor of this machine does not support hyperthreading.")

    if not number_of_ranks:
        number_of_ranks = (processor.physical_cores, )

    # Validate number of simulations
    validate_number_of_simulations(multidir, min_nodes, max_nodes,
                                   number_of_ranks)

    # Grab the template name for the host. This should always work because
    # click does the validation for us
    template = utils.retrieve_host_template(host)

    # Warn the user that NAMD support is still experimental.
    if any(["namd" in m for m in module]):
        console.warn(NAMD_WARNING, "--gpu")

    # Stop if we cannot find any modules. If the user specified multiple
    # modules, we will continue with only the valid ones.
    modules = mdengines.normalize_modules(module, skip_validation)
    if not modules:
        console.error("No requested modules available!")

    # Check if all needed files exist. Throw an error if they do not.
    validate_required_files(name=name, modules=modules)

    # Validate that we can use the number of ranks and threads.
    # We can continue, if no ValueError is thrown
    for ranks in number_of_ranks:
        try:
            processor.get_ranks_and_threads(
                ranks, with_hyperthreading=enable_hyperthreading)
        except ValueError as e:
            console.error(e)

    # Create all benchmark combinations and put them into a DataFrame
    data = construct_generate_data(
        name,
        job_name,
        modules,
        host,
        template,
        cpu,
        gpu,
        time,
        min_nodes,
        max_nodes,
        processor,
        number_of_ranks,
        enable_hyperthreading,
        multidir,
    )
    df = pd.DataFrame(data, columns=benchmark_version.generate_categories)

    # Consolidate the data by grouping on the number of nodes and print to the
    # user as an overview.
    consolidated_df = consolidate_dataframe(
        df, columns=benchmark_version.consolidate_categories)
    print_dataframe(
        consolidated_df[benchmark_version.generate_printing],
        columns=map_columns(
            map_dict=benchmark_version.category_mapping,
            columns=benchmark_version.generate_printing,
        ),
    )

    # Save the number of benchmarks for later printing
    number_of_benchmarks = df.shape[0]
    # Ask the user for confirmation to generate files.
    # If the user defined `--yes`, we will skip the confirmation immediately.
    if yes:
        console.info(
            "We will generate {} " +
            "{benchmark}.".format(benchmark="benchmark" if number_of_benchmarks
                                  == 1 else "benchmarks"),
            number_of_benchmarks,
        )
    elif not click.confirm("We will generate {} benchmarks. Continue?".format(
            number_of_benchmarks)):
        console.error("Exiting. No benchmarks were generated.")

    # Generate the benchmarks
    with click.progressbar(
            df.iterrows(),
            length=number_of_benchmarks,
            show_pos=True,
            label="Generating benchmarks",
    ) as bar:
        for _, row in bar:
            relative_path, file_basename = os.path.split(row["name"])
            mappings = benchmark_version.generate_mapping
            kwargs = {"name": file_basename, "relative_path": relative_path}
            for key, value in mappings.items():
                kwargs[value] = row[key]

            write_benchmark(**kwargs)

    # Finish up by telling the user how to submit the benchmarks
    console.info(
        "Finished! You can submit the jobs with {}.",
        "mdbenchmark submit",
    )