Python PrintDataFrame Examples, mdbenchmark.utils.PrintDataFrame Python Examples

Example #1

0

Show file

def test_generate_namd_experimental_warning(cli_runner, monkeypatch, tmpdir):
    """Test that we print the NAMD experimental warning."""
    with tmpdir.as_cwd():
        for f in ["md.namd", "md.psf", "md.pdb"]:
            open(f, "a").close()

        # monkeypatch the output of the available modules
        monkeypatch.setattr(
            "mdbenchmark.mdengines.get_available_modules", lambda: {"namd": ["123"]}
        )

        result = cli_runner.invoke(
            cli, ["generate", "--module=namd/123", "--host=draco", "--name=md", "--yes"]
        )
        output1 = (
            "WARNING NAMD support is experimental. "
            "All input files must be in the current directory. "
            "Parameter paths must be absolute. Only crude file checks are performed! "
            "If you use the --gpu option make sure you use the GPU compatible NAMD module!\n"
            "Creating benchmark system for namd/123.\n"
        )
        bundle = dtr.discover()
        df = DataFrameFromBundle(bundle)
        df = ConsolidateDataFrame(df)
        test_output = "Benchmark Summary:\n" + PrintDataFrame(df, False) + "\n"

        output2 = (
            "Generating the above benchmarks.\n"
            "Finished generating all benchmarks.\nYou can "
            "now submit the jobs with mdbenchmark submit.\n"
        )
        output = output1 + test_output + output2

        assert result.exit_code == 0
        assert result.output == output

Example #2

0

Show file

File: test_generate.py Project: nileshjchoudhary/MDBenchmark

    def _output(short=False):
        title = "Benchmark Summary:\n"
        bundle = dtr.discover()
        df = DataFrameFromBundle(bundle)
        if short:
            df = ConsolidateDataFrame(df)

        return title + PrintDataFrame(df, False) + "\n"

Example #3

0

Show file

File: submit.py Project: niepengfeiisgood/MDBenchmark

def do_submit(directory, force_restart, yes):
    """Submit the benchmarks."""
    # Migrate from MDBenchmark<2 to MDBenchmark=>2
    mds_to_dtr.migrate_to_datreant(directory)

    bundle = dtr.discover(directory)

    # Exit if no bundles were found in the current directory.
    if not bundle:
        console.error("No benchmarks found.")

    grouped_bundles = bundle.categories.groupby("started")
    try:
        bundles_not_yet_started = grouped_bundles[False]
    except KeyError:
        bundles_not_yet_started = None
    if not bundles_not_yet_started and not force_restart:
        console.error(
            "All generated benchmarks were already started once. "
            "You can force a restart with {}.",
            "--force",
        )

    # Start all benchmark simulations if a restart was requested. Otherwise
    # only start the ones that were not run yet.
    bundles_to_start = bundle
    if not force_restart:
        bundles_to_start = bundles_not_yet_started

    df = DataFrameFromBundle(bundles_to_start)

    # Reformat NaN values nicely into question marks.
    df_to_print = df.replace(np.nan, "?")
    df_to_print = df.drop(columns=["ns/day", "ncores"])
    console.info("{}", "Benchmark Summary:")
    df_short = ConsolidateDataFrame(df_to_print)
    PrintDataFrame(df_short)

    # Ask the user to confirm whether they want to submit the benchmarks
    if yes:
        console.info("The above benchmarks will be submitted.")
    elif not click.confirm("The above benchmarks will be submitted. Continue?"):
        console.error("Exiting. No benchmarks submitted.")

    batch_cmd = get_batch_command()
    console.info("Submitting a total of {} benchmarks.", len(bundles_to_start))
    for sim in bundles_to_start:
        # Remove files generated by previous mdbenchmark run
        if force_restart:
            engine = detect_md_engine(sim.categories["module"])
            cleanup_before_restart(engine=engine, sim=sim)
        sim.categories["started"] = True
        os.chdir(sim.abspath)
        subprocess.call([batch_cmd, "bench.job"])
    console.info(
        "Submitted all benchmarks. Run {} once they are finished to get the results.",
        "mdbenchmark analyze",
    )

Example #4

0

Show file

File: test_submit.py Project: niepengfeiisgood/MDBenchmark

def test_submit_resubmit(cli_runner, monkeypatch, tmpdir, data):
    """Test that we cannot submit a benchmark system that was already submitted,
       unless we force it.
    """
    with tmpdir.as_cwd():
        # Test that we get an error if we try to point the submit function to
        # an non-existent path.
        result = cli_runner.invoke(cli, ["submit", "--directory=look_here/"],
                                   "--yes")
        assert result.exit_code == 1
        assert result.output == "ERROR No benchmarks found.\n"

        # Test that we get an error if we try to start benchmarks that were
        # already started once.
        result = cli_runner.invoke(
            cli,
            [
                "submit", "--directory={}".format(
                    data["analyze-files-gromacs"]), "--yes"
            ],
        )
        df = pd.read_csv(data["analyze-files-gromacs-consolidated.csv"],
                         index_col=0)
        s = PrintDataFrame(df, False)

        output = "ERROR All generated benchmarks were already started once. You can force a restart with --force.\n"

        assert result.exit_code == 1
        assert result.output == output

        # Test that we can force restart already run benchmarks.
        # Monkeypatch a few functions
        monkeypatch.setattr("subprocess.call", lambda x: True)
        monkeypatch.setattr("mdbenchmark.cli.submit.get_batch_command",
                            lambda: "sbatch")
        monkeypatch.setattr("mdbenchmark.cli.submit.detect_md_engine",
                            lambda x: gromacs)
        monkeypatch.setattr("mdbenchmark.submit.cleanup_before_restart",
                            lambda engine, sim: True)

        output = (
            "Benchmark Summary:\n" + s +
            "\nThe above benchmarks will be submitted.\n" +
            "Submitting a total of 5 benchmarks.\n" +
            "Submitted all benchmarks. Run mdbenchmark analyze once they are finished to get the results.\n"
        )

        result = cli_runner.invoke(
            cli,
            [
                "submit",
                "--directory={}".format(data["analyze-files-gromacs"]),
                "--force",
                "--yes",
            ],
        )
        assert result.exit_code == 0
        assert result.output == output

Example #5

0

Show file

File: test_analyze.py Project: niepengfeiisgood/MDBenchmark

def test_analyze_namd(cli_runner, tmpdir, data):
    with tmpdir.as_cwd():
        result = cli_runner.invoke(
            cli,
            ["analyze", "--directory={}".format(data["analyze-files-namd"])])

        bundle = dtr.discover(data["analyze-files-namd"])
        df = DataFrameFromBundle(bundle)
        test_output = PrintDataFrame(df, False) + "\n"

        assert result.exit_code == 0
        assert result.output == test_output

Example #6

0

Show file

File: test_analyze.py Project: niepengfeiisgood/MDBenchmark

def test_analyze_gromacs(cli_runner, tmpdir, data):
    """Test that the output is OK when all outputs are fine."""
    with tmpdir.as_cwd():

        result = cli_runner.invoke(cli, [
            "analyze", "--directory={}".format(data["analyze-files-gromacs"])
        ])

        df = pd.read_csv(data["analyze-files-gromacs.csv"])
        test_output = PrintDataFrame(df, False) + "\n"
        assert result.exit_code == 0
        assert result.output == test_output

Example #7

0

Show file

File: test_analyze.py Project: niepengfeiisgood/MDBenchmark

def test_analyze_with_errors(cli_runner, tmpdir, data):
    """Test that we warn the user of errors in the output files. Also test that we
show a question mark instead of a float in the corresponding cell.
    """
    with tmpdir.as_cwd():

        result = cli_runner.invoke(cli, [
            "analyze", "--directory={}".format(data["analyze-files-w-errors"])
        ])

        bundle = dtr.discover(data["analyze-files-w-errors"])
        df = DataFrameFromBundle(bundle)
        df = df.replace(np.nan, "?")
        test_output = PrintDataFrame(df, False) + "\n"

        assert result.exit_code == 0
        assert result.output == test_output

Example #8

0

Show file

def test_analyze_plot(cli_runner, tmpdir, data):
    with tmpdir.as_cwd():

        result = cli_runner.invoke(
            cli.cli,
            [
                "analyze",
                "--directory={}".format(data["analyze-files-gromacs"], "--plot"),
            ],
        )

        bundle = dtr.discover(data["analyze-files-gromacs"])
        df = DataFrameFromBundle(bundle)
        test_output = PrintDataFrame(df, False) + "\n"

        assert result.exit_code == 0
        assert result.output == test_output
        os.path.isfile("runtimes.pdf")

Example #9

0

Show file

def do_analyze(directory, plot, ncores, save_csv):
    """Analyze benchmarks."""
    # Migrate from MDBenchmark<2 to MDBenchmark=>2
    mds_to_dtr.migrate_to_datreant(directory)

    bundle = dtr.discover(directory)

    df = DataFrameFromBundle(bundle)

    if save_csv is not None and not save_csv.endswith(".csv"):
        save_csv = "{}.csv".format(save_csv)
    df.to_csv(save_csv)

    # Reformat NaN values nicely into question marks.
    # move this to the bundle function!
    df = df.replace(np.nan, "?")
    if df.isnull().values.any():
        console.warn(
            "We were not able to gather informations for all systems. "
            "Systems marked with question marks have either crashed or "
            "were not started yet."
        )
    PrintDataFrame(df)

    if plot:
        console.warn("'--plot' has been deprecated, use '{}'.", "mdbenchmark plot")

        fig = Figure()
        FigureCanvas(fig)
        ax = fig.add_subplot(111)

        df = pd.read_csv(save_csv)
        if ncores:
            console.warn(
                "Ignoring your value from '{}' and parsing number of cores from log files.",
                "--number-cores/-ncores",
            )
        ax = plot_over_group(df, plot_cores=ncores, fit=True, ax=ax)
        lgd = ax.legend(loc="upper center", bbox_to_anchor=(0.5, -0.175))

        fig.tight_layout()
        fig.savefig(
            "runtimes.pdf", type="pdf", bbox_extra_artists=(lgd,), bbox_inches="tight"
        )

Example #10

0

Show file

def test_submit_test_prompt_no(cli_runner, tmpdir, data):
    """Test whether prompt answer no works."""
    with tmpdir.as_cwd():

        result = cli_runner.invoke(
            cli.cli,
            ["submit", "--directory={}".format(data["analyze-files-gromacs"])],
            input="n\n",
        )

        df = pd.read_csv(data["analyze-files-gromacs-consolidated.csv"],
                         index_col=0)
        s = PrintDataFrame(df, False)

        output = (
            "Benchmark Summary:\n" + s +
            "\nThe above benchmarks will be submitted. Continue? [y/N]: n\n" +
            "ERROR Exiting. No benchmarks submitted.\n")

        assert result.exit_code == 1
        assert result.output == output

Example #11

0

Show file

File: test_submit.py Project: niepengfeiisgood/MDBenchmark

def test_submit_test_prompt_yes(cli_runner, tmpdir, data, monkeypatch):
    """Test whether promt answer no works."""
    with tmpdir.as_cwd():
        # Test that we can force restart already run benchmarks.
        # Monkeypatch a few functions
        monkeypatch.setattr("subprocess.call", lambda x: True)
        monkeypatch.setattr("mdbenchmark.cli.submit.get_batch_command",
                            lambda: "sbatch")
        monkeypatch.setattr("mdbenchmark.cli.submit.detect_md_engine",
                            lambda x: gromacs)
        monkeypatch.setattr("mdbenchmark.cli.submit.cleanup_before_restart",
                            lambda engine, sim: True)

        result = cli_runner.invoke(
            cli,
            [
                "submit",
                "--directory={}".format(
                    data["analyze-files-gromacs-one-unstarted"]),
            ],
            input="y\n",
        )

        df = pd.read_csv(data["analyze-files-gromacs-prompt.csv"], index_col=0)
        s = PrintDataFrame(df, False)

        output = (
            "Benchmark Summary:\n" + s +
            "\nThe above benchmarks will be submitted. Continue? [y/N]: y\n" +
            "Submitting a total of 1 benchmarks.\n" +
            "Submitted all benchmarks. Run mdbenchmark analyze once they are finished to get the results.\n"
        )

        assert result.exit_code == 0
        assert result.output == output

        # Lazy way of resetting the value of `started` to `false`.
        # TODO: We need to clean up all of our unit tests...
        treant = dtr.Bundle(data["analyze-files-gromacs-one-unstarted"] + "/1")
        treant.categories["started"] = False

Example #12

0

Show file

def test_analyze_many_rows(cli_runner, tmpdir, datafiles):
    """Test that pandas does not limit the number of printed rows."""
    with tmpdir.as_cwd():
        open("protein.tpr", "a").close()

        result = cli_runner.invoke(
            cli.cli,
            [
                "generate",
                "--module=gromacs/2016.3",
                "--host=draco",
                "--max-nodes=64",
                "--name=protein",
                "--yes",
            ],
        )
        result = cli_runner.invoke(cli.cli, ["analyze", "--directory=draco_gromacs"])

        df = pd.read_csv(datafiles["analyze-many-rows.csv"], index_col=0)
        test_output = PrintDataFrame(df, False) + "\n"

        assert result.exit_code == 0
        assert result.output == test_output

Example #13

0

Show file

File: test_generate.py Project: nileshjchoudhary/MDBenchmark

def test_generate_test_prompt_yes(cli_runner, tmpdir, generate_output):
    """Test whether promt answer yes works."""
    with tmpdir.as_cwd():
        open("protein.tpr", "a").close()

        result = cli_runner.invoke(
            cli.cli,
            [
                "generate",
                "--module=gromacs/2016",
                "--host=draco",
                "--max-nodes=4",
                "--gpu",
                "--no-cpu",
                "--name=protein",
            ],
            input="y\n",
        )
        output1 = (
            "WARNING Cannot locate modules available on this host. Not performing module name validation.\n"
            "Creating benchmark system for gromacs/2016 with GPUs.\n"
        )
        bundle = dtr.discover()
        df = DataFrameFromBundle(bundle)
        df = ConsolidateDataFrame(df)
        output2 = (
            "The above benchmarks will be generated. Continue? [y/N]: y\n"
            "Finished generating all benchmarks.\n"
            "You can now submit the jobs with mdbenchmark submit.\n"
        )
        mid = "Benchmark Summary:\n" + PrintDataFrame(df, False) + "\n"
        output = output1 + mid + output2
        # Test that we get a warning, if no module name validation is performed.

        assert result.exit_code == 0
        assert result.output == output

Example #14

0

Show file

File: test_generate.py Project: nileshjchoudhary/MDBenchmark

def test_generate_odd_number_of_nodes(
    cli_runner,
    engine,
    module,
    extensions,
    generate_output,
    monkeypatch,
    tmpdir,
    version,
):
    """Make sure we generate the correct folder structure."""
    with tmpdir.as_cwd():
        for ext in extensions:
            open("protein.{}".format(ext), "a").close()

        monkeypatch.setattr(
            "mdbenchmark.mdengines.get_available_modules",
            lambda: {"gromacs": ["2016"], "namd": ["11"]},
        )

        result = cli_runner.invoke(
            cli.cli,
            [
                "generate",
                "--module={}".format(module),
                "--host=draco",
                "--min-nodes=6",
                "--max-nodes=8",
                "--gpu",
                "--no-cpu",
                "--name=protein",
                "--yes",
            ],
        )

        output1 = "Creating benchmark system for {} with GPUs.\n".format(module)

        bundle = dtr.discover()
        df = DataFrameFromBundle(bundle)
        df = ConsolidateDataFrame(df)
        test_output = "Benchmark Summary:\n" + PrintDataFrame(df, False) + "\n"

        output2 = (
            "Generating the above benchmarks.\n"
            "Finished generating all benchmarks.\n"
            "You can now submit the jobs with mdbenchmark submit.\n"
        )

        if "namd" in module:
            output = NAMD_WARNING_FORMATTED + output1 + test_output + output2
        else:
            output = output1 + test_output + output2

        assert result.exit_code == 0
        assert result.output == output
        assert os.path.exists("draco_{}".format(engine))
        host_engine_version_path = "draco_{}/{}_gpu/".format(engine, version)
        for i in range(6, 9):
            assert os.path.exists(host_engine_version_path + "{}".format(i))
            for ext in extensions:
                assert os.path.exists(
                    host_engine_version_path + "{}/protein.{}".format(i, ext)
                )
            assert os.path.exists(host_engine_version_path + "{}/bench.job".format(i))

Example #15

0

Show file

def do_generate(
    name,
    cpu,
    gpu,
    module,
    host,
    min_nodes,
    max_nodes,
    time,
    skip_validation,
    job_name,
    yes,
):
    """Generate a bunch of benchmarks."""
    # Validate the CPU and GPU flags
    validate_cpu_gpu_flags(cpu, gpu)

    # Validate the number of nodes
    validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes)

    # Grab the template name for the host. This should always work because
    # click does the validation for us
    template = utils.retrieve_host_template(host)

    # Warn the user that NAMD support is still experimental.
    if any(["namd" in m for m in module]):
        console.warn(NAMD_WARNING, "--gpu")

    module = mdengines.normalize_modules(module, skip_validation)

    # If several modules were given and we only cannot find one of them, we
    # continue.
    if not module:
        console.error("No requested modules available!")

    df_overview = pd.DataFrame(columns=[
        "name",
        "job_name",
        "base_directory",
        "template",
        "engine",
        "module",
        "nodes",
        "run time [min]",
        "gpu",
        "host",
    ])

    i = 1
    for m in module:
        # Here we detect the MD engine (supported: GROMACS and NAMD).
        engine = mdengines.detect_md_engine(m)

        # Check if all needed files exist. Throw an error if they do not.
        engine.check_input_file_exists(name)

        gpu_cpu = {"cpu": cpu, "gpu": gpu}
        for pu, state in sorted(gpu_cpu.items()):
            if not state:
                continue

            directory = "{}_{}".format(host, m)
            gpu = False
            gpu_string = ""
            if pu == "gpu":
                gpu = True
                directory += "_gpu"
                gpu_string = " with GPUs"

            console.info("Creating benchmark system for {}.", m + gpu_string)

            base_directory = dtr.Tree(directory)

            for nodes in range(min_nodes, max_nodes + 1):
                df_overview.loc[i] = [
                    name,
                    job_name,
                    base_directory,
                    template,
                    engine,
                    m,
                    nodes,
                    time,
                    gpu,
                    host,
                ]
                i += 1

    console.info("{}", "Benchmark Summary:")

    df_short = ConsolidateDataFrame(df_overview)
    PrintDataFrame(df_short)

    if yes:
        console.info("Generating the above benchmarks.")
    elif not click.confirm(
            "The above benchmarks will be generated. Continue?"):
        console.error("Exiting. No benchmarks generated.")

    for _, row in df_overview.iterrows():
        relative_path, file_basename = os.path.split(row["name"])
        write_benchmark(
            engine=row["engine"],
            base_directory=row["base_directory"],
            template=row["template"],
            nodes=row["nodes"],
            gpu=row["gpu"],
            module=row["module"],
            name=file_basename,
            relative_path=relative_path,
            job_name=row["job_name"],
            host=row["host"],
            time=row["run time [min]"],
        )

    # Provide some output for the user
    console.info(
        "Finished generating all benchmarks.\n"
        "You can now submit the jobs with {}.",
        "mdbenchmark submit",
    )