Exemple #1
0
def do_analyze(directory, save_csv):
    """Analyze benchmarks."""
    bundle = dtr.discover(directory)
    version = VersionFactory(categories=bundle.categories).version_class

    df = parse_bundle(
        bundle,
        columns=version.analyze_categories,
        sort_values_by=version.analyze_sort,
    )

    # Remove the versions column from the DataFrame
    columns_to_drop = ["version"]
    df = df.drop(columns=columns_to_drop)

    if save_csv is not None:
        if not save_csv.endswith(".csv"):
            save_csv = "{}.csv".format(save_csv)
        df.to_csv(save_csv, index=False)

        console.success("Successfully benchmark data to {}.", save_csv)

    # Reformat NaN values nicely into question marks.
    # move this to the bundle function!
    df = df.replace(np.nan, "?")
    if df.isnull().values.any():
        console.warn(
            "We were not able to gather informations for all systems. "
            "Systems marked with question marks have either crashed or "
            "were not started yet.")

    # Warn user that we are going to print more than 50 benchmark results to the console
    if df.shape[0] > 50:
        if click.confirm(
                "We are about to print the results of {} benchmarks to the console. Continue?"
                .format(click.style(str(df.shape[0]), bold=True))):
            pass
        else:
            console.error("Exiting.")

    # Print the data to the console
    print_dataframe(
        df,
        columns=map_columns(version.category_mapping,
                            version.analyze_printing),
    )
def guess_ncores():
    """Guess the number of physical CPU cores.

    We inspect `/proc/cpuinfo` to grab the actual number."""
    total_cores = None
    if sys.platform.startswith("linux"):
        nsocket = len(_cat_proc_cpuinfo_grep_query_sort_uniq("physical id"))
        ncores = len(_cat_proc_cpuinfo_grep_query_sort_uniq("core id"))
        total_cores = ncores * nsocket
    elif sys.platform == "darwin":
        # assumes we have an INTEL CPU with hyper-threading. As of 2017 this is
        # true for all officially supported Apple models.
        total_cores = mp.cpu_count() // 2
    if total_cores is None:
        console.warn(
            "Could not guess number of physical cores. "
            "Assuming there is only 1 core per node."
        )
        total_cores = 1
    return total_cores
Exemple #3
0
def do_analyze(directory, plot, ncores, save_csv):
    """Analyze benchmarks."""
    # Migrate from MDBenchmark<2 to MDBenchmark=>2
    mds_to_dtr.migrate_to_datreant(directory)

    bundle = dtr.discover(directory)

    df = DataFrameFromBundle(bundle)

    if save_csv is not None and not save_csv.endswith(".csv"):
        save_csv = "{}.csv".format(save_csv)
    df.to_csv(save_csv)

    # Reformat NaN values nicely into question marks.
    # move this to the bundle function!
    df = df.replace(np.nan, "?")
    if df.isnull().values.any():
        console.warn(
            "We were not able to gather informations for all systems. "
            "Systems marked with question marks have either crashed or "
            "were not started yet."
        )
    PrintDataFrame(df)

    if plot:
        console.warn("'--plot' has been deprecated, use '{}'.", "mdbenchmark plot")

        fig = Figure()
        FigureCanvas(fig)
        ax = fig.add_subplot(111)

        df = pd.read_csv(save_csv)
        if ncores:
            console.warn(
                "Ignoring your value from '{}' and parsing number of cores from log files.",
                "--number-cores/-ncores",
            )
        ax = plot_over_group(df, plot_cores=ncores, fit=True, ax=ax)
        lgd = ax.legend(loc="upper center", bbox_to_anchor=(0.5, -0.175))

        fig.tight_layout()
        fig.savefig(
            "runtimes.pdf", type="pdf", bbox_extra_artists=(lgd,), bbox_inches="tight"
        )
def test_console_warn():
    """Test the output of console.warn()."""
    fh = StringIO()
    console.warn("I am not feeling good today.", filehandler=fh)
    assert fh.getvalue() == "WARNING I am not feeling good today.\n"
Exemple #5
0
def normalize_modules(modules, skip_validation):
    """Validate that the provided module names are available.

    We first check whether the requested MD engine is supported by the package.
    Next we try to discover all available modules on the host. If this is not
    possible, or if the user has used the `--skip-validation` option, we skip
    the check and notify the user.

    If the user requested modules that were not found on the system, we inform
    the user and show all modules for that corresponding MD engine that were
    found.
    """
    # Check if modules are from supported md engines
    d = defaultdict(list)
    for m in modules:
        engine_name, version = prepare_module_name(m, skip_validation)
        d[engine_name] = version
    for engine_name in d.keys():
        if detect_md_engine(engine_name) is None:
            console.error(
                "There is currently no support for '{}'. " +
                "Supported MD engines are: {}.".format(", ".join(
                    sorted(SUPPORTED_ENGINES.keys()))),
                engine_name,
            )

    if skip_validation:
        console.warn("Not performing module name validation.")
        return modules

    available_modules = get_available_modules()
    if available_modules is None:
        console.warn(
            "Cannot locate modules available on this host. Not performing module name validation."
        )
        return modules

    good_modules = [
        m for m in modules if validate_module_name(m, available_modules)
    ]

    # Prepare to warn the user about missing modules
    missing_modules = set(modules).difference(good_modules)
    if missing_modules:
        d = defaultdict(list)
        for mm in sorted(missing_modules):
            engine_name, version = mm.split("/")
            d[engine_name].append(version)

        err = "We have problems finding all of your requested modules on this host.\n"
        args = []
        for engine_name in sorted(d.keys()):
            err += (
                "We were not able to find the following modules for MD engine {}: {}.\n"
            )
            args.append(engine_name)
            args.extend(d[engine_name])
            # Show all available modules that we found for the requested MD engine
            err += "Available modules are:\n{}\n"
            args.extend([
                "\n".join([
                    "{}/{}".format(engine_name, mde)
                    for mde in sorted(available_modules[engine_name])
                ])
            ])
        console.warn(err, bold=True, *args)

    return good_modules
Exemple #6
0
def do_generate(
    name,
    cpu,
    gpu,
    module,
    host,
    min_nodes,
    max_nodes,
    time,
    skip_validation,
    job_name,
    yes,
    physical_cores,
    logical_cores,
    number_of_ranks,
    enable_hyperthreading,
    multidir,
):
    """Generate a bunch of benchmarks."""

    # Instantiate the version we are going to use
    benchmark_version = Version3Categories()

    # Validate the CPU and GPU flags
    validate_cpu_gpu_flags(cpu, gpu)

    # Validate the number of nodes
    validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes)

    if logical_cores < physical_cores:
        console.error(
            "The number of logical cores cannot be smaller than the number of physical cores."
        )

    if physical_cores and not logical_cores:
        console.warn("Assuming logical_cores = 2 * physical_cores")
        logical_cores = 2 * physical_cores

    if physical_cores and logical_cores:
        processor = Processor(physical_cores=physical_cores,
                              logical_cores=logical_cores)
    else:
        processor = Processor()

    # Hyperthreading check
    if enable_hyperthreading and not processor.supports_hyperthreading:
        console.error(
            "The processor of this machine does not support hyperthreading.")

    if not number_of_ranks:
        number_of_ranks = (processor.physical_cores, )

    # Validate number of simulations
    validate_number_of_simulations(multidir, min_nodes, max_nodes,
                                   number_of_ranks)

    # Grab the template name for the host. This should always work because
    # click does the validation for us
    template = utils.retrieve_host_template(host)

    # Warn the user that NAMD support is still experimental.
    if any(["namd" in m for m in module]):
        console.warn(NAMD_WARNING, "--gpu")

    # Stop if we cannot find any modules. If the user specified multiple
    # modules, we will continue with only the valid ones.
    modules = mdengines.normalize_modules(module, skip_validation)
    if not modules:
        console.error("No requested modules available!")

    # Check if all needed files exist. Throw an error if they do not.
    validate_required_files(name=name, modules=modules)

    # Validate that we can use the number of ranks and threads.
    # We can continue, if no ValueError is thrown
    for ranks in number_of_ranks:
        try:
            processor.get_ranks_and_threads(
                ranks, with_hyperthreading=enable_hyperthreading)
        except ValueError as e:
            console.error(e)

    # Create all benchmark combinations and put them into a DataFrame
    data = construct_generate_data(
        name,
        job_name,
        modules,
        host,
        template,
        cpu,
        gpu,
        time,
        min_nodes,
        max_nodes,
        processor,
        number_of_ranks,
        enable_hyperthreading,
        multidir,
    )
    df = pd.DataFrame(data, columns=benchmark_version.generate_categories)

    # Consolidate the data by grouping on the number of nodes and print to the
    # user as an overview.
    consolidated_df = consolidate_dataframe(
        df, columns=benchmark_version.consolidate_categories)
    print_dataframe(
        consolidated_df[benchmark_version.generate_printing],
        columns=map_columns(
            map_dict=benchmark_version.category_mapping,
            columns=benchmark_version.generate_printing,
        ),
    )

    # Save the number of benchmarks for later printing
    number_of_benchmarks = df.shape[0]
    # Ask the user for confirmation to generate files.
    # If the user defined `--yes`, we will skip the confirmation immediately.
    if yes:
        console.info(
            "We will generate {} " +
            "{benchmark}.".format(benchmark="benchmark" if number_of_benchmarks
                                  == 1 else "benchmarks"),
            number_of_benchmarks,
        )
    elif not click.confirm("We will generate {} benchmarks. Continue?".format(
            number_of_benchmarks)):
        console.error("Exiting. No benchmarks were generated.")

    # Generate the benchmarks
    with click.progressbar(
            df.iterrows(),
            length=number_of_benchmarks,
            show_pos=True,
            label="Generating benchmarks",
    ) as bar:
        for _, row in bar:
            relative_path, file_basename = os.path.split(row["name"])
            mappings = benchmark_version.generate_mapping
            kwargs = {"name": file_basename, "relative_path": relative_path}
            for key, value in mappings.items():
                kwargs[value] = row[key]

            write_benchmark(**kwargs)

    # Finish up by telling the user how to submit the benchmarks
    console.info(
        "Finished! You can submit the jobs with {}.",
        "mdbenchmark submit",
    )
Exemple #7
0
def do_generate(
    name,
    cpu,
    gpu,
    module,
    host,
    min_nodes,
    max_nodes,
    time,
    skip_validation,
    job_name,
    yes,
):
    """Generate a bunch of benchmarks."""
    # Validate the CPU and GPU flags
    validate_cpu_gpu_flags(cpu, gpu)

    # Validate the number of nodes
    validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes)

    # Grab the template name for the host. This should always work because
    # click does the validation for us
    template = utils.retrieve_host_template(host)

    # Warn the user that NAMD support is still experimental.
    if any(["namd" in m for m in module]):
        console.warn(NAMD_WARNING, "--gpu")

    module = mdengines.normalize_modules(module, skip_validation)

    # If several modules were given and we only cannot find one of them, we
    # continue.
    if not module:
        console.error("No requested modules available!")

    df_overview = pd.DataFrame(columns=[
        "name",
        "job_name",
        "base_directory",
        "template",
        "engine",
        "module",
        "nodes",
        "run time [min]",
        "gpu",
        "host",
    ])

    i = 1
    for m in module:
        # Here we detect the MD engine (supported: GROMACS and NAMD).
        engine = mdengines.detect_md_engine(m)

        # Check if all needed files exist. Throw an error if they do not.
        engine.check_input_file_exists(name)

        gpu_cpu = {"cpu": cpu, "gpu": gpu}
        for pu, state in sorted(gpu_cpu.items()):
            if not state:
                continue

            directory = "{}_{}".format(host, m)
            gpu = False
            gpu_string = ""
            if pu == "gpu":
                gpu = True
                directory += "_gpu"
                gpu_string = " with GPUs"

            console.info("Creating benchmark system for {}.", m + gpu_string)

            base_directory = dtr.Tree(directory)

            for nodes in range(min_nodes, max_nodes + 1):
                df_overview.loc[i] = [
                    name,
                    job_name,
                    base_directory,
                    template,
                    engine,
                    m,
                    nodes,
                    time,
                    gpu,
                    host,
                ]
                i += 1

    console.info("{}", "Benchmark Summary:")

    df_short = ConsolidateDataFrame(df_overview)
    PrintDataFrame(df_short)

    if yes:
        console.info("Generating the above benchmarks.")
    elif not click.confirm(
            "The above benchmarks will be generated. Continue?"):
        console.error("Exiting. No benchmarks generated.")

    for _, row in df_overview.iterrows():
        relative_path, file_basename = os.path.split(row["name"])
        write_benchmark(
            engine=row["engine"],
            base_directory=row["base_directory"],
            template=row["template"],
            nodes=row["nodes"],
            gpu=row["gpu"],
            module=row["module"],
            name=file_basename,
            relative_path=relative_path,
            job_name=row["job_name"],
            host=row["host"],
            time=row["run time [min]"],
        )

    # Provide some output for the user
    console.info(
        "Finished generating all benchmarks.\n"
        "You can now submit the jobs with {}.",
        "mdbenchmark submit",
    )