def do_analyze(directory, save_csv): """Analyze benchmarks.""" bundle = dtr.discover(directory) version = VersionFactory(categories=bundle.categories).version_class df = parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) # Remove the versions column from the DataFrame columns_to_drop = ["version"] df = df.drop(columns=columns_to_drop) if save_csv is not None: if not save_csv.endswith(".csv"): save_csv = "{}.csv".format(save_csv) df.to_csv(save_csv, index=False) console.success("Successfully benchmark data to {}.", save_csv) # Reformat NaN values nicely into question marks. # move this to the bundle function! df = df.replace(np.nan, "?") if df.isnull().values.any(): console.warn( "We were not able to gather informations for all systems. " "Systems marked with question marks have either crashed or " "were not started yet.") # Warn user that we are going to print more than 50 benchmark results to the console if df.shape[0] > 50: if click.confirm( "We are about to print the results of {} benchmarks to the console. Continue?" .format(click.style(str(df.shape[0]), bold=True))): pass else: console.error("Exiting.") # Print the data to the console print_dataframe( df, columns=map_columns(version.category_mapping, version.analyze_printing), )
def guess_ncores(): """Guess the number of physical CPU cores. We inspect `/proc/cpuinfo` to grab the actual number.""" total_cores = None if sys.platform.startswith("linux"): nsocket = len(_cat_proc_cpuinfo_grep_query_sort_uniq("physical id")) ncores = len(_cat_proc_cpuinfo_grep_query_sort_uniq("core id")) total_cores = ncores * nsocket elif sys.platform == "darwin": # assumes we have an INTEL CPU with hyper-threading. As of 2017 this is # true for all officially supported Apple models. total_cores = mp.cpu_count() // 2 if total_cores is None: console.warn( "Could not guess number of physical cores. " "Assuming there is only 1 core per node." ) total_cores = 1 return total_cores
def do_analyze(directory, plot, ncores, save_csv): """Analyze benchmarks.""" # Migrate from MDBenchmark<2 to MDBenchmark=>2 mds_to_dtr.migrate_to_datreant(directory) bundle = dtr.discover(directory) df = DataFrameFromBundle(bundle) if save_csv is not None and not save_csv.endswith(".csv"): save_csv = "{}.csv".format(save_csv) df.to_csv(save_csv) # Reformat NaN values nicely into question marks. # move this to the bundle function! df = df.replace(np.nan, "?") if df.isnull().values.any(): console.warn( "We were not able to gather informations for all systems. " "Systems marked with question marks have either crashed or " "were not started yet." ) PrintDataFrame(df) if plot: console.warn("'--plot' has been deprecated, use '{}'.", "mdbenchmark plot") fig = Figure() FigureCanvas(fig) ax = fig.add_subplot(111) df = pd.read_csv(save_csv) if ncores: console.warn( "Ignoring your value from '{}' and parsing number of cores from log files.", "--number-cores/-ncores", ) ax = plot_over_group(df, plot_cores=ncores, fit=True, ax=ax) lgd = ax.legend(loc="upper center", bbox_to_anchor=(0.5, -0.175)) fig.tight_layout() fig.savefig( "runtimes.pdf", type="pdf", bbox_extra_artists=(lgd,), bbox_inches="tight" )
def test_console_warn(): """Test the output of console.warn().""" fh = StringIO() console.warn("I am not feeling good today.", filehandler=fh) assert fh.getvalue() == "WARNING I am not feeling good today.\n"
def normalize_modules(modules, skip_validation): """Validate that the provided module names are available. We first check whether the requested MD engine is supported by the package. Next we try to discover all available modules on the host. If this is not possible, or if the user has used the `--skip-validation` option, we skip the check and notify the user. If the user requested modules that were not found on the system, we inform the user and show all modules for that corresponding MD engine that were found. """ # Check if modules are from supported md engines d = defaultdict(list) for m in modules: engine_name, version = prepare_module_name(m, skip_validation) d[engine_name] = version for engine_name in d.keys(): if detect_md_engine(engine_name) is None: console.error( "There is currently no support for '{}'. " + "Supported MD engines are: {}.".format(", ".join( sorted(SUPPORTED_ENGINES.keys()))), engine_name, ) if skip_validation: console.warn("Not performing module name validation.") return modules available_modules = get_available_modules() if available_modules is None: console.warn( "Cannot locate modules available on this host. Not performing module name validation." ) return modules good_modules = [ m for m in modules if validate_module_name(m, available_modules) ] # Prepare to warn the user about missing modules missing_modules = set(modules).difference(good_modules) if missing_modules: d = defaultdict(list) for mm in sorted(missing_modules): engine_name, version = mm.split("/") d[engine_name].append(version) err = "We have problems finding all of your requested modules on this host.\n" args = [] for engine_name in sorted(d.keys()): err += ( "We were not able to find the following modules for MD engine {}: {}.\n" ) args.append(engine_name) args.extend(d[engine_name]) # Show all available modules that we found for the requested MD engine err += "Available modules are:\n{}\n" args.extend([ "\n".join([ "{}/{}".format(engine_name, mde) for mde in sorted(available_modules[engine_name]) ]) ]) console.warn(err, bold=True, *args) return good_modules
def do_generate( name, cpu, gpu, module, host, min_nodes, max_nodes, time, skip_validation, job_name, yes, physical_cores, logical_cores, number_of_ranks, enable_hyperthreading, multidir, ): """Generate a bunch of benchmarks.""" # Instantiate the version we are going to use benchmark_version = Version3Categories() # Validate the CPU and GPU flags validate_cpu_gpu_flags(cpu, gpu) # Validate the number of nodes validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes) if logical_cores < physical_cores: console.error( "The number of logical cores cannot be smaller than the number of physical cores." ) if physical_cores and not logical_cores: console.warn("Assuming logical_cores = 2 * physical_cores") logical_cores = 2 * physical_cores if physical_cores and logical_cores: processor = Processor(physical_cores=physical_cores, logical_cores=logical_cores) else: processor = Processor() # Hyperthreading check if enable_hyperthreading and not processor.supports_hyperthreading: console.error( "The processor of this machine does not support hyperthreading.") if not number_of_ranks: number_of_ranks = (processor.physical_cores, ) # Validate number of simulations validate_number_of_simulations(multidir, min_nodes, max_nodes, number_of_ranks) # Grab the template name for the host. This should always work because # click does the validation for us template = utils.retrieve_host_template(host) # Warn the user that NAMD support is still experimental. if any(["namd" in m for m in module]): console.warn(NAMD_WARNING, "--gpu") # Stop if we cannot find any modules. If the user specified multiple # modules, we will continue with only the valid ones. modules = mdengines.normalize_modules(module, skip_validation) if not modules: console.error("No requested modules available!") # Check if all needed files exist. Throw an error if they do not. validate_required_files(name=name, modules=modules) # Validate that we can use the number of ranks and threads. # We can continue, if no ValueError is thrown for ranks in number_of_ranks: try: processor.get_ranks_and_threads( ranks, with_hyperthreading=enable_hyperthreading) except ValueError as e: console.error(e) # Create all benchmark combinations and put them into a DataFrame data = construct_generate_data( name, job_name, modules, host, template, cpu, gpu, time, min_nodes, max_nodes, processor, number_of_ranks, enable_hyperthreading, multidir, ) df = pd.DataFrame(data, columns=benchmark_version.generate_categories) # Consolidate the data by grouping on the number of nodes and print to the # user as an overview. consolidated_df = consolidate_dataframe( df, columns=benchmark_version.consolidate_categories) print_dataframe( consolidated_df[benchmark_version.generate_printing], columns=map_columns( map_dict=benchmark_version.category_mapping, columns=benchmark_version.generate_printing, ), ) # Save the number of benchmarks for later printing number_of_benchmarks = df.shape[0] # Ask the user for confirmation to generate files. # If the user defined `--yes`, we will skip the confirmation immediately. if yes: console.info( "We will generate {} " + "{benchmark}.".format(benchmark="benchmark" if number_of_benchmarks == 1 else "benchmarks"), number_of_benchmarks, ) elif not click.confirm("We will generate {} benchmarks. Continue?".format( number_of_benchmarks)): console.error("Exiting. No benchmarks were generated.") # Generate the benchmarks with click.progressbar( df.iterrows(), length=number_of_benchmarks, show_pos=True, label="Generating benchmarks", ) as bar: for _, row in bar: relative_path, file_basename = os.path.split(row["name"]) mappings = benchmark_version.generate_mapping kwargs = {"name": file_basename, "relative_path": relative_path} for key, value in mappings.items(): kwargs[value] = row[key] write_benchmark(**kwargs) # Finish up by telling the user how to submit the benchmarks console.info( "Finished! You can submit the jobs with {}.", "mdbenchmark submit", )
def do_generate( name, cpu, gpu, module, host, min_nodes, max_nodes, time, skip_validation, job_name, yes, ): """Generate a bunch of benchmarks.""" # Validate the CPU and GPU flags validate_cpu_gpu_flags(cpu, gpu) # Validate the number of nodes validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes) # Grab the template name for the host. This should always work because # click does the validation for us template = utils.retrieve_host_template(host) # Warn the user that NAMD support is still experimental. if any(["namd" in m for m in module]): console.warn(NAMD_WARNING, "--gpu") module = mdengines.normalize_modules(module, skip_validation) # If several modules were given and we only cannot find one of them, we # continue. if not module: console.error("No requested modules available!") df_overview = pd.DataFrame(columns=[ "name", "job_name", "base_directory", "template", "engine", "module", "nodes", "run time [min]", "gpu", "host", ]) i = 1 for m in module: # Here we detect the MD engine (supported: GROMACS and NAMD). engine = mdengines.detect_md_engine(m) # Check if all needed files exist. Throw an error if they do not. engine.check_input_file_exists(name) gpu_cpu = {"cpu": cpu, "gpu": gpu} for pu, state in sorted(gpu_cpu.items()): if not state: continue directory = "{}_{}".format(host, m) gpu = False gpu_string = "" if pu == "gpu": gpu = True directory += "_gpu" gpu_string = " with GPUs" console.info("Creating benchmark system for {}.", m + gpu_string) base_directory = dtr.Tree(directory) for nodes in range(min_nodes, max_nodes + 1): df_overview.loc[i] = [ name, job_name, base_directory, template, engine, m, nodes, time, gpu, host, ] i += 1 console.info("{}", "Benchmark Summary:") df_short = ConsolidateDataFrame(df_overview) PrintDataFrame(df_short) if yes: console.info("Generating the above benchmarks.") elif not click.confirm( "The above benchmarks will be generated. Continue?"): console.error("Exiting. No benchmarks generated.") for _, row in df_overview.iterrows(): relative_path, file_basename = os.path.split(row["name"]) write_benchmark( engine=row["engine"], base_directory=row["base_directory"], template=row["template"], nodes=row["nodes"], gpu=row["gpu"], module=row["module"], name=file_basename, relative_path=relative_path, job_name=row["job_name"], host=row["host"], time=row["run time [min]"], ) # Provide some output for the user console.info( "Finished generating all benchmarks.\n" "You can now submit the jobs with {}.", "mdbenchmark submit", )