def correlation(raw_data_df): """Get the correlations. Args: raw_data_df (DataFrame): raw data Returns: DataFrame: correlations """ data_corr_df = pd.DataFrame() if not isinstance(raw_data_df, pd.DataFrame): logger.error('DataAnalyzer: the type of raw data is not pd.DataFrame') return data_corr_df if len(raw_data_df) == 0: logger.warning('DataAnalyzer: empty data.') return data_corr_df try: data_corr_df = raw_data_df.corr() statistics_error = [] for column in list(raw_data_df.columns): if column not in list(data_corr_df.columns ) and not raw_data_df[column].isnull().all(): statistics_error.append(column) if statistics_error: logger.warning( 'DataAnalyzer: [{}] is missing in correlation results.'.format( ','.join(str(x) for x in statistics_error))) except Exception as e: logger.error('DataAnalyzer: correlation failed, msg: {}'.format( str(e))) return data_corr_df
def _get_baseline_of_metric(self, baseline, metric): """Get the baseline value of the metric. Args: baseline (dict): baseline defined in baseline file metric (str): the full name of the metric Returns: numeric: the baseline value of the metric """ if metric in baseline: return baseline[metric] elif 'return_code' in metric: return 0 else: short = metric # exclude rank info, for example, '.*:\d+'->'.*' if ':' in metric: short = metric.strip(metric.split(':')[-1]).strip(':') else: short = metric.split('/')[0] if short in baseline: return baseline[short] # baseline not defined else: logger.warning('DataDiagnosis: get baseline - {} baseline not found'.format(metric)) return -1
def _get_metrics(self, rule, benchmark_rules): """Get metrics in the rule. Parse metric regex in the rule, and store the (metric, -1) pair in _sb_rules[rule]['metrics'] Args: rule (str): the name of the rule benchmark_rules (dict): the dict of rules """ metrics_in_rule = benchmark_rules[rule]['metrics'] benchmark_metrics_dict_in_rule = self._get_metrics_by_benchmarks(metrics_in_rule) for benchmark_name in benchmark_metrics_dict_in_rule: if benchmark_name not in self._benchmark_metrics_dict: logger.warning('RuleBase: get metrics failed - {}'.format(benchmark_name)) continue # get rules and criteria for each metric for metric in self._benchmark_metrics_dict[benchmark_name]: # metric full name in baseline if metric in metrics_in_rule: self._sb_rules[rule]['metrics'][metric] = -1 self._enable_metrics.add(metric) continue # metric full name not in baseline, use regex to match for metric_regex in benchmark_metrics_dict_in_rule[benchmark_name]: if re.search(metric_regex, metric): self._sb_rules[rule]['metrics'][metric] = -1 self._enable_metrics.add(metric)
def exec(self): """Run the SuperBench benchmarks locally.""" for benchmark_name in self._sb_benchmarks: if benchmark_name not in self._sb_enabled: continue benchmark_config = self._sb_benchmarks[benchmark_name] benchmark_results = list() self.__create_benchmark_dir(benchmark_name) cwd = os.getcwd() os.chdir(self.__get_benchmark_dir(benchmark_name)) monitor = None if self.__get_rank_id( ) == 0 and self._sb_monitor_config and self._sb_monitor_config.enable: if self.__get_platform() == Platform.CUDA: monitor = Monitor( None, int(self._sb_monitor_config.sample_duration or 10), int(self._sb_monitor_config.sample_interval or 1), self.__get_monitor_path(benchmark_name)) monitor.start() else: logger.warning( 'Monitor can not support ROCM/CPU platform.') benchmark_real_name = benchmark_name.split(':')[0] for framework in benchmark_config.frameworks or [ Framework.NONE.value ]: if benchmark_real_name == 'model-benchmarks' or ( ':' not in benchmark_name and benchmark_name.endswith('_models')): for model in benchmark_config.models: full_name = f'{benchmark_name}/{framework}-{model}' logger.info('Executor is going to execute %s.', full_name) context = BenchmarkRegistry.create_benchmark_context( model, platform=self.__get_platform(), framework=Framework(framework.lower()), parameters=self.__get_arguments( benchmark_config.parameters)) result = self.__exec_benchmark(full_name, context) benchmark_results.append(result) else: full_name = benchmark_name logger.info('Executor is going to execute %s.', full_name) context = BenchmarkRegistry.create_benchmark_context( benchmark_real_name, platform=self.__get_platform(), framework=Framework(framework.lower()), parameters=self.__get_arguments( benchmark_config.parameters)) result = self.__exec_benchmark(full_name, context) benchmark_results.append(result) if monitor: monitor.stop() self.__write_benchmark_results(benchmark_name, benchmark_results) os.chdir(cwd)
def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path): """Output data_not_accept_df into jsonl file. Args: data_not_accept_df (DataFrame): the DataFrame to output output_path (str): the path of output jsonl file """ p = Path(output_path) try: data_not_accept_json = data_not_accept_df.to_json(orient='index') data_not_accept = json.loads(data_not_accept_json) if not isinstance(data_not_accept_df, pd.DataFrame): logger.warning('DataDiagnosis: output json data - data_not_accept_df is not DataFrame.') return if data_not_accept_df.empty: logger.warning('DataDiagnosis: output json data - data_not_accept_df is empty.') return with p.open('w') as f: for node in data_not_accept: line = data_not_accept[node] line['Index'] = node json_str = json.dumps(line) f.write(json_str + '\n') except Exception as e: logger.error('DataDiagnosis: output json data failed, msg: {}'.format(str(e)))
def _preprocess(self): """Preprocess/preparation operations before the benchmarking. Return: True if _preprocess() succeed. """ if not super()._preprocess(): return False if len(self._args.precision) == 0: self._precision_need_to_run = self._support_precisions else: self._args.precision = [p.lower() for p in self._args.precision] for p in self._args.precision: if p not in self._support_precisions: logger.warning( 'Unsupported precision - benchmark: {}, precision: {}, expected: {}.'.format( self._name, p, self._support_precisions ) ) else: self._precision_need_to_run.append(p) if len(self._precision_need_to_run) == 0: self._result.set_return_code(ReturnCode.NO_SUPPORTED_PRECISION) return False return True
def interquartile_range(raw_data_df): """Get outlier detection bounds using IQR method. The reference of IQR is https://en.wikipedia.org/wiki/Interquartile_range. Get the mild and extreme outlier upper and lower value and bound. values: Mild Outlier: A point beyond inner whiskers on either side lower whisker: Q1 - 1.5*IQR upper whisker : Q3 + 1.5*IQR Extreme Outlier: A point beyond outer whiskers on either side lower whisker : Q1 - 3*IQR upper whisker : Q3 + 3*IQR bounds: (values - mean) / mean Args: raw_data_df (DataFrame): raw data Returns: DataFrame: data statistics and IQR bound """ if not isinstance(raw_data_df, pd.DataFrame): logger.error('DataAnalyzer: the type of raw data is not pd.DataFrame') return pd.DataFrame() if len(raw_data_df) == 0: logger.warning('DataAnalyzer: empty data.') return pd.DataFrame() try: data_statistics_df = statistic(raw_data_df) data_statistics_df.loc[ 'mild_outlier_upper'] = data_statistics_df.loc['75%'] + 1.5 * ( data_statistics_df.loc['75%'] - data_statistics_df.loc['25%']) data_statistics_df.loc[ 'extreme_outlier_upper'] = data_statistics_df.loc['75%'] + 3 * ( data_statistics_df.loc['75%'] - data_statistics_df.loc['25%']) data_statistics_df.loc[ 'mild_outlier_lower'] = data_statistics_df.loc['25%'] - 1.5 * ( data_statistics_df.loc['75%'] - data_statistics_df.loc['25%']) data_statistics_df.loc[ 'extreme_outlier_lower'] = data_statistics_df.loc['25%'] - 3 * ( data_statistics_df.loc['75%'] - data_statistics_df.loc['25%']) data_statistics_df.loc['mild_outlier_upper_bound'] = ( data_statistics_df.loc['mild_outlier_upper'] - data_statistics_df.loc['mean']) / data_statistics_df.loc['mean'] data_statistics_df.loc['extreme_outlier_upper_bound'] = ( data_statistics_df.loc['extreme_outlier_upper'] - data_statistics_df.loc['mean']) / data_statistics_df.loc['mean'] data_statistics_df.loc['mild_outlier_lower_bound'] = ( data_statistics_df.loc['mild_outlier_lower'] - data_statistics_df.loc['mean']) / data_statistics_df.loc['mean'] data_statistics_df.loc['extreme_outlier_lower_bound'] = ( data_statistics_df.loc['extreme_outlier_lower'] - data_statistics_df.loc['mean']) / data_statistics_df.loc['mean'] except Exception as e: logger.error( 'DataAnalyzer: interquartile_range failed, msg: {}'.format(str(e))) return data_statistics_df
def output_excel_raw_data(writer, raw_data_df, sheet_name): """Output raw data into 'sheet_name' excel page. Args: writer (xlsxwriter): xlsxwriter handle raw_data_df (DataFrame): the DataFrame to output sheet_name (str): sheet name of the excel """ # Output the raw data if isinstance(raw_data_df, pd.DataFrame) and not raw_data_df.empty: raw_data_df.to_excel(writer, sheet_name, index=True) else: logger.warning('FileHandler: excel_data_output - {} data_df is empty.'.format(sheet_name))
def get_vendor(self): """Get GPU vendor. Returns: str: GPU vendor, nvidia or amd. """ if Path('/dev/nvidiactl').is_char_device() and Path( '/dev/nvidia-uvm').is_char_device(): if not list(Path('/dev').glob('nvidia[0-9]*')): logger.warning('Cannot find NVIDIA GPU device.') return 'nvidia' if Path('/dev/kfd').is_char_device() and Path('/dev/dri').is_dir(): if not list(Path('/dev/dri').glob('card*')): logger.warning('Cannot find AMD GPU device.') return 'amd' return None
def rotate_dir(target_dir): """Rotate directory if it is not empty. Args: target_dir (str): Target directory path. """ try: if target_dir.is_dir() and any(target_dir.iterdir()): logger.warning('Directory %s is not empty.', str(target_dir)) for i in itertools.count(start=1): backup_dir = target_dir.with_name(f'{target_dir.name}.bak{i}') if not backup_dir.is_dir(): target_dir.rename(backup_dir) break except Exception: logger.exception('Failed to rotate directory %s.', str(target_dir)) raise
def register_benchmark(cls, name, class_def, parameters='', platform=None): """Register new benchmark, key is the benchmark name. Args: name (str): internal name of benchmark. class_def (Benchmark): class object of benchmark. parameters (str): predefined parameters of benchmark. platform (Platform): Platform types like CUDA, ROCM. """ if not name or not isinstance(name, str): logger.log_and_raise( TypeError, 'Name of registered benchmark is not string - benchmark: {}, type: {}'.format(name, type(name)) ) if not issubclass(class_def, Benchmark): logger.log_and_raise( TypeError, 'Registered class is not subclass of Benchmark - benchmark: {}, type: {}'.format(name, type(class_def)) ) if name not in cls.benchmarks: cls.benchmarks[name] = dict() if platform: if platform not in Platform: platform_list = list(map(str, Platform)) logger.log_and_raise( TypeError, 'Unknown platform - benchmark: {}, supportted platforms: {}, but got: {}'.format( name, platform_list, platform ) ) if platform in cls.benchmarks[name]: logger.warning('Duplicate registration - benchmark: {}, platform: {}'.format(name, platform)) cls.benchmarks[name][platform] = (class_def, parameters) else: # If not specified the tag, means the benchmark works for all platforms. for p in Platform: if p in cls.benchmarks[name]: logger.warning('Duplicate registration - benchmark: {}, platform: {}'.format(name, p)) cls.benchmarks[name][p] = (class_def, parameters) cls.__parse_and_check_args(name, class_def, parameters)
def __select_benchmark(cls, name, platform): """Select benchmark by name and platform. Args: name (str): internal name of benchmark. platform (Platform): Platform type of benchmark. Return: benchmark_class (Benchmark): class object of benchmark. predefine_params (str): predefined parameters which is set when register the benchmark. """ if name not in cls.benchmarks or platform not in cls.benchmarks[name]: logger.warning('Benchmark has no implementation, name: {}, platform: {}'.format(name, platform)) return (None, None) (benchmark_class, predefine_params) = cls.benchmarks[name][platform] return (benchmark_class, predefine_params)
def run(self): """Run the SuperBench benchmarks distributedly.""" self.check_env() for benchmark_name in self._sb_benchmarks: if benchmark_name not in self._sb_enabled_benchmarks: continue benchmark_config = self._sb_benchmarks[benchmark_name] for mode in benchmark_config.modes: if mode.name == 'local': Parallel(n_jobs=mode.proc_num if mode.parallel else 1)( delayed(self._run_proc)(benchmark_name, mode, { 'proc_rank': proc_rank }) for proc_rank in range(mode.proc_num)) elif mode.name == 'torch.distributed' or mode.name == 'mpi': self._run_proc(benchmark_name, mode, {'proc_rank': 0}) else: logger.warning('Unknown mode %s.', mode.name) self.fetch_results() self.__create_results_summary()
def output_all_nodes_results(self, raw_data_df, data_not_accept_df): """Output diagnosis results of all nodes. Args: raw_data_df (DataFrame): raw data data_not_accept_df (DataFrame): defective nodes's detailed information Returns: DataFrame: all nodes' detailed information inluding ['Accept','#Issues','Category','Issue_Details'] """ append_columns = ['Accept', '#Issues', 'Category', 'Issue_Details'] all_data_df = (raw_data_df).astype('float64') if data_not_accept_df.shape[0] == 0: all_data_df['Accept'] = [True for i in range(len(all_data_df))] all_data_df['#Issues'] = [0 for i in range(len(all_data_df))] all_data_df['Category'] = [None for i in range(len(all_data_df))] all_data_df['Issue_Details'] = [None for i in range(len(all_data_df))] elif data_not_accept_df.shape[0] > 0: data_not_accept_df['Accept'] = [False for i in range(len(data_not_accept_df))] data_not_accept_df['#Issues'] = data_not_accept_df['Defective Details'].map(lambda x: len(x.split(','))) data_not_accept_df = data_not_accept_df.rename(columns={'Defective Details': 'Issue_Details'}) for index in range(len(append_columns)): if append_columns[index] not in data_not_accept_df: logger.warning( 'DataDiagnosis: output_all_nodes_results - column {} not found in data_not_accept_df.'.format( append_columns[index] ) ) all_data_df[append_columns[index]] = None else: all_data_df = all_data_df.merge( data_not_accept_df[[append_columns[index]]], left_index=True, right_index=True, how='left' ) all_data_df['Accept'] = all_data_df['Accept'].replace(np.nan, True) all_data_df['#Issues'] = all_data_df['#Issues'].replace(np.nan, 0) all_data_df = all_data_df.replace(np.nan, '') return all_data_df
def run(self, ansible_config, sudo=False): # pragma: no cover """Run Ansible runner. Args: ansible_config (dict): Ansible config dict. sudo (bool): Run as sudo or not. Defaults to False. Returns: int: Ansible return code. """ if sudo: logger.info('Run as sudo ...') ansible_config['cmdline'] += ' --become' with tempfile.TemporaryDirectory(prefix='ansible') as tmpdir: r = ansible_runner.run(private_data_dir=tmpdir, **ansible_config) logger.debug(r.stats) if r.rc == 0: logger.info('Run succeed, return code {}.'.format(r.rc)) else: logger.warning('Run failed, return code {}.'.format(r.rc)) return r.rc
def _get_metrics_by_benchmarks(self, metrics_list): """Get mappings of benchmarks:metrics from metrics_list. Args: metrics_list (list): list of metrics Returns: dict: metrics organized by benchmarks """ benchmarks_metrics = {} for metric in metrics_list: if '/' not in metric: logger.warning('RuleBase: get_metrics_by_benchmarks - {} does not have benchmark_name'.format(metric)) else: benchmark = metric.split('/')[0] # support annotations in benchmark naming if ':' in benchmark: benchmark = metric.split(':')[0] if benchmark not in benchmarks_metrics: benchmarks_metrics[benchmark] = set() benchmarks_metrics[benchmark].add(metric) return benchmarks_metrics
def _benchmark(self): """Implementation for benchmarking. Return: True if run benchmark successfully. """ precision_need_to_run = list() for precision in self._args.precision: # Check if the precision is supported or not. if precision not in self._supported_precision: logger.warning( 'Can not run with specified precision - model: {}, supprted precision: {}, specified precision: {}'. format(self._name, ' '.join([p.value for p in self._supported_precision]), precision) ) else: precision_need_to_run.append(precision) if len(precision_need_to_run) == 0: self._result.set_return_code(ReturnCode.NO_SUPPORTED_PRECISION) return False for precision in precision_need_to_run: for model_action in self._args.model_action: self._sub_benchmark_start_time = time.time() if model_action == ModelAction.TRAIN: if not self.__train(precision): return False elif model_action == ModelAction.INFERENCE: if not self.__inference(precision): return False else: logger.warning( 'Model action has no implementation yet - model: {}, model_action: {}'.format( self._name, model_action ) ) return True
def creat_boxplot(raw_data_df, columns, output_dir): """Plot the boxplot for selected columns. Args: raw_data_df (DataFrame): raw data columns (list): selected metrics to plot the boxplot output_dir (str): the directory of output file """ if not isinstance(raw_data_df, pd.DataFrame): logger.error('DataAnalyzer: the type of raw data is not pd.DataFrame') return if len(raw_data_df) == 0: logger.error('DataAnalyzer: empty data for boxplot.') return if not isinstance(columns, list): logger.error('DataAnalyzer: the type of columns should be list.') return try: data_columns = raw_data_df.columns for column in columns: if column not in data_columns or raw_data_df[ column].dtype is not np.dtype('float'): logger.warning( 'DataAnalyzer: invalid column {} for boxplot.'.format( column)) columns.remove(column) n = len(columns) for i in range(n): sns.set(style='whitegrid') plt.subplot(n, 1, i + 1) sns.boxplot(x=columns[i], data=raw_data_df, orient='h') plt.subplots_adjust(hspace=1) plt.savefig(output_dir + '/boxplot.png') plt.show() except Exception as e: logger.error('DataAnalyzer: creat_boxplot failed, msg: {}'.format( str(e)))
def statistic(raw_data_df): """Get the statistics of the raw data. The statistics include count, mean, std, min, max, 1%, 5%, 25%, 50%, 75%, 95%, 99%. Args: raw_data_df (DataFrame): raw data Returns: DataFrame: data statistics """ data_statistics_df = pd.DataFrame() if not isinstance(raw_data_df, pd.DataFrame): logger.error('DataAnalyzer: the type of raw data is not pd.DataFrame') return data_statistics_df if len(raw_data_df) == 0: logger.warning('DataAnalyzer: empty data.') return data_statistics_df try: data_statistics_df = raw_data_df.describe() data_statistics_df.loc['1%'] = raw_data_df.quantile(0.01) data_statistics_df.loc['5%'] = raw_data_df.quantile(0.05) data_statistics_df.loc['95%'] = raw_data_df.quantile(0.95) data_statistics_df.loc['99%'] = raw_data_df.quantile(0.99) statistics_error = [] for column in list(raw_data_df.columns): if column not in list(data_statistics_df.columns ) and not raw_data_df[column].isnull().all(): statistics_error.append(column) if statistics_error: logger.warning( 'DataAnalyzer: [{}] is missing in statistics results.'.format( ','.join(str(x) for x in statistics_error))) except Exception as e: logger.error('DataAnalyzer: statistic failed, msg: {}'.format(str(e))) return data_statistics_df
if curr_step > self._args.num_warmup: # Save the step time of every training/inference step, unit is millisecond. duration.append((end - start) * 1000) if self._is_finished(curr_step, end): return duration # Register CNN benchmarks. # Reference: https://pytorch.org/vision/0.8/models.html # https://github.com/pytorch/vision/tree/v0.8.0/torchvision/models MODELS = [ 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'googlenet', 'inception_v3', 'mnasnet0_5', 'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3', 'mobilenet_v2', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19' ] for model in MODELS: if hasattr(models, model): BenchmarkRegistry.register_benchmark('pytorch-' + model, PytorchCNN, parameters='--model_type ' + model) else: logger.warning( 'model missing in torchvision.models - model: {}'.format(model))
def output_excel_data_not_accept(writer, data_not_accept_df, rules): """Output data_not_accept_df into 'Not Accept' excel page. Args: writer (xlsxwriter): xlsxwriter handle data_not_accept_df (DataFrame): the DataFrame to output rules (dict): the rules of DataDiagnosis """ # Get the xlsxwriter workbook objects and init the format workbook = writer.book color_format_red = workbook.add_format({'bg_color': '#FFC7CE', 'font_color': '#9C0006'}) percent_format = workbook.add_format({'num_format': '0.00%'}) # Output the not accept if isinstance(data_not_accept_df, pd.DataFrame): data_not_accept_df.to_excel(writer, 'Not Accept', index=True) if not data_not_accept_df.empty: row_start = 1 row_end = max(row_start, len(data_not_accept_df)) columns = list(data_not_accept_df.columns) worksheet = writer.sheets['Not Accept'] for rule in rules: for metric in rules[rule]['metrics']: # The column index of the metrics should start from 1 col_index = columns.index(metric) + 1 # Apply percent format for the columns whose rules are variance type. if rules[rule]['function'] == 'variance': worksheet.conditional_format( row_start, col_index, row_end, col_index, # start_row, start_col, end_row, end_col { 'type': 'no_blanks', 'format': percent_format } ) # Apply red format if the value violates the rule. if rules[rule]['function'] == 'value' or rules[rule]['function'] == 'variance': match = re.search(r'(>|<|<=|>=|==|!=)(.+)', rules[rule]['criteria']) if not match: continue symbol = match.group(1) condition = float(match.group(2)) worksheet.conditional_format( row_start, col_index, row_end, col_index, # start_row, start_col, end_row, end_col { 'type': 'cell', 'criteria': symbol, 'value': condition, 'format': color_format_red } ) else: logger.warning('FileHandler: excel_data_output - data_not_accept_df is empty.') else: logger.warning('FileHandler: excel_data_output - data_not_accept_df is not DataFrame.')
def __get_mode_command(self, benchmark_name, mode, timeout=None): """Get runner command for given mode. Args: benchmark_name (str): Benchmark name. mode (DictConfig): Runner mode. timeout (int): The timeout value in seconds. Return: str: Runner command. """ exec_command = ( 'sb exec --output-dir {output_dir} -c sb.config.yaml -C superbench.enable={name}' ).format( name=benchmark_name, output_dir=self._sb_output_dir, ) if timeout is not None: exec_command = 'timeout {timeout} {command}'.format( timeout=timeout, command=exec_command) mode_command = exec_command if mode.name == 'local': mode_command = '{prefix} {command}'.format( prefix=mode.prefix.format(proc_rank=mode.proc_rank, proc_num=mode.proc_num), command=exec_command, ) mode_command = f'PROC_RANK={mode.proc_rank} {mode_command.strip()}' elif mode.name == 'torch.distributed': # TODO: replace with torch.distributed.run in v1.9 # TODO: only supports node_num=1 and node_num=all currently torch_dist_params = '' if mode.node_num == 1 else \ '--nnodes=$NNODES --node_rank=$NODE_RANK --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT ' mode_command = ( f'python3 -m torch.distributed.launch' f' --use_env --no_python --nproc_per_node={mode.proc_num} {torch_dist_params}{exec_command}' f' superbench.benchmarks.{benchmark_name}.parameters.distributed_impl=ddp' f' superbench.benchmarks.{benchmark_name}.parameters.distributed_backend=nccl' ) elif mode.name == 'mpi': mode_command = ( 'mpirun ' # use default OpenMPI in image '-tag-output ' # tag mpi output with [jobid,rank]<stdout/stderr> prefix '-allow-run-as-root ' # allow mpirun to run when executed by root user '{host_list} ' # use prepared hostfile and launch {proc_num} processes on each node '-bind-to numa ' # bind processes to numa '{mca_list} {env_list} {command}' ).format( host_list=f'-host localhost:{mode.proc_num}' if mode.node_num == 1 else f'-hostfile hostfile -map-by ppr:{mode.proc_num}:node', mca_list=' '.join(f'-mca {k} {v}' for k, v in mode.mca.items()), env_list=' '.join( f'-x {k}={str(v).format(proc_rank=mode.proc_rank, proc_num=mode.proc_num)}' if isinstance(v, str) else f'-x {k}' for k, v in mode.env.items()), command=exec_command, ) else: logger.warning('Unknown mode %s.', mode.name) return mode_command.strip()