def _preprocess(self): """Preprocess/preparation operations before the benchmarking. Return: True if _preprocess() succeed. """ if not super()._preprocess(): return False # Format the arguments self._args.operation = self._args.operation.lower() # Check the arguments and generate the commands op = self._args.operation if op not in self.__operations: self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) logger.error( 'Unsupported operation of NCCL test - benchmark: {}, operation: {}, expected: {}.' .format(self._name, op, ' '.join(list(self.__operations.keys())))) return False else: self._bin_name = self.__operations[op] if not self._set_binary_path(): return False command = os.path.join(self._args.bin_dir, self._bin_name) command += ' -b {} -e {} -f {} -g {} -c {} -n {} -w {}'.format( self._args.minbytes, self._args.maxbytes, str(self._args.stepfactor), str(self._args.ngpus), str(self._args.check), str(self._args.iters), str(self._args.warmup_iters)) self._commands.append(command) return True
def _process_raw_result(self, idx, raw_output): """Function to process raw results and save the summarized results. self._result.add_raw_data() and self._result.add_result() need to be called to save the results. Args: idx (int): the index corresponding with the raw_output. raw_output (str): raw output string of the micro-benchmark. Return: True if the raw output string is valid and result can be extracted. """ host = self.__hosts[idx] self._result.add_raw_data('raw_output_' + host, raw_output, self._args.log_raw_data) try: # If socket error or exception happens on TCPing, add result values as failed suc = 0 fail = self._args.count mininum = 0.00 maximum = 0.00 average = 0.00 rate = 0 # Parse and add result from table-like output of TCPing if 'failure' not in raw_output: raw_output = raw_output.splitlines() labels = None for line in raw_output: # Get the line of the table labels if 'Host' in line: labels = line.split('|') labels = [label.strip() for label in labels] if host in line: res = line.split('|') res = [result.strip() for result in res] suc = int(res[labels.index('Successed')]) fail = int(res[labels.index('Failed')]) rate = float( res[labels.index('Success Rate')].strip('%')) mininum = float( res[labels.index('Minimum')].strip('ms')) maximum = float( res[labels.index('Maximum')].strip('ms')) average = float( res[labels.index('Average')].strip('ms')) self._result.add_result(host + '_successed_count', suc) self._result.add_result(host + '_failed_count', fail) self._result.add_result(host + '_success_rate', rate) self._result.add_result(host + '_time_min', mininum) self._result.add_result(host + '_time_max', maximum) self._result.add_result(host + '_time_avg', average) except Exception as e: logger.error( 'The result format is invalid - round: {}, benchmark: {}, address: {}, raw output: {}, message: {}.' .format(self._curr_run_index, self._name, host, raw_output, str(e))) return False return True
def _benchmark(self): """Implementation for benchmarking. Return: True if run benchmark successfully. """ for cmd_idx in range(len(self._commands)): logger.info( 'Execute command - round: {}, benchmark: {}, command: {}.'. format(self._curr_run_index, self._name, self._commands[cmd_idx])) output = run_command(self._commands[cmd_idx]) if output.returncode != 0: self._result.set_return_code( ReturnCode.DOCKERBENCHMARK_EXECUTION_FAILURE) logger.error( 'DockerBenchmark execution failed - round: {}, benchmark: {}, error message: {}.' .format(self._curr_run_index, self._name, output.stdout)) return False else: if not self._process_raw_result(cmd_idx, output.stdout): self._result.set_return_code( ReturnCode.DOCKERBENCHMARK_RESULT_PARSING_FAILURE) return False return True
def _parse_rules(self, rules): """Parse the rules for result summary. Args: rules (dict): rules from rule yaml file Returns: bool: return True if successfully get all rules, otherwise False. """ try: if not rules: logger.error('ResultSummary: get rules failed') return False self._sb_rules = {} self._enable_metrics = set() benchmark_rules = rules['superbench']['rules'] for rule in benchmark_rules: benchmark_rules[rule] = self._check_rules( benchmark_rules[rule], rule) self._sb_rules[rule] = {} self._sb_rules[rule]['name'] = rule self._sb_rules[rule]['categories'] = benchmark_rules[rule][ 'categories'] self._sb_rules[rule]['metrics'] = {} self._sb_rules[rule]['statistics'] = benchmark_rules[rule][ 'statistics'] self._sb_rules[rule]['aggregate'] = benchmark_rules[rule][ 'aggregate'] if 'aggregate' in benchmark_rules[ rule] else False super()._get_metrics(rule, benchmark_rules) return True except Exception as e: logger.error('ResultSummary: parse rules failed - {}'.format( str(e))) return False
def _postprocess(self): """Postprocess/cleanup operations after the benchmarking. Return: True if _postprocess() succeed. """ if not super()._postprocess(): return False try: if self._args.distributed_impl == DistributedImpl.DDP: torch.distributed.barrier() torch.distributed.destroy_process_group() except BaseException as e: self._result.set_return_code( ReturnCode.DISTRIBUTED_SETTING_DESTROY_FAILURE) logger.error( 'Post process failed - model: {}, distributed implementation: {}, message: {}.' .format(self._name, self._args.distributed_impl, str(e))) return False if self._gpu_available: torch.cuda.synchronize() del self._target del self._optimizer del self._model if self._gpu_available: torch.cuda.empty_cache() return True
def _preprocess(self): """Preprocess/preparation operations before the benchmarking. Return: True if _preprocess() succeed. """ if not super()._preprocess(): return False # Format the arguments self._args.mem_type = [p.lower() for p in self._args.mem_type] # Check the arguments and generate the commands if self._args.memory not in self._memory: self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) logger.error( 'Unsupported mem_type of bandwidth test - benchmark: {}, memory: {}, expected: {}.' .format(self._name, self._args.memory, ' '.join(self._memory))) return False for mem_type in self._args.mem_type: if mem_type not in self._mem_types: self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) logger.error( 'Unsupported mem_type of bandwidth test - benchmark: {}, mem_type: {}, expected: {}.' .format(self._name, mem_type, ' '.join(self._mem_types))) return False return True
def _preprocess(self): """Preprocess/preparation operations before the benchmarking. Return: True if _preprocess() succeed. """ if not super()._preprocess(): return False mlc_path = os.path.join(self._args.bin_dir, self._bin_name) ret_val = os.access(mlc_path, os.X_OK | os.F_OK) if not ret_val: logger.error( 'Executable {} not found in {} or it is not executable'.format( self._bin_name, self._args.bin_dir)) return False # the mlc command requires hugapage to be enabled mlc_wrapper = ' '.join([ 'nr_hugepages=`cat /proc/sys/vm/nr_hugepages`;', 'echo 4000 > /proc/sys/vm/nr_hugepages;', '%s;', 'err=$?;', 'echo ${nr_hugepages} > /proc/sys/vm/nr_hugepages;', '(exit $err)' ]) for test in self._args.tests: command = mlc_path + ' --%s' % test self._commands.append(mlc_wrapper % command) return True
def _process_numeric_result(self, metric, result, reduce_type=None, cal_percentile=False): """Function to save the numerical results. Args: metric (str): metric name which is the key. result (List[numbers.Number]): numerical result. reduce_type (ReduceType): The type of reduce function. cal_percentile (bool): Whether to calculate the percentile results. Return: True if result list is not empty. """ if len(result) == 0: logger.error( 'Numerical result of benchmark is empty - round: {}, name: {}.'.format( self._curr_run_index, self._name ) ) return False self._result.add_raw_data(metric, result, self._args.log_raw_data) self._result.add_result(metric, statistics.mean(result), reduce_type) if cal_percentile: self._process_percentile_result(metric, result, reduce_type) return True
def __prepare_general_ib_command_params(self): """Prepare general params for ib commands. Returns: Str of ib command params if arguments are valid, otherwise False. """ # Format the ib command type self._args.command = self._args.command.lower() # Add message size for ib command msg_size = f'-s {self._args.msg_size}' if self._args.msg_size > 0 else '-a' # Add GPUDirect for ib command gpu_dev = '' if self._args.gpu_dev is not None: gpu = GPU() if gpu.vendor == 'nvidia': gpu_dev = f'--use_cuda={self._args.gpu_dev}' elif gpu.vendor == 'amd': gpu_dev = f'--use_rocm={self._args.gpu_dev}' else: self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) logger.error('No GPU found - benchmark: {}'.format(self._name)) return False # Generate ib command params command_params = f'-F -n {self._args.iters} -d {self._args.ib_dev} {msg_size} {gpu_dev}' command_params = f'{command_params.strip()} --report_gbits' return command_params
def parse_args(self, ignore_invalid=False): """Parse the arguments. Return: ret (bool): whether parse succeed or not. args (argparse.Namespace): parsed arguments. unknown (list): unknown arguments. """ try: args, unknown = self._parser.parse_known_args(self._argv) except BaseException as e: if ignore_invalid: logger.info( 'Missing or invliad parameters, will ignore the error and skip the args checking.' ) return True, None, [] else: logger.error( 'Invalid argument - benchmark: {}, message: {}.'.format( self._name, str(e))) return False, None, [] ret = True if len(unknown) > 0: logger.error( 'Unknown arguments - benchmark: {}, unknown arguments: {}'. format(self._name, ' '.join(unknown))) ret = False return ret, args, unknown
def _set_binary_path(self): """Search the binary from self._args.bin_dir or from system environment path and set the binary directory. If self._args.bin_dir is specified, the binary is only searched inside it. Otherwise, the binary is searched from system environment path. Return: True if the binary exists. """ if self._bin_name is None: self._result.set_return_code(ReturnCode.MICROBENCHMARK_BINARY_NAME_NOT_SET) logger.error('The binary name is not set - benchmark: {}.'.format(self._name)) return False self._args.bin_dir = shutil.which(self._bin_name, mode=os.X_OK, path=self._args.bin_dir) if self._args.bin_dir is None: self._result.set_return_code(ReturnCode.MICROBENCHMARK_BINARY_NOT_EXIST) logger.error( 'The binary does not exist - benchmark: {}, binary name: {}, binary directory: {}.'.format( self._name, self._bin_name, self._args.bin_dir ) ) return False self._args.bin_dir = os.path.dirname(self._args.bin_dir) return True
def __check_raw_data(self): """Check the validation of raw data. Return: True if the raw data is: instance of List[List[Number]] for BenchmarkType.MODEL. instance of List[str] for BenchmarkType.DOCKER. instance of List[List[Number]] or List[str] for BenchmarkType.MICRO. """ for metric in self._result.raw_data: is_valid = True if self._benchmark_type == BenchmarkType.MODEL: is_valid = self.__is_list_list_type( self._result.raw_data[metric], numbers.Number) elif self._benchmark_type == BenchmarkType.DOCKER: is_valid = self.__is_list_type(self._result.raw_data[metric], str) elif self._benchmark_type == BenchmarkType.MICRO: is_valid = self.__is_list_type( self._result.raw_data[metric], str) or self.__is_list_list_type( self._result.raw_data[metric], numbers.Number) if not is_valid: logger.error( 'Invalid raw data type - benchmark: {}, metric: {}, raw data: {}.' .format(self._name, metric, self._result.raw_data[metric])) return False return True
def run(self): """Function to launch the benchmarking. Return: True if run benchmark successfully. """ ret = True try: ret &= self._preprocess() if ret: self._start_time = datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S') for self._curr_run_index in range(self._args.run_count): ret &= self._benchmark() self._end_time = datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S') self._result.set_timestamp(self._start_time, self._end_time) if ret: ret &= self.__check_result_format() except BaseException as e: self._result.set_return_code(ReturnCode.RUNTIME_EXCEPTION_ERROR) logger.error( 'Run benchmark failed - benchmark: {}, message: {}'.format( self._name, str(e))) finally: ret &= self._postprocess() return ret
def _preprocess(self): """Preprocess/preparation operations before the benchmarking. Return: True if _preprocess() succeed. """ self.add_parser_arguments() ret, self._args, unknown = self.parse_args() if not ret: self._result = BenchmarkResult(self._name, self._benchmark_type, ReturnCode.INVALID_ARGUMENT) return False self._result = BenchmarkResult(self._name, self._benchmark_type, ReturnCode.SUCCESS, run_count=self._args.run_count) if not isinstance(self._benchmark_type, BenchmarkType): logger.error( 'Invalid benchmark type - benchmark: {}, type: {}'.format( self._name, type(self._benchmark_type))) self._result.set_return_code(ReturnCode.INVALID_BENCHMARK_TYPE) return False return True
def __init__(self, shape, world_size, dtype=torch.float): """Constructor. Args: shape (List[int]): Shape of dataset. world_size (int): Number of workers. dtype (torch.dtype): Type of the elements. """ self._len = 0 self._data = None try: if dtype in [torch.float32, torch.float64]: self._data = torch.randn(*shape, dtype=dtype) elif dtype in [torch.int8, torch.int16, torch.int32, torch.int64]: self._data = torch.randint(0, 128, tuple(shape), dtype=dtype) else: logger.error( 'Unsupported precision for RandomDataset - data type: {}.'. format(dtype)) return except BaseException as e: logger.error( 'Generate random dataset failed - data type: {}, shape: {}, message: {}.' .format(dtype, shape, str(e))) return self._len = shape[0] * world_size self._world_size = world_size
def __get_network_bytes(self): """Method to get the network traffic information, unit: bytes. Return: The bytes transferred on the network, None means fail to get the data. """ net_info = dict() try: with open(self._net_file, 'r') as f: for line in f: items = line.split() if len(items) != 17: continue else: receive_bytes = int(items[1]) transmit_bytes = int(items[9]) net_info[items[0].strip()[:-1]] = [ receive_bytes, transmit_bytes ] return net_info except BaseException as e: logger.error( 'Failed to read network traffic information - error message: {}' .format(str(e))) return None
def _create_model(self, precision): """Construct the model for benchmarking. Args: precision (Precision): precision of model and input data, such as float32, float16. """ self._config = GPT2Config(n_embd=self._args.hidden_size, n_layer=self._args.num_hidden_layers, n_head=self._args.num_attention_heads) try: self._model = GPT2BenchmarkModel(self._config, self._args.num_classes) self._model = self._model.to(dtype=getattr(torch, precision.value)) if self._gpu_available: self._model = self._model.cuda() except BaseException as e: logger.error( 'Create model with specified precision failed - model: {}, precision: {}, message: {}.' .format(self._name, precision, str(e))) return False self._target = torch.LongTensor(self._args.batch_size).random_( self._args.num_classes) if self._gpu_available: self._target = self._target.cuda() return True
def run(self): """Method representing the process’s activity. Return: True if launching the process succeed. """ if self.__running.value == 0: if not self.__preprocess(): return False try: logger.info('Start monitoring.') self.__running.value = 1 self.__sample() self.__scheduler.run() except BaseException as e: logger.error( 'Failed to launch the monitor process - error message: {}'. format(str(e))) self.stop() return False else: logger.error('Monitor is still running') return True
def __kernel_nccl_pipeline(self, kernel, matA, matB, stages, message, times): """Computation and NCCL kernel pipeline with single GPU. Args: kernel (ComputationKernelType): the type of the computation kernel to run. matA (list[tensor]): the matrix list used in matmul or mul for every stage. matB (tensor): the matrix used in matmul. stages (int): the ratio number of computation kernel and communication kernel. message(tensor): the data used to be transferred through NCCL. times(int): number of times in one step to run. Return: True of False: if computation kernel type is invalid, return False, else, return True. """ if kernel == ComputationKernelType.MUL: for i in range(times): torch.distributed.all_reduce(message, op=torch.distributed.ReduceOp.SUM, async_op=True) for stage in range(stages): matA[stage].mul(matA[stage]) elif kernel == ComputationKernelType.MATMUL: for i in range(times): torch.distributed.all_reduce(message, op=torch.distributed.ReduceOp.SUM, async_op=True) for stage in range(stages): matA[stage].matmul(matB) else: logger.error( 'Unknown comoputation kernel type - benchmark: {}, type: {}.'. format(self._name, kernel)) return False return True
def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path): """Output data_not_accept_df into jsonl file. Args: data_not_accept_df (DataFrame): the DataFrame to output output_path (str): the path of output jsonl file """ p = Path(output_path) try: data_not_accept_json = data_not_accept_df.to_json(orient='index') data_not_accept = json.loads(data_not_accept_json) if not isinstance(data_not_accept_df, pd.DataFrame): logger.warning('DataDiagnosis: output json data - data_not_accept_df is not DataFrame.') return if data_not_accept_df.empty: logger.warning('DataDiagnosis: output json data - data_not_accept_df is empty.') return with p.open('w') as f: for node in data_not_accept: line = data_not_accept[node] line['Index'] = node json_str = json.dumps(line) f.write(json_str + '\n') except Exception as e: logger.error('DataDiagnosis: output json data failed, msg: {}'.format(str(e)))
def _process_raw_result(self, cmd_idx, raw_output): """Function to parse raw results and save the summarized results. self._result.add_raw_data() and self._result.add_result() need to be called to save the results. Args: cmd_idx (int): the index of command corresponding with the raw_output. raw_output (str): raw output string of the micro-benchmark. Return: True if the raw output string is valid and result can be extracted. """ self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data) try: output_lines = [x.strip() for x in raw_output.strip().splitlines()] for output_line in output_lines: tag, bw_str = output_line.split() self._result.add_result(tag + '_bw', float(bw_str)) except BaseException as e: self._result.set_return_code( ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE) logger.error( 'The result format is invalid - round: {}, benchmark: {}, raw output: {}, message: {}.' .format(self._curr_run_index, self._name, raw_output, str(e))) return False return True
def _parse_rules_and_baseline(self, rules, baseline): """Parse and merge rules and baseline read from file. Args: rules (dict): rules from rule yaml file baseline (dict): baseline of metrics from baseline json file Returns: bool: return True if successfully get the criteria for all rules, otherwise False. """ try: if not rules: logger.error('DataDiagnosis: get criteria failed') return False self._sb_rules = {} self._enable_metrics = set() benchmark_rules = rules['superbench']['rules'] self._raw_rules = benchmark_rules for rule in benchmark_rules: benchmark_rules[rule] = self._check_and_format_rules(benchmark_rules[rule], rule) self._sb_rules[rule] = {} self._sb_rules[rule]['name'] = rule self._sb_rules[rule]['function'] = benchmark_rules[rule]['function'] self._sb_rules[rule]['store'] = True if 'store' in benchmark_rules[ rule] and benchmark_rules[rule]['store'] is True else False self._sb_rules[rule]['criteria'] = benchmark_rules[rule]['criteria'] self._sb_rules[rule]['categories'] = benchmark_rules[rule]['categories'] self._sb_rules[rule]['metrics'] = {} self.__get_metrics_and_baseline(rule, benchmark_rules, baseline) self._enable_metrics = sorted(list(self._enable_metrics)) except Exception as e: logger.error('DataDiagnosis: get criteria failed - {}'.format(str(e))) return False return True
def _sync_result(self, result): """Function to reduce the result to rank 0. Args: result (list): The result data to sync. Return: Result if reduce result data successfully, otherwise None. """ result = super()._sync_result(result) if not result: return None try: if self._args.distributed_impl == DistributedImpl.DDP: if self._args.distributed_backend == DistributedBackend.NCCL: tensor = torch.as_tensor(result).cuda() else: tensor = torch.as_tensor(result) torch.distributed.all_reduce(tensor, op=torch.distributed.ReduceOp.MAX) result = tensor.tolist() except BaseException as e: logger.error( 'Sync train result failed - model: {}, distributed implementation: {}, message: {}.' .format(self._name, self._args.distributed_impl, str(e))) return None return result
def _preprocess(self): """Preprocess/preparation operations before the benchmarking. Return: True if _preprocess() succeed. """ if not super()._preprocess(): return False if self._args.distributed_impl != DistributedImpl.DDP: self._result.set_return_code( ReturnCode.DISTRIBUTED_SETTING_INIT_FAILURE) logger.error( 'Unsupported distributed implementation - model: {}, distributed implementation: {}.' .format(self._name, self._args.distributed_impl)) return False if ShardingMode.ALLGATHER in self._args.mode or ShardingMode.ALLREDUCE in self._args.mode: try: torch.distributed.init_process_group(backend='nccl') self.__world_size = int(os.environ['WORLD_SIZE']) self.__local_rank = int(os.environ['LOCAL_RANK']) except BaseException as e: self._result.set_return_code( ReturnCode.DISTRIBUTED_SETTING_INIT_FAILURE) torch.distributed.destroy_process_group() logger.error( 'Initialize distributed env failed - benchmark: {}, message: {}.' .format(self._name, str(e))) return False if torch.cuda.is_available(): torch.cuda.set_device(self.__local_rank) return True
def _benchmark(self): """Implementation for benchmarking. Return: True if run benchmark successfully. """ logger.info('TCP validation - round: {0}, name: {1}'.format( self._curr_run_index, self._name)) # Run TCPing on host in the hostfile in parallel try: outputs = Parallel( n_jobs=min(len(self.__hosts), self._args.parallel))( delayed(run_tcping)(self.__hosts[i], self._args.port, self._args.count, self._args.timeout) for i in (range(len(self.__hosts)))) except Exception as e: self._result.set_return_code( ReturnCode.MICROBENCHMARK_EXECUTION_FAILURE) logger.error( 'Microbenchmark execution failed - round: {}, benchmark: {}, error message: {}.' .format(self._curr_run_index, self._name, str(e))) return False # Parse the output and get the results for host_index, out in enumerate(outputs): if not self._process_raw_result(host_index, out): self._result.set_return_code( ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE) return False return True
def _benchmark(self): """Implementation for benchmarking.""" M = self._args.m K = self._args.k N = self._args.n for mode in self._args.mode: if mode == ShardingMode.NOSHARDING: elapse_times = self.__matmul_nosharding(M, K, N) elif mode == ShardingMode.ALLREDUCE: elapse_times = self.__matmul_allreduce(M, K, N) elif mode == ShardingMode.ALLGATHER: elapse_times = self.__matmul_allgather(M, K, N) else: logger.error( 'Unknown sharding mode - benchmark: {}, mode: {}.'.format( self._name, mode)) return False metric = '{}_time'.format(mode) if not self._process_numeric_result( metric, elapse_times, reduce_type=ReduceType.MAX): return False logger.info( 'Matmul sharding - round: {0}, name: {1}, shape: ({2}, {3}) * ({3}, {4}), mode: {5}, cost: {6} ms' .format(self._curr_run_index, self._name, M, K, N, mode, statistics.mean(elapse_times))) return True
def _preprocess(self): """Preprocess/preparation operations before the benchmarking. Return: True if _preprocess() succeed. """ if not super()._preprocess(): return False # Check if the content of hostfile is valid and not empty valid = True try: with open(self._args.hostfile, 'r') as f: self.__hosts = f.readlines() for i in range(0, len(self.__hosts)): self.__hosts[i] = self.__hosts[i].rstrip('\n') except Exception: valid = False if not valid or len(self.__hosts) == 0: self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) logger.error( 'Invalid hostfile - benchmark: {}, hostfile: {}.'.format( self._name, self._args.hostfile)) return False return True
def correlation(raw_data_df): """Get the correlations. Args: raw_data_df (DataFrame): raw data Returns: DataFrame: correlations """ data_corr_df = pd.DataFrame() if not isinstance(raw_data_df, pd.DataFrame): logger.error('DataAnalyzer: the type of raw data is not pd.DataFrame') return data_corr_df if len(raw_data_df) == 0: logger.warning('DataAnalyzer: empty data.') return data_corr_df try: data_corr_df = raw_data_df.corr() statistics_error = [] for column in list(raw_data_df.columns): if column not in list(data_corr_df.columns ) and not raw_data_df[column].isnull().all(): statistics_error.append(column) if statistics_error: logger.warning( 'DataAnalyzer: [{}] is missing in correlation results.'.format( ','.join(str(x) for x in statistics_error))) except Exception as e: logger.error('DataAnalyzer: correlation failed, msg: {}'.format( str(e))) return data_corr_df
def _process_raw_result(self, cmd_idx, raw_output): """Function to parse raw results and save the summarized results. self._result.add_raw_data() and self._result.add_result() need to be called to save the results. Args: cmd_idx (int): the index of command corresponding with the raw_output. raw_output (str): raw output string of the micro-benchmark. Return: True if the raw output string is valid and result can be extracted. """ self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data) pattern = r'\d+\.\d+' result = re.findall(pattern, raw_output) if len(result) != 2: logger.error( 'Cannot extract kernel launch overhead in event and wall mode - round: {}, benchmark: {}, raw data: {}.' .format(self._curr_run_index, self._name, raw_output)) return False try: result = [float(item) for item in result] except BaseException as e: logger.error( 'The result format is invalid - round: {}, benchmark: {}, result: {}, message: {}.' .format(self._curr_run_index, self._name, result, str(e))) return False self._result.add_result('event_time', result[0]) self._result.add_result('wall_time', result[1]) return True
def add_result(self, metric, value, reduce_type=None): """Add summarized data into result. Args: metric (str): metric name which is the key. value (float): summarized data. For e2e model benchmarks, the value is step-time or throughput. For micro-benchmarks, the value is FLOPS, bandwidth and etc. reduce_type (ReduceType): type of reduce function. Return: True if succeed to add the result. """ if not metric or not isinstance(metric, str): logger.error( 'metric name of benchmark is not string, name: {}, metric type: {}' .format(self.__name, type(metric))) return False if metric not in self.__result: self.__result[metric] = list() self.__reduce_op[metric] = reduce_type.value if isinstance( reduce_type, Enum) else None self.__result[metric].append(value) return True