def MakeSamplesFromOutput(metadata, output): """Create samples containing metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output Example output: perfkitbenchmarker/tests/linux_benchmarks/mlperf_benchmark_test.py Returns: Samples containing training metrics. """ samples = [] results = regex_util.ExtractAllMatches( r':::MLPv(\S+) resnet (\d+\.\d+) .* eval_accuracy: {(.*)}', output) start = None for version, wall_time, result in results: wall_time = float(wall_time) if not start: start = wall_time metadata_copy = metadata.copy() epoch = regex_util.ExtractExactlyOneMatch(r'"epoch": (\d+)', result) value = regex_util.ExtractExactlyOneMatch(r'"value": (0\.\d+)', result) metadata_copy['times'] = wall_time - start metadata_copy['epoch'] = int(epoch) metadata_copy['version'] = version samples.append( sample.Sample('Eval Accuracy', float(value) * 100, '%', metadata_copy)) times = regex_util.ExtractExactlyOneMatch(r'RESULT,resnet,.*,(\d+),.*,.*', output) samples.append(sample.Sample('Times', int(times), 'seconds', metadata)) return samples
def MakeSamplesFromOutput(metadata, output, use_tpu=False, model='resnet'): """Create samples containing metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output use_tpu: bool, whether tpu is in use model: string, model name Example output: perfkitbenchmarker/tests/linux_benchmarks/mlperf_benchmark_test.py Returns: Samples containing training metrics. """ samples = [] results = regex_util.ExtractAllMatches( r':::MLL (\d+\.\d+) eval_accuracy: {(.*)}', output) start = None for wall_time, result in results: wall_time = float(wall_time) if not start: start = wall_time metadata_copy = metadata.copy() epoch = regex_util.ExtractExactlyOneMatch(r'"epoch_num": (\d+)', result) if ('transformer' in model and (not use_tpu)): value = regex_util.ExtractExactlyOneMatch(r'"value": "(\d+\.\d+)"', result) elif 'mask' in model: mask_value, mask_metadata = regex_util.ExtractExactlyOneMatch( r'^"value": (.*?), "metadata": (.*)$', result) value = json.loads(mask_value)['accuracy']['BBOX'] metadata_copy.update(json.loads(mask_value)['accuracy']) metadata_copy.update(json.loads(mask_metadata)) else: value = regex_util.ExtractExactlyOneMatch(r'"value": (\d+\.\d+)', result) metadata_copy['times'] = wall_time - start metadata_copy['epoch'] = int(epoch) samples.append( sample.Sample('Eval Accuracy', float(value) * 100, '%', metadata_copy)) if not use_tpu: if 'minigo' in model: times = regex_util.ExtractAllMatches(r'RESULT,.*,(\d+),.*,.*', output) else: times = regex_util.ExtractAllMatches(r'RESULT,.*,.*,(\d+),.*,.*', output) samples.append( sample.Sample('Time', int(times[0]), 'seconds', metadata)) return samples
def MakeSamplesFromEvalOutput(metadata, output, elapsed_seconds, use_tpu=True): """Create a sample containing evaluation metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output elapsed_seconds: float, elapsed seconds from saved checkpoint. use_tpu: bool, whether tpu is used Example output: perfkitbenchmarker/tests/linux_benchmarks/resnet_benchmark_test.py Returns: a Sample containing evaluation metrics """ if use_tpu: pattern = (r'Saving dict for global step \d+: global_step = (\d+), ' r'loss = (\d+\.\d+), top_1_accuracy = (\d+\.\d+), ' r'top_5_accuracy = (\d+\.\d+)') step, loss, top_1_accuracy, top_5_accuracy = ( regex_util.ExtractExactlyOneMatch(pattern, output)) else: pattern = ( r'tensorflow:Saving dict for global step \d+: accuracy = (\d+\.\d+), ' r'accuracy_top_5 = (\d+\.\d+), global_step = (\d+),' r' loss = (\d+\.\d+)') top_1_accuracy, top_5_accuracy, step, loss = ( regex_util.ExtractExactlyOneMatch(pattern, output)) metadata_copy = metadata.copy() step = int(step) metadata_copy['step'] = step num_examples_per_epoch = metadata['num_examples_per_epoch'] metadata_copy['epoch'] = step / num_examples_per_epoch metadata_copy['elapsed_seconds'] = elapsed_seconds return [ sample.Sample('Eval Loss', float(loss), '', metadata_copy), # In the case of top-1 score, the trained model checks if the top # class (the one having the highest probability) is the same as the # target label. In the case of top-5 score, the trained model checks # if the target label is one of your top 5 predictions (the 5 ones # with the highest probabilities). sample.Sample('Top 1 Accuracy', float(top_1_accuracy) * 100, '%', metadata_copy), sample.Sample('Top 5 Accuracy', float(top_5_accuracy) * 100, '%', metadata_copy) ]
def _Install(vm): """Installs the OpenMPI package on the VM.""" if vm.OS_TYPE not in MOFED_OS_MAPPING: raise ValueError('OS type {} not in {}'.format( vm.OS_TYPE, sorted(MOFED_OS_MAPPING))) driver = MOFED_DRIVER.format(version=FLAGS.mofed_version, os=MOFED_OS_MAPPING[vm.OS_TYPE]) vm.InstallPackages('libdapl2 libmlx4-1') try: vm.RemoteCommand('curl -fSsL {} | tar -zxpf -'.format(driver)) except: raise errors.Setup.InvalidSetupError( 'Failed to download {}'.format(driver)) stdout, _ = vm.RemoteCommand( 'cd MLNX_OFED_LINUX-* && sudo ./mlnxofedinstall ' '--force') if not regex_util.ExtractExactlyOneMatch( r'Installation passed successfully', stdout): raise errors.Benchmarks.PrepareException( 'Mellanox OpenFabrics driver isn\'t installed successfully.') vm.RemoteCommand('sudo /etc/init.d/openibd restart') vm.RemoteCommand("sudo sed -i -e 's/# OS.EnableRDMA=y/" "OS.EnableRDMA=y/g' /etc/waagent.conf") vm.RemoteCommand("sudo sed -i -e 's/# OS.UpdateRdmaDriver=y/" "OS.UpdateRdmaDriver=y/g' /etc/waagent.conf") # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-hpc#rdma-capable-instances vm.RemoteCommand('cat << EOF | sudo tee -a /etc/security/limits.conf\n' '* hard memlock unlimited\n' '* soft memlock unlimited\n' '* hard nofile 65535\n' '* soft nofile 65535\n' 'EOF')
def ParseOpenSSLOutput(raw_result: str, version: str, parallelism: int): """Parse output from openssl speed and return as samples.""" matches = regex_util.ExtractExactlyOneMatch(r'evp\s+(.*)', raw_result).split() results = [] for idx, blocksize in enumerate(BLOCKSIZES_IN_BYTES): value_unit_tuple = regex_util.ExtractExactlyOneMatch( r'([\d\.]+)(\w+)', matches[idx]) metadata = { 'duration': _OPENSSL_SPEED_DURATION.value, 'algorithm': _OPENSSL_SPEED_ALGORITHM.value, 'parallelism': parallelism, 'version': version, 'blocksize': blocksize } results.append( sample.Sample('Throughput', float(value_unit_tuple[0]), value_unit_tuple[1], metadata)) return results
def MakeSamplesFromOutput(metadata, output): """Create samples containing metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output Example output: perfkitbenchmarker/tests/linux_benchmarks/nccl_benchmark_test.py Returns: Samples containing training metrics, and the bandwidth """ samples = [] metadata.update(_SAMPLE_LINE_RE.match(output).groupdict()) results = regex_util.ExtractAllMatches(r'(Rank\s+\d+) (.*)', output) for rank, device in results: metadata[rank] = device results = regex_util.ExtractAllMatches( r'^\s*' r'(\d+)\s+' r'(\d+)\s+' r'(\w+)\s+' r'(\w+)\s+' r'(\d+(?:\.\d+)?)\s+' r'(\d+(?:\.\d+)?)\s+' r'(\d+(?:\.\d+)?)\s+' r'(\S+)\s+' r'(\d+(?:\.\d+)?)\s+' r'(\d+(?:\.\d+)?)\s+' r'(\d+(?:\.\d+)?)\s+' r'(\S+)', output, re.MULTILINE) max_out_of_place_algbw = 0 for row in results: metadata_copy = metadata.copy() metadata_copy.update(zip(_METADATA_COLUMNS, row)) for metric, metadata_key in sorted(_SAMPLE_NAMES.items()): samples.append( sample.Sample(metric, float(metadata_copy[metadata_key]), 'GB/s', metadata_copy)) # Gbps is gigaBIT per second and GB/s is gigaBYTE per second max_out_of_place_algbw = max( max_out_of_place_algbw, float(metadata_copy['out_of_place_algbw'])) avg_bus_bandwidth = regex_util.ExtractExactlyOneMatch( r'Avg bus bandwidth\s+: ([0-9\.]+)', output) samples.append( sample.Sample('avg_busbw', float(avg_bus_bandwidth), 'GB/s', metadata)) samples.append( sample.Sample('max_out_of_place_algbw', max_out_of_place_algbw * 8, 'Gbps', metadata)) return samples, max_out_of_place_algbw
def PrepareService(self, location): self.storage_account = 'pkb%s' % FLAGS.run_uri vm_util.IssueCommand( ['azure', 'storage', 'account', 'create', '--type', 'ZRS', '-l', location or DEFAULT_AZURE_REGION, self.storage_account]) output, _, _ = vm_util.IssueCommand( ['azure', 'storage', 'account', 'keys', 'list', self.storage_account]) self.azure_key = regex_util.ExtractExactlyOneMatch( r'Primary:* (.+)', output)
def _ExtractTfParameterServerPid(output): """Extract the process identification number from TensorFlow parameter server. Args: output: string, Remote command output Returns: string, process identification number from TensorFlow parameter server Raises: TFParsePsPidException """ regex = r'{pid} (\S+)'.format(pid=PID_PREFIX) try: return regex_util.ExtractExactlyOneMatch(regex, output) except: raise TFParsePsPidException('Unable to parse process identification number ' 'of TensorFlow parameter server from remote ' 'command output.')
def MakeSamplesFromOutput(metadata: Dict[str, Any], output: str) -> List[sample.Sample]: """Create samples containing metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output Example output: perfkitbenchmarker/tests/linux_benchmarks/mlperf_inference_benchmark_test.py Returns: Samples containing training metrics. """ for column_name in _METADATA_COLUMNS: metadata[f'mlperf {column_name}'] = regex_util.ExtractExactlyOneMatch( fr'{re.escape(column_name)} *: *(.*)', output) throughput = regex_util.ExtractFloat( r': result_scheduled_samples_per_sec *: *(.*), Result is VALID', output) return [sample.Sample('throughput', float(throughput), 'samples/s', metadata)]
def _LastRunResults(bm_spec: benchmark_spec.BenchmarkSpec) -> str: """Finds the results of the last run. Args: bm_spec: The benchmark specification. Contains all data that is required to run the benchmark. Returns: The detail log. """ vm = bm_spec.vms[0] stdout, _ = vm.RobustRemoteCommand( f'{bm_spec.env_cmd} && make launch_docker DOCKER_COMMAND="find build/logs -name mlperf_log_detail.txt | xargs ls -t | head -n 1"', should_log=True) mlperf_log_detail_txt = regex_util.ExtractExactlyOneMatch( r'(build/logs/.*/mlperf_log_detail.txt)', stdout) stdout, _ = vm.RobustRemoteCommand( f'{bm_spec.env_cmd} && make launch_docker DOCKER_COMMAND="cat {mlperf_log_detail_txt}"', should_log=True) return stdout
def MakePerformanceSamplesFromOutput(base_metadata: Dict[str, Any], output: str) -> List[sample.Sample]: """Create performance samples containing metrics. Args: base_metadata: dict contains all the metadata that reports. output: string, command output Example output: perfkitbenchmarker/tests/linux_benchmarks/mlperf_inference_benchmark_test.py Returns: Samples containing training metrics. """ metadata = {} for column_name in _PERFORMANCE_METADATA: metadata[f'mlperf {column_name}'] = regex_util.ExtractExactlyOneMatch( fr'{re.escape(column_name)} *: *(.*)', output) metadata.update(base_metadata) throughput = regex_util.ExtractFloat( r': result_scheduled_samples_per_sec: (\d+\.\d+)', output) return [sample.Sample('throughput', float(throughput), 'samples/s', metadata)]
def MakeAccuracySamplesFromOutput(base_metadata: Dict[str, Any], output: str) -> List[sample.Sample]: """Creates accuracy samples containing metrics. Args: base_metadata: dict contains all the metadata that reports. output: string, command output Returns: Samples containing training metrics. """ metadata = {} for column_name in _ACCURACY_METADATA: metadata[f'mlperf {column_name}'] = regex_util.ExtractExactlyOneMatch( fr'{re.escape(column_name)} *: *(.*)', output) accuracy = regex_util.ExtractFloat( r': Accuracy = (\d+\.\d+), Threshold = \d+\.\d+\. Accuracy test PASSED', output) metadata['Threshold'] = regex_util.ExtractFloat( r': Accuracy = \d+\.\d+, Threshold = (\d+\.\d+)\. Accuracy test PASSED', output) metadata.update(base_metadata) return [sample.Sample('accuracy', float(accuracy), '%', metadata)]
def _MakeSamplesFromOutput(metadata, output): """Create a sample continaing the measured throughput. Args: metadata: dict contains all the metadata that reports. output: output Example output: perfkitbenchmarker/tests/linux_benchmarks/resnet_benchmark_test.py Returns: a Sample containing the throughput """ samples = [] pattern = r'loss = (\d+.\d+), step = 0' loss = regex_util.ExtractAllMatches(pattern, output) checkpoints = [('0', '', loss.pop(), '0', '0')] pattern = (r'global_step/sec: (\d+.\d+)\n(.*examples/sec: \d+.\d+\n)?.*' r'loss = (\d+.\d+), step = (\d+) \((\d+.\d+) sec\)') checkpoints.extend(regex_util.ExtractAllMatches(pattern, output)) for global_speed, example_speed, loss, step, duration in checkpoints: metadata_copy = copy.deepcopy(metadata) metadata_copy['step'] = int(step) metadata_copy['duration'] = float(duration) samples.append(sample.Sample('Loss', float(loss), '', metadata_copy)) samples.append( sample.Sample('Global Steps Per Second', float(global_speed), 'global_steps/sec', metadata_copy)) if example_speed: # This benchmark only reports "Examples Per Second" metric when we it # using TPU. pattern = r'examples/sec: (\d+.\d+)' example_speed = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append( sample.Sample('Examples Per Second', float(example_speed), 'examples/sec', metadata_copy)) pattern = r'Loss for final step: (\d+.\d+)' value = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append(sample.Sample('Final Loss', float(value), '', metadata)) if FLAGS.resnet_mode in ('eval', 'train_and_eval'): pattern = r'Eval results: {.*\'loss\': (\d+.\d+)' value = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append(sample.Sample('Eval Loss', float(value), '', metadata)) # In the case of top-1 score, the trained model checks if the top class (the # one having the highest probability) is the same as the target label. # In the case of top-5 score, the trained model checks if the target label # is one of your top 5 predictions (the 5 ones with the highest # probabilities). pattern = r'Eval results: {.*\'top_1_accuracy\': (\d+.\d+)' value = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append( sample.Sample('Top 1 Accuracy', float(value) * 100, '%', metadata)) pattern = r'Eval results: {.*\'top_5_accuracy\': (\d+.\d+)' value = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append( sample.Sample('Top 5 Accuracy', float(value) * 100, '%', metadata)) pattern = r'Elapsed seconds (\d+)' value = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append( sample.Sample('Elapsed Seconds', int(value), 'seconds', metadata)) return samples
def testNonUniqueMatch(self): with self.assertRaises(regex_util.TooManyMatchesError): regex_util.ExtractExactlyOneMatch('spam', 'spam spam spam')
def testCapturingGroup(self): self.assertEqual( regex_util.ExtractExactlyOneMatch('ba(r+)', 'foo barrr baz'), 'rrr')
def _MakeSamplesFromOutput(metadata, output): """Create a sample continaing the measured throughput. Args: metadata: dict contains all the metadata that reports. output: output Example output: perfkitbenchmarker/tests/linux_benchmarks/resnet_benchmark_test.py Returns: a Sample containing the throughput """ samples = [] pattern = r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6})' start_time = _ParseDateTime( regex_util.ExtractAllMatches(pattern, output)[0]) if FLAGS.resnet_mode in ('train', 'train_and_eval'): # If statement training true, it will parse examples_per_second, # global_steps_per_second, loss pattern = ( r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Saving checkpoints for (\d+).*\n' r'.*loss = (\d+\.\d+), step = \d+\n') for wall_time, step, loss in regex_util.ExtractAllMatches( pattern, output): metadata_copy = metadata.copy() metadata_copy['step'] = int(step) metadata_copy['duration'] = (_ParseDateTime(wall_time) - start_time).seconds samples.append( sample.Sample('Loss', float(loss), '', metadata_copy)) pattern = ( r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Saving checkpoints for (\d+).*\n' r'((.*\n){9})?.*Loss for final step: (\d+\.\d+).') for wall_time, step, _, _, loss in regex_util.ExtractAllMatches( pattern, output): metadata_copy = metadata.copy() metadata_copy['step'] = int(step) metadata_copy['duration'] = (_ParseDateTime(wall_time) - start_time).seconds samples.append( sample.Sample('Loss', float(loss), '', metadata_copy)) pattern = ( r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Saving checkpoints for (\d+).*\n' r'.*global_step/sec: (\d+\.\d+)\n' r'(.*examples/sec: (\d+.\d+))?') for wall_time, step, global_step, _, examples_sec in ( regex_util.ExtractAllMatches(pattern, output)): metadata_copy = metadata.copy() metadata_copy['step'] = int(step) metadata_copy['duration'] = (_ParseDateTime(wall_time) - start_time).seconds samples.append( sample.Sample('Global Steps Per Second', float(global_step), 'global_steps/sec', metadata_copy)) if examples_sec: # This benchmark only reports "Examples Per Second" metric when we it # using TPU. samples.append( sample.Sample('Examples Per Second', float(examples_sec), 'examples/sec', metadata_copy)) if FLAGS.resnet_mode in ('eval', 'train_and_eval'): # If statement evaluates true, it will parse top_1_accuracy, top_5_accuracy, # and eval_loss. pattern = ( r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Saving dict for global step \d+: ' r'global_step = (\d+), loss = (\d+\.\d+), top_1_accuracy = (\d+\.\d+), ' r'top_5_accuracy = (\d+\.\d+)') for wall_time, step, loss, top_1_accuracy, top_5_accuracy in ( regex_util.ExtractAllMatches(pattern, output)): metadata_copy = metadata.copy() metadata_copy['step'] = int(step) metadata_copy['duration'] = (_ParseDateTime(wall_time) - start_time).seconds samples.append( sample.Sample('Eval Loss', float(loss), '', metadata_copy)) # In the case of top-1 score, the trained model checks if the top class ( # the one having the highest probability) is the same as the target label. # In the case of top-5 score, the trained model checks if the target label # is one of your top 5 predictions (the 5 ones with the highest # probabilities). samples.append( sample.Sample('Top 1 Accuracy', float(top_1_accuracy) * 100, '%', metadata_copy)) samples.append( sample.Sample('Top 5 Accuracy', float(top_5_accuracy) * 100, '%', metadata_copy)) pattern = r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Elapsed seconds (\d+)' wall_time, value = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append( sample.Sample('Elapsed Seconds', int(value), 'seconds', metadata)) return samples
def testNoMatch(self): with self.assertRaises(regex_util.NoMatchError): regex_util.ExtractExactlyOneMatch('foo', 'bar')