def MakeSamplesFromOutput(metadata, output):
    """Create samples containing metrics.

  Args:
    metadata: dict contains all the metadata that reports.
    output: string, command output
  Example output:
    perfkitbenchmarker/tests/linux_benchmarks/mlperf_benchmark_test.py

  Returns:
    Samples containing training metrics.
  """
    samples = []
    results = regex_util.ExtractAllMatches(
        r':::MLPv(\S+) resnet (\d+\.\d+) .* eval_accuracy: {(.*)}', output)
    start = None
    for version, wall_time, result in results:
        wall_time = float(wall_time)
        if not start:
            start = wall_time
        metadata_copy = metadata.copy()
        epoch = regex_util.ExtractExactlyOneMatch(r'"epoch": (\d+)', result)
        value = regex_util.ExtractExactlyOneMatch(r'"value": (0\.\d+)', result)
        metadata_copy['times'] = wall_time - start
        metadata_copy['epoch'] = int(epoch)
        metadata_copy['version'] = version
        samples.append(
            sample.Sample('Eval Accuracy',
                          float(value) * 100, '%', metadata_copy))
    times = regex_util.ExtractExactlyOneMatch(r'RESULT,resnet,.*,(\d+),.*,.*',
                                              output)
    samples.append(sample.Sample('Times', int(times), 'seconds', metadata))
    return samples
Esempio n. 2
0
def MakeSamplesFromOutput(metadata, output, use_tpu=False, model='resnet'):
    """Create samples containing metrics.

  Args:
    metadata: dict contains all the metadata that reports.
    output: string, command output
    use_tpu: bool, whether tpu is in use
    model: string, model name
  Example output:
    perfkitbenchmarker/tests/linux_benchmarks/mlperf_benchmark_test.py

  Returns:
    Samples containing training metrics.
  """
    samples = []

    results = regex_util.ExtractAllMatches(
        r':::MLL (\d+\.\d+) eval_accuracy: {(.*)}', output)

    start = None
    for wall_time, result in results:
        wall_time = float(wall_time)
        if not start:
            start = wall_time
        metadata_copy = metadata.copy()
        epoch = regex_util.ExtractExactlyOneMatch(r'"epoch_num": (\d+)',
                                                  result)
        if ('transformer' in model and (not use_tpu)):
            value = regex_util.ExtractExactlyOneMatch(r'"value": "(\d+\.\d+)"',
                                                      result)
        elif 'mask' in model:
            mask_value, mask_metadata = regex_util.ExtractExactlyOneMatch(
                r'^"value": (.*?), "metadata": (.*)$', result)
            value = json.loads(mask_value)['accuracy']['BBOX']
            metadata_copy.update(json.loads(mask_value)['accuracy'])
            metadata_copy.update(json.loads(mask_metadata))
        else:
            value = regex_util.ExtractExactlyOneMatch(r'"value": (\d+\.\d+)',
                                                      result)
        metadata_copy['times'] = wall_time - start
        metadata_copy['epoch'] = int(epoch)
        samples.append(
            sample.Sample('Eval Accuracy',
                          float(value) * 100, '%', metadata_copy))

    if not use_tpu:
        if 'minigo' in model:
            times = regex_util.ExtractAllMatches(r'RESULT,.*,(\d+),.*,.*',
                                                 output)
        else:
            times = regex_util.ExtractAllMatches(r'RESULT,.*,.*,(\d+),.*,.*',
                                                 output)
        samples.append(
            sample.Sample('Time', int(times[0]), 'seconds', metadata))

    return samples
def MakeSamplesFromEvalOutput(metadata, output, elapsed_seconds, use_tpu=True):
    """Create a sample containing evaluation metrics.

  Args:
    metadata: dict contains all the metadata that reports.
    output: string, command output
    elapsed_seconds: float, elapsed seconds from saved checkpoint.
    use_tpu: bool, whether tpu is used

  Example output:
    perfkitbenchmarker/tests/linux_benchmarks/resnet_benchmark_test.py

  Returns:
    a Sample containing evaluation metrics
  """

    if use_tpu:
        pattern = (r'Saving dict for global step \d+: global_step = (\d+), '
                   r'loss = (\d+\.\d+), top_1_accuracy = (\d+\.\d+), '
                   r'top_5_accuracy = (\d+\.\d+)')
        step, loss, top_1_accuracy, top_5_accuracy = (
            regex_util.ExtractExactlyOneMatch(pattern, output))
    else:
        pattern = (
            r'tensorflow:Saving dict for global step \d+: accuracy = (\d+\.\d+), '
            r'accuracy_top_5 = (\d+\.\d+), global_step = (\d+),'
            r' loss = (\d+\.\d+)')
        top_1_accuracy, top_5_accuracy, step, loss = (
            regex_util.ExtractExactlyOneMatch(pattern, output))

    metadata_copy = metadata.copy()
    step = int(step)
    metadata_copy['step'] = step
    num_examples_per_epoch = metadata['num_examples_per_epoch']
    metadata_copy['epoch'] = step / num_examples_per_epoch
    metadata_copy['elapsed_seconds'] = elapsed_seconds
    return [
        sample.Sample('Eval Loss', float(loss), '', metadata_copy),
        # In the case of top-1 score, the trained model checks if the top
        # class (the one having the highest probability) is the same as the
        # target label. In the case of top-5 score, the trained model checks
        # if the target label is one of your top 5 predictions (the 5 ones
        # with the highest probabilities).
        sample.Sample('Top 1 Accuracy',
                      float(top_1_accuracy) * 100, '%', metadata_copy),
        sample.Sample('Top 5 Accuracy',
                      float(top_5_accuracy) * 100, '%', metadata_copy)
    ]
Esempio n. 4
0
def _Install(vm):
    """Installs the OpenMPI package on the VM."""
    if vm.OS_TYPE not in MOFED_OS_MAPPING:
        raise ValueError('OS type {} not in {}'.format(
            vm.OS_TYPE, sorted(MOFED_OS_MAPPING)))
    driver = MOFED_DRIVER.format(version=FLAGS.mofed_version,
                                 os=MOFED_OS_MAPPING[vm.OS_TYPE])
    vm.InstallPackages('libdapl2 libmlx4-1')
    try:
        vm.RemoteCommand('curl -fSsL {} | tar -zxpf -'.format(driver))
    except:
        raise errors.Setup.InvalidSetupError(
            'Failed to download {}'.format(driver))
    stdout, _ = vm.RemoteCommand(
        'cd MLNX_OFED_LINUX-* && sudo ./mlnxofedinstall '
        '--force')
    if not regex_util.ExtractExactlyOneMatch(
            r'Installation passed successfully', stdout):
        raise errors.Benchmarks.PrepareException(
            'Mellanox OpenFabrics driver isn\'t installed successfully.')
    vm.RemoteCommand('sudo /etc/init.d/openibd restart')
    vm.RemoteCommand("sudo sed -i -e 's/# OS.EnableRDMA=y/"
                     "OS.EnableRDMA=y/g' /etc/waagent.conf")
    vm.RemoteCommand("sudo sed -i -e 's/# OS.UpdateRdmaDriver=y/"
                     "OS.UpdateRdmaDriver=y/g' /etc/waagent.conf")
    # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-hpc#rdma-capable-instances
    vm.RemoteCommand('cat << EOF | sudo tee -a /etc/security/limits.conf\n'
                     '*               hard    memlock         unlimited\n'
                     '*               soft    memlock         unlimited\n'
                     '*               hard    nofile          65535\n'
                     '*               soft    nofile          65535\n'
                     'EOF')
def ParseOpenSSLOutput(raw_result: str, version: str, parallelism: int):
    """Parse output from openssl speed and return as samples."""
    matches = regex_util.ExtractExactlyOneMatch(r'evp\s+(.*)',
                                                raw_result).split()
    results = []
    for idx, blocksize in enumerate(BLOCKSIZES_IN_BYTES):
        value_unit_tuple = regex_util.ExtractExactlyOneMatch(
            r'([\d\.]+)(\w+)', matches[idx])
        metadata = {
            'duration': _OPENSSL_SPEED_DURATION.value,
            'algorithm': _OPENSSL_SPEED_ALGORITHM.value,
            'parallelism': parallelism,
            'version': version,
            'blocksize': blocksize
        }
        results.append(
            sample.Sample('Throughput', float(value_unit_tuple[0]),
                          value_unit_tuple[1], metadata))
    return results
Esempio n. 6
0
def MakeSamplesFromOutput(metadata, output):
    """Create samples containing metrics.

  Args:
    metadata: dict contains all the metadata that reports.
    output: string, command output
  Example output:
    perfkitbenchmarker/tests/linux_benchmarks/nccl_benchmark_test.py

  Returns:
    Samples containing training metrics, and the bandwidth
  """
    samples = []
    metadata.update(_SAMPLE_LINE_RE.match(output).groupdict())
    results = regex_util.ExtractAllMatches(r'(Rank\s+\d+) (.*)', output)
    for rank, device in results:
        metadata[rank] = device
    results = regex_util.ExtractAllMatches(
        r'^\s*'
        r'(\d+)\s+'
        r'(\d+)\s+'
        r'(\w+)\s+'
        r'(\w+)\s+'
        r'(\d+(?:\.\d+)?)\s+'
        r'(\d+(?:\.\d+)?)\s+'
        r'(\d+(?:\.\d+)?)\s+'
        r'(\S+)\s+'
        r'(\d+(?:\.\d+)?)\s+'
        r'(\d+(?:\.\d+)?)\s+'
        r'(\d+(?:\.\d+)?)\s+'
        r'(\S+)', output, re.MULTILINE)
    max_out_of_place_algbw = 0
    for row in results:
        metadata_copy = metadata.copy()
        metadata_copy.update(zip(_METADATA_COLUMNS, row))
        for metric, metadata_key in sorted(_SAMPLE_NAMES.items()):
            samples.append(
                sample.Sample(metric, float(metadata_copy[metadata_key]),
                              'GB/s', metadata_copy))
        # Gbps is gigaBIT per second and GB/s is gigaBYTE per second
        max_out_of_place_algbw = max(
            max_out_of_place_algbw, float(metadata_copy['out_of_place_algbw']))

    avg_bus_bandwidth = regex_util.ExtractExactlyOneMatch(
        r'Avg bus bandwidth\s+: ([0-9\.]+)', output)
    samples.append(
        sample.Sample('avg_busbw', float(avg_bus_bandwidth), 'GB/s', metadata))
    samples.append(
        sample.Sample('max_out_of_place_algbw', max_out_of_place_algbw * 8,
                      'Gbps', metadata))
    return samples, max_out_of_place_algbw
Esempio n. 7
0
  def PrepareService(self, location):
    self.storage_account = 'pkb%s' % FLAGS.run_uri
    vm_util.IssueCommand(
        ['azure', 'storage', 'account', 'create',
         '--type', 'ZRS',
         '-l', location or DEFAULT_AZURE_REGION,
         self.storage_account])

    output, _, _ = vm_util.IssueCommand(
        ['azure', 'storage', 'account',
         'keys', 'list', self.storage_account])

    self.azure_key = regex_util.ExtractExactlyOneMatch(
        r'Primary:* (.+)', output)
Esempio n. 8
0
def _ExtractTfParameterServerPid(output):
  """Extract the process identification number from TensorFlow parameter server.

  Args:
    output: string, Remote command output

  Returns:
    string, process identification number from TensorFlow parameter server

  Raises:
    TFParsePsPidException
  """
  regex = r'{pid} (\S+)'.format(pid=PID_PREFIX)
  try:
    return regex_util.ExtractExactlyOneMatch(regex, output)
  except:
    raise TFParsePsPidException('Unable to parse process identification number '
                                'of TensorFlow parameter server from remote '
                                'command output.')
def MakeSamplesFromOutput(metadata: Dict[str, Any],
                          output: str) -> List[sample.Sample]:
  """Create samples containing metrics.

  Args:
    metadata: dict contains all the metadata that reports.
    output: string, command output
  Example output:
    perfkitbenchmarker/tests/linux_benchmarks/mlperf_inference_benchmark_test.py

  Returns:
    Samples containing training metrics.
  """
  for column_name in _METADATA_COLUMNS:
    metadata[f'mlperf {column_name}'] = regex_util.ExtractExactlyOneMatch(
        fr'{re.escape(column_name)} *: *(.*)', output)
  throughput = regex_util.ExtractFloat(
      r': result_scheduled_samples_per_sec *: *(.*), Result is VALID', output)
  return [sample.Sample('throughput', float(throughput), 'samples/s', metadata)]
def _LastRunResults(bm_spec: benchmark_spec.BenchmarkSpec) -> str:
  """Finds the results of the last run.

  Args:
    bm_spec: The benchmark specification. Contains all data that is required to
      run the benchmark.

  Returns:
    The detail log.
  """
  vm = bm_spec.vms[0]
  stdout, _ = vm.RobustRemoteCommand(
      f'{bm_spec.env_cmd} && make launch_docker DOCKER_COMMAND="find build/logs -name mlperf_log_detail.txt | xargs ls -t | head -n 1"',
      should_log=True)
  mlperf_log_detail_txt = regex_util.ExtractExactlyOneMatch(
      r'(build/logs/.*/mlperf_log_detail.txt)', stdout)
  stdout, _ = vm.RobustRemoteCommand(
      f'{bm_spec.env_cmd} && make launch_docker DOCKER_COMMAND="cat {mlperf_log_detail_txt}"',
      should_log=True)
  return stdout
Esempio n. 11
0
def MakePerformanceSamplesFromOutput(base_metadata: Dict[str, Any],
                                     output: str) -> List[sample.Sample]:
  """Create performance samples containing metrics.

  Args:
    base_metadata: dict contains all the metadata that reports.
    output: string, command output
  Example output:
    perfkitbenchmarker/tests/linux_benchmarks/mlperf_inference_benchmark_test.py

  Returns:
    Samples containing training metrics.
  """
  metadata = {}
  for column_name in _PERFORMANCE_METADATA:
    metadata[f'mlperf {column_name}'] = regex_util.ExtractExactlyOneMatch(
        fr'{re.escape(column_name)} *: *(.*)', output)
  metadata.update(base_metadata)
  throughput = regex_util.ExtractFloat(
      r': result_scheduled_samples_per_sec: (\d+\.\d+)', output)
  return [sample.Sample('throughput', float(throughput), 'samples/s', metadata)]
def MakeAccuracySamplesFromOutput(base_metadata: Dict[str, Any],
                                  output: str) -> List[sample.Sample]:
  """Creates accuracy samples containing metrics.

  Args:
    base_metadata: dict contains all the metadata that reports.
    output: string, command output

  Returns:
    Samples containing training metrics.
  """
  metadata = {}
  for column_name in _ACCURACY_METADATA:
    metadata[f'mlperf {column_name}'] = regex_util.ExtractExactlyOneMatch(
        fr'{re.escape(column_name)} *: *(.*)', output)
  accuracy = regex_util.ExtractFloat(
      r': Accuracy = (\d+\.\d+), Threshold = \d+\.\d+\. Accuracy test PASSED',
      output)
  metadata['Threshold'] = regex_util.ExtractFloat(
      r': Accuracy = \d+\.\d+, Threshold = (\d+\.\d+)\. Accuracy test PASSED',
      output)
  metadata.update(base_metadata)
  return [sample.Sample('accuracy', float(accuracy), '%', metadata)]
Esempio n. 13
0
def _MakeSamplesFromOutput(metadata, output):
    """Create a sample continaing the measured throughput.

  Args:
    metadata: dict contains all the metadata that reports.
    output: output

  Example output:
    perfkitbenchmarker/tests/linux_benchmarks/resnet_benchmark_test.py

  Returns:
    a Sample containing the throughput
  """
    samples = []
    pattern = r'loss = (\d+.\d+), step = 0'
    loss = regex_util.ExtractAllMatches(pattern, output)
    checkpoints = [('0', '', loss.pop(), '0', '0')]
    pattern = (r'global_step/sec: (\d+.\d+)\n(.*examples/sec: \d+.\d+\n)?.*'
               r'loss = (\d+.\d+), step = (\d+) \((\d+.\d+) sec\)')
    checkpoints.extend(regex_util.ExtractAllMatches(pattern, output))
    for global_speed, example_speed, loss, step, duration in checkpoints:
        metadata_copy = copy.deepcopy(metadata)
        metadata_copy['step'] = int(step)
        metadata_copy['duration'] = float(duration)
        samples.append(sample.Sample('Loss', float(loss), '', metadata_copy))
        samples.append(
            sample.Sample('Global Steps Per Second', float(global_speed),
                          'global_steps/sec', metadata_copy))
        if example_speed:
            # This benchmark only reports "Examples Per Second" metric when we it
            # using TPU.
            pattern = r'examples/sec: (\d+.\d+)'
            example_speed = regex_util.ExtractExactlyOneMatch(pattern, output)
            samples.append(
                sample.Sample('Examples Per Second', float(example_speed),
                              'examples/sec', metadata_copy))

    pattern = r'Loss for final step: (\d+.\d+)'
    value = regex_util.ExtractExactlyOneMatch(pattern, output)
    samples.append(sample.Sample('Final Loss', float(value), '', metadata))
    if FLAGS.resnet_mode in ('eval', 'train_and_eval'):
        pattern = r'Eval results: {.*\'loss\': (\d+.\d+)'
        value = regex_util.ExtractExactlyOneMatch(pattern, output)
        samples.append(sample.Sample('Eval Loss', float(value), '', metadata))
        # In the case of top-1 score, the trained model checks if the top class (the
        # one having the highest probability) is the same as the target label.
        # In the case of top-5 score, the trained model checks if the target label
        # is one of your top 5 predictions (the 5 ones with the highest
        # probabilities).
        pattern = r'Eval results: {.*\'top_1_accuracy\': (\d+.\d+)'
        value = regex_util.ExtractExactlyOneMatch(pattern, output)
        samples.append(
            sample.Sample('Top 1 Accuracy',
                          float(value) * 100, '%', metadata))

        pattern = r'Eval results: {.*\'top_5_accuracy\': (\d+.\d+)'
        value = regex_util.ExtractExactlyOneMatch(pattern, output)
        samples.append(
            sample.Sample('Top 5 Accuracy',
                          float(value) * 100, '%', metadata))

        pattern = r'Elapsed seconds (\d+)'
        value = regex_util.ExtractExactlyOneMatch(pattern, output)
        samples.append(
            sample.Sample('Elapsed Seconds', int(value), 'seconds', metadata))
    return samples
 def testNonUniqueMatch(self):
     with self.assertRaises(regex_util.TooManyMatchesError):
         regex_util.ExtractExactlyOneMatch('spam', 'spam spam spam')
 def testCapturingGroup(self):
     self.assertEqual(
         regex_util.ExtractExactlyOneMatch('ba(r+)', 'foo barrr baz'),
         'rrr')
def _MakeSamplesFromOutput(metadata, output):
    """Create a sample continaing the measured throughput.

  Args:
    metadata: dict contains all the metadata that reports.
    output: output

  Example output:
    perfkitbenchmarker/tests/linux_benchmarks/resnet_benchmark_test.py

  Returns:
    a Sample containing the throughput
  """
    samples = []
    pattern = r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6})'
    start_time = _ParseDateTime(
        regex_util.ExtractAllMatches(pattern, output)[0])

    if FLAGS.resnet_mode in ('train', 'train_and_eval'):
        # If statement training true, it will parse examples_per_second,
        # global_steps_per_second, loss
        pattern = (
            r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Saving checkpoints for (\d+).*\n'
            r'.*loss = (\d+\.\d+), step = \d+\n')
        for wall_time, step, loss in regex_util.ExtractAllMatches(
                pattern, output):
            metadata_copy = metadata.copy()
            metadata_copy['step'] = int(step)
            metadata_copy['duration'] = (_ParseDateTime(wall_time) -
                                         start_time).seconds
            samples.append(
                sample.Sample('Loss', float(loss), '', metadata_copy))

        pattern = (
            r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Saving checkpoints for (\d+).*\n'
            r'((.*\n){9})?.*Loss for final step: (\d+\.\d+).')
        for wall_time, step, _, _, loss in regex_util.ExtractAllMatches(
                pattern, output):
            metadata_copy = metadata.copy()
            metadata_copy['step'] = int(step)
            metadata_copy['duration'] = (_ParseDateTime(wall_time) -
                                         start_time).seconds
            samples.append(
                sample.Sample('Loss', float(loss), '', metadata_copy))

        pattern = (
            r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Saving checkpoints for (\d+).*\n'
            r'.*global_step/sec: (\d+\.\d+)\n'
            r'(.*examples/sec: (\d+.\d+))?')
        for wall_time, step, global_step, _, examples_sec in (
                regex_util.ExtractAllMatches(pattern, output)):
            metadata_copy = metadata.copy()
            metadata_copy['step'] = int(step)
            metadata_copy['duration'] = (_ParseDateTime(wall_time) -
                                         start_time).seconds
            samples.append(
                sample.Sample('Global Steps Per Second', float(global_step),
                              'global_steps/sec', metadata_copy))
            if examples_sec:
                # This benchmark only reports "Examples Per Second" metric when we it
                # using TPU.
                samples.append(
                    sample.Sample('Examples Per Second', float(examples_sec),
                                  'examples/sec', metadata_copy))

    if FLAGS.resnet_mode in ('eval', 'train_and_eval'):
        # If statement evaluates true, it will parse top_1_accuracy, top_5_accuracy,
        # and eval_loss.
        pattern = (
            r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Saving dict for global step \d+: '
            r'global_step = (\d+), loss = (\d+\.\d+), top_1_accuracy = (\d+\.\d+), '
            r'top_5_accuracy = (\d+\.\d+)')
        for wall_time, step, loss, top_1_accuracy, top_5_accuracy in (
                regex_util.ExtractAllMatches(pattern, output)):
            metadata_copy = metadata.copy()
            metadata_copy['step'] = int(step)
            metadata_copy['duration'] = (_ParseDateTime(wall_time) -
                                         start_time).seconds
            samples.append(
                sample.Sample('Eval Loss', float(loss), '', metadata_copy))
            # In the case of top-1 score, the trained model checks if the top class (
            # the one having the highest probability) is the same as the target label.
            # In the case of top-5 score, the trained model checks if the target label
            # is one of your top 5 predictions (the 5 ones with the highest
            # probabilities).
            samples.append(
                sample.Sample('Top 1 Accuracy',
                              float(top_1_accuracy) * 100, '%', metadata_copy))
            samples.append(
                sample.Sample('Top 5 Accuracy',
                              float(top_5_accuracy) * 100, '%', metadata_copy))

        pattern = r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Elapsed seconds (\d+)'
        wall_time, value = regex_util.ExtractExactlyOneMatch(pattern, output)
        samples.append(
            sample.Sample('Elapsed Seconds', int(value), 'seconds', metadata))
    return samples
 def testNoMatch(self):
     with self.assertRaises(regex_util.NoMatchError):
         regex_util.ExtractExactlyOneMatch('foo', 'bar')