Example #1
0
def _CreateMetadataDict(benchmark_spec):
    """Create metadata dict to be used in run results.

  Args:
    benchmark_spec: benchmark spec

  Returns:
    metadata dict
  """
    vm = benchmark_spec.vms[0]
    metadata = dict()
    metadata.update(cuda_toolkit.GetMetadata(vm))
    metadata['benchmark_version'] = BENCHMARK_VERSION
    metadata['num_nodes'] = len(benchmark_spec.vms)
    metadata['total_gpus'] = int(benchmark_spec.total_gpus)
    metadata['model'] = benchmark_spec.model
    metadata['batch_size'] = benchmark_spec.batch_size
    metadata['num_steps'] = benchmark_spec.num_steps
    metadata['synthetic'] = benchmark_spec.synthetic
    metadata['precision'] = benchmark_spec.precision
    metadata['max_seq_len'] = benchmark_spec.max_seq_len
    metadata['nccl_version'] = benchmark_spec.nccl_version
    metadata['nccl_net_plugin'] = benchmark_spec.nccl_net_plugin
    metadata['cuda_visible_devices'] = benchmark_spec.cuda_visible_devices
    metadata['nccl_extra_params'] = benchmark_spec.nccl_extra_params
    return metadata
def Run(bm_spec: benchmark_spec.BenchmarkSpec) -> List[sample.Sample]:
    """Run GPU PingPong test.

  It tests the latency between 2 GPU in 2 VMs using TensorFlow gPRC server which
  were started during prepare phrase.

  Args:
    bm_spec: The benchmark specification

  Returns:
    A list of sample.Sample objects.
  """
    client_vm, server_vm = bm_spec.vms
    server_address = _SERVER_ADDR.format(hostname=server_vm.hostname,
                                         port=_PORT)
    base_metadata = cuda_toolkit.GetMetadata(client_vm)
    samples = []

    bws = _RunGpuPingpong(client_vm, server_address)
    for ping_bw, pong_bw in bws[1:]:
        metadata = {'ping': 32 / ping_bw, 'pong': 32 / pong_bw}
        metadata.update(base_metadata)
        samples.append(
            sample.Sample('latency', 32 / ping_bw + 32 / pong_bw,
                          'microseconds', metadata))
    return samples
Example #3
0
def _CreateMetadataDict(benchmark_spec, model, batch_size, num_gpus):
  """Create metadata dict to be used in run results.

  Args:
    benchmark_spec: benchmark spec
    model: model which was run
    batch_size: batch sized used
    num_gpus: number of GPUs used

  Returns:
    metadata dict
  """
  vm = benchmark_spec.vms[0]
  metadata = dict()
  if cuda_toolkit.CheckNvidiaGpuExists(vm):
    metadata.update(cuda_toolkit.GetMetadata(vm))
    metadata['num_gpus'] = num_gpus
  metadata['model'] = model
  metadata['batch_size'] = batch_size
  metadata['forward_only'] = benchmark_spec.forward_only
  metadata['data_name'] = benchmark_spec.data_name
  metadata['variable_update'] = benchmark_spec.variable_update
  metadata['local_parameter_device'] = benchmark_spec.local_parameter_device
  metadata['device'] = benchmark_spec.device
  metadata['data_format'] = benchmark_spec.data_format
  metadata['distortions'] = benchmark_spec.distortions
  metadata['benchmarks_commit_hash'] = benchmark_spec.benchmarks_commit_hash
  metadata['tensorflow_version'] = benchmark_spec.tensorflow_version
  metadata['tensorflow_cpu_pip_package'] = (
      benchmark_spec.tensorflow_cpu_pip_package)
  metadata['tensorflow_gpu_pip_package'] = (
      benchmark_spec.tensorflow_gpu_pip_package)
  metadata['distributed'] = benchmark_spec.distributed
  return metadata
def Run(benchmark_spec):
  """Runs the Stencil2D benchmark. GPU clock speeds must be set already.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
  vms = benchmark_spec.vms
  num_gpus = benchmark_spec.num_gpus
  master_vm = vms[0]
  num_iterations = FLAGS.stencil2d_iterations
  problem_sizes = FLAGS.stencil2d_problem_sizes
  num_processes = len(vms) * num_gpus

  metadata = {}
  metadata.update(cuda_toolkit.GetMetadata(master_vm))
  metadata['benchmark_version'] = BENCHMARK_VERSION
  metadata['num_iterations'] = num_iterations
  metadata['num_nodes'] = len(vms)
  metadata['num_processes'] = num_processes

  results = []
  for problem_size in problem_sizes:
    results.extend(
        _RunSingleIteration(master_vm, problem_size, num_processes,
                            num_iterations, metadata))
  return results
def _CreateMetadataDict(benchmark_spec):
    """Create metadata dict to be used in run results.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.

  Returns:
    metadata dict
  """
    metadata = {
        'use_tpu': bool(benchmark_spec.tpus),
        'model_dir': benchmark_spec.model_dir,
        'model': benchmark_spec.benchmark,
        'version': MLPERF_VERSION,
    }
    vms = benchmark_spec.vms
    num_vms = len(vms)
    vm = vms[0]
    gpus_per_node = nvidia_driver.QueryNumberOfGpus(vm)
    total_gpus = gpus_per_node * num_vms
    metadata.update(cuda_toolkit.GetMetadata(vm))
    metadata['total_gpus'] = total_gpus
    if benchmark_spec.tpus:
        metadata.update({
            'train_tpu_num_shards':
            benchmark_spec.tpu_groups['train'].GetNumShards(),
            'train_tpu_accelerator_type':
            benchmark_spec.tpu_groups['train'].GetAcceleratorType()
        })
    return metadata
Example #6
0
def _CreateMetadataDict(benchmark_spec):
    """Create metadata dict to be used in run results.

  Args:
    benchmark_spec: benchmark spec

  Returns:
    metadata dict
  """
    vm = benchmark_spec.vms[0]
    metadata = {
        'batch_size': benchmark_spec.batch_size,
        'num_epochs': benchmark_spec.num_epochs,
        'device': benchmark_spec.device,
        'num_layers': benchmark_spec.num_layers,
        'model': benchmark_spec.model,
        'mxnet_version': benchmark_spec.mxnet_version,
        'precision': benchmark_spec.precision,
        'key_value_store': benchmark_spec.key_value_store,
        'image_shape': benchmark_spec.image_shape,
        'commit': mxnet_cnn.GetCommit(vm)
    }
    if benchmark_spec.device == GPU:
        metadata.update(cuda_toolkit.GetMetadata(vm))
    return metadata
Example #7
0
def _CollectGpuSamples(
        vm: virtual_machine.BaseVirtualMachine) -> List[sample.Sample]:
    """Run XGBoost on the cluster.

  Args:
    vm: The virtual machine to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
    cmd = [
        f'{FLAGS.xgboost_env}',
        'python3',
        f'{linux_packages.INSTALL_DIR}/xgboost/tests/benchmark/benchmark_tree.py',
        f'--tree_method={_TREE_METHOD.value}',
        f'--sparsity={_SPARSITY.value}',
        f'--rows={_ROWS.value}',
        f'--columns={_COLUMNS.value}',
        f'--iterations={_ITERATIONS.value}',
        f'--test_size={_TEST_SIZE.value}',
    ]
    if _PARAMS.value:
        cmd.append(f'--params="{_PARAMS.value}"')
    metadata = _MetadataFromFlags(vm)
    metadata.update(cuda_toolkit.GetMetadata(vm))
    metadata['command'] = ' '.join(cmd)

    stdout, stderr, exit_code = vm.RemoteCommandWithReturnCode(
        metadata['command'], ignore_failure=True)
    if exit_code:
        logging.warning('Error with getting XGBoost stats: %s', stderr)
    training_time = regex_util.ExtractFloat(r'Train Time: ([\d\.]+) seconds',
                                            stdout)
    return sample.Sample('training_time', training_time, 'seconds', metadata)
Example #8
0
def _CreateMetadataDict(vms):
    """Create metadata dict to be used in run results.

  Args:
    vms: A list of worker VMs.

  Returns:
    metadata dict
  """
    vm = vms[0]
    gpus_per_node = nvidia_driver.QueryNumberOfGpus(vm)
    num_vms = len(vms)
    total_gpus = gpus_per_node * num_vms

    metadata = dict()
    metadata.update(cuda_toolkit.GetMetadata(vm))
    metadata['benchmark_version'] = BENCHMARK_VERSION
    metadata['num_nodes'] = len(vms)
    metadata['total_gpus'] = int(total_gpus)
    metadata['model'] = FLAGS.horovod_model
    metadata['batch_size'] = FLAGS.horovod_batch_size
    metadata['num_steps'] = FLAGS.horovod_num_steps
    metadata['synthetic'] = FLAGS.horovod_synthetic
    metadata['precision'] = FLAGS.horovod_precision
    metadata['max_seq_len'] = int(FLAGS.horovod_max_seq_len)
    metadata['nccl_version'] = FLAGS.nccl_version
    metadata['nccl_net_plugin'] = FLAGS.nccl_net_plugin
    metadata['cuda_visible_devices'] = FLAGS.nccl_cuda_visible_devices
    metadata['nccl_extra_params'] = FLAGS.nccl_extra_params
    return metadata
def _CollectGpuSamples(
        vm: virtual_machine.BaseVirtualMachine) -> List[sample.Sample]:
    """Run CUDA memcopy on the cluster.

  Args:
    vm: The virtual machine to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
    if not nvidia_driver.CheckNvidiaGpuExists(vm):
        return []
    if not nvidia_driver.CheckNvidiaSmiExists(vm):
        return []
    global_metadata = _MetadataFromFlags()
    global_metadata.update(cuda_toolkit.GetMetadata(vm))
    global_cmd = [
        BANDWIDTH_TEST_PATH, '--csv', f'--memory={_MEMORY.value}',
        f'--mode={_MODE.value}'
    ]
    if _HTOD.value:
        global_cmd.append('--htod')
    if _DTOH.value:
        global_cmd.append('--dtoh')
    if _DTOD.value:
        global_cmd.append('--dtod')
    if _WC.value:
        global_cmd.append('--wc')

    num_gpus = nvidia_driver.QueryNumberOfGpus(vm)
    devices = list(range(num_gpus)) + (['all'] if num_gpus > 1 else [])
    samples = []
    for device in devices:
        cmd = ' '.join(global_cmd + [f'--device={device}'])
        stdout, stderr, exit_code = vm.RemoteCommandWithReturnCode(
            cmd, ignore_failure=True)
        if exit_code:
            logging.warning('Error with getting GPU stats: %s', stderr)
            continue
        results = regex_util.ExtractAllMatches(
            r'bandwidthTest-(\S+), '
            r'Bandwidth = ([\d\.]+) (\S+), '
            r'Time = ([\d\.]+) s, '
            r'Size = (\d+) bytes, '
            r'NumDevsUsed = (\d+)', stdout)

        for metric, bandwidth, unit, time, size, num_devs_used in results:
            metadata = {
                'time': float(time),
                'size': int(size),
                'NumDevsUsed': num_devs_used,
                'device': device,
                'command': cmd,
            }
            metadata.update(global_metadata)
            samples.append(
                sample.Sample(metric, float(bandwidth), unit, metadata))
    return samples
def _CreateMetadataDict(benchmark_spec, model, batch_size):
    """Create metadata dict to be used in run results.

  Args:
    benchmark_spec: benchmark spec
    model: model which was run
    batch_size: batch sized used

  Returns:
    metadata dict
  """
    vm = benchmark_spec.vms[0]
    metadata = {}
    if cuda_toolkit.CheckNvidiaGpuExists(vm):
        metadata.update(cuda_toolkit.GetMetadata(vm))

    metadata['command_line'] = benchmark_spec.tf_cnn_benchmark_cmd
    metadata['cnn_benchmarks_branch'] = benchmark_spec.cnn_benchmarks_branch
    metadata['tensorflow_version'] = benchmark_spec.tensorflow_version
    metadata['tensorflow_cpu_pip_package'] = (
        benchmark_spec.tensorflow_cpu_pip_package)
    metadata['tensorflow_gpu_pip_package'] = (
        benchmark_spec.tensorflow_gpu_pip_package)
    # If we ran a custom command-line through the benchmark_args flag,
    # add the metadata from that command and return. We don't need anymore
    # metadata from this function as it is likely invalid.
    if getattr(benchmark_spec, 'benchmark_args', None):
        metadata.update(
            _GetMetadataFromBenchmarkArgs(benchmark_spec.benchmark_args))
        return metadata

    metadata['model'] = model
    metadata['batch_size'] = batch_size
    metadata['forward_only'] = benchmark_spec.forward_only
    metadata['data_name'] = benchmark_spec.data_name
    metadata['data_dir'] = benchmark_spec.data_dir
    metadata['use_local_data'] = benchmark_spec.use_local_data
    metadata['variable_update'] = benchmark_spec.variable_update
    metadata['local_parameter_device'] = benchmark_spec.local_parameter_device
    metadata['device'] = benchmark_spec.device
    metadata['data_format'] = benchmark_spec.data_format
    metadata['distortions'] = benchmark_spec.distortions
    metadata['distributed'] = benchmark_spec.distributed
    metadata['precision'] = benchmark_spec.precision
    metadata['num_gpus'] = benchmark_spec.num_gpus
    return metadata
Example #11
0
def Run(benchmark_spec):
    """Sets the GPU clock speed and runs the CUDA PCIe benchmark.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
    vm = benchmark_spec.vms[0]
    # Note:  The clock speed is set in this function rather than Prepare()
    # so that the user can perform multiple runs with a specified
    # clock speed without having to re-prepare the VM.
    cuda_toolkit.SetAndConfirmGpuClocks(vm)
    num_iterations = FLAGS.gpu_pcie_bandwidth_iterations
    mode = FLAGS.gpu_pcie_bandwidth_mode
    transfer_size_range = FLAGS.gpu_pcie_bandwidth_transfer_sizes
    raw_results = []
    metadata = {}
    metadata.update(cuda_toolkit.GetMetadata(vm))
    metadata['num_iterations'] = num_iterations
    metadata['mode'] = mode
    if mode == 'range':
        metadata['range_start'] = transfer_size_range[0]
        metadata['range_stop'] = transfer_size_range[1]
        metadata['range_step'] = transfer_size_range[2]

    run_command = ('%s/extras/demo_suite/bandwidthTest --device=all' %
                   cuda_toolkit.CUDA_TOOLKIT_INSTALL_DIR)
    if mode == 'range':
        run_command += (
            ' --mode=range --start={0} --end={1} --increment={2}'.format(
                transfer_size_range[0], transfer_size_range[1],
                transfer_size_range[2]))

    for i in range(num_iterations):
        stdout, _ = vm.RemoteCommand(run_command, should_log=True)
        raw_results.append(_ParseOutputFromSingleIteration(stdout))
        if 'device_info' not in metadata:
            metadata['device_info'] = _ParseDeviceInfo(stdout)
    return _CalculateMetricsOverAllIterations(raw_results, metadata)
def _CreateMetadataDict(benchmark_spec):
    """Create metadata dict to be used in run results.

  Args:
    benchmark_spec: benchmark spec

  Returns:
    metadata dict
  """
    vm = benchmark_spec.vms[0]
    metadata = dict()
    metadata.update(cuda_toolkit.GetMetadata(vm))
    metadata['benchmark_version'] = BENCHMARK_VERSION
    metadata['num_nodes'] = len(benchmark_spec.vms)
    metadata['total_gpus'] = int(benchmark_spec.total_gpus)
    metadata['model'] = benchmark_spec.model
    metadata['batch_size'] = benchmark_spec.batch_size
    metadata['deep_learning_examples_commit'] = (
        benchmark_spec.deep_learning_examples_commit)
    return metadata
def _CreateMetadataDict(benchmark_spec):
    """Create metadata dict to be used in run results.

  Args:
    benchmark_spec: benchmark spec

  Returns:
    metadata dict
  """
    vm = benchmark_spec.vms[0]
    metadata = dict()
    if benchmark_spec.device == GPU:
        metadata.update(cuda_toolkit.GetMetadata(vm))
    metadata['batch_size'] = benchmark_spec.batch_size
    metadata['num_epochs'] = benchmark_spec.num_epochs
    metadata['device'] = benchmark_spec.device
    metadata['num_layers'] = benchmark_spec.num_layers
    metadata['model'] = benchmark_spec.model
    metadata['mxnet_version'] = benchmark_spec.mxnet_version
    metadata['commit'] = mxnet_cnn.GetCommit(vm)
    return metadata
def _CreateMetadataDict(
    bm_spec: benchmark_spec.BenchmarkSpec) -> Dict[str, Any]:
  """Creates metadata dict to be used in run results.

  Args:
    bm_spec: The benchmark specification. Contains all data that is required to
      run the benchmark.

  Returns:
    metadata dict
  """
  metadata = {
      'model': FLAGS.mlperf_benchmark,
      'version': MLPERF_INFERENCE_VERSION,
  }
  vms = bm_spec.vms
  num_vms = len(vms)
  vm = vms[0]
  gpus_per_node = nvidia_driver.QueryNumberOfGpus(vm)
  total_gpus = gpus_per_node * num_vms
  metadata.update(cuda_toolkit.GetMetadata(vm))
  metadata['total_gpus'] = total_gpus
  return metadata
Example #15
0
def _CreateMetadataDict(benchmark_spec):
  """Create metadata dict to be used in run results.

  Args:
    benchmark_spec: benchmark spec

  Returns:
    metadata dict
  """
  vm = benchmark_spec.vms[0]
  metadata = dict()
  metadata.update(cuda_toolkit.GetMetadata(vm))
  metadata['num_nodes'] = len(benchmark_spec.vms)
  metadata['cpus_per_rank'] = int(benchmark_spec.cpus_per_rank)
  metadata['total_gpus'] = int(benchmark_spec.total_gpus)
  metadata['benchmark_version'] = BENCHMARK_VERSION
  metadata['runtime'] = int(benchmark_spec.hpcg_runtime)
  metadata['run_as_root'] = benchmark_spec.run_as_root
  metadata['problem_size'] = '%s,%s,%s' % (benchmark_spec.hpcg_problem_size[0],
                                           benchmark_spec.hpcg_problem_size[1],
                                           benchmark_spec.hpcg_problem_size[2])

  return metadata
def Run(benchmark_spec):
  """Sets the GPU clock speed and runs the CUDA PCIe benchmark.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
  vm = benchmark_spec.vms[0]
  num_iterations = FLAGS.gpu_pcie_bandwidth_iterations
  mode = FLAGS.gpu_pcie_bandwidth_mode
  transfer_size_range = FLAGS.gpu_pcie_bandwidth_transfer_sizes
  raw_results = []
  metadata = {}
  metadata.update(cuda_toolkit.GetMetadata(vm))
  metadata['num_iterations'] = num_iterations
  metadata['mode'] = mode
  if mode == 'range':
    metadata['range_start'] = transfer_size_range[0]
    metadata['range_stop'] = transfer_size_range[1]
    metadata['range_step'] = transfer_size_range[2]

  run_command = ('%s/extras/demo_suite/bandwidthTest --device=all' %
                 metadata['cuda_toolkit_home'])
  if mode == 'range':
    run_command += (' --mode=range --start={0} --end={1} --increment={2}'
                    .format(transfer_size_range[0], transfer_size_range[1],
                            transfer_size_range[2]))

  for i in range(num_iterations):
    stdout, _ = vm.RemoteCommand(run_command, should_log=True)
    raw_results.append(_ParseOutputFromSingleIteration(stdout))
    if 'device_info' not in metadata:
      metadata['device_info'] = _ParseDeviceInfo(stdout)
  return _CalculateMetricsOverAllIterations(raw_results, metadata)