def Prepare(bm_spec: benchmark_spec.BenchmarkSpec) -> None:
  """Install and set up MLPerf Inference on the target vm.

  Args:
    bm_spec: The benchmark specification

  Raises:
    errors.Config.InvalidValue upon both GPUs and TPUs appear in the config
  """
  vm = bm_spec.vms[0]

  repository = f'inference_results_{MLPERF_INFERENCE_VERSION}'
  vm.RemoteCommand(
      f'git clone https://github.com/mlcommons/{repository}.git',
      should_log=True)

  makefile = f'{repository}/closed/NVIDIA/Makefile'
  vm_util.ReplaceText(vm, 'shell uname -p', 'shell uname -m', makefile)

  requirements = f'{repository}/closed/NVIDIA/docker/requirements.1'
  vm_util.ReplaceText(vm, 'opencv-python-headless==4.5.2.52',
                      'opencv-python-headless==4.5.3.56', requirements)

  if nvidia_driver.CheckNvidiaGpuExists(vm):
    vm.Install('cuda_toolkit')
    vm.Install('nvidia_driver')
    vm.Install('nvidia_docker')

  benchmark = FLAGS.mlperf_benchmark
  bm_spec.env_cmd = ('export MLPERF_SCRATCH_PATH=/scratch && '
                     f'cd {repository}/closed/NVIDIA')
  vm.RobustRemoteCommand(
      f'{bm_spec.env_cmd} && '
      'make build_docker NO_BUILD=1 && '
      'make docker_add_user && '
      'make launch_docker DOCKER_COMMAND="echo $MLPERF_SCRATCH_PATH" && '
      'make launch_docker DOCKER_COMMAND="ls -al $MLPERF_SCRATCH_PATH" && '
      'make launch_docker DOCKER_COMMAND="make clean" && '
      'make launch_docker DOCKER_COMMAND="make link_dirs" && '
      'make launch_docker DOCKER_COMMAND="ls -al build/"',
      should_log=True)
  vm.RobustRemoteCommand(
      f'{bm_spec.env_cmd} && '
      'make launch_docker DOCKER_COMMAND='
      f'"make download_data BENCHMARKS={benchmark}"',
      should_log=True)
  vm.RobustRemoteCommand(
      f'{bm_spec.env_cmd} && '
      'make launch_docker DOCKER_COMMAND='
      f'"make download_model BENCHMARKS={benchmark}" && '
      'make launch_docker DOCKER_COMMAND='
      f'"make preprocess_data BENCHMARKS={benchmark}" && '
      f'make launch_docker DOCKER_COMMAND="make build"',
      should_log=True)
def _UseMpi(vm, num_processes):
    """Configure OpenFOAM to use MPI if running with more than 1 VM."""
    runscript = _GetPath(_RUNSCRIPT)
    vm_util.ReplaceText(
        vm, 'runParallel', 'mpirun '
        '-hostfile {machinefile} '
        '-mca btl ^openib '
        '--map-by node '
        '-np {num_processes}'.format(machinefile=_GetPath(_MACHINEFILE),
                                     num_processes=num_processes), runscript,
        '|')
    vm_util.ReplaceText(vm, '^mpirun.*', '& -parallel', runscript)
def _SetMeshDimensions(vm, dimensions):
    """Set the dimensions to test scalability of the motorBike tutorial."""
    pattern = 'hex (0 1 2 3 4 5 6 7) ({}) simpleGrading (1 1 1)'
    original_string = pattern.format(_MOTORBIKE_DIMENSIONS['medium'])
    new_string = pattern.format(dimensions)
    vm_util.ReplaceText(vm, original_string, new_string,
                        _GetPath(_BLOCKMESHDICT))
Ejemplo n.º 4
0
def BuildDockerImages(vm):
    """Builds the Docker images from source Dockerfiles for a pre-built env."""

    vm.InstallPackages('git')
    vm.RemoteHostCommand('cd {0} && git clone -b {1} '
                         'https://github.com/tensorflow/serving'.format(
                             linux_packages.INSTALL_DIR,
                             FLAGS.tf_serving_branch))

    setup_script = posixpath.join(
        linux_packages.INSTALL_DIR,
        'serving/tensorflow_serving/tools/docker/Dockerfile.devel')
    # Changes the TensorFlow git branch to tf_serving_branch
    vm_util.ReplaceText(
        vm, 'ARG TF_SERVING_VERSION_GIT_BRANCH=master',
        'ARG TF_SERVING_VERSION_GIT_BRANCH={}'.format(FLAGS.tf_serving_branch),
        setup_script)

    # Build an optimized binary for TF Serving, and keep all the build artifacts
    vm.RemoteHostCommand(
        'sudo docker build --target binary_build '
        '-t benchmarks/tensorflow-serving-devel '
        '-f {0}/tensorflow_serving/tools/docker/Dockerfile.devel '
        '{0}/tensorflow_serving/tools/docker/'.format(
            TF_SERVING_BASE_DIRECTORY))

    # Create a serving image with the optimized model_server binary
    vm.RemoteHostCommand(
        'sudo docker build '
        '-t benchmarks/tensorflow-serving '
        '--build-arg '
        'TF_SERVING_BUILD_IMAGE=benchmarks/tensorflow-serving-devel '
        '-f {0}/tensorflow_serving/tools/docker/Dockerfile '
        '{0}/tensorflow_serving/tools/docker/'.format(
            TF_SERVING_BASE_DIRECTORY))
Ejemplo n.º 5
0
 def testReplaceTextUsesCorrectCommand(self):
     """Test of vm_util.ReplaceText()."""
     vm_util.ReplaceText(self.mock_vm,
                         'current',
                         'new',
                         'test_file',
                         regex_char='|')
     self.mock_vm.RemoteCommand.assert_called_with(
         'sed -i -r "s|current|new|" test_file')
Ejemplo n.º 6
0
def _UseMpi(vm, num_processes, mapping):
    """Configure OpenFOAM to use MPI if running with more than 1 VM.

  This function looks for the word "runParallel" in the run script and replaces
  it with an mpirun command.

  Args:
    vm: The worker VM to use MPI on.
    num_processes: An integer representing the total number of processes for the
      MPI job.
    mapping: A string for the mpirun --map-by flag.
  """
    run_script = _GetPath(_RUN_SCRIPT)
    vm_util.ReplaceText(
        vm, 'runParallel', 'mpirun '
        f'-hostfile {_MACHINE_FILE} '
        '-mca btl ^openib '
        f'--map-by {mapping} '
        f'-np {num_processes}', run_script, '|')
    vm_util.ReplaceText(vm, '^mpirun.*', '& -parallel', run_script)
def PrepareBenchmark(benchmark_spec, vm=None):
    """Install and set up MLPerf on the target vm.

  Args:
    benchmark_spec: The benchmark specification
    vm: The VM to work on

  Raises:
    errors.Config.InvalidValue upon both GPUs and TPUs appear in the config
  """
    _UpdateBenchmarkSpecWithFlags(benchmark_spec)
    vm = vm or benchmark_spec.vms[0]

    if (bool(benchmark_spec.tpus) and nvidia_driver.CheckNvidiaGpuExists(vm)):
        raise errors.Config.InvalidValue(
            'Invalid configuration. GPUs and TPUs can not both present in the config.'
        )

    vm.RemoteCommand(
        f'if [ ! -d "$HOME/training_results_{MLPERF_VERSION}" ]; then '
        f'  git clone https://github.com/mlcommons/training_results_{MLPERF_VERSION}.git ; '
        'fi',
        should_log=True)
    vm.Install('pip3')
    if not HYPERTHREADS.value:
        if BERT in benchmark_spec.benchmark:
            vm_util.ReplaceText(
                vm, "'bind_pyt'", "'bind_pyt' '--no_hyperthreads'",
                f'training_results_{MLPERF_VERSION}/NVIDIA/benchmarks/bert/'
                'implementations/pytorch/run_with_docker.sh')
        elif MASK in benchmark_spec.benchmark:
            vm_util.ReplaceText(
                vm, "'bind_launch'", "'bind_launch' '--no_hyperthreads'",
                f'training_results_{MLPERF_VERSION}/NVIDIA/benchmarks/maskrcnn/'
                'implementations/pytorch/run_and_time.sh')
        elif RESNET in benchmark_spec.benchmark:
            vm_util.ReplaceText(
                vm, '--cpu=exclusive', '--cpu=exclusive,nosmt',
                f'training_results_{MLPERF_VERSION}/NVIDIA/benchmarks/resnet/'
                'implementations/mxnet/run_and_time.sh')
def _SetDimensions(vm, dimensions):
  """Sets the mesh dimensions in blockMeshDict.

  Replaces lines of the format:
  hex (0 1 2 3 4 5 6 7) (20 8 8) simpleGrading (1 1 1)

  with:
  hex (0 1 2 3 4 5 6 7) (dimensions) simpleGrading (1 1 1)

  Args:
    vm: The vm to make the replacement on.
    dimensions: String, new mesh dimensions to run with.

  """
  logging.info('Using dimensions (%s) in blockMeshDict', dimensions)
  vm_util.ReplaceText(vm, r'(hex \(.*\) \().*(\) .* \(.*\))',
                      r'\1{}\2'.format(dimensions),
                      _GetPath(_BLOCKMESHDICT),
                      regex_char='|')
def _SetDimensions(vm, dimensions):
  """Sets the mesh dimensions in blockMeshDict.

  Replaces lines of the format:
  hex (0 1 2 3 4 5 6 7) (20 8 8) simpleGrading (1 1 1)

  with:
  hex (0 1 2 3 4 5 6 7) (dimensions) simpleGrading (1 1 1)

  The actual contents of the second set of parentheses doesn't matter. This
  function will just replace whatever is inside those.

  Args:
    vm: The VM to make the replacement on.
    dimensions: String, new mesh dimensions to run with.

  """
  logging.info('Using dimensions (%s) in blockMeshDict', dimensions)
  vm_util.ReplaceText(vm, r'(hex \(.*\) \().*(\) .* \(.*\))',
                      r'\1{}\2'.format(dimensions),
                      _GetPath(_BLOCKMESHDICT),
                      regex_char='|')
def _UpdateScripts(benchmark_spec, node_rank):
    """Update the running scripts on the target vm.

  Args:
    benchmark_spec: The benchmark specification.
    node_rank: int, The rank of the node for multi-node distributed training
  """
    vm = benchmark_spec.vms[node_rank]
    benchmark = benchmark_spec.benchmark

    # TODO(tohaowu) Change config and script using a patch file.
    # request pairs to the sed command
    # each pair('str_A', 'str_B') indicates a request "replace anything
    # matching str_A to str_B" for a specific file
    config_sed = []
    config_sed += [(r'DGXSYSTEM=.*', fr'DGXSYSTEM=\"{DGXSYSTEM}\"')]
    config_sed += [
        (r'DGXNNODES=.*',
         r'DGXNNODES={num_vms}'.format(num_vms=benchmark_spec.num_vms))
    ]
    config_sed += [(r'DGXNGPU=.*', r'DGXNGPU={gpus_per_vm}'.format(
        gpus_per_vm=benchmark_spec.gpus_per_vm))]
    config_sed += [(r'DGXNSOCKET=.*', r'DGXNSOCKET={nsockets}'.format(
        nsockets=vm.CheckLsCpu().socket_count))]
    config_sed += [(r'DGXSOCKETCORES=.*', r'DGXSOCKETCORES={ncores}'.format(
        ncores=vm.CheckLsCpu().cores_per_socket))]

    run_and_time_sed = []
    run_and_time_sed += [(r'run_training.sh', r'run_training1.sh')]
    run_and_time_sed += [(r'DGXSYSTEM=.*', fr'DGXSYSTEM=\"{DGXSYSTEM}\"')]

    if FLAGS.mlperf_keep_nccl_log:
        run_and_time_sed += [(r'#\!\/bin\/bash', r'#\!\/bin\/bash\n'
                              r'export NCCL_DEBUG=INFO\n'
                              r'export NCCL_DEBUG_SUBSYS=ALL\n'
                              r'export NCCL_DEBUG_FILE=\/results\/%h.%p.nccl')]

    nccl_exports = _GetNcclParams() if FLAGS.nccl_extra_params else r''
    run_and_time_sed += [(r'#!\/bin\/bash', r'#!\/bin\/bash\n'
                          fr'{nccl_exports}')]

    run_sed = []
    run_sed += [(r'SYSLOGGING=1', r'SYSLOGGING=0')]
    run_sed += [(r'env [|] grep SLURM',
                 r'export SLURM_NNODES={num_vms}'.format(
                     num_vms=benchmark_spec.num_vms))]
    run_sed += [(r'data -v \$LOGDIR',
                 r'data -v \$(pwd):\/workspace\/{model}1 -v \$LOGDIR'.format(
                     model=benchmark))]
    run_sed += [(r'scontrol show hostname',
                 r'mpirun -hostfile \$HOME\/{hostfile} -N 1 hostname -I '
                 r'\| awk \'{{print \$1}}\' '.format(hostfile=HOSTFILE))]
    run_sed += [(r'srun --mem=0 -N 1 -n 1 -w \$hostn',
                 r'mpirun -N 1 -n 1 -H \$hostn')]
    run_sed += [(r'sleep 30', r'sleep 60')]
    run_sed += [(r'docker exec -it', r'docker exec -t')]
    run_sed += [(r'run_and_time.sh', r'run_and_time1.sh')]

    run_sed += [(r'nvidia-docker', r'sudo nvidia-docker')]
    run_sed += [(r'docker exec', r'sudo docker exec')]
    run_sed += [(r'docker container', r'sudo docker container')]

    if FLAGS.aws_efa or FLAGS.azure_infiniband:
        stdout, _ = vm.RemoteCommand('ls -d /dev/infiniband/*')
        devices = [device.replace('/', '\\/') for device in stdout.split()]
        device_args = ' '.join(f'--device={device}' for device in devices)
        run_sed += [(r'nvidia-docker run', fr'nvidia-docker run {device_args}')
                    ]

    if FLAGS.azure_infiniband:
        run_sed += [
            (r'_cont_mounts=(',
             r'_cont_mounts=(\"--volume=\/opt\/microsoft:\/opt\/microsoft\" ')
        ]

    nvprof_flags = r'-f -o \/results\/%h.%p.nvprof --profile-child-processes'

    script_path = (
        r'$HOME/training_results_{version}/NVIDIA/benchmarks/{model}'
        r'/implementations/{framework}'.format(
            version=mlperf_benchmark.MLPERF_VERSION,
            model='maskrcnn'
            if mlperf_benchmark.MASK in benchmark else benchmark,
            framework='mxnet'
            if mlperf_benchmark.RESNET in benchmark else 'pytorch'))

    config_files = [CONFIG]
    if mlperf_benchmark.TRANSFORMER in benchmark:
        config_sed, run_sed, run_and_time_sed = _GetChangesForTransformer(
            benchmark_spec, vm, script_path, nvprof_flags, config_sed, run_sed,
            run_and_time_sed)

    elif mlperf_benchmark.SSD in benchmark:
        config_sed, run_sed, run_and_time_sed = _GetChangesForSSD(
            benchmark_spec, nvprof_flags, config_sed, run_sed,
            run_and_time_sed)

    elif mlperf_benchmark.GNMT in benchmark:
        config_sed, run_sed, run_and_time_sed = _GetChangesForGNMT(
            benchmark_spec, nvprof_flags, config_sed, run_sed,
            run_and_time_sed)

    elif mlperf_benchmark.MASK in benchmark:
        config_sed, run_sed, run_and_time_sed = _GetChangesForMask(
            benchmark_spec, node_rank, script_path, nvprof_flags, config_sed,
            run_sed, run_and_time_sed)

        config_files = ['config_DGXA100_multi_4x8x4.sh']

    elif mlperf_benchmark.RESNET in benchmark:
        config_sed, run_sed, run_and_time_sed = _GetChangesForResnet(
            benchmark_spec, node_rank, nvprof_flags, config_sed, run_sed,
            run_and_time_sed)

        config_files = [
            'config_DGXA100_common.sh', 'config_DGXA100_multi_8x8x204.sh'
        ]

    elif mlperf_benchmark.BERT in benchmark:
        config_sed, run_sed, run_and_time_sed = _GetChangesForBert(
            benchmark_spec, node_rank, nvprof_flags, config_sed, run_sed,
            run_and_time_sed)

        config_files = [
            'config_DGXA100_common.sh', 'config_DGXA100_8x8x48x1.sh'
        ]

    vm.RemoteCommand(f'cd {script_path} && '
                     f'sed "{mlperf_benchmark.SedPairsToString(config_sed)}" '
                     f'{" ".join(config_files)} > {CONFIG} && '
                     f'chmod 755 {CONFIG} ')

    vm.RemoteCommand(
        f'cd {script_path} && '
        f'sed "{mlperf_benchmark.SedPairsToString(run_and_time_sed)}" '
        f'run_and_time.sh | sed "2 i source {CONFIG}" > run_and_time1.sh && '
        'chmod 755 run_and_time1.sh ')

    vm.RemoteCommand(
        f'cd {script_path} && '
        f'sed "{mlperf_benchmark.SedPairsToString(run_sed)}" run_with_docker.sh '
        f'| sed "2 i source {CONFIG}" > run_with_docker1.sh && '
        'chmod 755 run_with_docker1.sh')

    docker_file = posixpath.join(script_path, 'Dockerfile')
    if FLAGS.nccl_net_plugin:
        vm_util.ReplaceText(
            vm, 'RUN apt-get update',
            r'RUN echo \"deb https:\/\/packages.cloud.google.com\/apt '
            r'google-fast-socket main\" | '
            r'tee \/etc\/apt\/sources.list.d\/google-fast-socket.list\n'
            r'RUN curl -s -L '
            r'https:\/\/packages.cloud.google.com\/apt\/doc\/apt-key.gpg | '
            r'apt-key add -\n'
            r'RUN rm -f \/opt\/hpcx\/nccl_rdma_sharp_plugin\/lib\/libnccl-net.so\n'
            r'RUN apt-get update', docker_file)
        vm_util.ReplaceText(
            vm, 'apt-get install -y --no-install-recommends',
            'apt-get install -y --no-install-recommends google-fast-socket',
            docker_file)

    if FLAGS.aws_efa:
        vm.RemoteCommand(f'git clone {AWS_EFA_NCCL_BASEAMI_PIPELINE_URL}')
        vm.RemoteCommand(f'cat {NVIDIA_EFA_DOCKERFILE} >> {docker_file}')
        vm_util.ReplaceText(vm, 'FROM nvcr.*', '', docker_file)
        vm_util.ReplaceText(vm, 'yum-utils.*', '', docker_file)
        vm_util.ReplaceText(vm, 'python3-distutils.*', 'python3-distutils',
                            docker_file)
        vm_util.ReplaceText(vm, 'cmake', '', docker_file)
def Prepare(bm_spec: benchmark_spec.BenchmarkSpec) -> None:
  """Installs and sets up MLPerf Inference on the target vm.

  Args:
    bm_spec: The benchmark specification

  Raises:
    errors.Config.InvalidValue upon both GPUs and TPUs appear in the config
  """
  vm = bm_spec.vms[0]

  repository = f'inference_results_{MLPERF_INFERENCE_VERSION}'
  vm.RemoteCommand(f'git clone https://github.com/mlcommons/{repository}.git')

  makefile = f'{repository}/closed/NVIDIA/Makefile'
  vm_util.ReplaceText(vm, 'shell uname -p', 'shell uname -m', makefile)

  requirements1 = f'{repository}/closed/NVIDIA/docker/requirements.1'
  vm_util.ReplaceText(vm, 'opencv-python-headless==4.5.2.52',
                      'opencv-python-headless==4.5.3.56', requirements1)
  requirements2 = f'{repository}/closed/NVIDIA/docker/requirements.2'

  benchmark = FLAGS.mlperf_benchmark
  if _SERVER_TARGET_QPS.value:
    config = f'{repository}/closed/NVIDIA/configs/{benchmark}/Server/__init__.py'
    vm_util.ReplaceText(vm, 'server_target_qps = .*',
                        f'server_target_qps = {_SERVER_TARGET_QPS.value}',
                        config)

  for requirements in (requirements1, requirements2):
    vm_util.ReplaceText(vm, 'git:', 'https:', requirements)

  if nvidia_driver.CheckNvidiaGpuExists(vm):
    vm.Install('cuda_toolkit')
    vm.Install('nvidia_driver')
    vm.Install('nvidia_docker')

  bm_spec.env_cmd = (f'export MLPERF_SCRATCH_PATH={_MLPERF_SCRATCH_PATH} && '
                     f'cd {repository}/closed/NVIDIA')
  docker.AddUser(vm)
  vm.RobustRemoteCommand(
      f'{bm_spec.env_cmd} && '
      'make build_docker NO_BUILD=1 && '
      'make docker_add_user && '
      'make launch_docker DOCKER_COMMAND="make clean" && '
      'make launch_docker DOCKER_COMMAND="make link_dirs"',
      should_log=True)
  if benchmark == mlperf_benchmark.DLRM:
    # Download data
    data_dir = posixpath.join(_MLPERF_SCRATCH_PATH, 'data', _DLRM_DATA_MODULE)
    vm.DownloadPreprovisionedData(data_dir, _DLRM_DATA_MODULE, _DLRM_DATA)
    vm.RemoteCommand(f'cd {data_dir} && gzip -d {_DLRM_DATA}')

    # Download model
    model_dir = posixpath.join(_MLPERF_SCRATCH_PATH, 'models', benchmark)
    vm.DownloadPreprovisionedData(model_dir, benchmark, _DLRM_MODEL)
    vm.RemoteCommand(f'cd {model_dir} && '
                     f'tar -zxvf {_DLRM_MODEL} && '
                     f'rm -f {_DLRM_MODEL}')
    vm.DownloadPreprovisionedData(model_dir, benchmark, _DLRM_ROW_FREQ)

    # Preprocess Data
    preprocessed_data_dir = posixpath.join(_MLPERF_SCRATCH_PATH,
                                           'preprocessed_data',
                                           _DLRM_DATA_MODULE)
    vm.DownloadPreprovisionedData(preprocessed_data_dir, _DLRM_DATA_MODULE,
                                  _DLRM_PREPROCESSED_DATA)
    vm.RemoteCommand(f'cd {preprocessed_data_dir} && '
                     f'tar -zxvf {_DLRM_PREPROCESSED_DATA} && '
                     f'rm -f {_DLRM_PREPROCESSED_DATA}')
  elif benchmark == mlperf_benchmark.BERT:
    # Download data
    data_dir = posixpath.join(_MLPERF_SCRATCH_PATH, 'data', 'squad')
    vm.DownloadPreprovisionedData(data_dir, benchmark, 'dev-v1.1.json')

    # Download model
    model_dir = posixpath.join(_MLPERF_SCRATCH_PATH, 'models', benchmark)
    vm.DownloadPreprovisionedData(model_dir, benchmark, 'bert_large_v1_1.onnx')
    vm.DownloadPreprovisionedData(model_dir, benchmark,
                                  'bert_large_v1_1_fake_quant.onnx')
    vm.DownloadPreprovisionedData(model_dir, benchmark, 'vocab.txt')

    # Preprocess Data
    preprocessed_data_dir = posixpath.join(_MLPERF_SCRATCH_PATH,
                                           'preprocessed_data',
                                           'squad_tokenized')
    vm.DownloadPreprovisionedData(preprocessed_data_dir, benchmark,
                                  'input_ids.npy')
    vm.DownloadPreprovisionedData(preprocessed_data_dir, benchmark,
                                  'input_mask.npy')
    vm.DownloadPreprovisionedData(preprocessed_data_dir, benchmark,
                                  'segment_ids.npy')
  else:
    vm.RobustRemoteCommand(
        f'{bm_spec.env_cmd} && '
        'make launch_docker DOCKER_COMMAND='
        f'"make download_data BENCHMARKS={benchmark}"',
        should_log=True)
    vm.RobustRemoteCommand(
        f'{bm_spec.env_cmd} && '
        'make launch_docker DOCKER_COMMAND='
        f'"make download_model BENCHMARKS={benchmark}"',
        should_log=True)
    vm.RobustRemoteCommand(
        f'{bm_spec.env_cmd} && '
        'make launch_docker DOCKER_COMMAND='
        f'"make preprocess_data BENCHMARKS={benchmark}"',
        should_log=True)

  vm.RobustRemoteCommand(
      f'{bm_spec.env_cmd} && '
      'make launch_docker DOCKER_COMMAND='
      '"make build" && '
      'make launch_docker DOCKER_COMMAND='
      '"make generate_engines RUN_ARGS=\''
      f'--benchmarks={FLAGS.mlperf_benchmark} '
      f'--scenarios={_SCENARIOS.value}\'"',
      should_log=True)
def _SetNumProcesses(vm, num_processes):
    """Configure OpenFOAM to use the correct number of processes."""
    logging.info('Decomposing into %s subdomains', num_processes)
    vm_util.ReplaceText(vm, 'numberOfSubdomains.*',
                        'numberOfSubdomains %s;' % str(num_processes),
                        _GetPath(_DECOMPOSEDICT))
Ejemplo n.º 13
0
def Run(benchmark_spec):
  """Run MLPerf on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
      required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
  _UpdateBenchmarkSpecWithFlags(benchmark_spec)
  vm = benchmark_spec.vms[0]
  if benchmark_spec.tpus:
    # For MLPerf v0.6, the benchmake code of different hardware are different.
    if (benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-32' or
        benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-128' or
        benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-256' or
        benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-512' or
        benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-1024' or
        benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-2048'):
      run_path = (
          '$HOME/training_results_v0.6/Google/benchmarks/{model}/tpu-{tpus}'
          .format(
              model=benchmark_spec.benchmark,
              tpus=benchmark_spec.tpu_groups['train'].GetAcceleratorType()))
      code_path = (
          '$HOME/training_results_v0.6/Google/benchmarks/{model}/implementations/tpu-{tpus}-{model}'
          .format(
              model=benchmark_spec.benchmark,
              tpus=benchmark_spec.tpu_groups['train'].GetAcceleratorType()))

      if 'mask' in benchmark_spec.benchmark:
        model = 'mask_rcnn'
      elif 'gnmt' in benchmark_spec.benchmark:
        model = 'nmt'
      else:
        model = benchmark_spec.benchmark

      mlperf_benchmark_cmd = ('cd {code_path} && '
                              'export PYTHONPATH=$(pwd):$(pwd)/{model} && '
                              'cd {model} && '
                              '{run_path}/run_and_time.sh'.format(
                                  code_path=code_path,
                                  model=model,
                                  run_path=run_path))

      if 'ssd' in benchmark_spec.benchmark:
        mlperf_benchmark_cmd = (
            'export '
            'MLP_GCS_RESNET_CHECKPOINT={checkpoint}'
            ' && {cmd}'.format(
                checkpoint=FLAGS.mlperf_gcs_resnet_checkpoint,
                cmd=mlperf_benchmark_cmd))
    else:
      raise ValueError(
          'MLPerf configurations do not support the hardware in PKB. PKB may '
          'need to be updated if this is a new TPU type.')

  else:
    benchmark_path = '$HOME/training_results_v0.6/NVIDIA/benchmarks'
    common_env = 'DGXSYSTEM=DGX1 NEXP=1'
    if 'resnet' in benchmark_spec.benchmark:
      run_path = posixpath.join(benchmark_path, 'resnet/implementations/mxnet')
      env = 'DATADIR=/data/imagenet LOGDIR=/tmp/resnet PULL=0'
    elif 'transformer' in benchmark_spec.benchmark:
      run_path = posixpath.join(benchmark_path,
                                'transformer/implementations/pytorch')
      env = 'DATADIR=/data/wmt/utf8 LOGDIR=/tmp/transformer PULL=0'
    elif 'minigo' in benchmark_spec.benchmark:
      run_path = posixpath.join(benchmark_path,
                                'minigo/implementations/tensorflow')
      env = 'LOGDIR=/tmp/minigo CONT=mlperf-nvidia:minigo'
    elif 'mask' in benchmark_spec.benchmark:
      run_path = posixpath.join(benchmark_path,
                                'maskrcnn/implementations/pytorch')
      env = 'LOGDIR=/tmp/mask DATADIR=/data PULL=0'
    elif 'gnmt' in benchmark_spec.benchmark:
      run_path = posixpath.join(benchmark_path, 'gnmt/implementations/pytorch')
      env = 'LOGDIR=/tmp/gnmt DATADIR=/data/gnmt PULL=0'
    elif 'ssd' in benchmark_spec.benchmark:
      run_path = posixpath.join(benchmark_path, 'ssd/implementations/pytorch')
      env = 'LOGDIR=/tmp/ssd DATADIR=/data PULL=0'

    run_script = posixpath.join(run_path, 'run.sub')
    vm_util.ReplaceText(vm, 'SYSLOGGING=1', 'SYSLOGGING=0', run_script)
    mlperf_benchmark_cmd = (
        'cd {run_path} && chmod 755 run.sub && sudo {common_env} {env} '
        './run.sub'.format(run_path=run_path, common_env=common_env, env=env))

  if nvidia_driver.CheckNvidiaGpuExists(vm):
    mlperf_benchmark_cmd = '{env} {cmd}'.format(
        env=tensorflow.GetEnvironmentVars(vm), cmd=mlperf_benchmark_cmd)

  samples = []
  metadata = _CreateMetadataDict(benchmark_spec)
  stdout, _ = vm.RobustRemoteCommand(mlperf_benchmark_cmd, should_log=True)
  samples.extend(
      MakeSamplesFromOutput(
          metadata,
          stdout,
          use_tpu=bool(benchmark_spec.tpus),
          model=benchmark_spec.benchmark))
  return samples
def _SetDecomposeMethod(vm, decompose_method):
    """Set the parallel decomposition method if using multiple cores."""
    logging.info('Using %s decomposition', decompose_method)
    vm_util.ReplaceText(vm, 'method.*', 'method %s;' % decompose_method,
                        _GetPath(_DECOMPOSEDICT))
Ejemplo n.º 15
0
def PrepareRunner(benchmark_spec, vm=None):
    """Install and set up MLPerf on the target vm.

  Args:
    benchmark_spec: The benchmark specification
    vm: The VM to work on

  Raises:
    errors.Config.InvalidValue upon both GPUs and TPUs appear in the config
  """
    vm = vm or benchmark_spec.vms[0]
    if benchmark_spec.tpus:
        if vm == benchmark_spec.vms[0]:
            storage_service = gcs.GoogleCloudStorageService()
            benchmark_spec.storage_service = storage_service
            if FLAGS.mlperf_bucket:
                bucket = FLAGS.mlperf_bucket
                benchmark_spec.model_dir = f'gs://{bucket}/pkb-{FLAGS.run_uri}'
            else:
                bucket = f'pkb-{FLAGS.run_uri}'.format(uri=FLAGS.run_uri)
                benchmark_spec.model_dir = f'gs://{bucket}'

            benchmark_spec.bucket = bucket
            location = benchmark_spec.tpu_groups['train'].GetZone()
            storage_service.PrepareService(util.GetRegionFromZone(location))
            storage_service.MakeBucket(bucket)
            storage_service.AclBucket(benchmark_spec.gcp_service_account,
                                      gcs.WRITER, bucket)

        # For MLPerf 1.0, the benchmake code of different hardware are different.
        if (benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-32'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-128'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-256'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-512'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-1024'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-2048'):
            run_path = (
                '$HOME/training_results_{version}/Google/benchmarks/{model}/tpu-{tpus}'
                .format(version=MLPERF_VERSION,
                        model=benchmark_spec.benchmark,
                        tpus=benchmark_spec.tpu_groups['train'].
                        GetAcceleratorType()))
        else:
            raise ValueError(
                'MLPerf configurations do not support the hardware in PKB. PKB may '
                'need to be updated if this is a new TPU type.')

        if MASK in benchmark_spec.benchmark:
            model = 'mask_rcnn'
        elif GNMT in benchmark_spec.benchmark:
            model = 'nmt'
        else:
            model = benchmark_spec.benchmark

        code_path = (
            '$HOME/training_results_{version}/Google/benchmarks/{model}/implementations/tpu-{tpus}-{model}'
            .format(
                version=MLPERF_VERSION,
                model=benchmark_spec.benchmark,
                tpus=benchmark_spec.tpu_groups['train'].GetAcceleratorType()))

        vm.RemoteCommand('pip3 install --upgrade pyyaml==3.13 ')
        vm.RemoteCommand('pip3 install cloud-tpu-profiler==1.12')
        if (MASK in benchmark_spec.benchmark
                or SSD in benchmark_spec.benchmark):
            # Install the coco package, to load the coco dataset for Mask-RCNN
            # and SSD benchmarks.
            # TODO(user): coco whl package for python 3.5
            vm.RemoteCommand(
                'cd /tmp && '
                f'wget https://storage.cloud.google.com/mlperf_artifcats/{MLPERF_VERSION}_training/coco-1.1-cp36-cp36m-linux_x86_64.whl'
            )

        setup_script = posixpath.join(run_path, 'setup.sh')
        vm_util.ReplaceText(vm, '--progress-bar off', ' ', setup_script)
        vm_util.ReplaceText(vm, 'pip ', 'pip3 ', setup_script)
        vm.RemoteCommand(
            'chmod 755 {script} && {script}'.format(script=setup_script))

        if MASK not in benchmark_spec.benchmark:
            vm.RemoteCommand(
                'pip3 uninstall -y tf-estimator-nightly && '
                'pip3 install tf-estimator-nightly==1.14.0.dev2019051801')

        if RESNET in benchmark_spec.benchmark:
            data_dir = benchmark_spec.imagenet_data_dir
        elif TRANSFORMER in benchmark_spec.benchmark:
            data_dir = benchmark_spec.wmt_data_dir
        elif MASK in benchmark_spec.benchmark:
            data_dir = benchmark_spec.coco_data_dir
        elif GNMT in benchmark_spec.benchmark:
            data_dir = benchmark_spec.gnmt_data_dir
        elif SSD in benchmark_spec.benchmark:
            data_dir = benchmark_spec.coco_data_dir
        elif BERT in benchmark_spec.benchmark:
            data_dir = benchmark_spec.bert_data_dir
        else:
            raise ValueError(
                'Unknown operation, cannot find {} in benchmark'.format(
                    benchmark_spec.benchmark))

        run_script = posixpath.join(run_path, 'run_and_time.sh')
        data_dir = data_dir.replace('/', r'\/')
        checkpoint = FLAGS.mlperf_gcs_resnet_checkpoint.replace('/', r'\/')
        decode_dir = FLAGS.mlperf_transformer_decode_dir.replace('/', r'\/')
        tpu = benchmark_spec.tpu_groups['train'].GetName()
        vm_util.ReplaceText(vm, '--model_dir=.*',
                            r'--model_dir=gs:\/\/{} \\\\'.format(bucket),
                            run_script)
        vm_util.ReplaceText(vm, '--data_dir=.*',
                            r'--data_dir={} \\\\'.format(data_dir), run_script)
        vm_util.ReplaceText(
            vm, '--training_file_pattern=.*',
            r'--training_file_pattern={}\/train-* \\\\'.format(data_dir),
            run_script)
        vm_util.ReplaceText(
            vm, '--validation_file_pattern=.*',
            r'--validation_file_pattern={}\/val-* \\\\'.format(data_dir),
            run_script)
        vm_util.ReplaceText(
            vm, '--val_json_file=.*',
            r'--val_json_file={}\/instances_val2017.json \\\\'.format(
                data_dir), run_script)
        vm_util.ReplaceText(vm, '--resnet_checkpoint=.*',
                            r'--resnet_checkpoint={} \\\\'.format(checkpoint),
                            run_script)
        vm_util.ReplaceText(
            vm, '--decode_from_file=.*',
            r'--decode_from_file={}\/wmt14-en-de.src \\\\'.format(decode_dir),
            run_script)
        vm_util.ReplaceText(
            vm, '--decode_reference=.*',
            r'--decode_reference={}\/wmt14-en-de.ref \\\\'.format(decode_dir),
            run_script)
        vm_util.ReplaceText(
            vm, '--decode_to_file=.*',
            r'--decode_to_file={}\/decode.transformer_mlperf_tpu.'
            r'translate_ende_wmt32k_packed.2x2_log_1018_2 \\\\'.format(bucket),
            run_script)
        vm_util.ReplaceText(vm, '--tpu=.*', r'--tpu={} \\\\'.format(tpu),
                            run_script)
        vm_util.ReplaceText(vm, '--output_dir=.*',
                            r'--output_dir=gs:\/\/{} \\\\'.format(bucket),
                            run_script)
        vm_util.ReplaceText(vm, '--cloud_tpu_name=.*',
                            r'--cloud_tpu_name={} \\\\'.format(tpu),
                            run_script)
        vm_util.ReplaceText(vm, '--out_dir=.*',
                            r'--out_dir=gs:\/\/{} \\\\'.format(bucket),
                            run_script)
        vm_util.ReplaceText(vm, '--tpu_name=.*',
                            r'--tpu_name={} \\\\'.format(tpu), run_script)
        vm.RemoteCommand('chmod 755 {}'.format(run_script))

        if GNMT in benchmark_spec.benchmark:
            metric_script = posixpath.join(code_path, model, 'metric.py')
            vm_util.ReplaceText(vm, ' sacrebleu -t',
                                ' python3 -m sacrebleu -t', metric_script)
    else:
        benchmark_spec.model_dir = '/tmp'

        has_gpu = nvidia_driver.CheckNvidiaGpuExists(vm)
        if has_gpu:
            vm.Install('cuda_toolkit')

        vm.Install('nvidia_docker')
        vm.RemoteCommand(
            'if [ ! -d "/data" ]; then sudo ln -s /scratch /data; fi')

        if RESNET in benchmark_spec.benchmark:
            run_script = f'training_results_{MLPERF_VERSION}/NVIDIA/benchmarks/resnet/implementations/mxnet/run_and_time.sh'
            vm.RemoteCommand(
                f'cd training_results_{MLPERF_VERSION}/NVIDIA/benchmarks/resnet/implementations/mxnet &&'
                ' sudo docker build --network=host . -t mlperf-nvidia:image_classification',
                should_log=True)
            _DownloadData(benchmark_spec.imagenet_data_dir,
                          posixpath.join('/data', 'imagenet'), vm)

        if TRANSFORMER in benchmark_spec.benchmark:
            vm.RemoteCommand(
                f'cd training_results_{MLPERF_VERSION}/NVIDIA/benchmarks/transformer/implementations/pytorch &&'
                ' sudo docker build --network=host . -t mlperf-nvidia:translation',
                should_log=True)
            _DownloadData(benchmark_spec.wmt_data_dir,
                          posixpath.join('/data', 'wmt'), vm)

        if MINIGO in benchmark_spec.benchmark:
            build_path = f'training_results_{MLPERF_VERSION}/NVIDIA/benchmarks/minigo/implementations/tensorflow'
            run_script = posixpath.join(build_path, 'run_and_time.sh')
            vm_util.ReplaceText(
                vm, 'get_data.py', 'get_data.py --src_dir={}'.format(
                    FLAGS.minigo_model_dir.replace('/', r'\/')), run_script)
            vm.RemoteCommand('cd {} && sudo docker build --network=host -t '
                             'mlperf-nvidia:minigo .'.format(build_path),
                             should_log=True)

        if MASK in benchmark_spec.benchmark:
            vm.RemoteCommand(
                f'cd training_results_{MLPERF_VERSION}/NVIDIA/benchmarks/maskrcnn/implementations/pytorch && '
                'sudo docker build --network=host -t mlperf-nvidia:object_detection . ',
                should_log=True)
            _DownloadData(benchmark_spec.coco_data_dir,
                          posixpath.join('/data', 'coco2017'), vm)

        if GNMT in benchmark_spec.benchmark:
            vm.RemoteCommand(
                f'cd training_results_{MLPERF_VERSION}/NVIDIA/benchmarks/gnmt/implementations/pytorch && '
                'sudo docker build --network=host -t mlperf-nvidia:rnn_translator . ',
                should_log=True)
            _DownloadData(benchmark_spec.gnmt_data_dir,
                          posixpath.join('/data', 'gnmt'), vm)

        if SSD in benchmark_spec.benchmark:
            vm.RemoteCommand(
                f'cd training_results_{MLPERF_VERSION}/NVIDIA/benchmarks/ssd/implementations/pytorch && '
                'sudo docker build --network=host -t mlperf-nvidia:single_stage_detector . ',
                should_log=True)
            _DownloadData(benchmark_spec.coco_data_dir,
                          posixpath.join('/data', 'coco2017'), vm)

        if BERT in benchmark_spec.benchmark:
            vm.RemoteCommand(
                f'cd training_results_{MLPERF_VERSION}/NVIDIA/benchmarks/bert/implementations/pytorch && '
                'sudo docker build --network=host -t mlperf-nvidia:language_model . ',
                should_log=True)
            _DownloadData(benchmark_spec.bert_data_dir,
                          posixpath.join('/data', 'bert_data'), vm)
def _SetParallelDecompositionMethod(vm, decompose_method):
    """Set the parallel decomposition method if using multiple cores."""
    vm_util.ReplaceText(vm, 'method.*', 'method %s;' % decompose_method,
                        _GetPath(_DECOMPOSEDICT))
Ejemplo n.º 17
0
def _DownloadData(benchmark_spec, rank):
    """Downloads train valid and test on the target vm.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
      required to run the benchmark.
    rank: integer, the node rank in distributed training.
  """
    vm = benchmark_spec.vms[rank]
    vm.InstallPackages('python3-pip')
    vm.Install('wget')
    vm.RemoteCommand(
        '[ -d $HOME/fairseq ] || git clone {git} -b {branch}'.format(
            git=FAIRSEQ_GIT, branch=FAIRSEQ_BRANCH))
    setup_script = posixpath.join('fairseq', 'setup.py')
    vm_util.ReplaceText(vm, "'torch'",
                        f"'torch >= {FLAGS.robertammlm_torch_version}'",
                        setup_script)
    env = 'PATH=/opt/conda/bin:$PATH'
    vm.RemoteCommand('{} python3 -m pip install pyarrow'.format(env))
    vm.RemoteCommand(
        'cd fairseq && {} python3 -m pip install --editable .'.format(env))
    vm.RemoteCommand('mkdir -p {}'.format(DATA_PATH))
    text_zip = posixpath.join(DATA_PATH, posixpath.basename(WIKI_TEXT))
    vm.RemoteCommand('wget -O {des} {src}'.format(des=text_zip, src=WIKI_TEXT))
    vm.RemoteCommand('unzip {text_zip} -d {data_path}'.format(
        data_path=DATA_PATH, text_zip=text_zip))
    bpe_dir = posixpath.join(DATA_PATH, 'gpt2_bpe')
    vm.RemoteCommand('mkdir -p {}'.format(bpe_dir))
    vm.RemoteCommand('wget -O {des}/encoder.json {src}'.format(
        des=bpe_dir, src=ENCODER_JSON))
    vm.RemoteCommand('wget -O {des}/vocab.bpe {src}'.format(des=bpe_dir,
                                                            src=VOCAB_BPE))
    for phase in ('train', 'valid', 'test'):
        vm.RemoteCommand('cd {data_path} && {env} python3 -m '
                         'examples.roberta.multiprocessing_bpe_encoder '
                         '--encoder-json gpt2_bpe/encoder.json '
                         '--vocab-bpe gpt2_bpe/vocab.bpe '
                         '--inputs wikitext-103-raw/wiki.{phase}.raw '
                         '--outputs wikitext-103-raw/wiki.{phase}.bpe '
                         '--keep-empty '
                         '--workers 60 '.format(env=env,
                                                data_path=DATA_PATH,
                                                phase=phase))

    vm.RemoteCommand('wget -O {des}/dict.txt {src}'.format(des=bpe_dir,
                                                           src=FAIRSEQ_DICT))
    vm.RemoteCommand('cd {data_path} && {env} fairseq-preprocess '
                     '--only-source  --srcdict gpt2_bpe/dict.txt '
                     '--trainpref wikitext-103-raw/wiki.train.bpe '
                     '--validpref wikitext-103-raw/wiki.valid.bpe '
                     '--testpref wikitext-103-raw/wiki.test.bpe '
                     '--destdir data-bin/wikitext-103 '
                     '--workers 60'.format(env=env, data_path=DATA_PATH))
    data_bin = posixpath.join(DATA_PATH, 'data-bin')
    vm.RemoteCommand('mkdir -p {}/mlm-w103'.format(data_bin))
    vm.RemoteCommand('for x in `seq 1 {word_count}`;'
                     'do echo "$x 1" >> {data_bin}/mlm-w103/dict.txt;'
                     'done'.format(word_count=WORD_COUNT, data_bin=data_bin))

    for copy in range(benchmark_spec.num_copies):
        vm.RemoteCommand(
            'cp -r {data_bin}/wikitext-103 {data_bin}/mlm-w103/{copy}'.format(
                data_bin=data_bin, copy=copy))
        vm.RemoteCommand('cp {data_bin}/mlm-w103/dict.txt {data_bin}/mlm-w103/'
                         '{copy}'.format(data_bin=data_bin, copy=copy))
Ejemplo n.º 18
0
def Run(benchmark_spec):
    """Run MLPerf on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
      required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
    _UpdateBenchmarkSpecWithFlags(benchmark_spec)
    vm = benchmark_spec.vms[0]
    if benchmark_spec.tpus:
        # For MLPerf 1.0, the benchmake code of different hardware are different.
        if (benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-32'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-128'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-256'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-512'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-1024'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-2048'):
            run_path = (
                '$HOME/training_results_{version}/Google/benchmarks/{model}/tpu-{tpus}'
                .format(version=MLPERF_VERSION,
                        model=benchmark_spec.benchmark,
                        tpus=benchmark_spec.tpu_groups['train'].
                        GetAcceleratorType()))
            code_path = (
                '$HOME/training_results_{version}/Google/benchmarks/{model}/implementations/tpu-{tpus}-{model}'
                .format(version=MLPERF_VERSION,
                        model=benchmark_spec.benchmark,
                        tpus=benchmark_spec.tpu_groups['train'].
                        GetAcceleratorType()))

            if MASK in benchmark_spec.benchmark:
                model = 'mask_rcnn'
            elif GNMT in benchmark_spec.benchmark:
                model = 'nmt'
            else:
                model = benchmark_spec.benchmark

            mlperf_benchmark_cmd = (
                'cd {code_path} && '
                'export PYTHONPATH=$(pwd):$(pwd)/{model} && '
                'cd {model} && '
                '{run_path}/run_and_time.sh'.format(code_path=code_path,
                                                    model=model,
                                                    run_path=run_path))

            if SSD in benchmark_spec.benchmark:
                mlperf_benchmark_cmd = (
                    'export '
                    'MLP_GCS_RESNET_CHECKPOINT={checkpoint}'
                    ' && {cmd}'.format(
                        checkpoint=FLAGS.mlperf_gcs_resnet_checkpoint,
                        cmd=mlperf_benchmark_cmd))
        else:
            raise ValueError(
                'MLPerf configurations do not support the hardware in PKB. PKB may '
                'need to be updated if this is a new TPU type.')

    else:
        run_sub_paths = {
            RESNET: 'resnet/implementations/mxnet',
            TRANSFORMER: 'transformer/implementations/pytorch',
            MINIGO: 'minigo/implementations/tensorflow',
            MASK: 'maskrcnn/implementations/pytorch',
            GNMT: 'gnmt/implementations/pytorch',
            SSD: 'ssd/implementations/pytorch',
            BERT: 'bert/implementations/pytorch',
        }
        benchmark_path = f'$HOME/training_results_{MLPERF_VERSION}/NVIDIA/benchmarks'
        run_path = posixpath.join(benchmark_path,
                                  run_sub_paths[benchmark_spec.benchmark])
        env = {
            'DGXSYSTEM': DGXSYSTEM,
            'NEXP': 1,
            'PULL': 0,
            'LOGDIR': f'/tmp/{benchmark_spec.benchmark}',
        }
        envs = {
            RESNET: {},
            TRANSFORMER: {
                'DATADIR': '/data/wmt/utf8'
            },
            MINIGO: {
                'CONT': 'mlperf-nvidia:minigo'
            },
            MASK: {},
            GNMT: {
                'DATADIR': '/data/gnmt'
            },
            SSD: {
                'DATADIR': '/data'
            },
            BERT: {}
        }
        env.update(envs[benchmark_spec.benchmark])

        run_script = posixpath.join(run_path, 'run_with_docker.sh')
        vm_util.ReplaceText(vm, 'SYSLOGGING=1', 'SYSLOGGING=0', run_script)
        vm_util.ReplaceText(vm, 'docker exec -it', 'docker exec -t',
                            run_script)
        vm_util.ReplaceText(vm, 'nvidia-docker', 'sudo nvidia-docker',
                            run_script)
        vm_util.ReplaceText(vm, 'docker exec', 'sudo docker exec', run_script)
        vm_util.ReplaceText(vm, 'docker container', 'sudo docker container',
                            run_script)
        if benchmark_spec.benchmark == MASK:
            vm_util.ReplaceText(
                vm, r'_cont_mounts=\(',
                r'_cont_mounts=\(\"--volume=\${PKLDIR}:\/pkl_coco\" ',
                run_script)

        env = ' '.join(f'{key}={value}' for key, value in env.items())
        if nvidia_driver.CheckNvidiaGpuExists(vm):
            env = f'{tensorflow.GetEnvironmentVars(vm)} {env}'

        mlperf_benchmark_cmd = (f'chmod 755 {run_script} && '
                                f'cd {run_path} && '
                                f'{env} {run_script}')

    samples = []
    metadata = _CreateMetadataDict(benchmark_spec)
    stdout, _ = vm.RobustRemoteCommand(mlperf_benchmark_cmd, should_log=True)
    if NONE in FLAGS.mlperf_profiler:
        samples.extend(
            MakeSamplesFromOutput(metadata,
                                  stdout,
                                  use_tpu=bool(benchmark_spec.tpus),
                                  model=benchmark_spec.benchmark))
    return samples