コード例 #1
0
def Run(benchmark_spec):
    """Run MNIST on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
    _UpdateBenchmarkSpecWithFlags(benchmark_spec)
    vm = benchmark_spec.vms[0]
    mnist_benchmark_dir = 'tpu-demos/cloud_tpu/models/mnist'
    mnist_benchmark_cmd = (
        'python mnist.py --master={master} --train_file={train_file} '
        '--use_tpu={use_tpu} '
        '--train_steps={train_steps}'.format(
            master=benchmark_spec.master,
            train_file=benchmark_spec.train_file,
            use_tpu=benchmark_spec.use_tpu,
            train_steps=benchmark_spec.train_steps))
    if benchmark_spec.model_dir:
        mnist_benchmark_cmd = '{cmd} --model_dir {model_dir}'.format(
            cmd=mnist_benchmark_cmd, model_dir=benchmark_spec.model_dir)
    if FLAGS.tf_device == 'gpu':
        mnist_benchmark_cmd = '%s %s' % (tensorflow.GetEnvironmentVars(vm),
                                         mnist_benchmark_cmd)
    run_command = 'cd %s && %s' % (mnist_benchmark_dir, mnist_benchmark_cmd)
    stdout, stderr = vm.RobustRemoteCommand(run_command, should_log=True)
    return _MakeSamplesFromOutput(benchmark_spec, stdout + stderr)
コード例 #2
0
def Run(benchmark_spec):
    """Run MNIST on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
    _UpdateBenchmarkSpecWithFlags(benchmark_spec)
    vm = benchmark_spec.vms[0]
    mnist_benchmark_script = 'tpu/cloud_tpu/models/mnist/mnist.py'
    mnist_benchmark_cmd = ('python {script} '
                           '--master={master} '
                           '--train_file={train_file} '
                           '--use_tpu={use_tpu} '
                           '--train_steps={train_steps} '
                           '--iterations={iterations} '
                           '--model_dir={model_dir}'.format(
                               script=mnist_benchmark_script,
                               master=benchmark_spec.master,
                               train_file=benchmark_spec.train_file,
                               use_tpu=benchmark_spec.use_tpu,
                               train_steps=benchmark_spec.train_steps,
                               iterations=benchmark_spec.iterations,
                               model_dir=benchmark_spec.model_dir))
    if cuda_toolkit.CheckNvidiaGpuExists(vm):
        mnist_benchmark_cmd = '{env} {cmd}'.format(
            env=tensorflow.GetEnvironmentVars(vm), cmd=mnist_benchmark_cmd)
    stdout, stderr = vm.RobustRemoteCommand(mnist_benchmark_cmd,
                                            should_log=True)
    return MakeSamplesFromOutput(_CreateMetadataDict(benchmark_spec),
                                 stdout + stderr)
コード例 #3
0
def Run(benchmark_spec):
    """Run MLPerf on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
      required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
    _UpdateBenchmarkSpecWithFlags(benchmark_spec)
    vm = benchmark_spec.vms[0]
    if benchmark_spec.tpus:
        # For MLPerf v0.5, the benchmake code of different hardware are different.
        if benchmark_spec.tpu_groups['train'].GetNumShards() > 8:
            code_path = 'cloud_v2.512/resnet-tpuv2-512/code/resnet/model'
        elif benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v2-8':
            code_path = 'cloud_v2.8/resnet-tpuv2-8/code/resnet/model'
        elif benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-8':
            code_path = 'cloud_v3.8/resnet-tpuv3-8/code/resnet/model'
        else:
            raise ValueError(
                'MLPerf configurations do not support the hardware in PKB. PKB may '
                'need to be updated if this is a new TPU type.')
        cmd = 'bash run_helper.sh 2>&1 | tee output.txt'
    else:
        code_path = 'cloud_v100x8/code/resnet'
        cmd = ('sudo nvidia-docker build . -t foo && '
               'sudo nvidia-docker run -v $MLP_HOST_DATA_DIR:/data -v '
               '$MLP_HOST_OUTPUT_DIR:/output -v /proc:/host_proc -t '
               'foo:latest run_helper_8xV100.sh 2>&1 | tee output.txt')
    mlperf_benchmark_cmd = (
        'export MLP_GCS_MODEL_DIR={model_dir} && '
        'export MLP_PATH_GCS_IMAGENET={data_dir} && '
        'export MLP_TPU_NAME={tpu_train} && '
        'export MLP_PATH_GCS_EUW_IMAGENET={data_dir} && '
        'export MLP_GCS_EUW_MODEL_DIR={model_dir} && '
        'export MLP_TPU_SIDECAR_NAME={tpu_eval} && '
        'export MLP_HOST_DATA_DIR=/data && '
        'export MLP_HOST_OUTPUT_DIR=`pwd`/output && '
        'export PYTHONPATH=$PYTHONPATH:$PWD/tpu/models && '
        'cd results/v0.5.0/google/{code_path} && '
        'sed -i "s/python /python3 /g" run_helper*.sh && '
        'mkdir -p $MLP_HOST_OUTPUT_DIR && '
        '{cmd}'.format(model_dir=benchmark_spec.model_dir,
                       data_dir=benchmark_spec.data_dir,
                       tpu_train=(benchmark_spec.tpu_groups['train'].GetName()
                                  if benchmark_spec.tpus else ''),
                       tpu_eval=(benchmark_spec.tpu_groups['eval'].GetName()
                                 if benchmark_spec.tpus else ''),
                       code_path=code_path,
                       cmd=cmd))
    if cuda_toolkit.CheckNvidiaGpuExists(vm):
        mlperf_benchmark_cmd = '{env} {cmd}'.format(
            env=tensorflow.GetEnvironmentVars(vm), cmd=mlperf_benchmark_cmd)
    samples = []
    metadata = _CreateMetadataDict(benchmark_spec)
    stdout, _ = vm.RobustRemoteCommand(mlperf_benchmark_cmd, should_log=True)
    samples.extend(MakeSamplesFromOutput(metadata, stdout))
    return samples
コード例 #4
0
def Run(benchmark_spec):
  """Run MNIST on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
  _UpdateBenchmarkSpecWithFlags(benchmark_spec)
  vm = benchmark_spec.vms[0]
  mnist_benchmark_script = 'mnist_tpu.py'
  mnist_benchmark_cmd = (
      'cd models/official/mnist && '
      'python {script} '
      '--data_dir={data_dir} '
      '--iterations={iterations} '
      '--model_dir={model_dir} '
      '--batch_size={batch_size}'.format(
          script=mnist_benchmark_script,
          data_dir=benchmark_spec.data_dir,
          iterations=benchmark_spec.iterations,
          model_dir=benchmark_spec.model_dir,
          batch_size=benchmark_spec.batch_size))
  if cuda_toolkit.CheckNvidiaGpuExists(vm):
    mnist_benchmark_cmd = '{env} {cmd}'.format(
        env=tensorflow.GetEnvironmentVars(vm), cmd=mnist_benchmark_cmd)
  samples = []
  metadata = CreateMetadataDict(benchmark_spec)
  if benchmark_spec.train_steps:
    if benchmark_spec.tpus:
      tpu = benchmark_spec.tpu_groups['train'].GetName()
      num_shards = '--num_shards={}'.format(
          benchmark_spec.tpu_groups['train'].GetNumShards())
    else:
      tpu = num_shards = ''
    mnist_benchmark_train_cmd = (
        '{cmd} --tpu={tpu} --use_tpu={use_tpu} --train_steps={train_steps} '
        '{num_shards} --noenable_predict'.format(
            cmd=mnist_benchmark_cmd, tpu=tpu, use_tpu=bool(benchmark_spec.tpus),
            train_steps=benchmark_spec.train_steps, num_shards=num_shards))
    start = time.time()
    stdout, stderr = vm.RobustRemoteCommand(mnist_benchmark_train_cmd,
                                            should_log=True)
    elapsed_seconds = (time.time() - start)
    samples.extend(MakeSamplesFromTrainOutput(
        metadata, stdout + stderr, elapsed_seconds, benchmark_spec.train_steps))
  if benchmark_spec.eval_steps:
    mnist_benchmark_eval_cmd = (
        '{cmd} --tpu="" --use_tpu=False --eval_steps={eval_steps}'.format(
            cmd=mnist_benchmark_cmd, eval_steps=benchmark_spec.eval_steps))
    stdout, stderr = vm.RobustRemoteCommand(mnist_benchmark_eval_cmd,
                                            should_log=True)
    samples.extend(MakeSamplesFromEvalOutput(metadata, stdout + stderr,
                                             elapsed_seconds))
  return samples
コード例 #5
0
def Run(benchmark_spec):
  """Run ResNet on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
  _UpdateBenchmarkSpecWithFlags(benchmark_spec)
  vm = benchmark_spec.vms[0]
  resnet_benchmark_script = 'resnet_main.py'
  resnet_benchmark_cmd = (
      'cd tpu/models/official/resnet && '
      'python {script} '
      '--use_tpu={use_tpu} '
      '--tpu={tpu} '
      '--data_dir={data_dir} '
      '--model_dir={model_dir} '
      '--resnet_depth={depth} '
      '--mode={mode} '
      '--train_steps={train_steps} '
      '--train_batch_size={train_batch_size} '
      '--eval_batch_size={eval_batch_size} '
      '--iterations_per_loop={iterations} '
      '--num_cores={num_cores} '
      '--data_format={data_format} '
      '--precision={precision} '
      '--skip_host_call={skip_host_call} '
      '--num_train_images={num_train_images} '
      '--num_eval_images={num_eval_images}'.format(
          script=resnet_benchmark_script,
          use_tpu=benchmark_spec.use_tpu,
          tpu=benchmark_spec.tpu,
          data_dir=benchmark_spec.data_dir,
          model_dir=benchmark_spec.model_dir,
          depth=benchmark_spec.depth,
          mode=benchmark_spec.mode,
          train_steps=benchmark_spec.train_steps,
          train_batch_size=benchmark_spec.train_batch_size,
          eval_batch_size=benchmark_spec.eval_batch_size,
          iterations=benchmark_spec.iterations,
          num_cores=benchmark_spec.num_shards,
          data_format=benchmark_spec.data_format,
          precision=benchmark_spec.precision,
          skip_host_call=benchmark_spec.skip_host_call,
          num_train_images=benchmark_spec.num_train_images,
          num_eval_images=benchmark_spec.num_eval_images
      ))
  if FLAGS.tf_device == 'gpu':
    resnet_benchmark_cmd = '{env} {cmd}'.format(
        env=tensorflow.GetEnvironmentVars(vm), cmd=resnet_benchmark_cmd)
  stdout, stderr = vm.RobustRemoteCommand(resnet_benchmark_cmd,
                                          should_log=True)
  return _MakeSamplesFromOutput(_CreateMetadataDict(benchmark_spec),
                                stdout + stderr)
コード例 #6
0
def _RunModelOnVm(vm, model, benchmark_spec, args='', job_name=''):
    """Runs a TensorFlow benchmark on a single VM.

  Args:
    vm: VM to run on
    model: string, the name of model to run
    benchmark_spec: BenchmarkSpec object
    args: string, distributed arguments
    job_name: string, distributed job name

  Returns:
    a Sample containing the TensorFlow throughput or the process identification
    number from TensorFlow parameter server.
  """
    tf_cnn_benchmark_dir = 'benchmarks/scripts/tf_cnn_benchmarks'
    batch_size = _GetBatchSize(model)
    tf_cnn_benchmark_cmd = (
        'python tf_cnn_benchmarks.py '
        '--local_parameter_device={local_parameter_device} '
        '--batch_size={batch_size} '
        '--model={model} '
        '--data_name={data_name} '
        '--variable_update={variable_update} '
        '--distortions={distortions} '
        '--device={device} '
        '--data_format={data_format} '
        '--forward_only={forward_only} '
        '--flush_stdout=true'.format(
            local_parameter_device=benchmark_spec.local_parameter_device,
            batch_size=batch_size,
            model=model,
            data_name=benchmark_spec.data_name,
            variable_update=benchmark_spec.variable_update,
            distortions=benchmark_spec.distortions,
            device=benchmark_spec.device,
            data_format=benchmark_spec.data_format,
            forward_only=benchmark_spec.forward_only))
    if benchmark_spec.device == GPU:
        num_gpus = cuda_toolkit.QueryNumberOfGpus(vm)
        tf_cnn_benchmark_cmd = '{env} {cmd} --num_gpus={gpus}'.format(
            env=tensorflow.GetEnvironmentVars(vm),
            cmd=tf_cnn_benchmark_cmd,
            gpus=num_gpus)
    else:
        num_gpus = 0
    if args:
        tf_cnn_benchmark_cmd = '{cmd} --job_name={job} {args}'.format(
            cmd=tf_cnn_benchmark_cmd, job=job_name, args=args)
    run_command = 'cd {path} ; {cmd}'.format(path=tf_cnn_benchmark_dir,
                                             cmd=tf_cnn_benchmark_cmd)
    output, _ = vm.RobustRemoteCommand(run_command, should_log=True)
    if job_name == 'ps':
        return _ExtractTfParameterServerPid(output)
    else:
        return _MakeSamplesFromOutput(benchmark_spec, output, model,
                                      batch_size, num_gpus)
コード例 #7
0
def Run(benchmark_spec):
    """Run Inception V3 on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
    _UpdateBenchmarkSpecWithFlags(benchmark_spec)
    vm = benchmark_spec.vms[0]
    inception3_benchmark_script = (
        'tpu/models/experimental/inception/inception_v3.py')
    inception3_benchmark_cmd = (
        'python {script} '
        '--tpu={tpu} '
        '--learning_rate={learning_rate} '
        '--train_steps={train_steps} '
        '--iterations={iterations} '
        '--use_tpu={use_tpu} '
        '--use_data={use_data} '
        '--mode={mode} '
        '--train_steps_per_eval={train_steps_per_eval} '
        '--data_dir={data_dir} '
        '--model_dir={model_dir} '
        '--save_checkpoints_secs={save_checkpoints_secs} '
        '--train_batch_size={train_batch_size} '
        '--eval_batch_size={eval_batch_size} '
        '--num_shards={num_shards}'.format(
            script=inception3_benchmark_script,
            tpu=benchmark_spec.tpu,
            learning_rate=benchmark_spec.learning_rate,
            train_steps=benchmark_spec.train_steps,
            iterations=benchmark_spec.iterations,
            use_tpu=benchmark_spec.use_tpu,
            use_data=benchmark_spec.use_data,
            mode=benchmark_spec.mode,
            train_steps_per_eval=benchmark_spec.train_steps_per_eval,
            data_dir=benchmark_spec.data_dir,
            model_dir=benchmark_spec.model_dir,
            save_checkpoints_secs=benchmark_spec.save_checkpoints_secs,
            train_batch_size=benchmark_spec.train_batch_size,
            eval_batch_size=benchmark_spec.eval_batch_size,
            num_shards=benchmark_spec.num_shards))
    if FLAGS.tf_device == 'gpu':
        inception3_benchmark_cmd = '{env} {cmd}'.format(
            env=tensorflow.GetEnvironmentVars(vm),
            cmd=inception3_benchmark_cmd)
    stdout, stderr = vm.RobustRemoteCommand(inception3_benchmark_cmd,
                                            should_log=True)
    return mnist_benchmark.MakeSamplesFromOutput(
        _CreateMetadataDict(benchmark_spec), stdout + stderr)
コード例 #8
0
def _RunOnVm(vm, benchmark_spec):
    """Runs a TensorFlow benchmark on a single VM.

  Args:
    vm: VM to run on
    benchmark_spec: benchmark_spec object

  Returns:
    A list of samples
  """
    tf_cnn_benchmark_dir = 'benchmarks/scripts/tf_cnn_benchmarks'

    results = []
    for model in FLAGS.tf_models:
        batch_size = _GetBatchSize(model)
        tf_cnn_benchmark_cmd = (
            'python tf_cnn_benchmarks.py --local_parameter_device=%s '
            '--batch_size=%s --model=%s --data_name=%s --variable_update=%s '
            '--use_nccl=%s --distortions=%s --device=%s --data_format=%s '
            '--forward_only=%s') % (
                benchmark_spec.local_parameter_device, batch_size, model,
                benchmark_spec.data_name, benchmark_spec.variable_update,
                benchmark_spec.use_nccl, benchmark_spec.distortions,
                benchmark_spec.device, benchmark_spec.data_format,
                benchmark_spec.forward_only)
        if benchmark_spec.device == GPU:
            num_gpus = cuda_toolkit_8.QueryNumberOfGpus(vm)
            tf_cnn_benchmark_cmd = '%s %s --num_gpus=%s' % (
                tensorflow.GetEnvironmentVars(vm), tf_cnn_benchmark_cmd,
                num_gpus)
        else:
            num_gpus = 0
        run_command = 'cd %s && %s' % (tf_cnn_benchmark_dir,
                                       tf_cnn_benchmark_cmd)
        output, _ = vm.RobustRemoteCommand(run_command, should_log=True)
        results.extend(
            _MakeSamplesFromOutput(benchmark_spec, output, model, batch_size,
                                   num_gpus))

    return results
コード例 #9
0
def Run(benchmark_spec):
    """Run MLPerf on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
      required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
    _UpdateBenchmarkSpecWithFlags(benchmark_spec)
    vms = benchmark_spec.vms
    master_vm = vms[0]
    benchmark = benchmark_spec.benchmark

    env_params = {}
    env_params['SLURM_JOB_ID'] = r'{uri}'.format(uri=FLAGS.run_uri)
    env_params['PULL'] = 0
    env_params['DGXSYSTEM'] = DGXSYSTEM
    env_params['NEXP'] = 1
    env_params['LOGDIR'] = posixpath.join(vm_util.VM_TMP_DIR, benchmark)

    script_path = ('$HOME/training_results_{version}/NVIDIA/benchmarks/{model}'
                   r'/implementations/{framework}'.format(
                       version=mlperf_benchmark.MLPERF_VERSION,
                       model='maskrcnn'
                       if mlperf_benchmark.MASK in benchmark else benchmark,
                       framework='mxnet'
                       if mlperf_benchmark.RESNET in benchmark else 'pytorch'))

    benchmark_env_params = {
        mlperf_benchmark.TRANSFORMER: {
            'CONT': r'"mlperf-nvidia:translation"',
            'DATADIR': r'/data/wmt/utf8'
        },
        mlperf_benchmark.SSD: {
            'CONT': r'"mlperf-nvidia:single_stage_detector"',
            'DATADIR': '/data'
        },
        mlperf_benchmark.GNMT: {
            'CONT': r'"mlperf-nvidia:rnn_translator"',
            'DATADIR': r'/data/gnmt'
        },
        mlperf_benchmark.MASK: {},
        mlperf_benchmark.RESNET: {},
        mlperf_benchmark.BERT: {},
    }
    env_params.update(benchmark_env_params.get(benchmark, {}))
    if mlperf_benchmark.RESNET in benchmark:
        env_params['SLURM_JOB_NUM_NODES'] = benchmark_spec.num_vms

    env = r''
    if nvidia_driver.CheckNvidiaGpuExists(master_vm):
        env = tensorflow.GetEnvironmentVars(master_vm)

    cmd = (f'cd {script_path} && '
           f'{env} {_DictToString(env_params)} '
           f'{FLAGS.nccl_mpi} '
           '--allow-run-as-root '
           '-hostfile $HOME/HOSTFILE '
           '--mca pml ^cm '
           '--mca btl tcp,self '
           '--mca btl_tcp_if_exclude docker0,lo '
           '--bind-to none '
           '-N 1 '
           './run_with_docker1.sh')
    if (mlperf_benchmark.NVPROF in FLAGS.mlperf_profiler
            or FLAGS.mlperf_keep_nccl_log):
        cmd += (r' && cp /tmp/pkb/cmd* {logdir}'.format(
            logdir=posixpath.join(vm_util.VM_TMP_DIR, benchmark)))

    samples = []
    metadata = _CreateMetadataDict(benchmark_spec)
    stdout, _ = master_vm.RobustRemoteCommand(cmd, should_log=True)
    if mlperf_benchmark.NONE in FLAGS.mlperf_profiler:
        samples.extend(MakeSamplesFromOutput(metadata, stdout,
                                             model=benchmark))

    if (mlperf_benchmark.NVPROF in FLAGS.mlperf_profiler
            or FLAGS.mlperf_keep_nccl_log):
        master_vm.RemoteCommand(
            r'mkdir -p /data/aggregated/{model}'.format(model=benchmark))
        master_vm.RemoteCommand(
            r'mpirun -hostfile $HOME/{hostfile} -N 1 scp -r {logdir} '
            r'{master_ip}:/data/aggregated/'.format(
                hostfile=HOSTFILE,
                logdir=posixpath.join(vm_util.VM_TMP_DIR, benchmark),
                master_ip=master_vm.internal_ip))

    return samples
コード例 #10
0
def Run(benchmark_spec):
    """Run MLPerf on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
      required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
    _UpdateBenchmarkSpecWithFlags(benchmark_spec)
    vm = benchmark_spec.vms[0]
    if benchmark_spec.tpus:
        # For MLPerf v0.6, the benchmake code of different hardware are different.
        if (benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-32'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-128'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-256'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-512'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-1024'
                or benchmark_spec.tpu_groups['train'].GetAcceleratorType()
                == 'v3-2048'):
            run_path = (
                '$HOME/training_results_v0.6/Google/benchmarks/{model}/tpu-{tpus}'
                .format(model=benchmark_spec.benchmark,
                        tpus=benchmark_spec.tpu_groups['train'].
                        GetAcceleratorType()))
            code_path = (
                '$HOME/training_results_v0.6/Google/benchmarks/{model}/implementations/tpu-{tpus}-{model}'
                .format(model=benchmark_spec.benchmark,
                        tpus=benchmark_spec.tpu_groups['train'].
                        GetAcceleratorType()))

            if 'mask' in benchmark_spec.benchmark:
                model = 'mask_rcnn'
            elif 'gnmt' in benchmark_spec.benchmark:
                model = 'nmt'
            else:
                model = benchmark_spec.benchmark

            mlperf_benchmark_cmd = (
                'cd {code_path} && '
                'export PYTHONPATH=$(pwd):$(pwd)/{model} && '
                'cd {model} && '
                '{run_path}/run_and_time1.sh'.format(code_path=code_path,
                                                     model=model,
                                                     run_path=run_path))

            if 'ssd' in benchmark_spec.benchmark:
                mlperf_benchmark_cmd = (
                    'export '
                    'MLP_GCS_RESNET_CHECKPOINT=gs://download.tensorflow.org/models/mlperf/v0.5.0/resnet34_ssd_checkpoint'
                    ' && {cmd}'.format(cmd=mlperf_benchmark_cmd))

        else:
            raise ValueError(
                'MLPerf configurations do not support the hardware in PKB. PKB may '
                'need to be updated if this is a new TPU type.')

    else:
        if 'resnet' in benchmark_spec.benchmark:
            mlperf_benchmark_cmd = (
                'cd '
                'training_results_v0.6/NVIDIA/benchmarks/resnet/implementations/mxnet'
                ' && sed \'s/SYSLOGGING=1/SYSLOGGING=0/g\' ./run.sub > ./run1.sub &&'
                ' chmod 755 ./run1.sub && sudo DATADIR=/data/imagenet '
                'LOGDIR=/tmp/resnet PULL=0 DGXSYSTEM=DGX1 NEXP=1 ./run1.sub ')

        if 'transformer' in benchmark_spec.benchmark:
            mlperf_benchmark_cmd = (
                'cd '
                'training_results_v0.6/NVIDIA/benchmarks/transformer/implementations/pytorch'
                ' && sed \'s/SYSLOGGING=1/SYSLOGGING=0/g\' ./run.sub > ./run1.sub &&'
                ' chmod 755 ./run1.sub && sudo DATADIR=/data/wmt/utf8 '
                'LOGDIR=/tmp/transformer PULL=0 DGXSYSTEM=DGX1 NEXP=1 ./run1.sub '
            )

        if 'minigo' in benchmark_spec.benchmark:
            mlperf_benchmark_cmd = (
                'cd '
                '$HOME/training_results_v0.6/NVIDIA/benchmarks/minigo/implementations/tensorflow'
                ' && sed \'s/SYSLOGGING=1/SYSLOGGING=0/g\' ./run.sub > run1.sub && '
                'chmod 755 ./run1.sub && sudo LOGDIR=/tmp/minigo '
                'CONT=mlperf-nvidia:minigo DGXSYSTEM=DGX1 NEXP=1 ./run1.sub ')

        if 'mask' in benchmark_spec.benchmark:
            mlperf_benchmark_cmd = (
                'cd '
                '$HOME/training_results_v0.6/NVIDIA/benchmarks/maskrcnn/implementations/pytorch'
                ' && sed "s/SYSLOGGING=1/SYSLOGGING=0/g" ./run.sub > ./run1.sub && '
                'chmod 755 ./run1.sub && sudo LOGDIR=/tmp/mask DATADIR=/data PULL=0 '
                'DGXSYSTEM=DGX1 NEXP=1 ./run1.sub ')

        if 'gnmt' in benchmark_spec.benchmark:
            mlperf_benchmark_cmd = (
                'cd '
                '$HOME/training_results_v0.6/NVIDIA/benchmarks/gnmt/implementations/pytorch'
                ' && sed "s/SYSLOGGING=1/SYSLOGGING=0/g" ./run.sub > ./run1.sub && '
                'chmod 755 ./run1.sub && sudo LOGDIR=/tmp/gnmt DATADIR=/data/gnmt '
                'PULL=0 DGXSYSTEM=DGX1 NEXP=1 ./run1.sub ')

        if 'ssd' in benchmark_spec.benchmark:
            mlperf_benchmark_cmd = (
                'cd '
                '$HOME/training_results_v0.6/NVIDIA/benchmarks/ssd/implementations/pytorch'
                ' && sed "s/SYSLOGGING=1/SYSLOGGING=0/g" ./run.sub > ./run1.sub && '
                'chmod 755 ./run1.sub && sudo LOGDIR=/tmp/ssd DATADIR=/data PULL=0 '
                'DGXSYSTEM=DGX1 NEXP=1 ./run1.sub ')

    if cuda_toolkit.CheckNvidiaGpuExists(vm):
        mlperf_benchmark_cmd = '{env} {cmd}'.format(
            env=tensorflow.GetEnvironmentVars(vm), cmd=mlperf_benchmark_cmd)

    samples = []
    metadata = _CreateMetadataDict(benchmark_spec)
    stdout, _ = vm.RobustRemoteCommand(mlperf_benchmark_cmd, should_log=True)
    samples.extend(
        MakeSamplesFromOutput(metadata,
                              stdout,
                              use_tpu=bool(benchmark_spec.tpus),
                              model=benchmark_spec.benchmark))
    return samples
コード例 #11
0
def _GetTfCnnBenchmarkCommand(vm,
                              model,
                              batch_size,
                              benchmark_spec,
                              args='',
                              job_name=''):
    """Create the command used to run the tf_cnn_benchmarks script.

  The command is either formulated using flag values stored on the
  benchmark_spec, or is essentially provided outright through the
  benchmark_args flag.

  Args:
    vm: the VM to run on.
    model: name of the model to run.
    batch_size: batch size to use for training.
    benchmark_spec: the benchmark spec object.
    args: string, distributed arguments
    job_name: string, distributed job name

  Returns:
    A string that runs the tf_cnn_benchmarks.py script
    with the desired arguments.
  """
    num_gpus = (nvidia_driver.QueryNumberOfGpus(vm)
                if nvidia_driver.CheckNvidiaGpuExists(vm) else 0)
    benchmark_spec.num_gpus = num_gpus

    if benchmark_spec.benchmark_args is not None:
        cmd = 'python tf_cnn_benchmarks.py ' + benchmark_spec.benchmark_args
        # If the user didn't specify num_gpus in the benchmark_args string,
        # use all the GPUs on the system.
        if '--num_gpus' not in benchmark_spec.benchmark_args and num_gpus:
            cmd = '{cmd} --num_gpus={num_gpus}'.format(cmd=cmd,
                                                       num_gpus=num_gpus)
        return cmd

    benchmark_spec.local_parameter_device = FLAGS.tf_local_parameter_device
    benchmark_spec.device = FLAGS.tf_device
    benchmark_spec.data_format = FLAGS.tf_data_format
    if num_gpus == 0:
        benchmark_spec.local_parameter_device = CPU
        benchmark_spec.device = CPU
        benchmark_spec.data_format = NHWC

    cmd = ('{env_vars} python tf_cnn_benchmarks.py '
           '--local_parameter_device={local_parameter_device} '
           '--batch_size={batch_size} '
           '--model={model} '
           '{data} '
           '--data_name={data_name} '
           '--variable_update={variable_update} '
           '--distortions={distortions} '
           '--device={device} '
           '--data_format={data_format} '
           '--forward_only={forward_only} '
           '--use_fp16={use_fp16} '
           '{num_gpus} '
           '{job_name}'.format(
               env_vars=tensorflow.GetEnvironmentVars(vm),
               local_parameter_device=benchmark_spec.local_parameter_device,
               batch_size=batch_size,
               model=model,
               data=('--data_dir={}'.format(benchmark_spec.data_dir)
                     if benchmark_spec.data_dir else ''),
               data_name=benchmark_spec.data_name,
               variable_update=benchmark_spec.variable_update,
               distortions=benchmark_spec.distortions,
               device=benchmark_spec.device,
               data_format=benchmark_spec.data_format,
               forward_only=benchmark_spec.forward_only,
               use_fp16=(benchmark_spec.precision == FP16),
               num_gpus='--num_gpus={}'.format(num_gpus) if num_gpus else '',
               job_name='--job_name={0} {1}'.format(job_name, args)
               if args else ''))
    return cmd
コード例 #12
0
def Run(benchmark_spec):
  """Run ResNet on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
  _UpdateBenchmarkSpecWithFlags(benchmark_spec)
  vm = benchmark_spec.vms[0]
  resnet_benchmark_script = 'resnet_main.py'
  resnet_benchmark_cmd = (
      '{env_cmd} && cd tpu/models/official/resnet && '
      'python {script} '
      '--use_tpu={use_tpu} '
      '--data_dir={data_dir} '
      '--model_dir={model_dir} '
      '--resnet_depth={depth} '
      '--train_batch_size={train_batch_size} '
      '--eval_batch_size={eval_batch_size} '
      '--iterations_per_loop={iterations} '
      '--data_format={data_format} '
      '--precision={precision} '
      '--skip_host_call={skip_host_call} '
      '--num_train_images={num_train_images} '
      '--num_eval_images={num_eval_images}'.format(
          env_cmd=benchmark_spec.env_cmd,
          script=resnet_benchmark_script,
          use_tpu=bool(benchmark_spec.tpus),
          data_dir=benchmark_spec.data_dir,
          model_dir=benchmark_spec.model_dir,
          depth=benchmark_spec.depth,
          train_batch_size=benchmark_spec.train_batch_size,
          eval_batch_size=benchmark_spec.eval_batch_size,
          iterations=benchmark_spec.iterations,
          data_format=benchmark_spec.data_format,
          precision=benchmark_spec.precision,
          skip_host_call=benchmark_spec.skip_host_call,
          num_train_images=benchmark_spec.num_train_images,
          num_eval_images=benchmark_spec.num_eval_images
      ))
  if FLAGS.tf_device == 'gpu':
    resnet_benchmark_cmd = '{env} {cmd}'.format(
        env=tensorflow.GetEnvironmentVars(vm), cmd=resnet_benchmark_cmd)
  samples = []
  metadata = _CreateMetadataDict(benchmark_spec)
  elapsed_seconds = 0
  steps_per_eval = benchmark_spec.steps_per_eval
  train_steps = benchmark_spec.train_steps
  for step in range(steps_per_eval, train_steps + steps_per_eval,
                    steps_per_eval):
    step = min(step, train_steps)
    resnet_benchmark_cmd_step = '{cmd} --train_steps={step}'.format(
        cmd=resnet_benchmark_cmd, step=step)
    if benchmark_spec.mode in ('train', 'train_and_eval'):
      if benchmark_spec.tpus:
        tpu = benchmark_spec.tpu_groups['train'].GetName()
        num_cores = '--num_cores={}'.format(
            benchmark_spec.tpu_groups['train'].GetNumShards())
      else:
        tpu = num_cores = ''
      resnet_benchmark_train_cmd = (
          '{cmd} --tpu={tpu} --mode=train {num_cores}'.format(
              cmd=resnet_benchmark_cmd_step,
              tpu=tpu, num_cores=num_cores))
      start = time.time()
      stdout, stderr = vm.RobustRemoteCommand(resnet_benchmark_train_cmd,
                                              should_log=True)
      elapsed_seconds += (time.time() - start)
      samples.extend(mnist_benchmark.MakeSamplesFromTrainOutput(
          metadata, stdout + stderr, elapsed_seconds, step))
    if benchmark_spec.mode in ('train_and_eval', 'eval'):
      if benchmark_spec.tpus:
        tpu = benchmark_spec.tpu_groups['eval'].GetName()
        num_cores = '--num_cores={}'.format(
            benchmark_spec.tpu_groups['eval'].GetNumShards())
      else:
        tpu = num_cores = ''
      resnet_benchmark_eval_cmd = (
          '{cmd} --tpu={tpu} --mode=eval {num_cores}'.format(
              cmd=resnet_benchmark_cmd_step,
              tpu=tpu, num_cores=num_cores))
      stdout, stderr = vm.RobustRemoteCommand(resnet_benchmark_eval_cmd,
                                              should_log=True)
      samples.extend(MakeSamplesFromEvalOutput(
          metadata, stdout + stderr, elapsed_seconds))
  return samples
コード例 #13
0
def Run(benchmark_spec):
    """Run Inception V3 on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
    _UpdateBenchmarkSpecWithFlags(benchmark_spec)
    vm = benchmark_spec.vms[0]
    inception3_benchmark_script = (
        'tpu/models/experimental/inception/inception_v3.py')
    inception3_benchmark_cmd = (
        '{env_cmd} && python {script} '
        '--learning_rate={learning_rate} '
        '--iterations={iterations} '
        '--use_tpu={use_tpu} '
        '--use_data={use_data} '
        '--train_steps_per_eval={steps_per_eval} '
        '--data_dir={data_dir} '
        '--model_dir={model_dir} '
        '--save_checkpoints_secs={save_checkpoints_secs} '
        '--train_batch_size={train_batch_size} '
        '--eval_batch_size={eval_batch_size} '
        '--precision={precision}'.format(
            env_cmd=benchmark_spec.env_cmd,
            script=inception3_benchmark_script,
            learning_rate=benchmark_spec.learning_rate,
            iterations=benchmark_spec.iterations,
            use_tpu=bool(benchmark_spec.tpus),
            use_data=benchmark_spec.use_data,
            steps_per_eval=benchmark_spec.steps_per_eval,
            data_dir=benchmark_spec.data_dir,
            model_dir=benchmark_spec.model_dir,
            save_checkpoints_secs=benchmark_spec.save_checkpoints_secs,
            train_batch_size=benchmark_spec.train_batch_size,
            eval_batch_size=benchmark_spec.eval_batch_size,
            precision=benchmark_spec.precision))
    if FLAGS.tf_device == 'gpu':
        inception3_benchmark_cmd = '{env} {cmd}'.format(
            env=tensorflow.GetEnvironmentVars(vm),
            cmd=inception3_benchmark_cmd)
    samples = []
    metadata = _CreateMetadataDict(benchmark_spec)
    elapsed_seconds = 0
    steps_per_eval = benchmark_spec.steps_per_eval
    train_steps = benchmark_spec.train_steps
    for step in range(steps_per_eval, train_steps + steps_per_eval,
                      steps_per_eval):
        step = min(step, train_steps)
        inception3_benchmark_cmd_step = '{cmd} --train_steps={step}'.format(
            cmd=inception3_benchmark_cmd, step=step)
        if benchmark_spec.mode in ('train', 'train_and_eval'):
            if benchmark_spec.tpus:
                tpu = benchmark_spec.tpu_groups['train'].GetName()
                num_shards = '--num_shards={}'.format(
                    benchmark_spec.tpu_groups['train'].GetNumShards())
            else:
                tpu = num_shards = ''
            inception3_benchmark_train_cmd = (
                '{cmd} --tpu={tpu} --mode=train {num_shards}'.format(
                    cmd=inception3_benchmark_cmd_step,
                    tpu=tpu,
                    num_shards=num_shards))
            start = time.time()
            stdout, stderr = vm.RobustRemoteCommand(
                inception3_benchmark_train_cmd, should_log=True)
            elapsed_seconds += (time.time() - start)
            samples.extend(
                mnist_benchmark.MakeSamplesFromTrainOutput(
                    metadata, stdout + stderr, elapsed_seconds, step))
        if benchmark_spec.mode in ('train_and_eval', 'eval'):
            if benchmark_spec.tpus:
                tpu = benchmark_spec.tpu_groups['eval'].GetName()
                num_shards = '--num_shards={}'.format(
                    benchmark_spec.tpu_groups['eval'].GetNumShards())
            else:
                tpu = num_shards = ''
            inception3_benchmark_eval_cmd = (
                '{cmd} --tpu={tpu} --mode=eval {num_shards}'.format(
                    cmd=inception3_benchmark_cmd_step,
                    tpu=tpu,
                    num_shards=num_shards))
            stdout, stderr = vm.RobustRemoteCommand(
                inception3_benchmark_eval_cmd, should_log=True)
            samples.extend(
                resnet_benchmark.MakeSamplesFromEvalOutput(
                    metadata, stdout + stderr, elapsed_seconds))
    return samples
コード例 #14
0
def Run(benchmark_spec):
  """Run MLPerf on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
      required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
  _UpdateBenchmarkSpecWithFlags(benchmark_spec)
  vm = benchmark_spec.vms[0]
  if benchmark_spec.tpus:
    # For MLPerf v0.6, the benchmake code of different hardware are different.
    if (benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-32' or
        benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-128' or
        benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-256' or
        benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-512' or
        benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-1024' or
        benchmark_spec.tpu_groups['train'].GetAcceleratorType() == 'v3-2048'):
      run_path = (
          '$HOME/training_results_v0.6/Google/benchmarks/{model}/tpu-{tpus}'
          .format(
              model=benchmark_spec.benchmark,
              tpus=benchmark_spec.tpu_groups['train'].GetAcceleratorType()))
      code_path = (
          '$HOME/training_results_v0.6/Google/benchmarks/{model}/implementations/tpu-{tpus}-{model}'
          .format(
              model=benchmark_spec.benchmark,
              tpus=benchmark_spec.tpu_groups['train'].GetAcceleratorType()))

      if 'mask' in benchmark_spec.benchmark:
        model = 'mask_rcnn'
      elif 'gnmt' in benchmark_spec.benchmark:
        model = 'nmt'
      else:
        model = benchmark_spec.benchmark

      mlperf_benchmark_cmd = ('cd {code_path} && '
                              'export PYTHONPATH=$(pwd):$(pwd)/{model} && '
                              'cd {model} && '
                              '{run_path}/run_and_time.sh'.format(
                                  code_path=code_path,
                                  model=model,
                                  run_path=run_path))

      if 'ssd' in benchmark_spec.benchmark:
        mlperf_benchmark_cmd = (
            'export '
            'MLP_GCS_RESNET_CHECKPOINT={checkpoint}'
            ' && {cmd}'.format(
                checkpoint=FLAGS.mlperf_gcs_resnet_checkpoint,
                cmd=mlperf_benchmark_cmd))
    else:
      raise ValueError(
          'MLPerf configurations do not support the hardware in PKB. PKB may '
          'need to be updated if this is a new TPU type.')

  else:
    benchmark_path = '$HOME/training_results_v0.6/NVIDIA/benchmarks'
    common_env = 'DGXSYSTEM=DGX1 NEXP=1'
    if 'resnet' in benchmark_spec.benchmark:
      run_path = posixpath.join(benchmark_path, 'resnet/implementations/mxnet')
      env = 'DATADIR=/data/imagenet LOGDIR=/tmp/resnet PULL=0'
    elif 'transformer' in benchmark_spec.benchmark:
      run_path = posixpath.join(benchmark_path,
                                'transformer/implementations/pytorch')
      env = 'DATADIR=/data/wmt/utf8 LOGDIR=/tmp/transformer PULL=0'
    elif 'minigo' in benchmark_spec.benchmark:
      run_path = posixpath.join(benchmark_path,
                                'minigo/implementations/tensorflow')
      env = 'LOGDIR=/tmp/minigo CONT=mlperf-nvidia:minigo'
    elif 'mask' in benchmark_spec.benchmark:
      run_path = posixpath.join(benchmark_path,
                                'maskrcnn/implementations/pytorch')
      env = 'LOGDIR=/tmp/mask DATADIR=/data PULL=0'
    elif 'gnmt' in benchmark_spec.benchmark:
      run_path = posixpath.join(benchmark_path, 'gnmt/implementations/pytorch')
      env = 'LOGDIR=/tmp/gnmt DATADIR=/data/gnmt PULL=0'
    elif 'ssd' in benchmark_spec.benchmark:
      run_path = posixpath.join(benchmark_path, 'ssd/implementations/pytorch')
      env = 'LOGDIR=/tmp/ssd DATADIR=/data PULL=0'

    run_script = posixpath.join(run_path, 'run.sub')
    vm_util.ReplaceText(vm, 'SYSLOGGING=1', 'SYSLOGGING=0', run_script)
    mlperf_benchmark_cmd = (
        'cd {run_path} && chmod 755 run.sub && sudo {common_env} {env} '
        './run.sub'.format(run_path=run_path, common_env=common_env, env=env))

  if nvidia_driver.CheckNvidiaGpuExists(vm):
    mlperf_benchmark_cmd = '{env} {cmd}'.format(
        env=tensorflow.GetEnvironmentVars(vm), cmd=mlperf_benchmark_cmd)

  samples = []
  metadata = _CreateMetadataDict(benchmark_spec)
  stdout, _ = vm.RobustRemoteCommand(mlperf_benchmark_cmd, should_log=True)
  samples.extend(
      MakeSamplesFromOutput(
          metadata,
          stdout,
          use_tpu=bool(benchmark_spec.tpus),
          model=benchmark_spec.benchmark))
  return samples
コード例 #15
0
def Run(benchmark_spec):
  """Run ResNet on the cluster.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
  _UpdateBenchmarkSpecWithFlags(benchmark_spec)
  vm = benchmark_spec.vms[0]
  if benchmark_spec.tpus:
    resnet_benchmark_script = 'resnet_main.py'
    resnet_benchmark_cmd = (
        '{env_cmd} && '
        'cd tpu/models && '
        'export PYTHONPATH=$(pwd) &&'
        'cd official/resnet && '
        'python {script} '
        '--use_tpu={use_tpu} '
        '--data_dir={data_dir} '
        '--model_dir={model_dir} '
        '--resnet_depth={depth} '
        '--train_batch_size={train_batch_size} '
        '--eval_batch_size={eval_batch_size} '
        '--iterations_per_loop={iterations} '
        '--data_format={data_format} '
        '--precision={precision} '
        '--skip_host_call={skip_host_call} '
        '--num_train_images={num_train_images} '
        '--num_eval_images={num_eval_images}'.format(
            env_cmd=benchmark_spec.env_cmd,
            script=resnet_benchmark_script,
            use_tpu=bool(benchmark_spec.tpus),
            data_dir=benchmark_spec.data_dir,
            model_dir=benchmark_spec.model_dir,
            depth=benchmark_spec.depth,
            train_batch_size=benchmark_spec.train_batch_size,
            eval_batch_size=benchmark_spec.eval_batch_size,
            iterations=benchmark_spec.iterations,
            data_format=benchmark_spec.data_format,
            precision=benchmark_spec.precision,
            skip_host_call=benchmark_spec.skip_host_call,
            num_train_images=benchmark_spec.num_train_images,
            num_eval_images=benchmark_spec.num_eval_images))
  else:
    resnet_benchmark_script = 'imagenet_main.py'
    resnet_benchmark_cmd = ('{env_cmd} && '
                            'cd models && '
                            'export PYTHONPATH=$(pwd) && '
                            'cd official/r1/resnet && '
                            'python {script} '
                            '--data_dir=/data/imagenet '
                            '--model_dir={model_dir} '
                            '--resnet_size={resnet_size} '
                            '--batch_size={batch_size} '
                            '--data_format={data_format} '.format(
                                env_cmd=benchmark_spec.env_cmd,
                                script=resnet_benchmark_script,
                                model_dir=benchmark_spec.model_dir,
                                resnet_size=benchmark_spec.depth,
                                batch_size=benchmark_spec.train_batch_size,
                                data_format=benchmark_spec.data_format))
    precision = '{precision}'.format(precision=benchmark_spec.precision)
    if precision == 'bfloat16':
      resnet_benchmark_cmd = '{cmd} --dtype=fp16'.format(
          cmd=resnet_benchmark_cmd)
    else:
      resnet_benchmark_cmd = '{cmd} --dtype=fp32'.format(
          cmd=resnet_benchmark_cmd)

    if nvidia_driver.CheckNvidiaGpuExists(vm):
      resnet_benchmark_cmd = '{env} {cmd} --num_gpus={num_gpus}'.format(
          env=tensorflow.GetEnvironmentVars(vm),
          cmd=resnet_benchmark_cmd,
          num_gpus=nvidia_driver.QueryNumberOfGpus(vm))

  samples = []
  metadata = _CreateMetadataDict(benchmark_spec)
  elapsed_seconds = 0
  steps_per_eval = benchmark_spec.steps_per_eval
  train_steps = benchmark_spec.train_steps
  for step in range(steps_per_eval, train_steps + steps_per_eval,
                    steps_per_eval):
    step = min(step, train_steps)
    resnet_benchmark_cmd_step = '{cmd} --train_steps={step}'.format(
        cmd=resnet_benchmark_cmd, step=step)

    if benchmark_spec.mode in ('train', 'train_and_eval'):
      if benchmark_spec.tpus:
        tpu = benchmark_spec.tpu_groups['train'].GetName()
        num_cores = '--num_cores={}'.format(
            benchmark_spec.tpu_groups['train'].GetNumShards())
        resnet_benchmark_train_cmd = (
            '{cmd} --tpu={tpu} --mode=train {num_cores}'.format(
                cmd=resnet_benchmark_cmd_step, tpu=tpu, num_cores=num_cores))
      else:
        resnet_benchmark_train_cmd = (
            '{cmd} --max_train_steps={max_train_steps} '
            '--train_epochs={train_epochs} --noeval_only'.format(
                cmd=resnet_benchmark_cmd,
                train_epochs=benchmark_spec.epochs_per_eval,
                max_train_steps=step))

      start = time.time()
      stdout, stderr = vm.RobustRemoteCommand(resnet_benchmark_train_cmd,
                                              should_log=True)
      elapsed_seconds += (time.time() - start)
      samples.extend(mnist_benchmark.MakeSamplesFromTrainOutput(
          metadata, stdout + stderr, elapsed_seconds, step))

    if benchmark_spec.mode in ('train_and_eval', 'eval'):
      if benchmark_spec.tpus:
        tpu = benchmark_spec.tpu_groups['eval'].GetName()
        num_cores = '--num_cores={}'.format(
            benchmark_spec.tpu_groups['eval'].GetNumShards())
        resnet_benchmark_eval_cmd = (
            '{cmd} --tpu={tpu} --mode=eval {num_cores}'.format(
                cmd=resnet_benchmark_cmd_step, tpu=tpu, num_cores=num_cores))
      else:
        resnet_benchmark_eval_cmd = ('{cmd} --eval_only'.format(
            cmd=resnet_benchmark_cmd))

      stdout, stderr = vm.RobustRemoteCommand(resnet_benchmark_eval_cmd,
                                              should_log=True)
      samples.extend(
          MakeSamplesFromEvalOutput(
              metadata,
              stdout + stderr,
              elapsed_seconds,
              use_tpu=bool(benchmark_spec.tpus)))
  return samples