コード例 #1
0
def Prepare(benchmark_spec):
  """Install and set up ResNet on the target vm.

  Args:
    benchmark_spec: The benchmark specification

  Raises:
    errors.Config.InvalidValue upon both GPUs and TPUs appear in the config
  """
  vm = benchmark_spec.vms[0]

  if (bool(benchmark_spec.tpus) and nvidia_driver.CheckNvidiaGpuExists(vm)):
    raise errors.Config.InvalidValue(
        'Invalid configuration. GPUs and TPUs can not both present in the config.'
    )

  mnist_benchmark.Prepare(benchmark_spec)
  _UpdateBenchmarkSpecWithFlags(benchmark_spec)

  vm.Install('pyyaml')
  # To correctly install the requests lib, otherwise the experiment won't run
  vm.RemoteCommand('sudo pip uninstall -y requests')
  vm.RemoteCommand('sudo pip install requests')

  if not benchmark_spec.tpus:
    local_data_path = posixpath.join('/data', 'imagenet')
    vm.RemoteCommand('sudo mkdir -p {data_path} && '
                     'sudo chmod a+w {data_path} && '
                     'gsutil -m cp -r {data_dir}/* {data_path}'.format(
                         data_dir=benchmark_spec.data_dir,
                         data_path=local_data_path))
コード例 #2
0
def Prepare(benchmark_spec):
  """Install and set up the Tensor2Tensor benchmark on the target vm.

  Args:
    benchmark_spec: The benchmark specification
  """
  mnist_benchmark.Prepare(benchmark_spec)
  _UpdateBenchmarkSpecWithFlags(benchmark_spec)
コード例 #3
0
def Prepare(benchmark_spec):
    """Install and set up MLPerf on the target vm.

  Args:
    benchmark_spec: The benchmark specification
  """
    mnist_benchmark.Prepare(benchmark_spec)
    _UpdateBenchmarkSpecWithFlags(benchmark_spec)
    vm = benchmark_spec.vms[0]

    vm.RemoteCommand('git clone https://github.com/mlperf/results.git',
                     should_log=True)
    vm.InstallPackages('python3-pip')
    vm.RemoteCommand('pip3 install mlperf_compliance==0.0.10')

    if benchmark_spec.tpus:
        vm.RemoteCommand('pip3 install --upgrade '
                         'pyyaml==3.13 '
                         'oauth2client==4.1.3 '
                         'google-api-python-client==1.7.4 '
                         'google-cloud==0.34.0')
        vm.RemoteCommand('pip3 install cloud-tpu-profiler==1.12')
    else:
        vm.Install('nvidia_docker')
        vm.RemoteCommand('sudo ln -s /scratch /data')
        imagenet_data_dir = posixpath.join('/data', 'imagenet', 'combined')
        vm.RemoteCommand('sudo mkdir -p {}'.format(imagenet_data_dir))
        vm.RemoteCommand('sudo chmod a+w /data/imagenet/combined')
        vm.InstallPreprovisionedBenchmarkData(BENCHMARK_NAME,
                                              [_ILSVRC2012_TAR],
                                              imagenet_data_dir)
        vm.RemoteCommand('sudo tar -xvf {tar} -C {data_dir}'.format(
            tar=posixpath.join(imagenet_data_dir, _ILSVRC2012_TAR),
            data_dir=imagenet_data_dir))
        # Some of the data are in the sub directory. Copy all the data to current
        # directory.
        vm.RemoteCommand(
            'find {data_dir} -name "*-*-of-*" -exec mv {{}} {data_dir}'
            ' \\;'.format(data_dir=imagenet_data_dir))
    # Clearing caches.
    # https://github.com/mlperf/results/blob/master/v0.5.0/google/cloud_v2.8/resnet-tpuv2-8/code/resnet/model/main.sh#L133
    vm.RemoteCommand('sync && echo 3 | sudo tee /proc/sys/vm/drop_caches')
    vm.RemoteCommand('python3 -c "import mlperf_compliance;mlperf_compliance.'
                     'mlperf_log.{}_print(key=\'run_clear_caches\')"'.format(
                         benchmark_spec.benchmark))