def Prepare(benchmark_spec): """Install and set up ResNet on the target vm. Args: benchmark_spec: The benchmark specification Raises: errors.Config.InvalidValue upon both GPUs and TPUs appear in the config """ vm = benchmark_spec.vms[0] if (bool(benchmark_spec.tpus) and nvidia_driver.CheckNvidiaGpuExists(vm)): raise errors.Config.InvalidValue( 'Invalid configuration. GPUs and TPUs can not both present in the config.' ) mnist_benchmark.Prepare(benchmark_spec) _UpdateBenchmarkSpecWithFlags(benchmark_spec) vm.Install('pyyaml') # To correctly install the requests lib, otherwise the experiment won't run vm.RemoteCommand('sudo pip uninstall -y requests') vm.RemoteCommand('sudo pip install requests') if not benchmark_spec.tpus: local_data_path = posixpath.join('/data', 'imagenet') vm.RemoteCommand('sudo mkdir -p {data_path} && ' 'sudo chmod a+w {data_path} && ' 'gsutil -m cp -r {data_dir}/* {data_path}'.format( data_dir=benchmark_spec.data_dir, data_path=local_data_path))
def Prepare(benchmark_spec): """Install and set up the Tensor2Tensor benchmark on the target vm. Args: benchmark_spec: The benchmark specification """ mnist_benchmark.Prepare(benchmark_spec) _UpdateBenchmarkSpecWithFlags(benchmark_spec)
def Prepare(benchmark_spec): """Install and set up MLPerf on the target vm. Args: benchmark_spec: The benchmark specification """ mnist_benchmark.Prepare(benchmark_spec) _UpdateBenchmarkSpecWithFlags(benchmark_spec) vm = benchmark_spec.vms[0] vm.RemoteCommand('git clone https://github.com/mlperf/results.git', should_log=True) vm.InstallPackages('python3-pip') vm.RemoteCommand('pip3 install mlperf_compliance==0.0.10') if benchmark_spec.tpus: vm.RemoteCommand('pip3 install --upgrade ' 'pyyaml==3.13 ' 'oauth2client==4.1.3 ' 'google-api-python-client==1.7.4 ' 'google-cloud==0.34.0') vm.RemoteCommand('pip3 install cloud-tpu-profiler==1.12') else: vm.Install('nvidia_docker') vm.RemoteCommand('sudo ln -s /scratch /data') imagenet_data_dir = posixpath.join('/data', 'imagenet', 'combined') vm.RemoteCommand('sudo mkdir -p {}'.format(imagenet_data_dir)) vm.RemoteCommand('sudo chmod a+w /data/imagenet/combined') vm.InstallPreprovisionedBenchmarkData(BENCHMARK_NAME, [_ILSVRC2012_TAR], imagenet_data_dir) vm.RemoteCommand('sudo tar -xvf {tar} -C {data_dir}'.format( tar=posixpath.join(imagenet_data_dir, _ILSVRC2012_TAR), data_dir=imagenet_data_dir)) # Some of the data are in the sub directory. Copy all the data to current # directory. vm.RemoteCommand( 'find {data_dir} -name "*-*-of-*" -exec mv {{}} {data_dir}' ' \\;'.format(data_dir=imagenet_data_dir)) # Clearing caches. # https://github.com/mlperf/results/blob/master/v0.5.0/google/cloud_v2.8/resnet-tpuv2-8/code/resnet/model/main.sh#L133 vm.RemoteCommand('sync && echo 3 | sudo tee /proc/sys/vm/drop_caches') vm.RemoteCommand('python3 -c "import mlperf_compliance;mlperf_compliance.' 'mlperf_log.{}_print(key=\'run_clear_caches\')"'.format( benchmark_spec.benchmark))