def _SetAndConfirmGpuClocks(vm): """Sets and confirms the GPU clock speed. The clock values are provided in the gpu_pcie_bandwidth_clock_speeds flag. If a device is queried and its clock speed does not allign with what it was just set to, an expection will be raised. Args: vm: the virtual machine to operate on. Raises: UnsupportedClockSpeedException if a GPU did not accept the provided clock speeds. """ desired_memory_clock = FLAGS.gpu_pcie_bandwidth_clock_speeds[0] desired_graphics_clock = FLAGS.gpu_pcie_bandwidth_clock_speeds[1] cuda_toolkit_8.SetGpuClockSpeed(vm, desired_memory_clock, desired_graphics_clock) num_gpus = cuda_toolkit_8.QueryNumberOfGpus(vm) for i in range(num_gpus): if cuda_toolkit_8.QueryGpuClockSpeed( vm, i) != (desired_memory_clock, desired_graphics_clock): raise UnsupportedClockSpeedException( 'Unrecoverable error setting ' 'GPU #{} clock speed to {},{}'.format(i, desired_memory_clock, desired_graphics_clock))
def Run(benchmark_spec): """Run TensorFlow on the cluster. Args: benchmark_spec: The benchmark specification. Contains all data that is required to run the benchmark. Returns: A list of sample.Sample objects. """ _UpdateBenchmarkSpecWithFlags(benchmark_spec) vms = benchmark_spec.vms master_vm = vms[0] tf_cnn_benchmark_dir = 'benchmarks/scripts/tf_cnn_benchmarks' tf_cnn_benchmark_cmd = ( 'python tf_cnn_benchmarks.py --local_parameter_device=%s ' '--batch_size=%s --model=%s --data_name=%s --variable_update=%s ' '--use_nccl=%s --distortions=%s --device=%s --data_format=%s ' '--forward_only=%s') % ( benchmark_spec.local_parameter_device, benchmark_spec.batch_size, benchmark_spec.model, benchmark_spec.data_name, benchmark_spec.variable_update, benchmark_spec.use_nccl, benchmark_spec.distortions, benchmark_spec.device, benchmark_spec.data_format, benchmark_spec.forward_only) if benchmark_spec.device == GPU: benchmark_spec.num_gpus = cuda_toolkit_8.QueryNumberOfGpus(master_vm) tf_cnn_benchmark_cmd = '%s %s --num_gpus=%s' % (_GetEnvironmentVars( master_vm), tf_cnn_benchmark_cmd, benchmark_spec.num_gpus) run_command = 'cd %s && %s' % (tf_cnn_benchmark_dir, tf_cnn_benchmark_cmd) output, _ = master_vm.RobustRemoteCommand(run_command, should_log=True) return _MakeSamplesFromOutput(benchmark_spec, output)
def Run(benchmark_spec): """Sets the GPU clock speed and runs the CUDA PCIe benchmark. Args: benchmark_spec: The benchmark specification. Contains all data that is required to run the benchmark. Returns: A list of sample.Sample objects. """ vm = benchmark_spec.vms[0] # Note: The clock speed is set in this function rather than Prepare() # so that the user can perform multiple runs with a specified # clock speed without having to re-prepare the VM. cuda_toolkit_8.SetAndConfirmGpuClocks(vm) num_iterations = FLAGS.gpu_pcie_bandwidth_iterations raw_results = [] metadata = {} metadata.update(cuda_toolkit_8.GetMetadataFromFlags()) metadata['num_iterations'] = num_iterations metadata['num_gpus'] = cuda_toolkit_8.QueryNumberOfGpus(vm) run_command = ('%s/extras/demo_suite/bandwidthTest --device=all' % cuda_toolkit_8.CUDA_TOOLKIT_INSTALL_DIR) for i in range(num_iterations): stdout, _ = vm.RemoteCommand(run_command, should_log=True) raw_results.append(_ParseOutputFromSingleIteration(stdout)) if 'device_info' not in metadata: metadata['device_info'] = _ParseDeviceInfo(stdout) return _CalculateMetricsOverAllIterations(raw_results, metadata)
def Prepare(benchmark_spec): """Install SHOC and push the machinefile. Args: benchmark_spec: The benchmark specification. Contains all data that is required to run the benchmark. """ vm_util.RunThreaded(_InstallAndAuthenticateVm, benchmark_spec.vms) master_vm = benchmark_spec.vms[0] benchmark_spec.num_gpus = cuda_toolkit_8.QueryNumberOfGpus(master_vm) _CreateAndPushMachineFile(benchmark_spec.vms, benchmark_spec.num_gpus)
def Prepare(benchmark_spec): """Install and set up TensorFlow on the target vm. Args: benchmark_spec: The benchmark specification """ _UpdateBenchmarkSpecWithFlags(benchmark_spec) vms = benchmark_spec.vms master_vm = vms[0] logging.info('Installing CUDA Toolkit 8.0 on %s', master_vm) master_vm.Install('cuda_toolkit_8') benchmark_spec.num_gpus = cuda_toolkit_8.QueryNumberOfGpus(master_vm) master_vm.Install('cudnn') master_vm.Install('tensorflow')
def AssertCorrectNumberOfGpus(vm): """Assert that the VM is reporting the correct number of GPUs. Returns: number of GPUs on the VM Raises: Exception: if VM reports incorrect number of GPUs """ expected_num_gpus = num_gpus_map_util.gpus_per_vm[vm.machine_type] actual_num_gpus = cuda_toolkit_8.QueryNumberOfGpus(vm) if actual_num_gpus != expected_num_gpus: raise Exception( 'VM reported incorrect number of GPUs. ', 'Expected %s, received %s' % (expected_num_gpus, actual_num_gpus)) return actual_num_gpus
def Prepare(benchmark_spec): """Install and set up TensorFlow on the target vm. Args: benchmark_spec: The benchmark specification """ _UpdateBenchmarkSpecWithFlags(benchmark_spec) vms = benchmark_spec.vms master_vm = vms[0] logging.info('Installing CUDA Toolkit 8.0 on %s', master_vm) master_vm.Install('cuda_toolkit_8') benchmark_spec.num_gpus = cuda_toolkit_8.QueryNumberOfGpus(master_vm) master_vm.Install('cudnn') master_vm.Install('tensorflow') master_vm.RemoteCommand( 'git clone https://github.com/tensorflow/benchmarks.git', should_log=True)
def _UpdateBenchmarkSpecWithFlags(benchmark_spec): """Update the benchmark_spec with supplied command line flags. Args: benchmark_spec: benchmark specification to update """ gpus_per_node = (FLAGS.hpcg_gpus_per_node or cuda_toolkit_8.QueryNumberOfGpus(benchmark_spec.vms[0])) cpus_per_rank = int(benchmark_spec.vms[0].num_cpus / gpus_per_node) num_vms = len(benchmark_spec.vms) total_gpus = gpus_per_node * num_vms benchmark_spec.gpus_per_node = gpus_per_node benchmark_spec.cpus_per_rank = cpus_per_rank benchmark_spec.num_vms = num_vms benchmark_spec.total_gpus = total_gpus benchmark_spec.hpcg_problem_size = FLAGS.hpcg_problem_size benchmark_spec.hpcg_runtime = FLAGS.hpcg_runtime
def _RunOnVm(vm, benchmark_spec): """Runs a TensorFlow benchmark on a single VM. Args: vm: VM to run on benchmark_spec: benchmark_spec object Returns: A list of samples """ tf_cnn_benchmark_dir = 'benchmarks/scripts/tf_cnn_benchmarks' results = [] for model in FLAGS.tf_models: batch_size = _GetBatchSize(model) tf_cnn_benchmark_cmd = ( 'python tf_cnn_benchmarks.py --local_parameter_device=%s ' '--batch_size=%s --model=%s --data_name=%s --variable_update=%s ' '--use_nccl=%s --distortions=%s --device=%s --data_format=%s ' '--forward_only=%s') % ( benchmark_spec.local_parameter_device, batch_size, model, benchmark_spec.data_name, benchmark_spec.variable_update, benchmark_spec.use_nccl, benchmark_spec.distortions, benchmark_spec.device, benchmark_spec.data_format, benchmark_spec.forward_only) if benchmark_spec.device == GPU: num_gpus = cuda_toolkit_8.QueryNumberOfGpus(vm) tf_cnn_benchmark_cmd = '%s %s --num_gpus=%s' % ( tensorflow._GetEnvironmentVars(vm), tf_cnn_benchmark_cmd, num_gpus) else: num_gpus = 0 run_command = 'cd %s && %s' % (tf_cnn_benchmark_dir, tf_cnn_benchmark_cmd) output, _ = vm.RobustRemoteCommand(run_command, should_log=True) results.extend( _MakeSamplesFromOutput(benchmark_spec, output, model, batch_size, num_gpus)) return results
def Run(benchmark_spec): """Run MXNet on the cluster for each model specified. Args: benchmark_spec: The benchmark specification. Contains all data that is required to run the benchmark. Returns: A list of sample.Sample objects. """ _UpdateBenchmarkSpecWithFlags(benchmark_spec) vm = benchmark_spec.vms[0] mx_benchmark_dir = 'incubator-mxnet/example/image-classification' results = [] for model in FLAGS.mx_models: num_layers = _GetNumLayers(model) batch_size = _GetBatchSize(model, num_layers) benchmark_spec.model = model benchmark_spec.batch_size = batch_size mx_benchmark_cmd = ( 'python train_imagenet.py --benchmark 1 --network %s --batch-size %s ' '--image-shape %s --num-epochs %s --kv-store device') % ( model, batch_size, IMAGENET_SHAPE, benchmark_spec.num_epochs) if benchmark_spec.device == GPU: gpus = cuda_toolkit_8.QueryNumberOfGpus(vm) mx_benchmark_cmd = '%s %s --gpus %s' % (mxnet.GetEnvironmentVars( vm), mx_benchmark_cmd, ','.join(str(n) for n in range(gpus))) if num_layers: mx_benchmark_cmd = '%s --num-layers %s' % (mx_benchmark_cmd, num_layers) benchmark_spec.num_layers = num_layers run_command = 'cd %s && %s' % (mx_benchmark_dir, mx_benchmark_cmd) stdout, stderr = vm.RobustRemoteCommand(run_command, should_log=True) results.append(_MakeSamplesFromOutput(benchmark_spec, stdout or stderr)) return results
def testQueryNumberOfGpus(self): vm = mock.MagicMock() vm.RemoteCommand = mock.MagicMock(return_value=("count\n8", None)) self.assertEqual(8, cuda_toolkit_8.QueryNumberOfGpus(vm))