def Run(benchmark_spec): """Run MXNet on the cluster for each model specified. Args: benchmark_spec: The benchmark specification. Contains all data that is required to run the benchmark. Returns: A list of sample.Sample objects. """ _UpdateBenchmarkSpecWithFlags(benchmark_spec) vm = benchmark_spec.vms[0] mx_benchmark_dir = 'incubator-mxnet/example/image-classification' results = [] for model in FLAGS.mx_models: num_layers = _GetNumLayers(model) batch_size = _GetBatchSize(model, num_layers) benchmark_spec.model = model benchmark_spec.batch_size = batch_size benchmark_spec.num_layers = num_layers benchmark_spec.image_shape = _GetImageShape(model) mx_benchmark_cmd = ( 'python train_imagenet.py ' '--benchmark=1 ' '--network={network} ' '--batch-size={batch_size} ' '--image-shape={image_shape} ' '--num-epochs={num_epochs} ' '--dtype={precision} ' '--kv-store={key_value_store}').format( network=model, batch_size=batch_size, image_shape=benchmark_spec.image_shape, num_epochs=benchmark_spec.num_epochs, precision=benchmark_spec.precision, key_value_store=benchmark_spec.key_value_store) if benchmark_spec.device == GPU: num_gpus = cuda_toolkit.QueryNumberOfGpus(vm) mx_benchmark_cmd = '{env} {cmd} --gpus {gpus}'.format( env=mxnet.GetEnvironmentVars(vm), cmd=mx_benchmark_cmd, gpus=','.join(str(n) for n in range(num_gpus))) elif benchmark_spec.device == CPU: # Specifies the number of threads to use in CPU test. # https://mxnet.incubator.apache.org/faq/perf.html mx_benchmark_cmd = 'OMP_NUM_THREADS={omp_num_threads} {cmd}'.format( omp_num_threads=vm.NumCpusForBenchmark() // 2, cmd=mx_benchmark_cmd) if num_layers: mx_benchmark_cmd = '%s --num-layers %s' % (mx_benchmark_cmd, num_layers) run_command = 'cd %s && %s' % (mx_benchmark_dir, mx_benchmark_cmd) stdout, stderr = vm.RobustRemoteCommand(run_command, should_log=True) results.append(_MakeSamplesFromOutput(benchmark_spec, stdout or stderr)) return results
def Run(benchmark_spec): """Run MXNet on the cluster for each model specified. Args: benchmark_spec: The benchmark specification. Contains all data that is required to run the benchmark. Returns: A list of sample.Sample objects. """ _UpdateBenchmarkSpecWithFlags(benchmark_spec) vm = benchmark_spec.vms[0] mx_benchmark_dir = 'incubator-mxnet/example/image-classification' results = [] for model in FLAGS.mx_models: num_layers = _GetNumLayers(model) batch_size = _GetBatchSize(model, num_layers) benchmark_spec.model = model benchmark_spec.batch_size = batch_size mx_benchmark_cmd = ( 'python train_imagenet.py --benchmark 1 --network %s --batch-size %s ' '--image-shape %s --num-epochs %s --kv-store device') % ( model, batch_size, IMAGENET_SHAPE, benchmark_spec.num_epochs) if benchmark_spec.device == GPU: gpus = cuda_toolkit_8.QueryNumberOfGpus(vm) mx_benchmark_cmd = '%s %s --gpus %s' % (mxnet.GetEnvironmentVars( vm), mx_benchmark_cmd, ','.join(str(n) for n in range(gpus))) if num_layers: mx_benchmark_cmd = '%s --num-layers %s' % (mx_benchmark_cmd, num_layers) benchmark_spec.num_layers = num_layers run_command = 'cd %s && %s' % (mx_benchmark_dir, mx_benchmark_cmd) stdout, stderr = vm.RobustRemoteCommand(run_command, should_log=True) results.append(_MakeSamplesFromOutput(benchmark_spec, stdout or stderr)) return results