def build_graph(tower_devices, tensor_shapes, variable_mgr, num_iters): """Builds the graph for the benchmark. Args: tower_devices: A list of device strings of the devices to run the all-reduce benchmark on. tensor_shapes: A list of shapes of the tensors that will be aggregated for the all-reduce. variable_mgr: The VariableMgr to perform the all-reduce. num_iters: Number of iterations to aggregate tensors for. Returns: An op that runs the benchmark. """ all_device_tensors = [] for i, tower_device in enumerate(tower_devices): with tf.device(tower_device): device_tensors = [] for j, shape in enumerate(tensor_shapes): tensor = tf.Variable(tf.random_normal(shape, dtype=tf.float32), name='tensor_%d_on_device_%d' % (j, i)) device_tensors.append(tensor) all_device_tensors.append(device_tensors) log_fn('Building all-reduce ops') benchmark_op = build_all_reduce_iterations(all_device_tensors, tower_devices, variable_mgr, num_iters) log_fn('Done building all-reduce ops') return benchmark_op
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) options = make_options_from_flags(FLAGS) params = benchmark_cnn.make_params_from_flags() params = params._replace(batch_size=options.batch_size) params = params._replace(model='MY_GTSRB') params = params._replace(num_epochs=options.num_epochs) params = params._replace(num_gpus=options.num_gpus) params = params._replace(data_format='NHWC') params = params._replace(train_dir=options.checkpoint_folder) params = params._replace(allow_growth=True) params = params._replace(variable_update='replicated') params = params._replace(local_parameter_device='gpu') params = params._replace(use_tf_layers=False) # params = params._replace(all_reduce_spec='nccl') # params = params._replace(bottom_file=options.bottom_file) # params = params._replace(affine_files=options.affine_files) # params = params._replace(affine_classes=options.affine_classes) params = params._replace(optimizer=options.optimizer) params = params._replace(weight_decay=options.weight_decay) #params = params._replace(print_training_accuracy=True) params = params._replace(backbone_model_path=options.backbone_model_path) # Summary and Save & load checkpoints. # params = params._replace(summary_verbosity=1) # params = params._replace(save_summaries_steps=10) # params = params._replace(save_model_secs=3600) # save every 1 hour params = params._replace(save_model_secs=60) #save every 5 min params = benchmark_cnn.setup(params) #testtest(params) #exit(0) if 'test' in options.data_dir: dataset = GTSRBTestDataset(options) else: dataset = GTSRBDataset(options) model = Model_Builder(options.model_name, dataset.num_classes, options, params) bench = benchmark_cnn.BenchmarkCNN(params, dataset=dataset, model=model) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() tf.reset_default_graph()
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) params = benchmark_cnn.make_params_from_flags() params = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() with log_context(LOGGER_URL, LOGGER_USRENAME, LOGGER_PASSWORD, LOGGER_DB, LOGGER_SERIES, machine=LOGGER_VM):
def postprocess(self, results): """Postprocess results returned from model in Python.""" probs = results[self.PROBABILITY_TENSOR] total_wer, total_cer = 0, 0 speech_labels = " abcdefghijklmnopqrstuvwxyz'-" greedy_decoder = DeepSpeechDecoder(speech_labels) # Evaluate the performance using WER (Word Error Rate) and CER (Character # Error Rate) as metrics. targets = results[self.LABEL_TENSOR] # The ground truth transcript for i in range(self.batch_size): # Decode string. predicted_str = greedy_decoder.decode_logits(probs[i]) expected_str = greedy_decoder.decode(targets[i]) # Compute CER. total_cer += (greedy_decoder.cer(predicted_str, expected_str) / len(expected_str)) # Compute WER. total_wer += (greedy_decoder.wer(predicted_str, expected_str) / len(expected_str.split())) # Get mean value total_cer /= self.batch_size total_wer /= self.batch_size log_fn('total CER: {:f}; total WER: {:f}; total example: {:d}.'.format( total_cer, total_wer, self.batch_size)) # TODO(laigd): get rid of top_N_accuracy bindings in return {'top_1_accuracy': 0., 'top_5_accuracy': 0.}
def run_benchmark(bench_cnn, num_iters): """Runs the all-reduce benchmark. Args: bench_cnn: The BenchmarkCNN where params, the variable manager, and other attributes are obtained. num_iters: Number of iterations to do all-reduce for for. Raises: ValueError: Invalid params of bench_cnn. """ if bench_cnn.params.variable_update != 'replicated': raise ValueError('--variable_update=replicated must be specified to use' 'the all-reduce benchmark') if bench_cnn.params.variable_consistency == 'relaxed': raise ValueError('--variable_consistency=relaxed is not supported') benchmark_op = build_graph(bench_cnn.raw_devices, get_var_shapes(bench_cnn.model), bench_cnn.variable_mgr, num_iters) init_ops = [ tf.global_variables_initializer(), bench_cnn.variable_mgr.get_post_init_ops() ] loss_op = tf.no_op() if bench_cnn.graph_file: path, filename = os.path.split(bench_cnn.graph_file) as_text = filename.endswith('txt') log_fn('Writing GraphDef as %s to %s' % ( 'text' if as_text else 'binary', bench_cnn.graph_file)) tf.train.write_graph(tf.get_default_graph().as_graph_def(add_shapes=True), path, filename, as_text) run_graph(benchmark_op, bench_cnn, init_ops, loss_op)
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) params = benchmark_cnn.make_params_from_flags() # Print ENV Variables tf.logging.debug('=' * 20 + ' Environment Variables ' + '=' * 20) for k, v in os.environ.items(): tf.logging.debug('{}: {}'.format(k, v)) with mlperf.mlperf_logger(absl_flags.FLAGS.ml_perf_compliance_logging, params.model): params = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info()
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) params = benchmark_cnn.make_params_from_flags() params = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() print('num_inter_threads: ' + str(params.num_inter_threads)) print('num_intra_threads: ' + str(params.num_intra_threads)) print('datasets_num_private_threads: ' + str(params.datasets_num_private_threads)) print('datasets_use_prefetch: ' + str(params.datasets_use_prefetch)) print('datasets_prefetch_buffer_size: ' + str(params.datasets_prefetch_buffer_size))
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) params = benchmark_cnn.make_params_from_flags() params = benchmark_cnn.setup(params) import sys if params.enable_dmo == True: if LoadFileSystem() == False: sys.exit(-1) else : print("\n*******DMO enabled********\n") # sys.exit(0) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info()
def build_network(self, images, phase_train=True, nclass=1001, image_depth=3, data_type=tf.float32, data_format='NCHW', use_tf_layers=True, fp16_vars=False): """Returns logits and aux_logits from images.""" if data_format == 'NCHW': images = tf.transpose(images, [0, 3, 1, 2]) var_type = tf.float32 if data_type == tf.float16 and fp16_vars: var_type = tf.float16 network = convnet_builder.ConvNetBuilder( images, image_depth, phase_train, use_tf_layers, data_format, data_type, var_type) with tf.variable_scope('cg', custom_getter=network.get_custom_getter()): self.add_inference(network) log_fn("Number of parameters: %d" % network.n_parameters) # Add the final fully-connected class layer logits = (network.affine(nclass, activation='linear') if not self.skip_final_affine_layer() else network.top_layer) aux_logits = None if network.aux_top_layer is not None: with network.switch_to_aux_top_layer(): aux_logits = network.affine( nclass, activation='linear', stddev=0.001) if data_type == tf.float16: # TODO(reedwm): Determine if we should do this cast here. logits = tf.cast(logits, tf.float32) if aux_logits is not None: aux_logits = tf.cast(aux_logits, tf.float32) return logits, aux_logits
def main(_): params = benchmark_cnn.make_params_from_flags() params = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info()
def _eval_once(self, saver, image_producer_ops, fetches, local_var_init_op_group, nth_ckpt): with tf.Session(config=create_config_proto(self.params)) as sess: coord = tf.train.Coordinator() if self.params.checkpoint_dir is None: raise ValueError( 'Checkpoint directory for evaluation is not specified') try: global_step = load_checkpoint(saver, sess, self.params.checkpoint_dir, nth_ckpt) except CheckpointNotFoundException: log_fn( 'Checkpoint not found in %s' % self.params.checkpoint_dir) sys.exit(-1) return log_fn('[Evaluation] START') assert not self.use_synthetic_gpu_images dummy_queue = tf.FIFOQueue(1, [tf.bool], shapes=[[]], name='dummy_queue', shared_name='dummy_queue') qr = tf.train.QueueRunner(dummy_queue, image_producer_ops) tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, qr) enqueue_threads = qr.create_threads(sess=sess, coord=coord, daemon=True) for thread in enqueue_threads: thread.start() top_1_accuracy_sum = 0.0 top_5_accuracy_sum = 0.0 total_eval_count = self.num_batches_for_eval * self.batch_size for step in xrange(self.num_batches_for_eval): results = top_1_accuracy_sum += results['top_1_accuracy'] top_5_accuracy_sum += results['top_5_accuracy'] if (step + 1) % self.params.display_every_for_eval == 0: log_fn('%i\ttop_1_accuracy: %.4f' % ( step + 1, top_1_accuracy_sum / step)) log_fn('%i\ttop_5_accuracy: %.4f' % ( step + 1, top_5_accuracy_sum / step)) accuracy_at_1 = top_1_accuracy_sum / self.num_batches_for_eval accuracy_at_5 = top_5_accuracy_sum / self.num_batches_for_eval log_fn( '[SUMMARY] Global step: %d Accuracy @ 1 = %.4f Accuracy @ 5 = %.4f [%d examples]' % (global_step, accuracy_at_1, accuracy_at_5, total_eval_count)) coord.request_stop()
def main(_): # Build benchmark_cnn model params = benchmark_cnn.make_params_from_flags() params, sess_config = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) # Print informaton tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() # Build single-GPU benchmark_cnn model with tf.Graph().as_default() as single_gpu_graph: bench.build_model() def run(sess, num_iters, tensor_or_op_name_to_replica_names, num_workers, worker_id, num_replicas_per_worker): fetches = { 'global_step': tensor_or_op_name_to_replica_names[][0], 'cost': tensor_or_op_name_to_replica_names[][0], 'train_op': tensor_or_op_name_to_replica_names[][0], } if isinstance(, tf.Tensor): fetches['lr'] = tensor_or_op_name_to_replica_names[][0] start = time.time() for i in range(num_iters): results = if i % FLAGS.log_frequency == 0: end = time.time() throughput = float(FLAGS.log_frequency) / float(end - start) "global step: %d, lr: %f, loss: %f, " "throughput: %f steps/sec" % (results['global_step'], results['lr'] if 'lr' in results else, results['cost'], throughput)) start = time.time() config = parallax_config.build_config() config.sess_config = sess_config parallax.parallel_run(single_gpu_graph, run, FLAGS.resource_info_file, FLAGS.max_steps, sync=FLAGS.sync, parallax_config=config)
def main(positional_arguments): assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) options = make_options_from_flags(FLAGS) params = benchmark_cnn.make_params_from_flags() params = params._replace(batch_size=options.batch_size) params = params._replace(model='MY_GTSRB') params = params._replace(num_epochs=options.num_epochs) params = params._replace(num_gpus=options.num_gpus) params = params._replace(data_format='NHWC') params = params._replace(train_dir=options.checkpoint_folder) params = params._replace(allow_growth=True) params = params._replace(variable_update='replicated') params = params._replace(local_parameter_device='gpu') params = params._replace(use_tf_layers=False) # params = params._replace(all_reduce_spec='nccl') # params = params._replace(bottom_file=options.bottom_file) # params = params._replace(affine_files=options.affine_files) # params = params._replace(affine_classes=options.affine_classes) params = params._replace(optimizer=options.optimizer) params = params._replace(weight_decay=options.weight_decay) params = params._replace(print_training_accuracy=True) params = params._replace(backbone_model_path=options.backbone_model_path) # Summary and Save & load checkpoints. # params = params._replace(summary_verbosity=1) # params = params._replace(save_summaries_steps=10) params = params._replace(save_model_secs=3600) # save every 1 hour # params = params._replace(save_model_secs=300) #save every 5 min params = benchmark_cnn.setup(params) dataset = CifarDataset(options) model = Model_Builder(options.model_name, dataset.num_classes, options, params) bench = benchmark_cnn.BenchmarkCNN(params, dataset=dataset, model=model) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info()
def postprocess(self, results): """Postprocess results returned from model.""" try: import coco_metric # pylint: disable=g-import-not-at-top except ImportError: raise ImportError('To use the COCO dataset, you must clone the ' 'repo and add ' 'tensorflow/models and tensorflow/models/research to ' 'the PYTHONPATH, and compile the protobufs by ' 'following' 'master/research/object_detection/g3doc/' '#protobuf-compilation ; To evaluate using COCO' 'metric, download and install Python COCO API from' '') pred_boxes = results[ssd_constants.PRED_BOXES] pred_scores = results[ssd_constants.PRED_SCORES] # TODO(haoyuzhang): maybe use these values for visualization. # gt_boxes = results['gt_boxes'] # gt_classes = results['gt_classes'] source_id = results[ssd_constants.SOURCE_ID] raw_shape = results[ssd_constants.RAW_SHAPE] for i in range(self.get_batch_size()): self.predictions[int(source_id[i])] = { ssd_constants.PRED_BOXES: pred_boxes[i], ssd_constants.PRED_SCORES: pred_scores[i], ssd_constants.SOURCE_ID: source_id[i], ssd_constants.RAW_SHAPE: raw_shape[i] } # COCO metric calculates mAP only after a full epoch of evaluation. Return # dummy results for top_N_accuracy to be compatible with if len(self.predictions) >= ssd_constants.COCO_NUM_VAL_IMAGES: annotation_file = os.path.join(self.params.data_dir, ssd_constants.ANNOTATION_FILE) eval_results = coco_metric.compute_map(self.predictions.values(), annotation_file) ret = {'top_1_accuracy': 0., 'top_5_accuracy': 0.} for metric_key, metric_value in eval_results.items(): ret['simple_value:' + metric_key] = metric_value return ret log_fn('Got {:d} out of {:d} eval examples.' ' Waiting for the remaining to calculate mAP...'.format( len(self.predictions), ssd_constants.COCO_NUM_VAL_IMAGES)) return {'top_1_accuracy': 0., 'top_5_accuracy': 0.}
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) params = benchmark_cnn.make_params_from_flags() params = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) run_benchmark(bench, absl_flags.FLAGS.iters_per_step)
def main(extra_flags): # extra_flags is a list of command line arguments, excluding those defined # in tf.flags.FLAGS. extra_flags[0] is always the program name. It is an error # to supply flags not defined with tf.flags.FLAGS, so we raise an ValueError # in that case. assert len(extra_flags) >= 1 if len(extra_flags) > 1: raise ValueError('Received unknown flags: %s' % extra_flags[1:]) params = benchmark_cnn.make_params_from_flags() benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info()
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError("Received unknown positional arguments: %s" % positional_arguments[1:]) params = benchmark_cnn.make_params_from_flags() with mlperf.mlperf_logger(absl_flags.FLAGS.ml_perf_compliance_logging, params.model): params = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn("TensorFlow: %i.%i" % (tfversion[0], tfversion[1])) bench.print_info()
def main(_): # Build benchmark_cnn model params = benchmark_cnn.make_params_from_flags() params, sess_config = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) # Print informaton tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() # Build single-GPU benchmark_cnn model single_gpu_graph = tf.Graph() with single_gpu_graph.as_default(): bench.build_model() config = parallax_config.build_config() config.sess_config = sess_config sess, num_workers, worker_id, num_replicas_per_worker = \ parallax.parallel_run(single_gpu_graph, FLAGS.resource_info_file, sync=FLAGS.sync, parallax_config=config) fetches = { 'global_step': bench.global_step, 'cost': bench.cost, 'train_op': bench.train_op, } start = time.time() for i in range(FLAGS.max_steps): results = if (i + 1) % FLAGS.log_frequency == 0: end = time.time() throughput = float(FLAGS.log_frequency) / float(end - start) "global step: %d, loss: %f, throughput: %f steps/sec" % (results['global_step'][0] + 1, results['cost'][0], throughput)) start = time.time()
def load_checkpoint(saver, sess, checkpoint_dir, nth_ckpt): ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if nth_ckpt >= len(ckpt.all_model_checkpoint_paths): raise CheckpointNotFoundException('No more checkpoint file.') log_fn("Evaluate checkpoint file [%d/%d]" % ( nth_ckpt, len(ckpt.all_model_checkpoint_paths))) if ckpt and ckpt.all_model_checkpoint_paths[nth_ckpt]: if os.path.isabs(ckpt.all_model_checkpoint_paths[nth_ckpt]): model_checkpoint_path = ckpt.all_model_checkpoint_paths[nth_ckpt] else: raise ValueError('Checkpoint path should be absolute path.') global_step = \ ckpt.all_model_checkpoint_paths[nth_ckpt].split('/')[-1].split('-')[-1] if not global_step.isdigit(): global_step = 0 else: global_step = int(global_step) saver.restore(sess, model_checkpoint_path) log_fn('Successfully loaded model from %s.' % model_checkpoint_path) return global_step else: raise CheckpointNotFoundException('No checkpoint file found.')
def run_graph(benchmark_op, bench_cnn, init_ops, dummy_loss_op): """Runs the graph for the benchmark. Args: benchmark_op: An op that runs the benchmark. bench_cnn: The BenchmarkCNN where params and other attributes are obtained. init_ops: A list of ops that are run before `benchmark_op` for initialization. dummy_loss_op: Any op. We must pass a loss op to `benchmark_cnn.benchmark_one_step`, but the result of the op is never actually used. """ config = benchmark_cnn.create_config_proto(bench_cnn.params) with tf.Session(config=config) as sess: for op in init_ops: step_train_times = [] fetches = {'average_loss': dummy_loss_op, 'benchmark_op': benchmark_op} log_fn('Running warmup') for i in range(-bench_cnn.num_warmup_batches, bench_cnn.num_batches): if i == 0: log_fn('Running all-reduce ops') start = time.time() if i > 0 and i % bench_cnn.params.display_every == 0: log_fn('Iteration: %d. Average time per step so far: %s' % (i, (time.time() - start) / i)) # Call benchmark_one_step instead of directly calling, to # potentially get a trace file, partitioned graphs, etc. benchmark_cnn.benchmark_one_step( sess=sess, fetches=fetches, step=i, # The batch size is only used for the images/sec calculation, which is # not actually calculated because we pass show_images_per_sec=False. batch_size=None, step_train_times=step_train_times, trace_filename=bench_cnn.trace_filename, partitioned_graph_file_prefix=( bench_cnn.params.partitioned_graph_file_prefix), profiler=None, image_producer=None, params=bench_cnn.params, show_images_per_sec=False) log_fn('Average time per step: %s' % ((time.time() - start) / bench_cnn.num_batches))
def postprocess(self, results): """Postprocess results returned from model.""" try: import coco_metric # pylint: disable=g-import-not-at-top except ImportError: raise ImportError( 'To use the COCO dataset, you must clone the ' 'repo and add ' 'tensorflow/models and tensorflow/models/research to ' 'the PYTHONPATH, and compile the protobufs by ' 'following' 'master/research/object_detection/g3doc/' '#protobuf-compilation ; To evaluate using COCO' 'metric, download and install Python COCO API from' '') pred_boxes = results[ssd_constants.PRED_BOXES] pred_scores = results[ssd_constants.PRED_SCORES] # TODO(haoyuzhang): maybe use these values for visualization. # gt_boxes = results['gt_boxes'] # gt_classes = results['gt_classes'] source_id = results[ssd_constants.SOURCE_ID] raw_shape = results[ssd_constants.RAW_SHAPE] # COCO evaluation requires processing COCO_NUM_VAL_IMAGES exactly once. Due # to rounding errors (i.e., COCO_NUM_VAL_IMAGES % batch_size != 0), setting # `num_eval_epochs` to 1 is not enough and will often miss some images. We # expect user to set `num_eval_epochs` to >1, which will leave some unused # images from previous steps in `predictions`. Here we check if we are doing # eval at a new global step. if results['global_step'] > self.eval_global_step: self.eval_global_step = results['global_step'] self.predictions.clear() for i, sid in enumerate(source_id): self.predictions[int(sid)] = { ssd_constants.PRED_BOXES: pred_boxes[i], ssd_constants.PRED_SCORES: pred_scores[i], ssd_constants.SOURCE_ID: source_id[i], ssd_constants.RAW_SHAPE: raw_shape[i] } # COCO metric calculates mAP only after a full epoch of evaluation. Return # dummy results for top_N_accuracy to be compatible with if len(self.predictions) >= ssd_constants.COCO_NUM_VAL_IMAGES: log_fn('Got results for all {:d} eval examples. Calculate mAP...'. format(ssd_constants.COCO_NUM_VAL_IMAGES)) annotation_file = os.path.join(self.params.data_dir, ssd_constants.ANNOTATION_FILE) eval_results = coco_metric.compute_map(self.predictions.values(), annotation_file) self.predictions.clear() ret = {'top_1_accuracy': 0., 'top_5_accuracy': 0.} for metric_key, metric_value in eval_results.items(): ret[constants.SIMPLE_VALUE_RESULT_PREFIX + metric_key] = metric_value return ret log_fn('Got {:d} out of {:d} eval examples.' ' Waiting for the remaining to calculate mAP...'.format( len(self.predictions), ssd_constants.COCO_NUM_VAL_IMAGES)) return {'top_1_accuracy': 0., 'top_5_accuracy': 0.}
def postprocess(self, results): """Postprocess results returned from model.""" try: import coco_metric # pylint: disable=g-import-not-at-top except ImportError: raise ImportError('To use the COCO dataset, you must clone the ' 'repo and add ' 'tensorflow/models and tensorflow/models/research to ' 'the PYTHONPATH, and compile the protobufs by ' 'following' 'master/research/object_detection/g3doc/' '#protobuf-compilation ; To evaluate using COCO' 'metric, download and install Python COCO API from' '') pred_boxes = results[ssd_constants.PRED_BOXES] pred_scores = results[ssd_constants.PRED_SCORES] # TODO(haoyuzhang): maybe use these values for visualization. # gt_boxes = results['gt_boxes'] # gt_classes = results['gt_classes'] source_id = results[ssd_constants.SOURCE_ID] raw_shape = results[ssd_constants.RAW_SHAPE] # COCO evaluation requires processing COCO_NUM_VAL_IMAGES exactly once. Due # to rounding errors (i.e., COCO_NUM_VAL_IMAGES % batch_size != 0), setting # `num_eval_epochs` to 1 is not enough and will often miss some images. We # expect user to set `num_eval_epochs` to >1, which will leave some unused # images from previous steps in `predictions`. Here we check if we are doing # eval at a new global step. if results['global_step'] > self.eval_global_step: self.eval_global_step = results['global_step'] self.predictions.clear() for i, sid in enumerate(source_id): self.predictions[int(sid)] = { ssd_constants.PRED_BOXES: pred_boxes[i], ssd_constants.PRED_SCORES: pred_scores[i], ssd_constants.SOURCE_ID: source_id[i], ssd_constants.RAW_SHAPE: raw_shape[i] } # COCO metric calculates mAP only after a full epoch of evaluation. Return # dummy results for top_N_accuracy to be compatible with if len(self.predictions) >= ssd_constants.COCO_NUM_VAL_IMAGES: log_fn('Got results for all {:d} eval examples. Calculate mAP...'.format( ssd_constants.COCO_NUM_VAL_IMAGES)) annotation_file = os.path.join(self.params.data_dir, ssd_constants.ANNOTATION_FILE) # Size of predictions before decoding about 15--30GB, while size after # decoding is 100--200MB. When using async eval mode, decoding takes # 20--30 seconds of main thread time but is necessary to avoid OOM during # inter-process communication. decoded_preds = coco_metric.decode_predictions(self.predictions.values()) self.predictions.clear() if self.params.collect_eval_results_async: def _eval_results_getter(): """Iteratively get eval results from async eval process.""" while True: step, eval_results = self.async_eval_results_queue.get() self.eval_coco_ap = eval_results['COCO/AP'] mlperf.logger.log_eval_accuracy( self.eval_coco_ap, step, self.batch_size * self.params.num_gpus, ssd_constants.COCO_NUM_TRAIN_IMAGES) if self.reached_target(): # Reached target, clear all pending messages in predictions queue # and insert poison pill to stop the async eval process. while not self.async_eval_predictions_queue.empty(): self.async_eval_predictions_queue.get() self.async_eval_predictions_queue.put('STOP') break if not self.async_eval_process: # Limiting the number of messages in predictions queue to prevent OOM. # Each message (predictions data) can potentially consume a lot of # memory, and normally there should only be few messages in the queue. # If often blocked on this, consider reducing eval frequency. self.async_eval_predictions_queue = multiprocessing.Queue(2) self.async_eval_results_queue = multiprocessing.Queue() # Reason to use a Process as opposed to Thread is mainly the # computationally intensive eval runner. Python multithreading is not # truly running in parallel, a runner thread would get significantly # delayed (or alternatively delay the main thread). self.async_eval_process = multiprocessing.Process( target=coco_metric.async_eval_runner, args=(self.async_eval_predictions_queue, self.async_eval_results_queue, annotation_file)) self.async_eval_process.daemon = True self.async_eval_process.start() self.async_eval_results_getter_thread = threading.Thread( target=_eval_results_getter, args=()) self.async_eval_results_getter_thread.daemon = True self.async_eval_results_getter_thread.start() self.async_eval_predictions_queue.put( (self.eval_global_step, decoded_preds)) return {'top_1_accuracy': 0, 'top_5_accuracy': 0.} eval_results = coco_metric.compute_map(decoded_preds, annotation_file) self.eval_coco_ap = eval_results['COCO/AP'] ret = {'top_1_accuracy': self.eval_coco_ap, 'top_5_accuracy': 0.} for metric_key, metric_value in eval_results.items(): ret[constants.SIMPLE_VALUE_RESULT_PREFIX + metric_key] = metric_value mlperf.logger.log_eval_accuracy(self.eval_coco_ap, self.eval_global_step, self.batch_size * self.params.num_gpus, ssd_constants.COCO_NUM_TRAIN_IMAGES) return ret log_fn('Got {:d} out of {:d} eval examples.' ' Waiting for the remaining to calculate mAP...'.format( len(self.predictions), ssd_constants.COCO_NUM_VAL_IMAGES)) return {'top_1_accuracy': self.eval_coco_ap, 'top_5_accuracy': 0.}
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. # For DGX servers use hierarchical_copy=True argument assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) tests_models = [ { 'num_gpus': None, 'batch_size': 64, 'variable_update': 'parameter_server', 'model': 'inception3' }, { 'num_gpus': None, 'batch_size': 64, 'variable_update': 'parameter_server', 'model': 'resnet50' }, { 'num_gpus': None, 'batch_size': 32, 'variable_update': 'parameter_server', 'model': 'resnet152' }, #batch=64 crashes { 'num_gpus': None, 'batch_size': 64, 'variable_update': 'replicated', 'model': 'vgg16' }, { 'num_gpus': None, 'batch_size': 512, 'variable_update': 'replicated', 'model': 'alexnet' } ] test_gpus = [1, 2, 4, 8] stats = [] for test in tests_models: for num_gpus in test_gpus: test['num_gpus'] = num_gpus params = benchmark_cnn.make_params_from_flags() params = benchmark_cnn.setup(params) # force --hierarchical_copy to False when using 1 GPU if num_gpus == 1: params = params._replace(hierarchical_copy=False) params = params._replace(num_gpus=test['num_gpus'], batch_size=test['batch_size'], model=test['model'], variable_update=test['variable_update']) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() results = # result # { # 'average_wall_time': 0.6646941304206848, # 'images_per_sec': 385.1395525908701, # 'last_average_loss': 7.256145, # 'num_steps': 100, # 'num_workers': 1 # } stats.append({'test': test.copy(), 'result': results}) # summary print('summary:') print('==========') pprint.pprint(stats) print('==========') s = '' for i in range(len(test_gpus)): for j in range(len(tests_models)): s += str(stats[i + j * len(test_gpus)]['result']['images_per_sec']) s += ', ' s += '\n' print(s) print('==========')
def print_info(self): """Print basic information.""" log_fn('Model: %s' % self.model.get_model()) dataset_name = if self.dataset.use_synthetic_gpu_images(): dataset_name += ' (synthetic)' log_fn('Dataset: %s' % dataset_name) log_fn('Mode: %s' % get_mode_from_params(self.params)) log_fn('Batch size: %s per device' % self.batch_size) if self.batch_group_size > 1: log_fn(' %d batches per prepocessing group' % self.batch_group_size) log_fn('Data format: %s' % self.data_format) log_fn('Optimizer: %s' % self.params.optimizer) log_fn('==========')