def build_graph(tower_devices, tensor_shapes, variable_mgr, num_iters):
    """Builds the graph for the benchmark.

  Args:
    tower_devices: A list of device strings of the devices to run the all-reduce
      benchmark on.
    tensor_shapes: A list of shapes of the tensors that will be aggregated for
      the all-reduce.
    variable_mgr: The VariableMgr to perform the all-reduce.
    num_iters: Number of iterations to aggregate tensors for.
  Returns:
    An op that runs the benchmark.
  """
    all_device_tensors = []
    for i, tower_device in enumerate(tower_devices):
        with tf.device(tower_device):
            device_tensors = []
            for j, shape in enumerate(tensor_shapes):
                tensor = tf.Variable(tf.random_normal(shape, dtype=tf.float32),
                                     name='tensor_%d_on_device_%d' % (j, i))
                device_tensors.append(tensor)
        all_device_tensors.append(device_tensors)

    log_fn('Building all-reduce ops')
    benchmark_op = build_all_reduce_iterations(all_device_tensors,
                                               tower_devices, variable_mgr,
                                               num_iters)
    log_fn('Done building all-reduce ops')
    return benchmark_op
Example #2
0
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.
    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError('Received unknown positional arguments: %s' %
                         positional_arguments[1:])

    options = make_options_from_flags(FLAGS)

    params = benchmark_cnn.make_params_from_flags()
    params = params._replace(batch_size=options.batch_size)
    params = params._replace(model='MY_GTSRB')
    params = params._replace(num_epochs=options.num_epochs)
    params = params._replace(num_gpus=options.num_gpus)
    params = params._replace(data_format='NHWC')
    params = params._replace(train_dir=options.checkpoint_folder)
    params = params._replace(allow_growth=True)
    params = params._replace(variable_update='replicated')
    params = params._replace(local_parameter_device='gpu')
    params = params._replace(use_tf_layers=False)
    # params = params._replace(all_reduce_spec='nccl')

    # params = params._replace(bottom_file=options.bottom_file)
    # params = params._replace(affine_files=options.affine_files)
    # params = params._replace(affine_classes=options.affine_classes)

    params = params._replace(optimizer=options.optimizer)
    params = params._replace(weight_decay=options.weight_decay)

    #params = params._replace(print_training_accuracy=True)
    params = params._replace(backbone_model_path=options.backbone_model_path)
    # Summary and Save & load checkpoints.
    # params = params._replace(summary_verbosity=1)
    # params = params._replace(save_summaries_steps=10)
    # params = params._replace(save_model_secs=3600)  # save every 1 hour
    params = params._replace(save_model_secs=60)  #save every 5 min
    params = benchmark_cnn.setup(params)

    #testtest(params)
    #exit(0)

    if 'test' in options.data_dir:
        dataset = GTSRBTestDataset(options)
    else:
        dataset = GTSRBDataset(options)
    model = Model_Builder(options.model_name, dataset.num_classes, options,
                          params)

    bench = benchmark_cnn.BenchmarkCNN(params, dataset=dataset, model=model)

    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

    bench.print_info()
    bench.run()

    tf.reset_default_graph()
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.
    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError('Received unknown positional arguments: %s' %
                         positional_arguments[1:])

    params = benchmark_cnn.make_params_from_flags()
    params = benchmark_cnn.setup(params)
    bench = benchmark_cnn.BenchmarkCNN(params)

    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

    bench.print_info()
    with log_context(LOGGER_URL,
                     LOGGER_USRENAME,
                     LOGGER_PASSWORD,
                     LOGGER_DB,
                     LOGGER_SERIES,
                     machine=LOGGER_VM):
        bench.run()
Example #4
0
    def postprocess(self, results):
        """Postprocess results returned from model in Python."""
        probs = results[self.PROBABILITY_TENSOR]

        total_wer, total_cer = 0, 0
        speech_labels = " abcdefghijklmnopqrstuvwxyz'-"
        greedy_decoder = DeepSpeechDecoder(speech_labels)

        # Evaluate the performance using WER (Word Error Rate) and CER (Character
        # Error Rate) as metrics.
        targets = results[self.LABEL_TENSOR]  # The ground truth transcript
        for i in range(self.batch_size):
            # Decode string.
            predicted_str = greedy_decoder.decode_logits(probs[i])
            expected_str = greedy_decoder.decode(targets[i])
            # Compute CER.
            total_cer += (greedy_decoder.cer(predicted_str, expected_str) /
                          len(expected_str))
            # Compute WER.
            total_wer += (greedy_decoder.wer(predicted_str, expected_str) /
                          len(expected_str.split()))

        # Get mean value
        total_cer /= self.batch_size
        total_wer /= self.batch_size

        log_fn('total CER: {:f}; total WER: {:f}; total example: {:d}.'.format(
            total_cer, total_wer, self.batch_size))
        # TODO(laigd): get rid of top_N_accuracy bindings in benchmark_cnn.py
        return {'top_1_accuracy': 0., 'top_5_accuracy': 0.}
def run_benchmark(bench_cnn, num_iters):
  """Runs the all-reduce benchmark.

  Args:
    bench_cnn: The BenchmarkCNN where params, the variable manager, and other
      attributes are obtained.
    num_iters: Number of iterations to do all-reduce for for.

  Raises:
    ValueError: Invalid params of bench_cnn.
  """
  if bench_cnn.params.variable_update != 'replicated':
    raise ValueError('--variable_update=replicated must be specified to use'
                     'the all-reduce benchmark')
  if bench_cnn.params.variable_consistency == 'relaxed':
    raise ValueError('--variable_consistency=relaxed is not supported')

  benchmark_op = build_graph(bench_cnn.raw_devices,
                             get_var_shapes(bench_cnn.model),
                             bench_cnn.variable_mgr, num_iters)
  init_ops = [
      tf.global_variables_initializer(),
      bench_cnn.variable_mgr.get_post_init_ops()
  ]
  loss_op = tf.no_op()

  if bench_cnn.graph_file:
    path, filename = os.path.split(bench_cnn.graph_file)
    as_text = filename.endswith('txt')
    log_fn('Writing GraphDef as %s to %s' % (
        'text' if as_text else 'binary', bench_cnn.graph_file))
    tf.train.write_graph(tf.get_default_graph().as_graph_def(add_shapes=True),
                         path, filename, as_text)

  run_graph(benchmark_op, bench_cnn, init_ops, loss_op)
Example #6
0
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.
    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError('Received unknown positional arguments: %s' %
                         positional_arguments[1:])

    params = benchmark_cnn.make_params_from_flags()

    # Print ENV Variables
    tf.logging.debug('=' * 20 + ' Environment Variables ' + '=' * 20)
    for k, v in os.environ.items():
        tf.logging.debug('{}: {}'.format(k, v))

    with mlperf.mlperf_logger(absl_flags.FLAGS.ml_perf_compliance_logging,
                              params.model):
        params = benchmark_cnn.setup(params)
        bench = benchmark_cnn.BenchmarkCNN(params)

        tfversion = cnn_util.tensorflow_version_tuple()

        log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

        bench.print_info()
        bench.run()
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.
    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError('Received unknown positional arguments: %s' %
                         positional_arguments[1:])

    params = benchmark_cnn.make_params_from_flags()
    params = benchmark_cnn.setup(params)
    bench = benchmark_cnn.BenchmarkCNN(params)

    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

    bench.print_info()
    print('num_inter_threads: ' + str(params.num_inter_threads))
    print('num_intra_threads: ' + str(params.num_intra_threads))
    print('datasets_num_private_threads: ' +
          str(params.datasets_num_private_threads))
    print('datasets_use_prefetch: ' + str(params.datasets_use_prefetch))
    print('datasets_prefetch_buffer_size: ' +
          str(params.datasets_prefetch_buffer_size))

    bench.run()
def main(positional_arguments):
  # Command-line arguments like '--distortions False' are equivalent to
  # '--distortions=True False', where False is a positional argument. To prevent
  # this from silently running with distortions, we do not allow positional
  # arguments.
  assert len(positional_arguments) >= 1
  if len(positional_arguments) > 1:
    raise ValueError('Received unknown positional arguments: %s'
                     % positional_arguments[1:])

  params = benchmark_cnn.make_params_from_flags()
  params = benchmark_cnn.setup(params)

  import sys
  if params.enable_dmo == True:
    if LoadFileSystem() == False:
        sys.exit(-1)
    else :
        print("\n*******DMO enabled********\n")
  #      sys.exit(0)

  bench = benchmark_cnn.BenchmarkCNN(params)

  tfversion = cnn_util.tensorflow_version_tuple()
  log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

  bench.print_info()
  bench.run()
Example #9
0
 def build_network(self, images, phase_train=True, nclass=1001, image_depth=3,
                   data_type=tf.float32, data_format='NCHW',
                   use_tf_layers=True, fp16_vars=False):
   """Returns logits and aux_logits from images."""
   if data_format == 'NCHW':
     images = tf.transpose(images, [0, 3, 1, 2])
   var_type = tf.float32
   if data_type == tf.float16 and fp16_vars:
     var_type = tf.float16
   network = convnet_builder.ConvNetBuilder(
       images, image_depth, phase_train, use_tf_layers,
       data_format, data_type, var_type)
   with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
     self.add_inference(network)
     log_fn("Number of parameters: %d" % network.n_parameters)
     # Add the final fully-connected class layer
     logits = (network.affine(nclass, activation='linear')
               if not self.skip_final_affine_layer()
               else network.top_layer)
     aux_logits = None
     if network.aux_top_layer is not None:
       with network.switch_to_aux_top_layer():
         aux_logits = network.affine(
             nclass, activation='linear', stddev=0.001)
   if data_type == tf.float16:
     # TODO(reedwm): Determine if we should do this cast here.
     logits = tf.cast(logits, tf.float32)
     if aux_logits is not None:
       aux_logits = tf.cast(aux_logits, tf.float32)
   return logits, aux_logits
Example #10
0
def main(_):
    params = benchmark_cnn.make_params_from_flags()
    params = benchmark_cnn.setup(params)
    bench = benchmark_cnn.BenchmarkCNN(params)

    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

    bench.print_info()
    bench.run()
Example #11
0
    def _eval_once(self, saver, image_producer_ops, fetches,
                   local_var_init_op_group, nth_ckpt):
        with tf.Session(config=create_config_proto(self.params)) as sess:

            coord = tf.train.Coordinator()
            if self.params.checkpoint_dir is None:
                raise ValueError(
                    'Checkpoint directory for evaluation is not specified')
            try:
                global_step = load_checkpoint(saver, sess,
                                              self.params.checkpoint_dir,
                                              nth_ckpt)
            except CheckpointNotFoundException:
                log_fn(
                    'Checkpoint not found in %s' % self.params.checkpoint_dir)
                sys.exit(-1)
                return
            log_fn('[Evaluation] START')
            sess.run(local_var_init_op_group)

            assert not self.use_synthetic_gpu_images

            dummy_queue = tf.FIFOQueue(1, [tf.bool], shapes=[[]],
                                       name='dummy_queue',
                                       shared_name='dummy_queue')

            qr = tf.train.QueueRunner(dummy_queue, image_producer_ops)
            tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, qr)
            enqueue_threads = qr.create_threads(sess=sess, coord=coord,
                                                daemon=True)
            for thread in enqueue_threads:
                thread.start()

            top_1_accuracy_sum = 0.0
            top_5_accuracy_sum = 0.0
            total_eval_count = self.num_batches_for_eval * self.batch_size
            for step in xrange(self.num_batches_for_eval):
                results = sess.run(fetches)
                top_1_accuracy_sum += results['top_1_accuracy']
                top_5_accuracy_sum += results['top_5_accuracy']
                if (step + 1) % self.params.display_every_for_eval == 0:
                    log_fn('%i\ttop_1_accuracy: %.4f' % (
                    step + 1, top_1_accuracy_sum / step))
                    log_fn('%i\ttop_5_accuracy: %.4f' % (
                    step + 1, top_5_accuracy_sum / step))
            accuracy_at_1 = top_1_accuracy_sum / self.num_batches_for_eval
            accuracy_at_5 = top_5_accuracy_sum / self.num_batches_for_eval
            log_fn(
                '[SUMMARY] Global step: %d Accuracy @ 1 = %.4f Accuracy @ 5 = %.4f [%d examples]' %
                (global_step, accuracy_at_1, accuracy_at_5, total_eval_count))
            sess.run(dummy_queue.close(cancel_pending_enqueues=True))
            coord.request_stop()
def main(_):
    # Build benchmark_cnn model
    params = benchmark_cnn.make_params_from_flags()
    params, sess_config = benchmark_cnn.setup(params)
    bench = benchmark_cnn.BenchmarkCNN(params)

    # Print informaton
    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))
    bench.print_info()

    # Build single-GPU benchmark_cnn model
    with tf.Graph().as_default() as single_gpu_graph:
        bench.build_model()

    def run(sess, num_iters, tensor_or_op_name_to_replica_names, num_workers,
            worker_id, num_replicas_per_worker):
        fetches = {
            'global_step':
            tensor_or_op_name_to_replica_names[bench.global_step.name][0],
            'cost':
            tensor_or_op_name_to_replica_names[bench.cost.name][0],
            'train_op':
            tensor_or_op_name_to_replica_names[bench.train_op.name][0],
        }
        if isinstance(bench.lr, tf.Tensor):
            fetches['lr'] = tensor_or_op_name_to_replica_names[
                bench.lr.name][0]

        start = time.time()
        for i in range(num_iters):
            results = sess.run(fetches)
            if i % FLAGS.log_frequency == 0:
                end = time.time()
                throughput = float(FLAGS.log_frequency) / float(end - start)
                parallax.log.info(
                    "global step: %d, lr: %f, loss: %f, "
                    "throughput: %f steps/sec" %
                    (results['global_step'], results['lr'] if 'lr' in results
                     else bench.lr, results['cost'], throughput))
                start = time.time()

    config = parallax_config.build_config()
    config.sess_config = sess_config

    parallax.parallel_run(single_gpu_graph,
                          run,
                          FLAGS.resource_info_file,
                          FLAGS.max_steps,
                          sync=FLAGS.sync,
                          parallax_config=config)
Example #13
0
def main(positional_arguments):
  assert len(positional_arguments) >= 1
  if len(positional_arguments) > 1:
    raise ValueError('Received unknown positional arguments: %s'
                     % positional_arguments[1:])

  options = make_options_from_flags(FLAGS)

  params = benchmark_cnn.make_params_from_flags()
  params = params._replace(batch_size=options.batch_size)
  params = params._replace(model='MY_GTSRB')
  params = params._replace(num_epochs=options.num_epochs)
  params = params._replace(num_gpus=options.num_gpus)
  params = params._replace(data_format='NHWC')
  params = params._replace(train_dir=options.checkpoint_folder)
  params = params._replace(allow_growth=True)
  params = params._replace(variable_update='replicated')
  params = params._replace(local_parameter_device='gpu')
  params = params._replace(use_tf_layers=False)
  # params = params._replace(all_reduce_spec='nccl')

  # params = params._replace(bottom_file=options.bottom_file)
  # params = params._replace(affine_files=options.affine_files)
  # params = params._replace(affine_classes=options.affine_classes)

  params = params._replace(optimizer=options.optimizer)
  params = params._replace(weight_decay=options.weight_decay)

  params = params._replace(print_training_accuracy=True)
  params = params._replace(backbone_model_path=options.backbone_model_path)
  # Summary and Save & load checkpoints.
  # params = params._replace(summary_verbosity=1)
  # params = params._replace(save_summaries_steps=10)
  params = params._replace(save_model_secs=3600)  # save every 1 hour
  # params = params._replace(save_model_secs=300) #save every 5 min
  params = benchmark_cnn.setup(params)

  dataset = CifarDataset(options)
  model = Model_Builder(options.model_name, dataset.num_classes, options, params)

  bench = benchmark_cnn.BenchmarkCNN(params, dataset=dataset, model=model)

  tfversion = cnn_util.tensorflow_version_tuple()
  log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

  bench.print_info()
  bench.run()
Example #14
0
  def postprocess(self, results):
    """Postprocess results returned from model."""
    try:
      import coco_metric  # pylint: disable=g-import-not-at-top
    except ImportError:
      raise ImportError('To use the COCO dataset, you must clone the '
                        'repo https://github.com/tensorflow/models and add '
                        'tensorflow/models and tensorflow/models/research to '
                        'the PYTHONPATH, and compile the protobufs by '
                        'following https://github.com/tensorflow/models/blob/'
                        'master/research/object_detection/g3doc/installation.md'
                        '#protobuf-compilation ; To evaluate using COCO'
                        'metric, download and install Python COCO API from'
                        'https://github.com/cocodataset/cocoapi')

    pred_boxes = results[ssd_constants.PRED_BOXES]
    pred_scores = results[ssd_constants.PRED_SCORES]
    # TODO(haoyuzhang): maybe use these values for visualization.
    # gt_boxes = results['gt_boxes']
    # gt_classes = results['gt_classes']
    source_id = results[ssd_constants.SOURCE_ID]
    raw_shape = results[ssd_constants.RAW_SHAPE]

    for i in range(self.get_batch_size()):
      self.predictions[int(source_id[i])] = {
          ssd_constants.PRED_BOXES: pred_boxes[i],
          ssd_constants.PRED_SCORES: pred_scores[i],
          ssd_constants.SOURCE_ID: source_id[i],
          ssd_constants.RAW_SHAPE: raw_shape[i]
      }

    # COCO metric calculates mAP only after a full epoch of evaluation. Return
    # dummy results for top_N_accuracy to be compatible with benchmar_cnn.py.
    if len(self.predictions) >= ssd_constants.COCO_NUM_VAL_IMAGES:
      annotation_file = os.path.join(self.params.data_dir,
                                     ssd_constants.ANNOTATION_FILE)
      eval_results = coco_metric.compute_map(self.predictions.values(),
                                             annotation_file)
      ret = {'top_1_accuracy': 0., 'top_5_accuracy': 0.}
      for metric_key, metric_value in eval_results.items():
        ret['simple_value:' + metric_key] = metric_value
      return ret
    log_fn('Got {:d} out of {:d} eval examples.'
           ' Waiting for the remaining to calculate mAP...'.format(
               len(self.predictions), ssd_constants.COCO_NUM_VAL_IMAGES))
    return {'top_1_accuracy': 0., 'top_5_accuracy': 0.}
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.
    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError('Received unknown positional arguments: %s' %
                         positional_arguments[1:])

    params = benchmark_cnn.make_params_from_flags()
    params = benchmark_cnn.setup(params)
    bench = benchmark_cnn.BenchmarkCNN(params)

    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

    run_benchmark(bench, absl_flags.FLAGS.iters_per_step)
Example #16
0
def main(extra_flags):
  # extra_flags is a list of command line arguments, excluding those defined
  # in tf.flags.FLAGS. extra_flags[0] is always the program name. It is an error
  # to supply flags not defined with tf.flags.FLAGS, so we raise an ValueError
  # in that case.
  assert len(extra_flags) >= 1
  if len(extra_flags) > 1:
    raise ValueError('Received unknown flags: %s' % extra_flags[1:])

  params = benchmark_cnn.make_params_from_flags()
  benchmark_cnn.setup(params)
  bench = benchmark_cnn.BenchmarkCNN(params)

  tfversion = cnn_util.tensorflow_version_tuple()
  log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

  bench.print_info()
  bench.run()
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.
    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError("Received unknown positional arguments: %s" % positional_arguments[1:])

    params = benchmark_cnn.make_params_from_flags()
    with mlperf.mlperf_logger(absl_flags.FLAGS.ml_perf_compliance_logging, params.model):
        params = benchmark_cnn.setup(params)
        bench = benchmark_cnn.BenchmarkCNN(params)

    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn("TensorFlow:  %i.%i" % (tfversion[0], tfversion[1]))

    bench.print_info()
    bench.run()
def main(_):
    # Build benchmark_cnn model
    params = benchmark_cnn.make_params_from_flags()
    params, sess_config = benchmark_cnn.setup(params)
    bench = benchmark_cnn.BenchmarkCNN(params)

    # Print informaton
    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))
    bench.print_info()

    # Build single-GPU benchmark_cnn model
    single_gpu_graph = tf.Graph()
    with single_gpu_graph.as_default():
        bench.build_model()

    config = parallax_config.build_config()
    config.sess_config = sess_config

    sess, num_workers, worker_id, num_replicas_per_worker = \
        parallax.parallel_run(single_gpu_graph,
                              FLAGS.resource_info_file,
                              sync=FLAGS.sync,
                              parallax_config=config)

    fetches = {
        'global_step': bench.global_step,
        'cost': bench.cost,
        'train_op': bench.train_op,
    }

    start = time.time()
    for i in range(FLAGS.max_steps):
        results = sess.run(fetches)
        if (i + 1) % FLAGS.log_frequency == 0:
            end = time.time()
            throughput = float(FLAGS.log_frequency) / float(end - start)
            parallax.log.info(
                "global step: %d, loss: %f, throughput: %f steps/sec" %
                (results['global_step'][0] + 1, results['cost'][0],
                 throughput))
            start = time.time()
Example #19
0
def load_checkpoint(saver, sess, checkpoint_dir, nth_ckpt):
    ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    if nth_ckpt >= len(ckpt.all_model_checkpoint_paths):
        raise CheckpointNotFoundException('No more checkpoint file.')
    log_fn("Evaluate checkpoint file [%d/%d]" % (
    nth_ckpt, len(ckpt.all_model_checkpoint_paths)))
    if ckpt and ckpt.all_model_checkpoint_paths[nth_ckpt]:
        if os.path.isabs(ckpt.all_model_checkpoint_paths[nth_ckpt]):
            model_checkpoint_path = ckpt.all_model_checkpoint_paths[nth_ckpt]
        else:
            raise ValueError('Checkpoint path should be absolute path.')
        global_step = \
        ckpt.all_model_checkpoint_paths[nth_ckpt].split('/')[-1].split('-')[-1]
        if not global_step.isdigit():
            global_step = 0
        else:
            global_step = int(global_step)
        saver.restore(sess, model_checkpoint_path)
        log_fn('Successfully loaded model from %s.' % model_checkpoint_path)
        return global_step
    else:
        raise CheckpointNotFoundException('No checkpoint file found.')
def run_graph(benchmark_op, bench_cnn, init_ops, dummy_loss_op):
    """Runs the graph for the benchmark.

  Args:
    benchmark_op: An op that runs the benchmark.
    bench_cnn: The BenchmarkCNN where params and other attributes are obtained.
    init_ops: A list of ops that are run before `benchmark_op` for
      initialization.
    dummy_loss_op: Any op. We must pass a loss op to
      `benchmark_cnn.benchmark_one_step`, but the result of the op is never
      actually used.
  """
    config = benchmark_cnn.create_config_proto(bench_cnn.params)
    with tf.Session(config=config) as sess:
        for op in init_ops:
            sess.run(op)
        step_train_times = []
        fetches = {'average_loss': dummy_loss_op, 'benchmark_op': benchmark_op}
        log_fn('Running warmup')
        for i in range(-bench_cnn.num_warmup_batches, bench_cnn.num_batches):
            if i == 0:
                log_fn('Running all-reduce ops')
                start = time.time()
            if i > 0 and i % bench_cnn.params.display_every == 0:
                log_fn('Iteration: %d. Average time per step so far: %s' %
                       (i, (time.time() - start) / i))
            # Call benchmark_one_step instead of directly calling sess.run(...), to
            # potentially get a trace file, partitioned graphs, etc.
            benchmark_cnn.benchmark_one_step(
                sess=sess,
                fetches=fetches,
                step=i,
                # The batch size is only used for the images/sec calculation, which is
                # not actually calculated because we pass show_images_per_sec=False.
                batch_size=None,
                step_train_times=step_train_times,
                trace_filename=bench_cnn.trace_filename,
                partitioned_graph_file_prefix=(
                    bench_cnn.params.partitioned_graph_file_prefix),
                profiler=None,
                image_producer=None,
                params=bench_cnn.params,
                show_images_per_sec=False)
        log_fn('Average time per step: %s' %
               ((time.time() - start) / bench_cnn.num_batches))
Example #21
0
    def postprocess(self, results):
        """Postprocess results returned from model."""
        try:
            import coco_metric  # pylint: disable=g-import-not-at-top
        except ImportError:
            raise ImportError(
                'To use the COCO dataset, you must clone the '
                'repo https://github.com/tensorflow/models and add '
                'tensorflow/models and tensorflow/models/research to '
                'the PYTHONPATH, and compile the protobufs by '
                'following https://github.com/tensorflow/models/blob/'
                'master/research/object_detection/g3doc/installation.md'
                '#protobuf-compilation ; To evaluate using COCO'
                'metric, download and install Python COCO API from'
                'https://github.com/cocodataset/cocoapi')

        pred_boxes = results[ssd_constants.PRED_BOXES]
        pred_scores = results[ssd_constants.PRED_SCORES]
        # TODO(haoyuzhang): maybe use these values for visualization.
        # gt_boxes = results['gt_boxes']
        # gt_classes = results['gt_classes']
        source_id = results[ssd_constants.SOURCE_ID]
        raw_shape = results[ssd_constants.RAW_SHAPE]

        # COCO evaluation requires processing COCO_NUM_VAL_IMAGES exactly once. Due
        # to rounding errors (i.e., COCO_NUM_VAL_IMAGES % batch_size != 0), setting
        # `num_eval_epochs` to 1 is not enough and will often miss some images. We
        # expect user to set `num_eval_epochs` to >1, which will leave some unused
        # images from previous steps in `predictions`. Here we check if we are doing
        # eval at a new global step.
        if results['global_step'] > self.eval_global_step:
            self.eval_global_step = results['global_step']
            self.predictions.clear()

        for i, sid in enumerate(source_id):
            self.predictions[int(sid)] = {
                ssd_constants.PRED_BOXES: pred_boxes[i],
                ssd_constants.PRED_SCORES: pred_scores[i],
                ssd_constants.SOURCE_ID: source_id[i],
                ssd_constants.RAW_SHAPE: raw_shape[i]
            }

        # COCO metric calculates mAP only after a full epoch of evaluation. Return
        # dummy results for top_N_accuracy to be compatible with benchmar_cnn.py.
        if len(self.predictions) >= ssd_constants.COCO_NUM_VAL_IMAGES:
            log_fn('Got results for all {:d} eval examples. Calculate mAP...'.
                   format(ssd_constants.COCO_NUM_VAL_IMAGES))
            annotation_file = os.path.join(self.params.data_dir,
                                           ssd_constants.ANNOTATION_FILE)
            eval_results = coco_metric.compute_map(self.predictions.values(),
                                                   annotation_file)
            self.predictions.clear()
            ret = {'top_1_accuracy': 0., 'top_5_accuracy': 0.}
            for metric_key, metric_value in eval_results.items():
                ret[constants.SIMPLE_VALUE_RESULT_PREFIX +
                    metric_key] = metric_value
            return ret
        log_fn('Got {:d} out of {:d} eval examples.'
               ' Waiting for the remaining to calculate mAP...'.format(
                   len(self.predictions), ssd_constants.COCO_NUM_VAL_IMAGES))
        return {'top_1_accuracy': 0., 'top_5_accuracy': 0.}
Example #22
0
  def postprocess(self, results):
    """Postprocess results returned from model."""
    try:
      import coco_metric  # pylint: disable=g-import-not-at-top
    except ImportError:
      raise ImportError('To use the COCO dataset, you must clone the '
                        'repo https://github.com/tensorflow/models and add '
                        'tensorflow/models and tensorflow/models/research to '
                        'the PYTHONPATH, and compile the protobufs by '
                        'following https://github.com/tensorflow/models/blob/'
                        'master/research/object_detection/g3doc/installation.md'
                        '#protobuf-compilation ; To evaluate using COCO'
                        'metric, download and install Python COCO API from'
                        'https://github.com/cocodataset/cocoapi')

    pred_boxes = results[ssd_constants.PRED_BOXES]
    pred_scores = results[ssd_constants.PRED_SCORES]
    # TODO(haoyuzhang): maybe use these values for visualization.
    # gt_boxes = results['gt_boxes']
    # gt_classes = results['gt_classes']
    source_id = results[ssd_constants.SOURCE_ID]
    raw_shape = results[ssd_constants.RAW_SHAPE]

    # COCO evaluation requires processing COCO_NUM_VAL_IMAGES exactly once. Due
    # to rounding errors (i.e., COCO_NUM_VAL_IMAGES % batch_size != 0), setting
    # `num_eval_epochs` to 1 is not enough and will often miss some images. We
    # expect user to set `num_eval_epochs` to >1, which will leave some unused
    # images from previous steps in `predictions`. Here we check if we are doing
    # eval at a new global step.
    if results['global_step'] > self.eval_global_step:
      self.eval_global_step = results['global_step']
      self.predictions.clear()

    for i, sid in enumerate(source_id):
      self.predictions[int(sid)] = {
          ssd_constants.PRED_BOXES: pred_boxes[i],
          ssd_constants.PRED_SCORES: pred_scores[i],
          ssd_constants.SOURCE_ID: source_id[i],
          ssd_constants.RAW_SHAPE: raw_shape[i]
      }

    # COCO metric calculates mAP only after a full epoch of evaluation. Return
    # dummy results for top_N_accuracy to be compatible with benchmar_cnn.py.
    if len(self.predictions) >= ssd_constants.COCO_NUM_VAL_IMAGES:
      log_fn('Got results for all {:d} eval examples. Calculate mAP...'.format(
          ssd_constants.COCO_NUM_VAL_IMAGES))

      annotation_file = os.path.join(self.params.data_dir,
                                     ssd_constants.ANNOTATION_FILE)
      # Size of predictions before decoding about 15--30GB, while size after
      # decoding is 100--200MB. When using async eval mode, decoding takes
      # 20--30 seconds of main thread time but is necessary to avoid OOM during
      # inter-process communication.
      decoded_preds = coco_metric.decode_predictions(self.predictions.values())
      self.predictions.clear()

      if self.params.collect_eval_results_async:
        def _eval_results_getter():
          """Iteratively get eval results from async eval process."""
          while True:
            step, eval_results = self.async_eval_results_queue.get()
            self.eval_coco_ap = eval_results['COCO/AP']
            mlperf.logger.log_eval_accuracy(
                self.eval_coco_ap, step, self.batch_size * self.params.num_gpus,
                ssd_constants.COCO_NUM_TRAIN_IMAGES)
            if self.reached_target():
              # Reached target, clear all pending messages in predictions queue
              # and insert poison pill to stop the async eval process.
              while not self.async_eval_predictions_queue.empty():
                self.async_eval_predictions_queue.get()
              self.async_eval_predictions_queue.put('STOP')
              break

        if not self.async_eval_process:
          # Limiting the number of messages in predictions queue to prevent OOM.
          # Each message (predictions data) can potentially consume a lot of
          # memory, and normally there should only be few messages in the queue.
          # If often blocked on this, consider reducing eval frequency.
          self.async_eval_predictions_queue = multiprocessing.Queue(2)
          self.async_eval_results_queue = multiprocessing.Queue()

          # Reason to use a Process as opposed to Thread is mainly the
          # computationally intensive eval runner. Python multithreading is not
          # truly running in parallel, a runner thread would get significantly
          # delayed (or alternatively delay the main thread).
          self.async_eval_process = multiprocessing.Process(
              target=coco_metric.async_eval_runner,
              args=(self.async_eval_predictions_queue,
                    self.async_eval_results_queue,
                    annotation_file))
          self.async_eval_process.daemon = True
          self.async_eval_process.start()

          self.async_eval_results_getter_thread = threading.Thread(
              target=_eval_results_getter, args=())
          self.async_eval_results_getter_thread.daemon = True
          self.async_eval_results_getter_thread.start()

        self.async_eval_predictions_queue.put(
            (self.eval_global_step, decoded_preds))
        return {'top_1_accuracy': 0, 'top_5_accuracy': 0.}

      eval_results = coco_metric.compute_map(decoded_preds, annotation_file)
      self.eval_coco_ap = eval_results['COCO/AP']
      ret = {'top_1_accuracy': self.eval_coco_ap, 'top_5_accuracy': 0.}
      for metric_key, metric_value in eval_results.items():
        ret[constants.SIMPLE_VALUE_RESULT_PREFIX + metric_key] = metric_value
      mlperf.logger.log_eval_accuracy(self.eval_coco_ap, self.eval_global_step,
                                      self.batch_size * self.params.num_gpus,
                                      ssd_constants.COCO_NUM_TRAIN_IMAGES)
      return ret
    log_fn('Got {:d} out of {:d} eval examples.'
           ' Waiting for the remaining to calculate mAP...'.format(
               len(self.predictions), ssd_constants.COCO_NUM_VAL_IMAGES))
    return {'top_1_accuracy': self.eval_coco_ap, 'top_5_accuracy': 0.}
Example #23
0
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.

    # For DGX servers use hierarchical_copy=True argument

    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError('Received unknown positional arguments: %s' %
                         positional_arguments[1:])

    tests_models = [
        {
            'num_gpus': None,
            'batch_size': 64,
            'variable_update': 'parameter_server',
            'model': 'inception3'
        },
        {
            'num_gpus': None,
            'batch_size': 64,
            'variable_update': 'parameter_server',
            'model': 'resnet50'
        },
        {
            'num_gpus': None,
            'batch_size': 32,
            'variable_update': 'parameter_server',
            'model': 'resnet152'
        },  #batch=64 crashes
        {
            'num_gpus': None,
            'batch_size': 64,
            'variable_update': 'replicated',
            'model': 'vgg16'
        },
        {
            'num_gpus': None,
            'batch_size': 512,
            'variable_update': 'replicated',
            'model': 'alexnet'
        }
    ]

    test_gpus = [1, 2, 4, 8]

    stats = []
    for test in tests_models:
        for num_gpus in test_gpus:
            test['num_gpus'] = num_gpus

            params = benchmark_cnn.make_params_from_flags()
            params = benchmark_cnn.setup(params)

            # force --hierarchical_copy to False when using 1 GPU
            if num_gpus == 1:
                params = params._replace(hierarchical_copy=False)

            params = params._replace(num_gpus=test['num_gpus'],
                                     batch_size=test['batch_size'],
                                     model=test['model'],
                                     variable_update=test['variable_update'])

            bench = benchmark_cnn.BenchmarkCNN(params)

            tfversion = cnn_util.tensorflow_version_tuple()
            log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

            bench.print_info()
            results = bench.run()
            # result
            # {
            #     'average_wall_time': 0.6646941304206848,
            #     'images_per_sec': 385.1395525908701,
            #     'last_average_loss': 7.256145,
            #     'num_steps': 100,
            #     'num_workers': 1
            # }
            stats.append({'test': test.copy(), 'result': results})

    # summary
    print('summary:')
    print('==========')
    pprint.pprint(stats)

    print('==========')
    s = ''
    for i in range(len(test_gpus)):
        for j in range(len(tests_models)):
            s += str(stats[i + j * len(test_gpus)]['result']['images_per_sec'])
            s += ', '
        s += '\n'
    print(s)
    print('==========')
Example #24
0
 def print_info(self):
     """Print basic information."""
     log_fn('Model:       %s' % self.model.get_model())
     dataset_name = self.dataset.name
     if self.dataset.use_synthetic_gpu_images():
         dataset_name += ' (synthetic)'
     log_fn('Dataset:     %s' % dataset_name)
     log_fn('Mode:        %s' % get_mode_from_params(self.params))
     log_fn('Batch size:  %s per device' % self.batch_size)
     if self.batch_group_size > 1:
         log_fn('             %d batches per prepocessing group' %
                self.batch_group_size)
     log_fn('Data format: %s' % self.data_format)
     log_fn('Optimizer:   %s' % self.params.optimizer)
     log_fn('==========')