Beispiel #1
0
 def _benchmark_eager_apply(self,
                            label,
                            device_and_format,
                            defun=False,
                            execution_mode=None,
                            compiled=False):
     with tfe.execution_mode(execution_mode):
         device, data_format = device_and_format
         model = resnet50.ResNet50(data_format)
         if defun:
             model.call = tfe.defun(model.call, compiled=compiled)
         batch_size = 64
         num_burn = 5
         num_iters = 30
         with tf.device(device):
             images, _ = random_batch(batch_size, data_format)
             for _ in xrange(num_burn):
                 model(images, training=False).cpu()
             if execution_mode:
                 tfe.async_wait()
             gc.collect()
             start = time.time()
             for _ in xrange(num_iters):
                 model(images, training=False).cpu()
             if execution_mode:
                 tfe.async_wait()
             self._report(label, start, num_iters, device, batch_size,
                          data_format)
Beispiel #2
0
    def _benchmark_eager_train(self, label, make_iterator, defun=False):
        device, data_format = device_and_data_format()
        for batch_size in self._train_batch_sizes():
            (images, labels) = random_batch(batch_size)
            num_burn = 3
            num_iters = 10
            model = resnet50.ResNet50(data_format)
            if defun:
                model.call = tfe.defun(model.call)
            optimizer = tf.train.GradientDescentOptimizer(0.1)

            with tf.device(device):
                iterator = make_iterator((images, labels))
                for _ in xrange(num_burn):
                    (images, labels) = iterator.next()
                    train_one_step(model, images, labels, optimizer)
                self._force_gpu_sync()
                gc.collect()

                start = time.time()
                for _ in xrange(num_iters):
                    (images, labels) = iterator.next()
                    train_one_step(model, images, labels, optimizer)
                self._force_gpu_sync()
                self._report(label, start, num_iters, device, batch_size,
                             data_format)
Beispiel #3
0
 def test_apply_with_pooling(self):
   device, data_format = device_and_data_format()
   model = resnet50.ResNet50(data_format, include_top=False, pooling='avg')
   with tf.device(device):
     images, _ = random_batch(2)
     output = model(images)
   self.assertEqual((2, 2048), output.shape)
Beispiel #4
0
  def testTrainWithSummary(self):
    with tf.Graph().as_default():
      images = tf.placeholder(tf.float32, image_shape(None), name='images')
      labels = tf.placeholder(tf.float32, [None, 1000], name='labels')

      tf.train.get_or_create_global_step()
      logdir = tempfile.mkdtemp()
      with tf.contrib.summary.always_record_summaries():
        with tf.contrib.summary.create_file_writer(
            logdir, max_queue=0,
            name='t0').as_default():
          model = resnet50.ResNet50(data_format())
          logits = model(images, training=True)
          loss = tf.losses.softmax_cross_entropy(
              logits=logits, onehot_labels=labels)
          tf.contrib.summary.scalar(name='loss', tensor=loss)
          optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
          train_op = optimizer.minimize(loss)

      init = tf.global_variables_initializer()
      self.assertEqual(321, len(tf.global_variables()))

      # Use small batches for tests because the OSS version runs
      # in constrained GPU environment with 1-2GB of memory.
      batch_size = 2
      with tf.Session() as sess:
        sess.run(init)
        sess.run(tf.contrib.summary.summary_writer_initializer_op())
        np_images, np_labels = random_batch(batch_size)
        sess.run([train_op, tf.contrib.summary.all_summary_ops()],
                 feed_dict={images: np_images, labels: np_labels})

      events = summary_test_util.events_from_logdir(logdir)
      self.assertEqual(len(events), 2)
      self.assertEqual(events[1].summary.value[0].tag, 'loss')
Beispiel #5
0
 def test_no_garbage(self):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format)
     optimizer = tf.train.GradientDescentOptimizer(0.1)
     with tf.device(device):
         images, labels = random_batch(2, data_format)
         gc.disable()
         # Warm up. Note that this first run does create significant amounts of
         # garbage to be collected. The hope is that this is a build-only effect,
         # and a subsequent training loop will create nothing which needs to be
         # collected.
         apply_gradients(model, optimizer,
                         compute_gradients(model, images, labels))
         gc.collect()
         previous_gc_debug_flags = gc.get_debug()
         gc.set_debug(gc.DEBUG_SAVEALL)
         for _ in range(2):
             # Run twice to ensure that garbage that is created on the first
             # iteration is no longer accessible.
             apply_gradients(model, optimizer,
                             compute_gradients(model, images, labels))
         gc.collect()
         # There should be no garbage requiring collection.
         self.assertEqual(0, len(gc.garbage))
         gc.set_debug(previous_gc_debug_flags)
         gc.enable()
Beispiel #6
0
 def test_apply(self):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format)
     with tf.device(device):
         images, _ = random_batch(2)
         output = model(images)
     self.assertEqual((2, 1000), output.shape)
Beispiel #7
0
    def benchmark_graph_train(self):
        for batch_size in [16, 32, 64]:
            with tf.Graph().as_default():
                np_images, np_labels = random_batch(batch_size)
                dataset = tf.data.Dataset.from_tensors(
                    (np_images, np_labels)).repeat()
                (images, labels) = dataset.make_one_shot_iterator().get_next()

                model = resnet50.ResNet50(data_format())
                logits = model(images, training=True)
                loss = tf.losses.softmax_cross_entropy(logits=logits,
                                                       onehot_labels=labels)
                optimizer = tf.train.GradientDescentOptimizer(
                    learning_rate=1.0)
                train_op = optimizer.minimize(loss)

                init = tf.global_variables_initializer()
                with tf.Session() as sess:
                    sess.run(init)
                    (num_burn, num_iters) = (5, 10)
                    for _ in range(num_burn):
                        sess.run(train_op)
                    start = time.time()
                    for _ in range(num_iters):
                        sess.run(train_op)
                    self._report('train', start, num_iters, batch_size)
Beispiel #8
0
 def _apply(self, defun=False):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format)
     if defun:
         model.call = tfe.defun(model.call)
     with tf.device(device):
         images, _ = random_batch(2)
         output = model(images)
     self.assertEqual((2, 1000), output.shape)
Beispiel #9
0
 def test_apply_no_top(self):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format, include_top=False)
     with tf.device(device):
         images, _ = random_batch(2)
         output = model(images)
     output_shape = ((2, 2048, 1, 1) if data_format == 'channels_first' else
                     (2, 1, 1, 2048))
     self.assertEqual(output_shape, output.shape)
Beispiel #10
0
 def _apply(self, defun=False, execution_mode=None):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format)
     if defun:
         model.call = tfe.defun(model.call)
     with tf.device(device), tfe.execution_mode(execution_mode):
         images, _ = random_batch(2, data_format)
         output = model(images, training=False)
         tfe.async_wait()
     self.assertEqual((2, 1000), output.shape)
Beispiel #11
0
    def testApply(self):
        batch_size = 64
        with tf.Graph().as_default():
            images = tf.placeholder(tf.float32, image_shape(None))
            model = resnet50.ResNet50(data_format())
            predictions = model(images)

            init = tf.global_variables_initializer()

            with tf.Session() as sess:
                sess.run(init)
                np_images, _ = random_batch(batch_size)
                out = sess.run(predictions, feed_dict={images: np_images})
                self.assertAllEqual([64, 1000], out.shape)
Beispiel #12
0
 def test_train(self):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format)
     tf.train.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
     with tf.contrib.summary.create_file_writer(
             logdir, max_queue=0, name='t0').as_default(
             ), tf.contrib.summary.always_record_summaries():
         with tf.device(device):
             optimizer = tf.train.GradientDescentOptimizer(0.1)
             images, labels = random_batch(2)
             train_one_step(model, images, labels, optimizer)
             self.assertEqual(320, len(model.variables))
     events = summary_test_util.events_from_logdir(logdir)
     self.assertEqual(len(events), 2)
     self.assertEqual(events[1].summary.value[0].tag, 'loss')
Beispiel #13
0
  def testApply(self):
    # Use small batches for tests because the OSS version runs
    # in constrained GPU environment with 1-2GB of memory.
    batch_size = 8
    with tf.Graph().as_default():
      images = tf.placeholder(tf.float32, image_shape(None))
      model = resnet50.ResNet50(data_format())
      predictions = model(images, training=False)

      init = tf.global_variables_initializer()

      with tf.Session() as sess:
        sess.run(init)
        np_images, _ = random_batch(batch_size)
        out = sess.run(predictions, feed_dict={images: np_images})
        self.assertAllEqual([batch_size, 1000], out.shape)
Beispiel #14
0
 def benchmark_eager_apply(self):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format)
     batch_size = 64
     num_burn = 5
     num_iters = 30
     with tf.device(device):
         images, _ = random_batch(batch_size)
         for _ in xrange(num_burn):
             model(images).cpu()
         gc.collect()
         start = time.time()
         for _ in xrange(num_iters):
             model(images).cpu()
         self._report('eager_apply', start, num_iters, device, batch_size,
                      data_format)
Beispiel #15
0
 def _benchmark_eager_apply(self, label, defun=False):
   device, data_format = device_and_data_format()
   model = resnet50.ResNet50(data_format)
   if defun:
     model.call = tfe.defun(model.call)
   batch_size = 64
   num_burn = 5
   num_iters = 30
   with tf.device(device):
     images, _ = random_batch(batch_size)
     for _ in xrange(num_burn):
       model(images).cpu()
     gc.collect()
     start = time.time()
     for _ in xrange(num_iters):
       model(images).cpu()
     self._report(label, start, num_iters, device, batch_size, data_format)
    def _benchmark_eager_train(self,
                               label,
                               make_iterator,
                               device_and_format,
                               defun=False,
                               execution_mode=None,
                               compiled=False):
        with tfe.execution_mode(execution_mode):
            device, data_format = device_and_format
            for batch_size in self._train_batch_sizes():
                (images, labels) = random_batch(batch_size, data_format)
                model = resnet50.ResNet50(data_format)
                optimizer = tf.train.GradientDescentOptimizer(0.1)
                apply_grads = apply_gradients
                if defun:
                    model.call = tfe.defun(model.call, compiled=compiled)
                    apply_grads = tfe.defun(apply_gradients, compiled=compiled)

                num_burn = 3
                num_iters = 10
                with tf.device(device):
                    iterator = make_iterator((images, labels))
                    for _ in xrange(num_burn):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        tfe.async_wait()
                    self._force_device_sync()
                    gc.collect()

                    start = time.time()
                    for _ in xrange(num_iters):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        tfe.async_wait()
                    self._force_device_sync()
                    self._report(label, start, num_iters, device, batch_size,
                                 data_format)
Beispiel #17
0
  def benchmark_graph_apply(self):
    with tf.Graph().as_default():
      images = tf.placeholder(tf.float32, image_shape(None))
      model = resnet50.ResNet50(data_format())
      predictions = model(images, training=False)

      init = tf.global_variables_initializer()

      batch_size = 64
      with tf.Session() as sess:
        sess.run(init)
        np_images, _ = random_batch(batch_size)
        num_burn, num_iters = (3, 30)
        for _ in range(num_burn):
          sess.run(predictions, feed_dict={images: np_images})
        start = time.time()
        for _ in range(num_iters):
          # Comparison with the eager execution benchmark in resnet50_test.py
          # isn't entirely fair as the time here includes the cost of copying
          # the feeds from CPU memory to GPU.
          sess.run(predictions, feed_dict={images: np_images})
        self._report('apply', start, num_iters, batch_size)