Ejemplo n.º 1
0
 def _benchmark_eager_apply(self, label, device_and_format, defun=False,
                            execution_mode=None, compiled=False):
   with tfe.execution_mode(execution_mode):
     device, data_format = device_and_format
     model = densenet.DenseNet(self.depth, self.growth_rate, self.num_blocks,
                               self.output_classes,
                               self.num_layers_in_each_block, data_format,
                               bottleneck=True, compression=0.5,
                               weight_decay=1e-4, dropout_rate=0,
                               pool_initial=True, include_top=True)
     if defun:
       model.call = tfe.defun(model.call, compiled=compiled)
     batch_size = 64
     num_burn = 5
     num_iters = 30
     with tf.device(device):
       images, _ = random_batch(batch_size, data_format)
       for _ in xrange(num_burn):
         model(images, training=False).cpu()
       if execution_mode:
         tfe.async_wait()
       gc.collect()
       start = time.time()
       for _ in xrange(num_iters):
         model(images, training=False).cpu()
       if execution_mode:
         tfe.async_wait()
       self._report(label, start, num_iters, device, batch_size, data_format)
Ejemplo n.º 2
0
 def _benchmark_eager_apply(self,
                            label,
                            device_and_format,
                            defun=False,
                            execution_mode=None,
                            compiled=False):
     with tfe.execution_mode(execution_mode):
         device, data_format = device_and_format
         model = resnet50.ResNet50(data_format)
         if defun:
             model.call = tfe.defun(model.call, compiled=compiled)
         batch_size = 64
         num_burn = 5
         num_iters = 30
         with tf.device(device):
             images, _ = random_batch(batch_size, data_format)
             for _ in xrange(num_burn):
                 model(images, training=False).cpu()
             if execution_mode:
                 tfe.async_wait()
             gc.collect()
             start = time.time()
             for _ in xrange(num_iters):
                 model(images, training=False).cpu()
             if execution_mode:
                 tfe.async_wait()
             self._report(label, start, num_iters, device, batch_size,
                          data_format)
Ejemplo n.º 3
0
  def _benchmark_eager_train(self,
                             label,
                             make_iterator,
                             defun=False,
                             execution_mode=None):
    with tfe.execution_mode(execution_mode):
      device, data_format = device_and_data_format()
      for batch_size in self._train_batch_sizes():
        (images, labels) = random_batch(batch_size)
        num_burn = 3
        num_iters = 10
        model = resnet50.ResNet50(data_format)
        if defun:
          model.call = tfe.defun(model.call)
        optimizer = tf.train.GradientDescentOptimizer(0.1)

        with tf.device(device):
          iterator = make_iterator((images, labels))
          for _ in xrange(num_burn):
            (images, labels) = iterator.next()
            train_one_step(model, images, labels, optimizer)
          if execution_mode:
            tfe.async_wait()
          self._force_gpu_sync()
          gc.collect()

          start = time.time()
          for _ in xrange(num_iters):
            (images, labels) = iterator.next()
            train_one_step(model, images, labels, optimizer)
          if execution_mode:
            tfe.async_wait()
          self._force_gpu_sync()
          self._report(label, start, num_iters, device, batch_size, data_format)
Ejemplo n.º 4
0
 def _apply(self, defun=False, execution_mode=None):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format)
     if defun:
         model.call = tfe.defun(model.call)
     with tf.device(device), tfe.execution_mode(execution_mode):
         images, _ = random_batch(2, data_format)
         output = model(images, training=False)
         tfe.async_wait()
     self.assertEqual((2, 1000), output.shape)
Ejemplo n.º 5
0
 def _apply(self, defun=False, execution_mode=None):
   device, data_format = device_and_data_format()
   model = resnet50.ResNet50(data_format)
   if defun:
     model.call = tfe.defun(model.call)
   with tf.device(device), tfe.execution_mode(execution_mode):
     images, _ = random_batch(2, data_format)
     output = model(images, training=False)
     tfe.async_wait()
   self.assertEqual((2, 1000), output.shape)
Ejemplo n.º 6
0
    def _benchmark_eager_train(self,
                               label,
                               make_iterator,
                               device_and_format,
                               defun=False,
                               execution_mode=None,
                               compiled=False):
        with tfe.execution_mode(execution_mode):
            device, data_format = device_and_format
            for batch_size in self._train_batch_sizes():
                (images, labels) = random_batch(batch_size, data_format)
                model = densenet.DenseNet(self.depth,
                                          self.growth_rate,
                                          self.num_blocks,
                                          self.output_classes,
                                          self.num_layers_in_each_block,
                                          data_format,
                                          bottleneck=True,
                                          compression=0.5,
                                          weight_decay=1e-4,
                                          dropout_rate=0,
                                          pool_initial=True,
                                          include_top=True)
                optimizer = tf.train.GradientDescentOptimizer(0.1)
                apply_grads = apply_gradients
                if defun:
                    model.call = tfe.defun(model.call, compiled=compiled)
                    apply_grads = tfe.defun(apply_gradients, compiled=compiled)

                num_burn = 3
                num_iters = 10
                with tf.device(device):
                    iterator = make_iterator((images, labels))
                    for _ in xrange(num_burn):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        tfe.async_wait()
                    self._force_device_sync()
                    gc.collect()

                    start = time.time()
                    for _ in xrange(num_iters):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        tfe.async_wait()
                    self._force_device_sync()
                    self._report(label, start, num_iters, device, batch_size,
                                 data_format)
Ejemplo n.º 7
0
 def _test_train(self, execution_mode=None):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format)
     tf.train.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
     with tf.contrib.summary.create_file_writer(
             logdir, max_queue=0, name='t0').as_default(
             ), tf.contrib.summary.always_record_summaries():
         with tf.device(device), tfe.execution_mode(execution_mode):
             optimizer = tf.train.GradientDescentOptimizer(0.1)
             images, labels = random_batch(2, data_format)
             train_one_step(model, images, labels, optimizer)
             self.assertEqual(320, len(model.variables))
             tfe.async_wait()
     events = summary_test_util.events_from_logdir(logdir)
     self.assertEqual(len(events), 2)
     self.assertEqual(events[1].summary.value[0].tag, 'loss')
Ejemplo n.º 8
0
 def _test_train(self, execution_mode=None):
   device, data_format = device_and_data_format()
   model = resnet50.ResNet50(data_format)
   tf.train.get_or_create_global_step()
   logdir = tempfile.mkdtemp()
   with tf.contrib.summary.create_file_writer(
       logdir, max_queue=0,
       name='t0').as_default(), tf.contrib.summary.always_record_summaries():
     with tf.device(device), tfe.execution_mode(execution_mode):
       optimizer = tf.train.GradientDescentOptimizer(0.1)
       images, labels = random_batch(2, data_format)
       train_one_step(model, images, labels, optimizer)
       self.assertEqual(320, len(model.variables))
       tfe.async_wait()
   events = summary_test_util.events_from_logdir(logdir)
   self.assertEqual(len(events), 2)
   self.assertEqual(events[1].summary.value[0].tag, 'loss')
Ejemplo n.º 9
0
  def _benchmark_eager_train(self,
                             label,
                             make_iterator,
                             device_and_format,
                             defun=False,
                             execution_mode=None,
                             compiled=False):
    with tfe.execution_mode(execution_mode):
      device, data_format = device_and_format
      for batch_size in self._train_batch_sizes():
        (images, labels) = random_batch(batch_size, data_format)
        model = densenet.DenseNet(self.depth, self.growth_rate, self.num_blocks,
                                  self.output_classes,
                                  self.num_layers_in_each_block, data_format,
                                  bottleneck=True, compression=0.5,
                                  weight_decay=1e-4, dropout_rate=0,
                                  pool_initial=True, include_top=True)
        optimizer = tf.train.GradientDescentOptimizer(0.1)
        apply_grads = apply_gradients
        if defun:
          model.call = tfe.defun(model.call, compiled=compiled)
          apply_grads = tfe.defun(apply_gradients, compiled=compiled)

        num_burn = 3
        num_iters = 10
        with tf.device(device):
          iterator = make_iterator((images, labels))
          for _ in xrange(num_burn):
            (images, labels) = iterator.next()
            apply_grads(model, optimizer,
                        compute_gradients(model, images, labels))
          if execution_mode:
            tfe.async_wait()
          self._force_device_sync()
          gc.collect()

          start = time.time()
          for _ in xrange(num_iters):
            (images, labels) = iterator.next()
            apply_grads(model, optimizer,
                        compute_gradients(model, images, labels))
          if execution_mode:
            tfe.async_wait()
          self._force_device_sync()
          self._report(label, start, num_iters, device, batch_size, data_format)
Ejemplo n.º 10
0
    def _benchmark_eager_train(self,
                               label,
                               make_iterator,
                               device_and_format,
                               defun=False,
                               execution_mode=None,
                               compiled=False):
        with tfe.execution_mode(execution_mode):
            device, data_format = device_and_format
            for batch_size in self._train_batch_sizes():
                (images, labels) = random_batch(batch_size, data_format)
                model = resnet50.ResNet50(data_format)
                optimizer = tf.train.GradientDescentOptimizer(0.1)
                apply_grads = apply_gradients
                if defun:
                    model.call = tfe.defun(model.call, compiled=compiled)
                    apply_grads = tfe.defun(apply_gradients, compiled=compiled)

                num_burn = 3
                num_iters = 10
                with tf.device(device):
                    iterator = make_iterator((images, labels))
                    for _ in xrange(num_burn):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        tfe.async_wait()
                    self._force_device_sync()
                    gc.collect()

                    start = time.time()
                    for _ in xrange(num_iters):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        tfe.async_wait()
                    self._force_device_sync()
                    self._report(label, start, num_iters, device, batch_size,
                                 data_format)
Ejemplo n.º 11
0
  def _benchmark_eager_train(self,
                             label,
                             make_iterator,
                             device_and_format,
                             defun=False,
                             execution_mode=None):
    with tfe.execution_mode(execution_mode):
      device, data_format = device_and_format
      for batch_size in self._train_batch_sizes():
        (images, labels) = random_batch(batch_size, data_format)
        model = resnet50.ResNet50(data_format)
        optimizer = tf.train.GradientDescentOptimizer(0.1)
        apply_grads = apply_gradients
        if defun:
          model.call = tfe.function(model.call)
          # TODO(apassos) enable tf.function here
          apply_grads = tfe.defun(apply_gradients)

        num_burn = 3
        num_iters = 10
        with tf.device(device):
          iterator = make_iterator((images, labels))
          for _ in xrange(num_burn):
            (images, labels) = iterator.next()
            apply_grads(model, optimizer,
                        compute_gradients(model, images, labels))
          if execution_mode:
            tfe.async_wait()
          self._force_device_sync()
          gc.collect()

          start = time.time()
          for _ in xrange(num_iters):
            (images, labels) = iterator.next()
            apply_grads(model, optimizer,
                        compute_gradients(model, images, labels))
          if execution_mode:
            tfe.async_wait()
          self._force_device_sync()
          self._report(label, start, num_iters, device, batch_size, data_format)
Ejemplo n.º 12
0
 def _benchmark_eager_apply(self,
                            label,
                            device_and_format,
                            defun=False,
                            execution_mode=None,
                            compiled=False):
     with tfe.execution_mode(execution_mode):
         device, data_format = device_and_format
         model = densenet.DenseNet(self.depth,
                                   self.growth_rate,
                                   self.num_blocks,
                                   self.output_classes,
                                   self.num_layers_in_each_block,
                                   data_format,
                                   bottleneck=True,
                                   compression=0.5,
                                   weight_decay=1e-4,
                                   dropout_rate=0,
                                   pool_initial=True,
                                   include_top=True)
         if defun:
             model.call = tfe.defun(model.call, compiled=compiled)
         batch_size = 64
         num_burn = 5
         num_iters = 30
         with tf.device(device):
             images, _ = random_batch(batch_size, data_format)
             for _ in xrange(num_burn):
                 model(images, training=False).cpu()
             if execution_mode:
                 tfe.async_wait()
             gc.collect()
             start = time.time()
             for _ in xrange(num_iters):
                 model(images, training=False).cpu()
             if execution_mode:
                 tfe.async_wait()
             self._report(label, start, num_iters, device, batch_size,
                          data_format)
Ejemplo n.º 13
0
 def _benchmark_eager_apply(self, label, device_and_format, defun=False,
                            execution_mode=None, compiled=False):
   with tfe.execution_mode(execution_mode):
     device, data_format = device_and_format
     model = resnet50.ResNet50(data_format)
     if defun:
       model.call = tfe.defun(model.call, compiled=compiled)
     batch_size = 64
     num_burn = 5
     num_iters = 30
     with tf.device(device):
       images, _ = random_batch(batch_size, data_format)
       for _ in xrange(num_burn):
         model(images, training=False).cpu()
       if execution_mode:
         tfe.async_wait()
       gc.collect()
       start = time.time()
       for _ in xrange(num_iters):
         model(images, training=False).cpu()
       if execution_mode:
         tfe.async_wait()
       self._report(label, start, num_iters, device, batch_size, data_format)
Ejemplo n.º 14
0
  def _benchmark_eager_train(self,
                             label,
                             make_iterator,
                             device_and_format,
                             defun=False,
                             execution_mode=None,
                             compiled=False):
    with tfe.execution_mode(execution_mode):
      device, data_format = device_and_format
      for batch_size in self._train_batch_sizes():
        (images, labels) = random_batch(batch_size, data_format)
        num_burn = 3
        num_iters = 10
        model = resnet50.ResNet50(data_format)
        if defun:
          model.call = tfe.defun(model.call, compiled=compiled)
        optimizer = tf.train.GradientDescentOptimizer(0.1)

        with tf.device(device):
          iterator = make_iterator((images, labels))
          for _ in xrange(num_burn):
            (images, labels) = iterator.next()
            train_one_step(model, images, labels, optimizer)
          if execution_mode:
            tfe.async_wait()
          self._force_device_sync()
          gc.collect()

          start = time.time()
          for _ in xrange(num_iters):
            (images, labels) = iterator.next()
            train_one_step(model, images, labels, optimizer)
          if execution_mode:
            tfe.async_wait()
          self._force_device_sync()
          self._report(label, start, num_iters, device, batch_size, data_format)