예제 #1
0
    def _next_internal(self):
        autograph_status = autograph_ctx.control_status_ctx().status
        autograph_disabled = autograph_status == autograph_ctx.Status.DISABLED
        if not context.executing_eagerly() and autograph_disabled:
            self._get_next_call_count += 1
            if self._get_next_call_count > GET_NEXT_CALL_ERROR_THRESHOLD:
                raise ValueError(GET_NEXT_CALL_ERROR_MESSAGE)

        if not context.executing_eagerly():
            # TODO(b/169442955): Investigate the need for this colocation constraint.
            with ops.colocate_with(self._iterator_resource):
                ret = gen_dataset_ops.iterator_get_next(
                    self._iterator_resource,
                    output_types=self._flat_output_types,
                    output_shapes=self._flat_output_shapes)
            return structure.from_compatible_tensor_list(
                self._element_spec, ret)

        # TODO(b/77291417): This runs in sync mode as iterators use an error status
        # to communicate that there is no more data to iterate over.
        with context.execution_mode(context.SYNC):
            ret = gen_dataset_ops.iterator_get_next(
                self._iterator_resource,
                output_types=self._flat_output_types,
                output_shapes=self._flat_output_shapes)

            try:
                # Fast path for the case `self._structure` is not a nested structure.
                return self._element_spec._from_compatible_tensor_list(ret)  # pylint: disable=protected-access
            except AttributeError:
                return structure.from_compatible_tensor_list(
                    self._element_spec, ret)
예제 #2
0
 def testExecuteBasicAsync(self):
     with context.execution_mode(context.ASYNC):
         three = constant_op.constant(3)
         five = constant_op.constant(5)
         product = execute(b'Mul',
                           num_outputs=1,
                           inputs=[three, five],
                           attrs=('T', three.dtype.as_datatype_enum))[0]
         self.assertAllEqual(15, product)
     # Error: Invalid arguments
     # TODO(b/149995282): When an exception is thrown in ASYNC mode, it seems
     # there are things left over that cause mutex corruption when
     # _reset_context() is called before the next test is executed.
     #
     # context.set_execution_mode(context.ASYNC)
     # with self.assertRaises(errors.InvalidArgumentError):
     #   execute(
     #       b'MatMul',
     #       num_outputs=1,
     #       inputs=[three, five],
     #       attrs=('transpose_a', False, 'transpose_b', False, 'T',
     #              three.dtype.as_datatype_enum))
     #   context.context().executor.wait()
     #
     context.context().executor.clear_error()
     context.context().execution_mode = context.SYNC
예제 #3
0
    def _next_internal(self):
        """Returns a nested structure of `tf.Tensor`s containing the next element.
    """
        if not context.executing_eagerly():
            with ops.device(self._device):
                ret = gen_dataset_ops.iterator_get_next(
                    self._iterator_resource,
                    output_types=self._flat_output_types,
                    output_shapes=self._flat_output_shapes)
            return structure.from_compatible_tensor_list(
                self._element_spec, ret)

        # This runs in sync mode as iterators use an error status to communicate
        # that there is no more data to iterate over.
        # TODO(b/77291417): Fix
        with context.execution_mode(context.SYNC):
            with ops.device(self._device):
                # TODO(ashankar): Consider removing this ops.device() contextmanager
                # and instead mimic ops placement in graphs: Operations on resource
                # handles execute on the same device as where the resource is placed.
                ret = gen_dataset_ops.iterator_get_next(
                    self._iterator_resource,
                    output_types=self._flat_output_types,
                    output_shapes=self._flat_output_shapes)

            try:
                # Fast path for the case `self._structure` is not a nested structure.
                return self._element_spec._from_compatible_tensor_list(ret)  # pylint: disable=protected-access
            except AttributeError:
                return structure.from_compatible_tensor_list(
                    self._element_spec, ret)
예제 #4
0
 def _benchmark_eager_apply(self,
                            label,
                            device_and_format,
                            defun=False,
                            execution_mode=None):
     with context.execution_mode(execution_mode):
         device, data_format = device_and_format
         model = resnet50.ResNet50(data_format)
         if defun:
             model.call = tf.function(model.call)
         batch_size = 64
         num_burn = 5
         num_iters = 30
         with tf.device(device):
             images, _ = resnet50_test_util.random_batch(
                 batch_size, data_format)
             for _ in xrange(num_burn):
                 model(images, training=False).cpu()
             if execution_mode:
                 context.async_wait()
             gc.collect()
             start = time.time()
             for _ in xrange(num_iters):
                 model(images, training=False).cpu()
             if execution_mode:
                 context.async_wait()
             self._report(label, start, num_iters, device, batch_size,
                          data_format)
예제 #5
0
 def testDatasetEagerIteration(self, execution_mode):
   with context.eager_mode(), context.execution_mode(execution_mode):
     val = 0
     dataset = dataset_ops.Dataset.range(10)
     for foo in dataset:
       self.assertEqual(val, foo.numpy())
       val += 1
예제 #6
0
  def decorator(self, *args, **kwargs):
    # TODO(b/117110239): Re-enable.
    # with context.execution_mode(context.ASYNC):
    #   f(self, *args, **kwargs)

    with context.execution_mode(context.SYNC):
      f(self, *args, **kwargs)
예제 #7
0
    def _next_internal(self):
        if not context.executing_eagerly():
            # TODO(b/169442955): Investigate the need for this colocation constraint.
            with ops.colocate_with(self._iterator_resource):
                ret = gen_dataset_ops.iterator_get_next(
                    self._iterator_resource,
                    output_types=self._flat_output_types,
                    output_shapes=self._flat_output_shapes)
            return structure.from_compatible_tensor_list(
                self._element_spec, ret)

        # TODO(b/77291417): This runs in sync mode as iterators use an error status
        # to communicate that there is no more data to iterate over.
        with context.execution_mode(context.SYNC):
            ret = gen_dataset_ops.iterator_get_next(
                self._iterator_resource,
                output_types=self._flat_output_types,
                output_shapes=self._flat_output_shapes)

            try:
                # Fast path for the case `self._structure` is not a nested structure.
                return self._element_spec._from_compatible_tensor_list(ret)  # pylint: disable=protected-access
            except AttributeError:
                return structure.from_compatible_tensor_list(
                    self._element_spec, ret)
예제 #8
0
  def _next_internal(self):
    """Returns a nested structure of `tf.Tensor`s containing the next element.
    """
    if not context.executing_eagerly():
      with ops.device(self._device):
        ret = gen_dataset_ops.iterator_get_next(
            self._iterator_resource,
            output_types=self._flat_output_types,
            output_shapes=self._flat_output_shapes)
      return self._structure._from_compatible_tensor_list(ret)  # pylint: disable=protected-access

    # This runs in sync mode as iterators use an error status to communicate
    # that there is no more data to iterate over.
    # TODO(b/77291417): Fix
    with context.execution_mode(context.SYNC):
      with ops.device(self._device):
        # TODO(ashankar): Consider removing this ops.device() contextmanager
        # and instead mimic ops placement in graphs: Operations on resource
        # handles execute on the same device as where the resource is placed.
        # NOTE(mrry): Here we use the "_sync" variant of `iterator_get_next`
        # because in eager mode this code will run synchronously on the calling
        # thread. Therefore we do not need to make a defensive context switch
        # to a background thread, and can achieve a small constant performance
        # boost by invoking the iterator synchronously.
        ret = gen_dataset_ops.iterator_get_next_sync(
            self._iterator_resource,
            output_types=self._flat_output_types,
            output_shapes=self._flat_output_shapes)

      return self._structure._from_compatible_tensor_list(ret)  # pylint: disable=protected-access
예제 #9
0
 def testDatasetEagerIteration(self, execution_mode):
     with context.eager_mode(), context.execution_mode(execution_mode):
         val = 0
         dataset = dataset_ops.Dataset.range(10)
         for foo in dataset:
             self.assertEqual(val, foo.numpy())
             val += 1
예제 #10
0
  def decorator(self, *args, **kwargs):
    # TODO(b/117110239): Re-enable.
    # with context.execution_mode(context.ASYNC):
    #   f(self, *args, **kwargs)

    with context.execution_mode(context.SYNC):
      f(self, *args, **kwargs)
예제 #11
0
 def testEagerIteratorAsync(self):
   with context.eager_mode(), context.execution_mode(context.ASYNC):
     val = 0
     dataset = dataset_ops.Dataset.range(10)
     for foo in dataset:
       self.assertEqual(val, foo.numpy())
       val += 1
예제 #12
0
  def _next_internal(self):
    """Returns a nested structure of `tf.Tensor`s containing the next element.
    """
    if not context.executing_eagerly():
      with ops.device(self._device):
        ret = gen_dataset_ops.iterator_get_next(
            self._iterator_resource,
            output_types=self._flat_output_types,
            output_shapes=self._flat_output_shapes)
      return structure.from_compatible_tensor_list(self._element_spec, ret)

    # This runs in sync mode as iterators use an error status to communicate
    # that there is no more data to iterate over.
    # TODO(b/77291417): Fix
    with context.execution_mode(context.SYNC):
      with ops.device(self._device):
        # TODO(ashankar): Consider removing this ops.device() contextmanager
        # and instead mimic ops placement in graphs: Operations on resource
        # handles execute on the same device as where the resource is placed.
        # NOTE(mrry): Here we use the "_sync" variant of `iterator_get_next`
        # because in eager mode this code will run synchronously on the calling
        # thread. Therefore we do not need to make a defensive context switch
        # to a background thread, and can achieve a small constant performance
        # boost by invoking the iterator synchronously.
        ret = gen_dataset_ops.iterator_get_next_sync(
            self._iterator_resource,
            output_types=self._flat_output_types,
            output_shapes=self._flat_output_shapes)

      try:
        # Fast path for the case `self._structure` is not a nested structure.
        return self._element_spec._from_compatible_tensor_list(ret)  # pylint: disable=protected-access
      except AttributeError:
        return structure.from_compatible_tensor_list(self._element_spec, ret)
예제 #13
0
 def testEagerIteratorAsync(self):
     with context.eager_mode(), context.execution_mode(context.ASYNC):
         val = 0
         dataset = dataset_ops.Dataset.range(10)
         for foo in dataset:
             self.assertEqual(val, foo.numpy())
             val += 1
예제 #14
0
 def _next_internal(self):
   """Returns a nested structure of `tf.Tensor`s containing the next element.
   """
   # This runs in sync mode as iterators use an error status to communicate
   # that there is no more data to iterate over.
   # TODO(b/77291417): Fix
   with context.execution_mode(context.SYNC):
     return super(Iterator, self)._next_internal()
예제 #15
0
 def _next_internal(self):
   """Returns a nested structure of `tf.Tensor`s containing the next element.
   """
   # This runs in sync mode as iterators use an error status to communicate
   # that there is no more data to iterate over.
   # TODO(b/77291417): Fix
   with context.execution_mode(context.SYNC):
     return super(Iterator, self)._next_internal()
예제 #16
0
  def benchmarkAddScalars(self):
    with context.execution_mode(context.GRAPH_MODE):
      x = array_ops.placeholder(shape=[], dtype=dtypes.float32, name="x")
      y = array_ops.placeholder(shape=[], dtype=dtypes.float32, name="y")

      def bench():
        return gen_math_ops.add(x, y)

      self._run_and_report(bench, 1000)
예제 #17
0
 def _apply(self, defun=False, execution_mode=None):
     device, data_format = resnet50_test_util.device_and_data_format()
     model = resnet50.ResNet50(data_format)
     if defun:
         model.call = tf.function(model.call)
     with tf.device(device), context.execution_mode(execution_mode):
         images, _ = resnet50_test_util.random_batch(2, data_format)
         output = model(images, training=False)
         context.async_wait()
     self.assertEqual((2, 1000), output.shape)
예제 #18
0
  def benchmarkMatMul(self):
    with context.execution_mode(context.GRAPH_MODE):
      x = array_ops.placeholder(
          shape=[784, 1000], dtype=dtypes.float32, name="x")
      y = array_ops.placeholder(
          shape=[1000, 1000], dtype=dtypes.float32, name="y")

      def bench():
        return gen_math_ops.mat_mul(x, y)

      self._run_and_report(bench, 1000)
예제 #19
0
 def _next_internal(self):
   """Returns a nested structure of `tf.Tensor`s containing the next element.
   """
   # This runs in sync mode as iterators use an error status to communicate
   # that there is no more data to iterate over.
   # TODO(b/77291417): Fix
   with context.execution_mode(context.SYNC):
     with ops.device(self._device):
       flat_ret = ged_ops.experimental_function_buffering_resource_get_next(
           function_buffer_resource=self._buffering_resource,
           output_types=self._flat_output_types)
     return self._element_structure._from_tensor_list(flat_ret)
예제 #20
0
    def _test_train(self, execution_mode=None):
        start = time.process_time()
        model = mnist.custom_model()

        with tf.device("CPU"), context.execution_mode(execution_mode):
            optimizer = tf.keras.optimizers.SGD(0.1)
            images, labels = random_batch(1000)
            apply_gradients(model, optimizer,
                            compute_gradients(model, images, labels))
            context.async_wait()
        end = time.process_time()
        print("time: ", end - start)
예제 #21
0
 def _next_internal(self):
     """Returns a nested structure of `tf.Tensor`s containing the next element.
 """
     # This runs in sync mode as iterators use an error status to communicate
     # that there is no more data to iterate over.
     # TODO(b/77291417): Fix
     with context.execution_mode(context.SYNC):
         with ops.device(self._device):
             flat_ret = ged_ops.experimental_function_buffering_resource_get_next(
                 function_buffer_resource=self._buffering_resource,
                 output_types=self._flat_output_types)
         return self._element_structure._from_tensor_list(flat_ret)
예제 #22
0
 def _test_train(self, execution_mode=None):
     start = time.process_time()
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format)
     for i in range(10):
         with tf.device(device), context.execution_mode(execution_mode):
             optimizer = tf.keras.optimizers.SGD(0.1)
             images, labels = random_batch(32, data_format)
             apply_gradients(model, optimizer,
                             compute_gradients(model, images, labels))
             context.async_wait()
     end = time.process_time()
     print("time: ", end - start)
예제 #23
0
 def _next_internal(self):
   """Returns a nested structure of `tf.Tensor`s containing the next element.
   """
   # This runs in sync mode as iterators use an error status to communicate
   # that there is no more data to iterate over.
   # TODO(b/77291417): Fix
   with context.execution_mode(context.SYNC):
     with ops.device(self._device):
       ret = ged_ops.experimental_function_buffering_resource_get_next(
           function_buffer_resource=self._buffering_resource,
           output_types=self._flat_output_types)
     return sparse.deserialize_sparse_tensors(
         nest.pack_sequence_as(self._output_types, ret), self._output_types,
         self._output_shapes, self._output_classes)
예제 #24
0
def _run_benchmark(func, num_iters, execution_mode=None):
    with context.execution_mode(execution_mode):
        # call func to warm up
        func()
        if execution_mode == context.ASYNC:
            get_executor().wait()
        start = time.time()
        for _ in range(num_iters):
            func()
        if execution_mode == context.ASYNC:
            get_executor().wait()
        end = time.time()

        return end - start
예제 #25
0
  def testCopyBetweenDevicesAsync(self):
    with context.execution_mode(context.ASYNC):
      x = constant_op.constant([[1., 2.], [3., 4.]])
      x = x.cpu()
      x = x.gpu()
      x = x.gpu()
      x = x.cpu()
      context.async_wait()

    # Invalid device
    with self.assertRaises(RuntimeError):
      x.gpu(context.context().num_gpus() + 1)
      context.async_wait()
    context.async_clear_error()
예제 #26
0
    def testCopyBetweenDevicesAsync(self):
        with context.execution_mode(context.ASYNC):
            x = constant_op.constant([[1., 2.], [3., 4.]])
            x = x.cpu()
            x = x.gpu()
            x = x.gpu()
            x = x.cpu()
            context.context().executor.wait()

        # Invalid device
        with self.assertRaises(RuntimeError):
            x.gpu(context.context().num_gpus() + 1)
            context.context().executor.wait()
        context.context().executor.clear_error()
예제 #27
0
 def _next_internal(self):
   """Returns a nested structure of `tf.Tensor`s containing the next element.
   """
   # This runs in sync mode as iterators use an error status to communicate
   # that there is no more data to iterate over.
   # TODO(b/77291417): Fix
   with context.execution_mode(context.SYNC):
     with ops.device(self._device):
       ret = gen_dataset_ops.function_buffering_resource_get_next(
           function_buffer_resource=self._buffering_resource,
           output_types=self._flat_output_types)
     return sparse.deserialize_sparse_tensors(
         nest.pack_sequence_as(self._output_types, ret), self._output_types,
         self._output_shapes, self._output_classes)
예제 #28
0
def run_benchmark(func, num_iters, execution_mode=None):
    ctx = context.context()
    with context.execution_mode(execution_mode):
        # call func to maybe warm up the GPU
        func()
        if execution_mode == context.ASYNC:
            ctx.async_wait()
        start = time.time()
        for _ in xrange(num_iters):
            func()
        if execution_mode == context.ASYNC:
            ctx.async_wait()
        end = time.time()

        return end - start
예제 #29
0
    def testCopyBetweenDevicesAsync(self):
        if not context.context().num_gpus():
            self.skipTest('No GPUs found')
        with context.execution_mode(context.ASYNC):
            x = constant_op.constant([[1., 2.], [3., 4.]])
            x = x.cpu()
            x = x.gpu()
            x = x.gpu()
            x = x.cpu()
            context.async_wait()

        # Invalid device
        with self.assertRaises(RuntimeError):
            x.gpu(context.context().num_gpus() + 1)
            context.async_wait()
        context.async_clear_error()
예제 #30
0
    def _call_for_each_replica(self, fn, args, kwargs):
        # For now, `fn` must be an @tf.function.
        # TODO(josh11b): Relax this restriction?  Main problem is if
        # (a) executing eagerly, (b) `fn` not @tf.function, and
        # (c) executed frequently.
        assert isinstance(fn, def_function.Function)

        if _outside_run_graph() is not None:
            # Nested case, should just use outer function's context for things like
            # the current replica index.
            # TODO(josh11b): Test this case!
            with MirroredFunctionReplicaContext(self._container_strategy()):
                results = fn(*nest.map_structure(_unwrap_tensors, args),
                             **nest.map_structure(_unwrap_tensors, kwargs))
                return nest.map_structure(_wrap_tensors, results)

        _replica_index.graph_outside_run = ops.get_default_graph()
        return_values = []

        try:
            with MirroredFunctionReplicaContext(self._container_strategy()):
                for index, device in enumerate(self._devices):
                    _replica_index.current = index
                    with ops.device(device):
                        if context.executing_eagerly():
                            # NOTE: These functions need to execute concurrently if they
                            # use a collective op. This is a particular concern with eager
                            # execution.
                            with context.execution_mode(context.ASYNC):
                                return_values.append(
                                    fn(
                                        *distribute_utils.select_replica(
                                            index, args),
                                        **distribute_utils.select_replica(
                                            index, kwargs)))
                        else:
                            return_values.append(
                                fn(
                                    *distribute_utils.select_replica(
                                        index, args),
                                    **distribute_utils.select_replica(
                                        index, kwargs)))
        finally:
            _replica_index.graph_outside_run = None
            _replica_index.current = None

        return distribute_utils.regroup(return_values)
예제 #31
0
 def _run(self, func, num_iters, execution_mode=None):
     # call func to maybe warm up the GPU
     ctx = context.context()
     with context.execution_mode(execution_mode):
         func()
         if execution_mode == context.ASYNC:
             ctx.async_wait()
         start = time.time()
         for _ in xrange(num_iters):
             func()
         if execution_mode == context.ASYNC:
             ctx.async_wait()
         end = time.time()
         mean_us = (end - start) * 1e6 / num_iters
         self.report_benchmark(
             iters=num_iters,
             wall_time=mean_us,
             extras={"examples_per_sec": num_iters / (end - start)})
예제 #32
0
 def _run(self, func, num_iters, execution_mode=None):
   # call func to maybe warm up the GPU
   ctx = context.context()
   with context.execution_mode(execution_mode):
     func()
     if execution_mode == context.ASYNC:
       ctx.async_wait()
     start = time.time()
     for _ in xrange(num_iters):
       func()
     if execution_mode == context.ASYNC:
       ctx.async_wait()
     end = time.time()
     mean_us = (end - start) * 1e6 / num_iters
     self.report_benchmark(
         iters=num_iters,
         wall_time=mean_us,
         extras={"examples_per_sec": num_iters / (end - start)})
예제 #33
0
 def _test_train(self, execution_mode=None):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format)
     tf.compat.v2.summary.experimental.set_step(
         tf.train.get_or_create_global_step())
     logdir = tempfile.mkdtemp()
     with tf.compat.v2.summary.create_file_writer(
             logdir, max_queue=0,
             name='t0').as_default(), tf.compat.v2.summary.record_if(True):
         with tf.device(device), context.execution_mode(execution_mode):
             optimizer = tf.train.GradientDescentOptimizer(0.1)
             images, labels = random_batch(2, data_format)
             apply_gradients(model, optimizer,
                             compute_gradients(model, images, labels))
             self.assertEqual(320, len(model.variables))
             context.async_wait()
     events = events_from_logdir(logdir)
     self.assertEqual(len(events), 2)
     self.assertEqual(events[1].summary.value[0].tag, 'loss')
예제 #34
0
    def _benchmark_eager_train(self,
                               label,
                               make_iterator,
                               device_and_format,
                               defun=False,
                               execution_mode=None):
        with context.execution_mode(execution_mode):
            device, data_format = device_and_format
            for batch_size in self._train_batch_sizes():
                (images, labels) = resnet50_test_util.random_batch(
                    batch_size, data_format)
                model = resnet50.ResNet50(data_format)
                # TODO(b/161911585): tf_to_corert MLIR lowering pipeline should handle
                # case when momentum is not set.
                optimizer = tf.keras.optimizers.SGD(0.1, 0.1)
                apply_grads = apply_gradients
                if defun:
                    model.call = tf.function(model.call)
                    apply_grads = tf.function(apply_gradients)

                num_burn = 3
                num_iters = 10
                with tf.device(device):
                    iterator = make_iterator((images, labels))
                    for _ in xrange(num_burn):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        context.async_wait()
                    self._force_device_sync()
                    gc.collect()

                    start = time.time()
                    for _ in xrange(num_iters):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        context.async_wait()
                    self._force_device_sync()
                    self._report(label, start, num_iters, device, batch_size,
                                 data_format)
예제 #35
0
 def _next_internal(self):
     """Returns a nested structure of `tf.Tensor`s containing the next element.
 """
     # This runs in sync mode as iterators use an error status to communicate
     # that there is no more data to iterate over.
     # TODO (b/77291417): Fix id:669
     # https://github.com/imdone/tensorflow/issues/670
     with context.execution_mode(context.SYNC):
         if self._buffer_resource_handle is not None:
             with ops.device(self._device):
                 ret = prefetching_ops.function_buffering_resource_get_next(
                     function_buffer_resource=self._buffer_resource_handle,
                     output_types=self._flat_output_types)
             return sparse.deserialize_sparse_tensors(
                 nest.pack_sequence_as(self._output_types,
                                       ret), self._output_types,
                 self._output_shapes, self._output_classes)
         else:
             return super(Iterator, self)._next_internal()
예제 #36
0
 def testExecuteBasicAsync(self):
     with context.execution_mode(context.ASYNC):
         three = constant_op.constant(3)
         five = constant_op.constant(5)
         product = execute(b'Mul',
                           num_outputs=1,
                           inputs=[three, five],
                           attrs=('T', three.dtype.as_datatype_enum))[0]
         self.assertAllEqual(15, product)
     # Error: Invalid arguments
     context.set_execution_mode(context.ASYNC)
     with self.assertRaises(errors.InvalidArgumentError):
         execute(b'MatMul',
                 num_outputs=1,
                 inputs=[three, five],
                 attrs=('transpose_a', False, 'transpose_b', False, 'T',
                        three.dtype.as_datatype_enum))
         context.async_wait()
     context.async_clear_error()
     context.context().execution_mode = context.SYNC
예제 #37
0
    def _benchmark_eager_train(self,
                               label,
                               make_iterator,
                               device_and_format,
                               defun=False,
                               execution_mode=None):
        with context.execution_mode(execution_mode):
            device, data_format = device_and_format
            for batch_size in self._train_batch_sizes():
                (images, labels) = resnet50_test_util.random_batch(
                    batch_size, data_format)
                model = resnet50.ResNet50(data_format)
                optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.1)
                apply_grads = apply_gradients
                if defun:
                    model.call = tf.function(model.call)
                    apply_grads = tf.function(apply_gradients)

                num_burn = 3
                num_iters = 10
                with tf.device(device):
                    iterator = make_iterator((images, labels))
                    for _ in xrange(num_burn):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        context.async_wait()
                    self._force_device_sync()
                    gc.collect()

                    start = time.time()
                    for _ in xrange(num_iters):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        context.async_wait()
                    self._force_device_sync()
                    self._report(label, start, num_iters, device, batch_size,
                                 data_format)
예제 #38
0
 def testExecuteBasicAsync(self):
   with context.execution_mode(context.ASYNC):
     three = constant_op.constant(3)
     five = constant_op.constant(5)
     product = execute(
         b'Mul',
         num_outputs=1,
         inputs=[three, five],
         attrs=('T', three.dtype.as_datatype_enum))[0]
     self.assertAllEqual(15, product)
   # Error: Invalid arguments
   context.set_execution_mode(context.ASYNC)
   with self.assertRaises(errors.InvalidArgumentError):
     execute(
         b'MatMul',
         num_outputs=1,
         inputs=[three, five],
         attrs=('transpose_a', False, 'transpose_b', False, 'T',
                three.dtype.as_datatype_enum))
     context.async_wait()
   context.async_clear_error()
   context.set_execution_mode(context.SYNC)
예제 #39
0
  def _next_internal(self):
    """Returns a nested structure of `tf.Tensor`s containing the next element.
    """
    # This runs in sync mode as iterators use an error status to communicate
    # that there is no more data to iterate over.
    # TODO(b/77291417): Fix
    with context.execution_mode(context.SYNC):
      with ops.device(self._device):
        # TODO(ashankar): Consider removing this ops.device() contextmanager
        # and instead mimic ops placement in graphs: Operations on resource
        # handles execute on the same device as where the resource is placed.
        # NOTE(mrry): Here we use the "_sync" variant of `iterator_get_next`
        # because in eager mode this code will run synchronously on the calling
        # thread. Therefore we do not need to make a defensive context switch
        # to a background thread, and can achieve a small constant performance
        # boost by invoking the iterator synchronously.
        ret = gen_dataset_ops.iterator_get_next_sync(
            self._resource,
            output_types=self._flat_output_types,
            output_shapes=self._flat_output_shapes)

      return sparse.deserialize_sparse_tensors(
          nest.pack_sequence_as(self._output_types, ret), self._output_types,
          self._output_shapes, self._output_classes)
예제 #40
0
  def decorator(self, *args, **kwargs):
    with context.execution_mode(context.ASYNC):
      f(self, *args, **kwargs)

    with context.execution_mode(context.SYNC):
      f(self, *args, **kwargs)
예제 #41
0
  def decorator(self, *args, **kwargs):
    with context.execution_mode(context.ASYNC):
      f(self, *args, **kwargs)

    with context.execution_mode(context.SYNC):
      f(self, *args, **kwargs)