Ejemplo n.º 1
0
    def testOverrideThreadPool(self):
        def get_thread_id(_):
            # Python creates a dummy thread object to represent the current
            # thread when called from an "alien" thread (such as a
            # `PrivateThreadPool` thread in this case). It does not include
            # the TensorFlow-given display name, but it has a unique
            # identifier that maps one-to-one with the underlying OS thread.
            return np.array(threading.current_thread().ident).astype(np.int64)

        for num_threads in [1, 2, 4, 8, 16]:

            dataset = (Dataset.range(1000).map(
                lambda x: script_ops.py_func(get_thread_id, [x], dtypes.int64),
                num_parallel_calls=32).apply(unique.unique()))

            dataset = threadpool.override_threadpool(
                dataset,
                threadpool.PrivateThreadPool(
                    num_threads,
                    display_name='private_thread_pool_%d' % num_threads))

            thread_ids = []
            for next_element in datasets.Iterator(dataset):
                thread_ids.append(next_element)
            self.assertEqual(len(thread_ids), len(set(thread_ids)))
            self.assertGreater(len(thread_ids), 0)
            # NOTE(mrry): We don't control the thread pool scheduling, and
            # so cannot guarantee that all of the threads in the pool will
            # perform work.
            self.assertLessEqual(len(thread_ids), num_threads)
Ejemplo n.º 2
0
    def benchmarkSliceBatchCacheRepeatCallable(self):
        input_size = 10000
        batch_size = 100
        num_epochs = 100

        input_data = np.random.randn(input_size)

        dataset = (Dataset.from_tensor_slices(input_data).batch(
            batch_size).cache().repeat(num_epochs))
        iterator = datasets.Iterator(dataset)

        ends = [time.time()]
        for _ in iterator:
            ends.append(time.time())

        deltas = np.ediff1d(ends)
        median_wall_time = np.median(deltas)
        print(
            'Slice/batch/cache/repeat eager input size: %d batch size: %d Median '
            'wall time per element: %f' %
            (input_size, batch_size, median_wall_time))
        self.report_benchmark(
            iters=len(deltas),
            wall_time=median_wall_time,
            name='benchmark_slice_batch_cache_repeat_eager_input_%d_batch_%d' %
            (input_size, batch_size))
Ejemplo n.º 3
0
    def testSparseTensorElements(self):
        components = (sparse_tensor.SparseTensorValue(
            indices=np.array([[0, 0], [1, 0], [2, 0]]),
            values=np.array([0, 0, 0]),
            dense_shape=np.array([3, 1])),
                      sparse_tensor.SparseTensorValue(
                          indices=np.array([[0, 0], [1, 1], [2, 2]]),
                          values=np.array([1, 2, 3]),
                          dense_shape=np.array([3, 3])))

        expected = [
            (sparse_tensor.SparseTensorValue(indices=np.array([[0]]),
                                             values=np.array([0]),
                                             dense_shape=np.array([1])),
             sparse_tensor.SparseTensorValue(indices=np.array([[0]]),
                                             values=np.array([1]),
                                             dense_shape=np.array([3]))),
            (sparse_tensor.SparseTensorValue(indices=np.array([[0]]),
                                             values=np.array([0]),
                                             dense_shape=np.array([1])),
             sparse_tensor.SparseTensorValue(indices=np.array([[1]]),
                                             values=np.array([2]),
                                             dense_shape=np.array([3]))),
            (sparse_tensor.SparseTensorValue(indices=np.array([[0]]),
                                             values=np.array([0]),
                                             dense_shape=np.array([1])),
             sparse_tensor.SparseTensorValue(indices=np.array([[2]]),
                                             values=np.array([3]),
                                             dense_shape=np.array([3]))),
        ]

        for i, result in enumerate(
                datasets.Iterator(Dataset.from_tensor_slices(components))):
            self.assertSparseValuesEqual(expected[i][0], result[0])
            self.assertSparseValuesEqual(expected[i][1], result[1])
Ejemplo n.º 4
0
  def testMapAndFilter(self):
    def even(x):
      return math_ops.equal(math_ops.mod(x, 2), 0)

    it = datasets.Iterator(Dataset.range(8).map(math_ops.square).filter(even))
    got = [x.numpy() for x in it]
    self.assertAllEqual([0, 4, 16, 36], got)
Ejemplo n.º 5
0
    def testPyFunc(self):
        def my_map(inp):
            return [[x + 1 for x in inp]]

        ds = Dataset.range(4).map(
            lambda x: script_ops.py_func(my_map, [[x]], dtypes.int64))
        got = [x.numpy() for x in datasets.Iterator(ds)]
        self.assertAllEqual([[1], [2], [3], [4]], got)
Ejemplo n.º 6
0
 def testGetNext(self):
     iterator = datasets.Iterator(Dataset.range(4))
     self.assertEqual(0, iterator.get_next().numpy())
     self.assertEqual(1, iterator.get_next().numpy())
     self.assertEqual(2, iterator.get_next().numpy())
     self.assertEqual(3, iterator.get_next().numpy())
     with self.assertRaises(errors.OutOfRangeError):
         iterator.get_next()
Ejemplo n.º 7
0
  def testMapAndFilter(self):
    # TODO(ashankar): Address this
    self.skipTest('Not working yet, requires function attribute support')

    def even(x):
      return math_ops.equal(math_ops.mod(x, 2), 0)

    it = datasets.Iterator(Dataset.range(8).map(math_ops.square).filter(even))
    got = [x.numpy() for x in it]
    self.assertAllEqual([0, 4, 16, 36], got)
Ejemplo n.º 8
0
 def testRestoreInReconstructedIterator(self):
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
     dataset = Dataset.range(10)
     for i in range(5):
         iterator = datasets.Iterator(dataset)
         checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
         checkpoint.restore(saver.latest_checkpoint(checkpoint_directory))
         for j in range(2):
             self.assertEqual(i * 2 + j, iterator.get_next().numpy())
         checkpoint.save(file_prefix=checkpoint_prefix)
Ejemplo n.º 9
0
  def testTensorsExplicitPrefetchToDevice(self):
    ds = Dataset.from_tensor_slices([0., 1.])
    ds = ds.apply(prefetching_ops.prefetch_to_device(test.gpu_device_name()))

    with self.assertRaisesRegexp(TypeError, 'prefetch_to_device'):
      datasets.Iterator(ds)

    for i, x in enumerate(ds):
      with ops.device(test.gpu_device_name()):
        x = math_ops.add(x, x)
        self.assertEqual(float(i) + float(i), x.numpy())
Ejemplo n.º 10
0
 def testMapCaptureLookupTable(self):
     default_val = -1
     keys = constant_op.constant(['brain', 'salad', 'surgery'])
     values = constant_op.constant([0, 1, 2], dtypes.int64)
     table = lookup.HashTable(
         lookup.KeyValueTensorInitializer(keys, values), default_val)
     dataset = Dataset.from_tensor_slices(['brain', 'salad', 'surgery'])
     dataset = dataset.map(table.lookup)
     it = datasets.Iterator(dataset)
     got = [x.numpy() for x in it]
     self.assertAllEqual([0, 1, 2], got)
Ejemplo n.º 11
0
 def testNestedOutputs(self):
   ds = Dataset.zip((Dataset.range(4), Dataset.zip((Dataset.range(4),
                                                    Dataset.range(4)))))
   total = 0
   # The Iterator will return a nested structure of Tensor objects.
   # Some funkiness to compare against simple integers.
   for (i, x) in enumerate(datasets.Iterator(ds)):
     want = (i, (i, i))
     got = (x[0].numpy(), (x[1][0].numpy(), x[1][1].numpy()))
     self.assertEqual(got, want)
     total += 1
   self.assertEqual(4, total)
Ejemplo n.º 12
0
    def testRestoreExhaustedIterator(self):
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
        dataset = Dataset.range(3)
        iterator = datasets.Iterator(dataset)

        checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
        self.assertEqual(0, iterator.get_next().numpy())
        self.assertEqual(1, iterator.get_next().numpy())
        save_path = checkpoint.save(checkpoint_prefix)
        self.assertEqual(2, iterator.get_next().numpy())
        checkpoint.restore(save_path)
        self.assertEqual(2, iterator.get_next().numpy())
Ejemplo n.º 13
0
  def make_one_shot_iterator(self):
    """Get a one time use iterator for the distributed PerDeviceDataset."""
    if self._prefetch_on_device:
      on_device_dataset = self._dataset.apply(
          prefetching_ops_v2.prefetch_to_devices(self._devices))
      dataset_iterator = on_device_dataset.make_one_shot_iterator()
    elif context.executing_eagerly():
      dataset_iterator = datasets.Iterator(self._dataset)
    else:
      dataset_iterator = self._dataset.make_one_shot_iterator()

    return PerDeviceDataIterator(
        dataset_iterator, self._devices, self._prefetch_on_device)
Ejemplo n.º 14
0
 def testSaveRestore(self):
   checkpoint_directory = self.get_temp_dir()
   checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
   dataset = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
   dataset = dataset.map(math_ops.square).batch(2)
   iterator = datasets.Iterator(dataset)
   checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
   self.assertAllEqual([1, 4], iterator.get_next().numpy())
   save_path = checkpoint.save(checkpoint_prefix)
   self.assertAllEqual([9, 16], iterator.get_next().numpy())
   self.assertAllEqual([25, 36], iterator.get_next().numpy())
   checkpoint.restore(save_path)
   self.assertAllEqual([9, 16], iterator.get_next().numpy())
   self.assertAllEqual([25, 36], iterator.get_next().numpy())
Ejemplo n.º 15
0
    def evaluate_on_dataset(self, dataset, *args, **kwargs):
        """Convenience method for performing an eval on a Dataset.

    Args:
      dataset: Dataset object with the input data to evaluate on.
      *args:
      **kwargs: Optional additional arguments to __call__(), except
        `summary_logdir`: if specified, metrics will be written as summaries
        to this directory.

    Returns:
      @compatibility(eager)
      When eager execution is enabled, this returns the result of performing
      an evaluation as a dictionary. With graph execution, this returns a tuple
      (init_op, call_op, results_op) which may be executed using this code:
      ```python
        sess.run(init_op)
        try:
          while True:
            sess.run(call_op)
        except tf.errors.OutOfRangeError:
          pass
        return sess.run(results_op)  # A dictionary

        # equivalently:
        return evaluator.run_evaluation(init_op, call_op, results_op, sess=sess)
      ```
      @end_compatibility
    """
        summary_logdir = kwargs.pop("summary_logdir", None)
        if context.executing_eagerly():
            for example in datasets.Iterator(dataset):
                self.__call__(example, *args, **kwargs)
            return self.all_metric_results(summary_logdir)
        # Graph construction
        next_value = dataset_ops.make_one_shot_iterator(dataset).get_next()
        # Function inlining destroys strict inputs semantics (function body might
        # start execution before all inputs are ready). When iterator is exhausted
        # and throws out of range error, function body might be partially executed.
        # To prevent this we add an explicit control dependency from the 'get_next'.
        with ops.control_dependencies([next_value]):
            has_next_value = control_flow_ops.no_op(name="iterator_has_next")
        with ops.control_dependencies([has_next_value]):
            call_op = self.__call__(next_value, *args, **kwargs)
        init_op = self.init_variables()
        results_op = self.all_metric_results(summary_logdir)
        return (init_op, call_op, results_op)
 def testSaveRestore(self):
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
     dataset = dataset_ops.Dataset.from_tensor_slices(
         [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
     dataset = dataset.map(math_ops.square).batch(2)
     # TODO(b/138399725): Re-enable default optimizations.
     options = dataset_ops.Options()
     options.experimental_optimization.apply_default_optimizations = False
     dataset = dataset.with_options(options)
     iterator = datasets.Iterator(dataset)
     checkpoint = trackable_utils.Checkpoint(iterator=iterator)
     self.assertAllEqual([1, 4], iterator.get_next().numpy())
     save_path = checkpoint.save(checkpoint_prefix)
     self.assertAllEqual([9, 16], iterator.get_next().numpy())
     self.assertAllEqual([25, 36], iterator.get_next().numpy())
     checkpoint.restore(save_path)
     self.assertAllEqual([9, 16], iterator.get_next().numpy())
     self.assertAllEqual([25, 36], iterator.get_next().numpy())
Ejemplo n.º 17
0
    def evaluate_on_dataset(self, dataset, *args, **kwargs):
        """Convenience method for performing an eval on a Dataset.

    Args:
      dataset: Dataset object with the input data to evaluate on.
      *args:
      **kwargs: Optional additional arguments to __call__(), except
        `summary_logdir`: if specified, metrics will be written as summaries
        to this directory.

    Returns:
      @compatibility(eager)
      When eager execution is enabled, this returns the result of performing
      an evaluation as a dictionary. With graph execution, this returns a tuple
      (init_op, call_op, results_op) which may be executed using this code:
      ```python
        sess.run(init_op)
        try:
          while True:
            sess.run(call_op)
        except tf.errors.OutOfRangeError:
          pass
        return sess.run(results_op)  # A dictionary

        # equivalently:
        return evaluator.run_evaluation(init_op, call_op, results_op, sess=sess)
      ```
      @end_compatibility
    """
        summary_logdir = kwargs.pop("summary_logdir", None)
        if context.executing_eagerly():
            for example in datasets.Iterator(dataset):
                self.__call__(example, *args, **kwargs)
            return self.all_metric_results(summary_logdir)
        # Graph construction
        call_op = self.__call__(
            dataset_ops.make_one_shot_iterator(dataset).get_next(), *args,
            **kwargs)
        init_op = self.init_variables()
        results_op = self.all_metric_results(summary_logdir)
        return (init_op, call_op, results_op)
Ejemplo n.º 18
0
    def evaluate_on_dataset(self, dataset, *args, **kwargs):
        """Convenience method for performing an eval on a Dataset.

    Args:
      dataset: Dataset object with the input data to evaluate on.
      *args:
      **kwargs: Optional additional arguments to __call__().

    Returns:
      @compatibility(eager)
      When eager execution is enabled, this returns the result of performing
      an evaluation as a dictionary. With graph execution, this returns a tuple
      (init_op, call_op, results_op) which may be executed using this code:
      ```python
        sess.run(init_op)
        try:
          while True:
            sess.run(call_op)
        except tf.errors.OutOfRangeError:
          pass
        return sess.run(results_op)  # A dictionary

        # equivalently:
        return evaluator.run_evaluation(init_op, call_op, results_op, sess=sess)
      ```
      @end_compatibility
    """
        # TODO(josh11b): Add optional summary_writer.
        if context.in_graph_mode():
            call_op = self.__call__(
                dataset.make_one_shot_iterator().get_next(), *args, **kwargs)
            init_op = self.init_variables()
            results_op = self.all_metric_results()
            return (init_op, call_op, results_op)
        # Eager case
        for example in datasets.Iterator(dataset):
            self.__call__(example, *args, **kwargs)
        return self.all_metric_results()
Ejemplo n.º 19
0
 def testTensorsPlacedOnDevice(self):
   ds = Dataset.from_tensors([0., 1.])
   with ops.device(test.gpu_device_name()):
     x = datasets.Iterator(ds).next()
     x = math_ops.add(x, x)
   self.assertAllEqual([0., 2.], x.numpy())
Ejemplo n.º 20
0
 def testBasic(self):
   got = []
   for t in datasets.Iterator(Dataset.range(4)):
     got.append(t.numpy())
   self.assertAllEqual([0, 1, 2, 3], got)
Ejemplo n.º 21
0
 def distribute_dataset(self, dataset):
     if context.executing_eagerly():
         return datasets.Iterator(dataset)
     else:
         return dataset.make_one_shot_iterator()
Ejemplo n.º 22
0
 def evaluate_on_dataset(self, dataset, *args, **kwargs):
     """Convenience method for performing an eval on a Dataset."""
     for example in datasets.Iterator(dataset):
         self.__call__(example, *args, **kwargs)
     # TODO(josh11b): Add optional summary_writer.
     return self.all_metric_results()