def testOverrideThreadPool(self): def get_thread_id(_): # Python creates a dummy thread object to represent the current # thread when called from an "alien" thread (such as a # `PrivateThreadPool` thread in this case). It does not include # the TensorFlow-given display name, but it has a unique # identifier that maps one-to-one with the underlying OS thread. return np.array(threading.current_thread().ident).astype(np.int64) for num_threads in [1, 2, 4, 8, 16]: dataset = (Dataset.range(1000).map( lambda x: script_ops.py_func(get_thread_id, [x], dtypes.int64), num_parallel_calls=32).apply(unique.unique())) dataset = threadpool.override_threadpool( dataset, threadpool.PrivateThreadPool( num_threads, display_name='private_thread_pool_%d' % num_threads)) thread_ids = [] for next_element in datasets.Iterator(dataset): thread_ids.append(next_element) self.assertEqual(len(thread_ids), len(set(thread_ids))) self.assertGreater(len(thread_ids), 0) # NOTE(mrry): We don't control the thread pool scheduling, and # so cannot guarantee that all of the threads in the pool will # perform work. self.assertLessEqual(len(thread_ids), num_threads)
def benchmarkSliceBatchCacheRepeatCallable(self): input_size = 10000 batch_size = 100 num_epochs = 100 input_data = np.random.randn(input_size) dataset = (Dataset.from_tensor_slices(input_data).batch( batch_size).cache().repeat(num_epochs)) iterator = datasets.Iterator(dataset) ends = [time.time()] for _ in iterator: ends.append(time.time()) deltas = np.ediff1d(ends) median_wall_time = np.median(deltas) print( 'Slice/batch/cache/repeat eager input size: %d batch size: %d Median ' 'wall time per element: %f' % (input_size, batch_size, median_wall_time)) self.report_benchmark( iters=len(deltas), wall_time=median_wall_time, name='benchmark_slice_batch_cache_repeat_eager_input_%d_batch_%d' % (input_size, batch_size))
def testSparseTensorElements(self): components = (sparse_tensor.SparseTensorValue( indices=np.array([[0, 0], [1, 0], [2, 0]]), values=np.array([0, 0, 0]), dense_shape=np.array([3, 1])), sparse_tensor.SparseTensorValue( indices=np.array([[0, 0], [1, 1], [2, 2]]), values=np.array([1, 2, 3]), dense_shape=np.array([3, 3]))) expected = [ (sparse_tensor.SparseTensorValue(indices=np.array([[0]]), values=np.array([0]), dense_shape=np.array([1])), sparse_tensor.SparseTensorValue(indices=np.array([[0]]), values=np.array([1]), dense_shape=np.array([3]))), (sparse_tensor.SparseTensorValue(indices=np.array([[0]]), values=np.array([0]), dense_shape=np.array([1])), sparse_tensor.SparseTensorValue(indices=np.array([[1]]), values=np.array([2]), dense_shape=np.array([3]))), (sparse_tensor.SparseTensorValue(indices=np.array([[0]]), values=np.array([0]), dense_shape=np.array([1])), sparse_tensor.SparseTensorValue(indices=np.array([[2]]), values=np.array([3]), dense_shape=np.array([3]))), ] for i, result in enumerate( datasets.Iterator(Dataset.from_tensor_slices(components))): self.assertSparseValuesEqual(expected[i][0], result[0]) self.assertSparseValuesEqual(expected[i][1], result[1])
def testMapAndFilter(self): def even(x): return math_ops.equal(math_ops.mod(x, 2), 0) it = datasets.Iterator(Dataset.range(8).map(math_ops.square).filter(even)) got = [x.numpy() for x in it] self.assertAllEqual([0, 4, 16, 36], got)
def testPyFunc(self): def my_map(inp): return [[x + 1 for x in inp]] ds = Dataset.range(4).map( lambda x: script_ops.py_func(my_map, [[x]], dtypes.int64)) got = [x.numpy() for x in datasets.Iterator(ds)] self.assertAllEqual([[1], [2], [3], [4]], got)
def testGetNext(self): iterator = datasets.Iterator(Dataset.range(4)) self.assertEqual(0, iterator.get_next().numpy()) self.assertEqual(1, iterator.get_next().numpy()) self.assertEqual(2, iterator.get_next().numpy()) self.assertEqual(3, iterator.get_next().numpy()) with self.assertRaises(errors.OutOfRangeError): iterator.get_next()
def testMapAndFilter(self): # TODO(ashankar): Address this self.skipTest('Not working yet, requires function attribute support') def even(x): return math_ops.equal(math_ops.mod(x, 2), 0) it = datasets.Iterator(Dataset.range(8).map(math_ops.square).filter(even)) got = [x.numpy() for x in it] self.assertAllEqual([0, 4, 16, 36], got)
def testRestoreInReconstructedIterator(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') dataset = Dataset.range(10) for i in range(5): iterator = datasets.Iterator(dataset) checkpoint = checkpointable_utils.Checkpoint(iterator=iterator) checkpoint.restore(saver.latest_checkpoint(checkpoint_directory)) for j in range(2): self.assertEqual(i * 2 + j, iterator.get_next().numpy()) checkpoint.save(file_prefix=checkpoint_prefix)
def testTensorsExplicitPrefetchToDevice(self): ds = Dataset.from_tensor_slices([0., 1.]) ds = ds.apply(prefetching_ops.prefetch_to_device(test.gpu_device_name())) with self.assertRaisesRegexp(TypeError, 'prefetch_to_device'): datasets.Iterator(ds) for i, x in enumerate(ds): with ops.device(test.gpu_device_name()): x = math_ops.add(x, x) self.assertEqual(float(i) + float(i), x.numpy())
def testMapCaptureLookupTable(self): default_val = -1 keys = constant_op.constant(['brain', 'salad', 'surgery']) values = constant_op.constant([0, 1, 2], dtypes.int64) table = lookup.HashTable( lookup.KeyValueTensorInitializer(keys, values), default_val) dataset = Dataset.from_tensor_slices(['brain', 'salad', 'surgery']) dataset = dataset.map(table.lookup) it = datasets.Iterator(dataset) got = [x.numpy() for x in it] self.assertAllEqual([0, 1, 2], got)
def testNestedOutputs(self): ds = Dataset.zip((Dataset.range(4), Dataset.zip((Dataset.range(4), Dataset.range(4))))) total = 0 # The Iterator will return a nested structure of Tensor objects. # Some funkiness to compare against simple integers. for (i, x) in enumerate(datasets.Iterator(ds)): want = (i, (i, i)) got = (x[0].numpy(), (x[1][0].numpy(), x[1][1].numpy())) self.assertEqual(got, want) total += 1 self.assertEqual(4, total)
def testRestoreExhaustedIterator(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') dataset = Dataset.range(3) iterator = datasets.Iterator(dataset) checkpoint = checkpointable_utils.Checkpoint(iterator=iterator) self.assertEqual(0, iterator.get_next().numpy()) self.assertEqual(1, iterator.get_next().numpy()) save_path = checkpoint.save(checkpoint_prefix) self.assertEqual(2, iterator.get_next().numpy()) checkpoint.restore(save_path) self.assertEqual(2, iterator.get_next().numpy())
def make_one_shot_iterator(self): """Get a one time use iterator for the distributed PerDeviceDataset.""" if self._prefetch_on_device: on_device_dataset = self._dataset.apply( prefetching_ops_v2.prefetch_to_devices(self._devices)) dataset_iterator = on_device_dataset.make_one_shot_iterator() elif context.executing_eagerly(): dataset_iterator = datasets.Iterator(self._dataset) else: dataset_iterator = self._dataset.make_one_shot_iterator() return PerDeviceDataIterator( dataset_iterator, self._devices, self._prefetch_on_device)
def testSaveRestore(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') dataset = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) dataset = dataset.map(math_ops.square).batch(2) iterator = datasets.Iterator(dataset) checkpoint = checkpointable_utils.Checkpoint(iterator=iterator) self.assertAllEqual([1, 4], iterator.get_next().numpy()) save_path = checkpoint.save(checkpoint_prefix) self.assertAllEqual([9, 16], iterator.get_next().numpy()) self.assertAllEqual([25, 36], iterator.get_next().numpy()) checkpoint.restore(save_path) self.assertAllEqual([9, 16], iterator.get_next().numpy()) self.assertAllEqual([25, 36], iterator.get_next().numpy())
def evaluate_on_dataset(self, dataset, *args, **kwargs): """Convenience method for performing an eval on a Dataset. Args: dataset: Dataset object with the input data to evaluate on. *args: **kwargs: Optional additional arguments to __call__(), except `summary_logdir`: if specified, metrics will be written as summaries to this directory. Returns: @compatibility(eager) When eager execution is enabled, this returns the result of performing an evaluation as a dictionary. With graph execution, this returns a tuple (init_op, call_op, results_op) which may be executed using this code: ```python sess.run(init_op) try: while True: sess.run(call_op) except tf.errors.OutOfRangeError: pass return sess.run(results_op) # A dictionary # equivalently: return evaluator.run_evaluation(init_op, call_op, results_op, sess=sess) ``` @end_compatibility """ summary_logdir = kwargs.pop("summary_logdir", None) if context.executing_eagerly(): for example in datasets.Iterator(dataset): self.__call__(example, *args, **kwargs) return self.all_metric_results(summary_logdir) # Graph construction next_value = dataset_ops.make_one_shot_iterator(dataset).get_next() # Function inlining destroys strict inputs semantics (function body might # start execution before all inputs are ready). When iterator is exhausted # and throws out of range error, function body might be partially executed. # To prevent this we add an explicit control dependency from the 'get_next'. with ops.control_dependencies([next_value]): has_next_value = control_flow_ops.no_op(name="iterator_has_next") with ops.control_dependencies([has_next_value]): call_op = self.__call__(next_value, *args, **kwargs) init_op = self.init_variables() results_op = self.all_metric_results(summary_logdir) return (init_op, call_op, results_op)
def testSaveRestore(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') dataset = dataset_ops.Dataset.from_tensor_slices( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) dataset = dataset.map(math_ops.square).batch(2) # TODO(b/138399725): Re-enable default optimizations. options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) iterator = datasets.Iterator(dataset) checkpoint = trackable_utils.Checkpoint(iterator=iterator) self.assertAllEqual([1, 4], iterator.get_next().numpy()) save_path = checkpoint.save(checkpoint_prefix) self.assertAllEqual([9, 16], iterator.get_next().numpy()) self.assertAllEqual([25, 36], iterator.get_next().numpy()) checkpoint.restore(save_path) self.assertAllEqual([9, 16], iterator.get_next().numpy()) self.assertAllEqual([25, 36], iterator.get_next().numpy())
def evaluate_on_dataset(self, dataset, *args, **kwargs): """Convenience method for performing an eval on a Dataset. Args: dataset: Dataset object with the input data to evaluate on. *args: **kwargs: Optional additional arguments to __call__(), except `summary_logdir`: if specified, metrics will be written as summaries to this directory. Returns: @compatibility(eager) When eager execution is enabled, this returns the result of performing an evaluation as a dictionary. With graph execution, this returns a tuple (init_op, call_op, results_op) which may be executed using this code: ```python sess.run(init_op) try: while True: sess.run(call_op) except tf.errors.OutOfRangeError: pass return sess.run(results_op) # A dictionary # equivalently: return evaluator.run_evaluation(init_op, call_op, results_op, sess=sess) ``` @end_compatibility """ summary_logdir = kwargs.pop("summary_logdir", None) if context.executing_eagerly(): for example in datasets.Iterator(dataset): self.__call__(example, *args, **kwargs) return self.all_metric_results(summary_logdir) # Graph construction call_op = self.__call__( dataset_ops.make_one_shot_iterator(dataset).get_next(), *args, **kwargs) init_op = self.init_variables() results_op = self.all_metric_results(summary_logdir) return (init_op, call_op, results_op)
def evaluate_on_dataset(self, dataset, *args, **kwargs): """Convenience method for performing an eval on a Dataset. Args: dataset: Dataset object with the input data to evaluate on. *args: **kwargs: Optional additional arguments to __call__(). Returns: @compatibility(eager) When eager execution is enabled, this returns the result of performing an evaluation as a dictionary. With graph execution, this returns a tuple (init_op, call_op, results_op) which may be executed using this code: ```python sess.run(init_op) try: while True: sess.run(call_op) except tf.errors.OutOfRangeError: pass return sess.run(results_op) # A dictionary # equivalently: return evaluator.run_evaluation(init_op, call_op, results_op, sess=sess) ``` @end_compatibility """ # TODO(josh11b): Add optional summary_writer. if context.in_graph_mode(): call_op = self.__call__( dataset.make_one_shot_iterator().get_next(), *args, **kwargs) init_op = self.init_variables() results_op = self.all_metric_results() return (init_op, call_op, results_op) # Eager case for example in datasets.Iterator(dataset): self.__call__(example, *args, **kwargs) return self.all_metric_results()
def testTensorsPlacedOnDevice(self): ds = Dataset.from_tensors([0., 1.]) with ops.device(test.gpu_device_name()): x = datasets.Iterator(ds).next() x = math_ops.add(x, x) self.assertAllEqual([0., 2.], x.numpy())
def testBasic(self): got = [] for t in datasets.Iterator(Dataset.range(4)): got.append(t.numpy()) self.assertAllEqual([0, 1, 2, 3], got)
def distribute_dataset(self, dataset): if context.executing_eagerly(): return datasets.Iterator(dataset) else: return dataset.make_one_shot_iterator()
def evaluate_on_dataset(self, dataset, *args, **kwargs): """Convenience method for performing an eval on a Dataset.""" for example in datasets.Iterator(dataset): self.__call__(example, *args, **kwargs) # TODO(josh11b): Add optional summary_writer. return self.all_metric_results()