def _nested_to_numpy_graph(ds_nested: Tree[TensorflowElem]) -> Tree[NumpyElem]: """Convert the nested structure of TF element to numpy.""" all_ds = [] all_arrays = [] flat_ds = tf.nest.flatten(ds_nested) for elem in flat_ds: # Create an iterator for all datasets if tf_compat.is_dataset(elem): # Capture the current graph, so calling `iter(ds)` twice will reuse the # graph in which `as_numpy` was created. graph = tf.compat.v1.get_default_graph() ds_iter = tf.compat.v1.data.make_initializable_iterator(elem) all_ds.append( _IterableDataset(_graph_dataset_iterator, ds_iter, graph)) else: all_arrays.append(elem) # Then create numpy arrays for all tensors if all_arrays: with utils.nogpu_session() as sess: # Shared session for tf.Tensor all_arrays = sess.run(all_arrays) # Merge the dataset iterators and np arrays iter_ds = iter(all_ds) iter_array = iter(all_arrays) return tf.nest.pack_sequence_as(ds_nested, [ next(iter_ds) if tf_compat.is_dataset(ds_el) else next(iter_array) for ds_el in flat_ds ])
def _assert_ds_types(nested_ds: Tree[TensorflowElem]) -> None: """Assert all inputs are from valid types.""" for el in tf.nest.flatten(nested_ds): if not (isinstance(el, (tf.Tensor, tf.RaggedTensor)) or tf_compat.is_dataset(el)): nested_types = tf.nest.map_structure(type, nested_ds) raise TypeError( 'Arguments to as_numpy must be tf.Tensors or tf.data.Datasets. ' f'Got: {nested_types}.')
def _elem_to_numpy_eager(tf_el: TensorflowElem) -> NumpyElem: """Converts a single element from tf to numpy.""" if isinstance(tf_el, tf.Tensor): return tf_el.numpy() elif isinstance(tf_el, tf.RaggedTensor): return tf_el elif tf_compat.is_dataset(tf_el): return _IterableDataset(_eager_dataset_iterator, tf_el) else: raise AssertionError(f'Unexpected element: {type(tf_el)}: {tf_el}')
def _elem_to_numpy_eager( tf_el: TensorflowElem) -> Union[NumpyElem, Iterable[NumpyElem]]: """Converts a single element from tf to numpy.""" if isinstance(tf_el, tf.Tensor): return tf_el._numpy() # pytype: disable=attribute-error # pylint: disable=protected-access elif isinstance(tf_el, tf.RaggedTensor): return tf_el elif tf_compat.is_dataset(tf_el): return _IterableDataset(_eager_dataset_iterator, tf_el) elif tf_el is None: return None else: raise AssertionError(f'Unexpected element: {type(tf_el)}: {tf_el}')
def as_numpy(dataset, *, graph=None): """Converts a `tf.data.Dataset` to an iterable of NumPy arrays. `as_numpy` converts a possibly nested structure of `tf.data.Dataset`s and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively. Note that because TensorFlow has support for ragged tensors and NumPy has no equivalent representation, [`tf.RaggedTensor`s](https://www.tensorflow.org/api_docs/python/tf/RaggedTensor) are left as-is for the user to deal with them (e.g. using `to_list()`). In TF 1 (i.e. graph mode), `tf.RaggedTensor`s are returned as `tf.ragged.RaggedTensorValue`s. Example: ``` ds = tfds.load(name="mnist", split="train") ds_numpy = tfds.as_numpy(ds) # Convert `tf.data.Dataset` to Python generator for ex in ds_numpy: # `{'image': np.array(shape=(28, 28, 1)), 'labels': np.array(shape=())}` print(ex) ``` Args: dataset: a possibly nested structure of `tf.data.Dataset`s and/or `tf.Tensor`s. graph: `tf.Graph`, optional, explicitly set the graph to use. Returns: A structure matching `dataset` where `tf.data.Dataset`s are converted to generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays. """ nested_ds = dataset del dataset # Flatten flat_ds = tf.nest.flatten(nested_ds) flat_np = [] # Type check for Tensors and Datasets for ds_el in flat_ds: types = [type(el) for el in flat_ds] types = tf.nest.pack_sequence_as(nested_ds, types) if not (isinstance(ds_el, (tf.Tensor, tf.RaggedTensor)) or tf_compat.is_dataset(ds_el)): raise ValueError("Arguments to as_numpy must be tf.Tensors or " "tf.data.Datasets. Got: %s" % types) if tf.executing_eagerly(): # Eager mode for ds_el in flat_ds: if isinstance(ds_el, tf.Tensor): np_el = ds_el.numpy() elif isinstance(ds_el, tf.RaggedTensor): np_el = ds_el elif tf_compat.is_dataset(ds_el): np_el = _eager_dataset_iterator(ds_el) else: assert False flat_np.append(np_el) else: # Graph mode # First create iterators for datasets with utils.maybe_with_graph(graph, create_if_none=False): ds_iters = [ tf.compat.v1.data.make_initializable_iterator(ds_el) for ds_el in flat_ds if tf_compat.is_dataset(ds_el) ] ds_iters = [ _graph_dataset_iterator(ds_iter, graph) for ds_iter in ds_iters ] # Then create numpy arrays for tensors with utils.nogpu_session( graph) as sess: # Shared session for tf.Tensor # Calling sess.run once so that randomness is shared. np_arrays = sess.run([ tensor for tensor in flat_ds if not tf_compat.is_dataset(tensor) ]) # Merge the dataset iterators and np arrays iter_ds = iter(ds_iters) iter_array = iter(np_arrays) flat_np = [ next(iter_ds) if tf_compat.is_dataset(ds_el) else next(iter_array) for ds_el in flat_ds ] # Nest return tf.nest.pack_sequence_as(nested_ds, flat_np)
def as_numpy(dataset, graph=None): """Converts a `tf.data.Dataset` to an iterable of NumPy arrays. `as_numpy` converts a possibly nested structure of `tf.data.Dataset`s and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively. Args: dataset: a possibly nested structure of `tf.data.Dataset`s and/or `tf.Tensor`s. graph: `tf.Graph`, optional, explicitly set the graph to use. Returns: A structure matching `dataset` where `tf.data.Dataset`s are converted to generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays. """ nested_ds = dataset del dataset # Flatten flat_ds = tf.nest.flatten(nested_ds) flat_np = [] # Type check for Tensors and Datasets for ds_el in flat_ds: types = [type(el) for el in flat_ds] types = tf.nest.pack_sequence_as(nested_ds, types) if not (isinstance(ds_el, tf.Tensor) or tf_compat.is_dataset(ds_el)): raise ValueError("Arguments to as_numpy must be tf.Tensors or " "tf.data.Datasets. Got: %s" % types) if tf.executing_eagerly(): # Eager mode for ds_el in flat_ds: if isinstance(ds_el, tf.Tensor): np_el = ds_el.numpy() elif tf_compat.is_dataset(ds_el): np_el = _eager_dataset_iterator(ds_el) else: assert False flat_np.append(np_el) else: # Graph mode # First create iterators for datasets with utils.maybe_with_graph(graph, create_if_none=False): ds_iters = [ tf.compat.v1.data.make_one_shot_iterator(ds_el).get_next() for ds_el in flat_ds if tf_compat.is_dataset(ds_el) ] ds_iters = [ _graph_dataset_iterator(ds_iter, graph) for ds_iter in ds_iters ] # Then create numpy arrays for tensors with utils.nogpu_session( graph) as sess: # Shared session for tf.Tensor # Calling sess.run once so that randomness is shared. np_arrays = sess.run([ tensor for tensor in flat_ds if not tf_compat.is_dataset(tensor) ]) # Merge the dataset iterators and np arrays iter_ds = iter(ds_iters) iter_array = iter(np_arrays) flat_np = [ next(iter_ds) if tf_compat.is_dataset(ds_el) else next(iter_array) for ds_el in flat_ds ] # Nest return tf.nest.pack_sequence_as(nested_ds, flat_np)