Example #1
0
def _nested_to_numpy_graph(ds_nested: Tree[TensorflowElem]) -> Tree[NumpyElem]:
    """Convert the nested structure of TF element to numpy."""
    all_ds = []
    all_arrays = []
    flat_ds = tf.nest.flatten(ds_nested)
    for elem in flat_ds:
        # Create an iterator for all datasets
        if tf_compat.is_dataset(elem):
            # Capture the current graph, so calling `iter(ds)` twice will reuse the
            # graph in which `as_numpy` was created.
            graph = tf.compat.v1.get_default_graph()
            ds_iter = tf.compat.v1.data.make_initializable_iterator(elem)
            all_ds.append(
                _IterableDataset(_graph_dataset_iterator, ds_iter, graph))
        else:
            all_arrays.append(elem)

    # Then create numpy arrays for all tensors
    if all_arrays:
        with utils.nogpu_session() as sess:  # Shared session for tf.Tensor
            all_arrays = sess.run(all_arrays)

    # Merge the dataset iterators and np arrays
    iter_ds = iter(all_ds)
    iter_array = iter(all_arrays)
    return tf.nest.pack_sequence_as(ds_nested, [
        next(iter_ds) if tf_compat.is_dataset(ds_el) else next(iter_array)
        for ds_el in flat_ds
    ])
Example #2
0
def _assert_ds_types(nested_ds: Tree[TensorflowElem]) -> None:
    """Assert all inputs are from valid types."""
    for el in tf.nest.flatten(nested_ds):
        if not (isinstance(el, (tf.Tensor, tf.RaggedTensor))
                or tf_compat.is_dataset(el)):
            nested_types = tf.nest.map_structure(type, nested_ds)
            raise TypeError(
                'Arguments to as_numpy must be tf.Tensors or tf.data.Datasets. '
                f'Got: {nested_types}.')
Example #3
0
def _elem_to_numpy_eager(tf_el: TensorflowElem) -> NumpyElem:
    """Converts a single element from tf to numpy."""
    if isinstance(tf_el, tf.Tensor):
        return tf_el.numpy()
    elif isinstance(tf_el, tf.RaggedTensor):
        return tf_el
    elif tf_compat.is_dataset(tf_el):
        return _IterableDataset(_eager_dataset_iterator, tf_el)
    else:
        raise AssertionError(f'Unexpected element: {type(tf_el)}: {tf_el}')
Example #4
0
def _elem_to_numpy_eager(
        tf_el: TensorflowElem) -> Union[NumpyElem, Iterable[NumpyElem]]:
    """Converts a single element from tf to numpy."""
    if isinstance(tf_el, tf.Tensor):
        return tf_el._numpy()  # pytype: disable=attribute-error  # pylint: disable=protected-access
    elif isinstance(tf_el, tf.RaggedTensor):
        return tf_el
    elif tf_compat.is_dataset(tf_el):
        return _IterableDataset(_eager_dataset_iterator, tf_el)
    elif tf_el is None:
        return None
    else:
        raise AssertionError(f'Unexpected element: {type(tf_el)}: {tf_el}')
Example #5
0
def as_numpy(dataset, *, graph=None):
    """Converts a `tf.data.Dataset` to an iterable of NumPy arrays.

  `as_numpy` converts a possibly nested structure of `tf.data.Dataset`s
  and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively.

  Note that because TensorFlow has support for ragged tensors and NumPy has
  no equivalent representation,
  [`tf.RaggedTensor`s](https://www.tensorflow.org/api_docs/python/tf/RaggedTensor)
  are left as-is for the user to deal with them (e.g. using `to_list()`).
  In TF 1 (i.e. graph mode), `tf.RaggedTensor`s are returned as
  `tf.ragged.RaggedTensorValue`s.

  Example:

  ```
  ds = tfds.load(name="mnist", split="train")
  ds_numpy = tfds.as_numpy(ds)  # Convert `tf.data.Dataset` to Python generator
  for ex in ds_numpy:
    # `{'image': np.array(shape=(28, 28, 1)), 'labels': np.array(shape=())}`
    print(ex)
  ```

  Args:
    dataset: a possibly nested structure of `tf.data.Dataset`s and/or
      `tf.Tensor`s.
    graph: `tf.Graph`, optional, explicitly set the graph to use.

  Returns:
    A structure matching `dataset` where `tf.data.Dataset`s are converted to
    generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays.
  """
    nested_ds = dataset
    del dataset

    # Flatten
    flat_ds = tf.nest.flatten(nested_ds)
    flat_np = []

    # Type check for Tensors and Datasets
    for ds_el in flat_ds:
        types = [type(el) for el in flat_ds]
        types = tf.nest.pack_sequence_as(nested_ds, types)
        if not (isinstance(ds_el, (tf.Tensor, tf.RaggedTensor))
                or tf_compat.is_dataset(ds_el)):
            raise ValueError("Arguments to as_numpy must be tf.Tensors or "
                             "tf.data.Datasets. Got: %s" % types)

    if tf.executing_eagerly():
        # Eager mode
        for ds_el in flat_ds:
            if isinstance(ds_el, tf.Tensor):
                np_el = ds_el.numpy()
            elif isinstance(ds_el, tf.RaggedTensor):
                np_el = ds_el
            elif tf_compat.is_dataset(ds_el):
                np_el = _eager_dataset_iterator(ds_el)
            else:
                assert False
            flat_np.append(np_el)
    else:
        # Graph mode

        # First create iterators for datasets
        with utils.maybe_with_graph(graph, create_if_none=False):
            ds_iters = [
                tf.compat.v1.data.make_initializable_iterator(ds_el)
                for ds_el in flat_ds if tf_compat.is_dataset(ds_el)
            ]
        ds_iters = [
            _graph_dataset_iterator(ds_iter, graph) for ds_iter in ds_iters
        ]

        # Then create numpy arrays for tensors
        with utils.nogpu_session(
                graph) as sess:  # Shared session for tf.Tensor
            # Calling sess.run once so that randomness is shared.
            np_arrays = sess.run([
                tensor for tensor in flat_ds
                if not tf_compat.is_dataset(tensor)
            ])

        # Merge the dataset iterators and np arrays
        iter_ds = iter(ds_iters)
        iter_array = iter(np_arrays)
        flat_np = [
            next(iter_ds) if tf_compat.is_dataset(ds_el) else next(iter_array)
            for ds_el in flat_ds
        ]

    # Nest
    return tf.nest.pack_sequence_as(nested_ds, flat_np)
Example #6
0
def as_numpy(dataset, graph=None):
    """Converts a `tf.data.Dataset` to an iterable of NumPy arrays.

  `as_numpy` converts a possibly nested structure of `tf.data.Dataset`s
  and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively.

  Args:
    dataset: a possibly nested structure of `tf.data.Dataset`s and/or
      `tf.Tensor`s.
    graph: `tf.Graph`, optional, explicitly set the graph to use.

  Returns:
    A structure matching `dataset` where `tf.data.Dataset`s are converted to
    generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays.
  """
    nested_ds = dataset
    del dataset

    # Flatten
    flat_ds = tf.nest.flatten(nested_ds)
    flat_np = []

    # Type check for Tensors and Datasets
    for ds_el in flat_ds:
        types = [type(el) for el in flat_ds]
        types = tf.nest.pack_sequence_as(nested_ds, types)
        if not (isinstance(ds_el, tf.Tensor) or tf_compat.is_dataset(ds_el)):
            raise ValueError("Arguments to as_numpy must be tf.Tensors or "
                             "tf.data.Datasets. Got: %s" % types)

    if tf.executing_eagerly():
        # Eager mode
        for ds_el in flat_ds:
            if isinstance(ds_el, tf.Tensor):
                np_el = ds_el.numpy()
            elif tf_compat.is_dataset(ds_el):
                np_el = _eager_dataset_iterator(ds_el)
            else:
                assert False
            flat_np.append(np_el)
    else:
        # Graph mode

        # First create iterators for datasets
        with utils.maybe_with_graph(graph, create_if_none=False):
            ds_iters = [
                tf.compat.v1.data.make_one_shot_iterator(ds_el).get_next()
                for ds_el in flat_ds if tf_compat.is_dataset(ds_el)
            ]
        ds_iters = [
            _graph_dataset_iterator(ds_iter, graph) for ds_iter in ds_iters
        ]

        # Then create numpy arrays for tensors
        with utils.nogpu_session(
                graph) as sess:  # Shared session for tf.Tensor
            # Calling sess.run once so that randomness is shared.
            np_arrays = sess.run([
                tensor for tensor in flat_ds
                if not tf_compat.is_dataset(tensor)
            ])

        # Merge the dataset iterators and np arrays
        iter_ds = iter(ds_iters)
        iter_array = iter(np_arrays)
        flat_np = [
            next(iter_ds) if tf_compat.is_dataset(ds_el) else next(iter_array)
            for ds_el in flat_ds
        ]

    # Nest
    return tf.nest.pack_sequence_as(nested_ds, flat_np)