Esempio n. 1
0
def _graph_dataset_iterator(ds_iter, graph=None):
    """Constructs a Python generator from a tf.data.Iterator."""
    with utils.maybe_with_graph(graph, create_if_none=False):
        init = ds_iter.initializer
        ds_item = ds_iter.get_next()
    with utils.nogpu_session(graph) as sess:
        sess.run(init)
        while True:
            try:
                yield sess.run(ds_item)
            except tf.errors.OutOfRangeError:
                break
Esempio n. 2
0
def as_numpy(dataset, *, graph=None):
    """Converts a `tf.data.Dataset` to an iterable of NumPy arrays.

  `as_numpy` converts a possibly nested structure of `tf.data.Dataset`s
  and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively.

  Note that because TensorFlow has support for ragged tensors and NumPy has
  no equivalent representation,
  [`tf.RaggedTensor`s](https://www.tensorflow.org/api_docs/python/tf/RaggedTensor)
  are left as-is for the user to deal with them (e.g. using `to_list()`).
  In TF 1 (i.e. graph mode), `tf.RaggedTensor`s are returned as
  `tf.ragged.RaggedTensorValue`s.

  Example:

  ```
  ds = tfds.load(name="mnist", split="train")
  ds_numpy = tfds.as_numpy(ds)  # Convert `tf.data.Dataset` to Python generator
  for ex in ds_numpy:
    # `{'image': np.array(shape=(28, 28, 1)), 'labels': np.array(shape=())}`
    print(ex)
  ```

  Args:
    dataset: a possibly nested structure of `tf.data.Dataset`s and/or
      `tf.Tensor`s.
    graph: `tf.Graph`, optional, explicitly set the graph to use.

  Returns:
    A structure matching `dataset` where `tf.data.Dataset`s are converted to
    generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays.
  """
    nested_ds = dataset
    del dataset

    # Flatten
    flat_ds = tf.nest.flatten(nested_ds)
    flat_np = []

    # Type check for Tensors and Datasets
    for ds_el in flat_ds:
        types = [type(el) for el in flat_ds]
        types = tf.nest.pack_sequence_as(nested_ds, types)
        if not (isinstance(ds_el, (tf.Tensor, tf.RaggedTensor))
                or tf_compat.is_dataset(ds_el)):
            raise ValueError("Arguments to as_numpy must be tf.Tensors or "
                             "tf.data.Datasets. Got: %s" % types)

    if tf.executing_eagerly():
        # Eager mode
        for ds_el in flat_ds:
            if isinstance(ds_el, tf.Tensor):
                np_el = ds_el.numpy()
            elif isinstance(ds_el, tf.RaggedTensor):
                np_el = ds_el
            elif tf_compat.is_dataset(ds_el):
                np_el = _eager_dataset_iterator(ds_el)
            else:
                assert False
            flat_np.append(np_el)
    else:
        # Graph mode

        # First create iterators for datasets
        with utils.maybe_with_graph(graph, create_if_none=False):
            ds_iters = [
                tf.compat.v1.data.make_initializable_iterator(ds_el)
                for ds_el in flat_ds if tf_compat.is_dataset(ds_el)
            ]
        ds_iters = [
            _graph_dataset_iterator(ds_iter, graph) for ds_iter in ds_iters
        ]

        # Then create numpy arrays for tensors
        with utils.nogpu_session(
                graph) as sess:  # Shared session for tf.Tensor
            # Calling sess.run once so that randomness is shared.
            np_arrays = sess.run([
                tensor for tensor in flat_ds
                if not tf_compat.is_dataset(tensor)
            ])

        # Merge the dataset iterators and np arrays
        iter_ds = iter(ds_iters)
        iter_array = iter(np_arrays)
        flat_np = [
            next(iter_ds) if tf_compat.is_dataset(ds_el) else next(iter_array)
            for ds_el in flat_ds
        ]

    # Nest
    return tf.nest.pack_sequence_as(nested_ds, flat_np)
Esempio n. 3
0
def dataset_as_numpy(dataset, graph=None):
  """Converts a `tf.data.Dataset` to an iterable of NumPy arrays.

  `dataset_as_numpy` converts a possibly nested structure of `tf.data.Dataset`s
  and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively.

  Args:
    dataset: a possibly nested structure of `tf.data.Dataset`s and/or
      `tf.Tensor`s.
    graph: `tf.Graph`, optional, explicitly set the graph to use.

  Returns:
    A structure matching `dataset` where `tf.data.Dataset`s are converted to
    generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays.
  """
  nested_ds = dataset
  del dataset

  # Flatten
  flat_ds = tf.contrib.framework.nest.flatten(nested_ds)
  flat_np = []

  # Type check for Tensors and Datasets
  for ds_el in flat_ds:
    types = [type(el) for el in flat_ds]
    types = tf.contrib.framework.nest.pack_sequence_as(nested_ds, types)
    if not isinstance(ds_el, (tf.Tensor, tf.data.Dataset)):
      raise ValueError("Arguments to dataset_as_numpy must be tf.Tensors or "
                       "tf.data.Datasets. Got: %s" % types)

  if tf.executing_eagerly():
    # Eager mode
    for ds_el in flat_ds:
      if isinstance(ds_el, tf.Tensor):
        np_el = ds_el.numpy()
      elif isinstance(ds_el, tf.data.Dataset):
        np_el = _eager_dataset_iterator(ds_el)
      else:
        assert False
      flat_np.append(np_el)
  else:
    # Graph mode

    # First create necessary graph ops
    ds_iters = [None] * len(flat_ds)
    with utils.maybe_with_graph(graph, create_if_none=False):
      for i, ds_el in enumerate(flat_ds):
        if isinstance(ds_el, tf.data.Dataset):
          ds_iters[i] = tf.compat.v1.data.make_one_shot_iterator(
              ds_el).get_next()

    # Then create NumPy items
    # Shared session for tf.Tensor runs
    with utils.nogpu_session(graph) as sess:
      for ds_iter, ds_el in zip(ds_iters, flat_ds):
        if ds_iter is None:
          # Tensor
          np_el = sess.run(ds_el)
        else:
          # Dataset
          np_el = _graph_dataset_iterator(ds_iter, graph)
        flat_np.append(np_el)

  # Nest
  return tf.contrib.framework.nest.pack_sequence_as(nested_ds, flat_np)
Esempio n. 4
0
def as_numpy(dataset, graph=None):
    """Converts a `tf.data.Dataset` to an iterable of NumPy arrays.

  `as_numpy` converts a possibly nested structure of `tf.data.Dataset`s
  and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively.

  Args:
    dataset: a possibly nested structure of `tf.data.Dataset`s and/or
      `tf.Tensor`s.
    graph: `tf.Graph`, optional, explicitly set the graph to use.

  Returns:
    A structure matching `dataset` where `tf.data.Dataset`s are converted to
    generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays.
  """
    nested_ds = dataset
    del dataset

    # Flatten
    flat_ds = tf.nest.flatten(nested_ds)
    flat_np = []

    # Type check for Tensors and Datasets
    for ds_el in flat_ds:
        types = [type(el) for el in flat_ds]
        types = tf.nest.pack_sequence_as(nested_ds, types)
        if not isinstance(ds_el, (tf.Tensor, tf.data.Dataset)):
            raise ValueError("Arguments to as_numpy must be tf.Tensors or "
                             "tf.data.Datasets. Got: %s" % types)

    if tf.executing_eagerly():
        # Eager mode
        for ds_el in flat_ds:
            if isinstance(ds_el, tf.Tensor):
                np_el = ds_el.numpy()
            elif isinstance(ds_el, tf.data.Dataset):
                np_el = _eager_dataset_iterator(ds_el)
            else:
                assert False
            flat_np.append(np_el)
    else:
        # Graph mode

        # First create iterators for datasets
        with utils.maybe_with_graph(graph, create_if_none=False):
            ds_iters = [
                tf.compat.v1.data.make_one_shot_iterator(ds_el).get_next()
                for ds_el in flat_ds if _is_ds(ds_el)
            ]
        ds_iters = [
            _graph_dataset_iterator(ds_iter, graph) for ds_iter in ds_iters
        ]

        # Then create numpy arrays for tensors
        with utils.nogpu_session(
                graph) as sess:  # Shared session for tf.Tensor
            # Calling sess.run once so that randomness is shared.
            np_arrays = sess.run(
                [tensor for tensor in flat_ds if not _is_ds(tensor)])

        # Merge the dataset iterators and np arrays
        iter_ds = iter(ds_iters)
        iter_array = iter(np_arrays)
        flat_np = [
            next(iter_ds) if _is_ds(ds_el) else next(iter_array)
            for ds_el in flat_ds
        ]

    # Nest
    return tf.nest.pack_sequence_as(nested_ds, flat_np)