Ejemplo n.º 1
0
def _get_png():
    """Returns a random png image."""
    image = fake_data_utils.get_random_picture(HEIGHT, WIDTH)
    png = tf.image.encode_png(image)
    with utils.nogpu_session() as sess:
        res = sess.run(png)
    return res
Ejemplo n.º 2
0
def _graph_dataset_iterator(ds_item, graph=None):
  with utils.nogpu_session(graph) as sess:
    while True:
      try:
        yield sess.run(ds_item)
      except tf.errors.OutOfRangeError:
        break
Ejemplo n.º 3
0
def _nested_to_numpy_graph(ds_nested: Tree[TensorflowElem]) -> Tree[NumpyElem]:
    """Convert the nested structure of TF element to numpy."""
    all_ds = []
    all_arrays = []
    flat_ds = tf.nest.flatten(ds_nested)
    for elem in flat_ds:
        # Create an iterator for all datasets
        if tf_compat.is_dataset(elem):
            # Capture the current graph, so calling `iter(ds)` twice will reuse the
            # graph in which `as_numpy` was created.
            graph = tf.compat.v1.get_default_graph()
            ds_iter = tf.compat.v1.data.make_initializable_iterator(elem)
            all_ds.append(
                _IterableDataset(_graph_dataset_iterator, ds_iter, graph))
        else:
            all_arrays.append(elem)

    # Then create numpy arrays for all tensors
    if all_arrays:
        with utils.nogpu_session() as sess:  # Shared session for tf.Tensor
            all_arrays = sess.run(all_arrays)

    # Merge the dataset iterators and np arrays
    iter_ds = iter(all_ds)
    iter_array = iter(all_arrays)
    return tf.nest.pack_sequence_as(ds_nested, [
        next(iter_ds) if tf_compat.is_dataset(ds_el) else next(iter_array)
        for ds_el in flat_ds
    ])
Ejemplo n.º 4
0
def _get_jpeg(height, width):
    """Returns jpeg picture."""
    image = fake_data_utils.get_random_picture(height, width)
    jpeg = tf.image.encode_jpeg(image)
    with utils.nogpu_session() as sess:
        res = sess.run(jpeg)
    return res
Ejemplo n.º 5
0
 def _generate_examples(self, archive, subset_images):
     prefix_len = len("SUN397")
     with tf.Graph().as_default():
         with utils.nogpu_session() as sess:
             for filepath, fobj in archive:
                 # Note: all files in the tar.gz are in SUN397/...
                 filename = filepath[prefix_len:].replace(
                     "\\", "/")  # For windows
                 if filename in subset_images:
                     # Example:
                     # From filename: /c/car_interior/backseat/sun_aenygxwhhmjtisnf.jpg
                     # To class: /c/car_interior/backseat
                     label = "/".join(filename.split("/")[:-1])
                     image = _process_image_file(
                         fobj,
                         sess,
                         filename,
                         quality=self.builder_config.quality,
                         target_pixels=self.builder_config.target_pixels)
                     record = {
                         "file_name": filename,
                         "image": image,
                         "label": label,
                     }
                     yield filename, record
Ejemplo n.º 6
0
def _get_random_jpeg():
  image = _get_random_picture()
  jpeg = tf.image.encode_jpeg(image)
  with utils.nogpu_session() as sess:
    res = sess.run(jpeg)
  fobj = tempfile.NamedTemporaryFile(delete=False, mode='wb', suffix='.JPEG')
  fobj.write(res)
  fobj.close()
  return fobj.name, image.shape[0], image.shape[1]
Ejemplo n.º 7
0
def get_random_jpeg(height=None, width=None):
    """Returns path to JPEG picture."""
    image = get_random_picture(height, width)
    jpeg = tf.image.encode_jpeg(image)
    with utils.nogpu_session() as sess:
        res = sess.run(jpeg)
    fobj = tempfile.NamedTemporaryFile(delete=False, mode='wb', suffix='.JPEG')
    fobj.write(res)
    fobj.close()
    return fobj.name
Ejemplo n.º 8
0
def get_random_png(height=None, width=None, channels=CHANNELS_NB):
    """Returns path to PNG picture."""
    image = get_random_picture(height, width, channels)
    png = tf.image.encode_png(image)
    with utils.nogpu_session() as sess:
        res = sess.run(png)
    fobj = tempfile.NamedTemporaryFile(delete=False, mode='wb', suffix='.PNG')
    fobj.write(res)
    fobj.close()
    return fobj.name
Ejemplo n.º 9
0
def get_random_wav_c2(channels=2,
                      duration=_AUDIO_DURATION,
                      sample=_SAMPLE_RATE):
  """Returns path to WAV audio having channels = 2."""
  audio = get_random_audio(duration, sample).reshape(-1, channels)
  wav = tf.audio.encode_wav(audio, sample)
  with utils.nogpu_session() as sess:
    res = sess.run(wav)
  with tempfile.NamedTemporaryFile(delete=False, mode='wb', suffix='.wav') as f:
    f.write(res)
  return f.name
Ejemplo n.º 10
0
def _graph_dataset_iterator(ds_iter, graph: tf.Graph) -> Iterator[NumpyElem]:
    """Constructs a Python generator from a tf.data.Iterator."""
    with graph.as_default():
        init = ds_iter.initializer
        ds_item = ds_iter.get_next()
        with utils.nogpu_session() as sess:
            sess.run(init)
            while True:
                try:
                    yield sess.run(ds_item)
                except tf.errors.OutOfRangeError:
                    break
Ejemplo n.º 11
0
def _graph_dataset_iterator(ds_iter, graph=None):
    """Constructs a Python generator from a tf.data.Iterator."""
    with utils.maybe_with_graph(graph, create_if_none=False):
        init = ds_iter.initializer
        ds_item = ds_iter.get_next()
    with utils.nogpu_session(graph) as sess:
        sess.run(init)
        while True:
            try:
                yield sess.run(ds_item)
            except tf.errors.OutOfRangeError:
                break
Ejemplo n.º 12
0
def get_random_png(height=None, width=None, channels=CHANNELS_NB):
    """Returns path to PNG picture."""
    # Big randomly generated pngs take large amounts of diskspace.
    # Instead, we resize a 4x4 random image to the png size.
    image = get_random_picture(4, 4, channels)
    image = tf.image.resize_nearest_neighbor(tf.expand_dims(image, 0),
                                             (height, width))[0]
    png = tf.image.encode_png(image)
    with utils.nogpu_session() as sess:
        res = sess.run(png)
    fobj = tempfile.NamedTemporaryFile(delete=False, mode='wb', suffix='.PNG')
    fobj.write(res)
    fobj.close()
    return fobj.name
Ejemplo n.º 13
0
def iterate_over_dataset(dataset):
    """Yields numpy elements of `tf.data.Dataset`."""
    if tf.executing_eagerly():
        for item in dataset:
            flat = tf.contrib.framework.nest.flatten(item)
            flat = [el.numpy() for el in flat]
            yield tf.contrib.framework.nest.pack_sequence_as(item, flat)
    else:
        item = dataset.make_one_shot_iterator().get_next()
        with utils.nogpu_session() as sess:
            while True:
                try:
                    yield sess.run(item)
                except tf.errors.OutOfRangeError:
                    break
Ejemplo n.º 14
0
 def _generate_examples(self, archive):
   """Yields examples."""
   prefix_len = len("SUN397")
   with tf.Graph().as_default():
     with utils.nogpu_session() as sess:
       for filepath, fobj in archive:
         if (filepath.endswith(".jpg") and
             filepath not in _SUN397_IGNORE_IMAGES):
           # Note: all files in the tar.gz are in SUN397/...
           filename = filepath[prefix_len:]
           # Example:
           # From filename: /c/car_interior/backseat/sun_aenygxwhhmjtisnf.jpg
           # To class: /c/car_interior/backseat
           label = "/".join(filename.split("/")[:-1])
           image = _process_image_file(fobj, sess, filepath)
           yield {
               "file_name": filename,
               "image": image,
               "label": label,
           }
Ejemplo n.º 15
0
def as_numpy(dataset, graph=None):
    """Converts a `tf.data.Dataset` to an iterable of NumPy arrays.

  `as_numpy` converts a possibly nested structure of `tf.data.Dataset`s
  and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively.

  Args:
    dataset: a possibly nested structure of `tf.data.Dataset`s and/or
      `tf.Tensor`s.
    graph: `tf.Graph`, optional, explicitly set the graph to use.

  Returns:
    A structure matching `dataset` where `tf.data.Dataset`s are converted to
    generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays.
  """
    nested_ds = dataset
    del dataset

    # Flatten
    flat_ds = tf.nest.flatten(nested_ds)
    flat_np = []

    # Type check for Tensors and Datasets
    for ds_el in flat_ds:
        types = [type(el) for el in flat_ds]
        types = tf.nest.pack_sequence_as(nested_ds, types)
        if not isinstance(ds_el, (tf.Tensor, tf.data.Dataset)):
            raise ValueError("Arguments to as_numpy must be tf.Tensors or "
                             "tf.data.Datasets. Got: %s" % types)

    if tf.executing_eagerly():
        # Eager mode
        for ds_el in flat_ds:
            if isinstance(ds_el, tf.Tensor):
                np_el = ds_el.numpy()
            elif isinstance(ds_el, tf.data.Dataset):
                np_el = _eager_dataset_iterator(ds_el)
            else:
                assert False
            flat_np.append(np_el)
    else:
        # Graph mode

        # First create iterators for datasets
        with utils.maybe_with_graph(graph, create_if_none=False):
            ds_iters = [
                tf.compat.v1.data.make_one_shot_iterator(ds_el).get_next()
                for ds_el in flat_ds if _is_ds(ds_el)
            ]
        ds_iters = [
            _graph_dataset_iterator(ds_iter, graph) for ds_iter in ds_iters
        ]

        # Then create numpy arrays for tensors
        with utils.nogpu_session(
                graph) as sess:  # Shared session for tf.Tensor
            # Calling sess.run once so that randomness is shared.
            np_arrays = sess.run(
                [tensor for tensor in flat_ds if not _is_ds(tensor)])

        # Merge the dataset iterators and np arrays
        iter_ds = iter(ds_iters)
        iter_array = iter(np_arrays)
        flat_np = [
            next(iter_ds) if _is_ds(ds_el) else next(iter_array)
            for ds_el in flat_ds
        ]

    # Nest
    return tf.nest.pack_sequence_as(nested_ds, flat_np)
Ejemplo n.º 16
0
def as_numpy(dataset, *, graph=None):
    """Converts a `tf.data.Dataset` to an iterable of NumPy arrays.

  `as_numpy` converts a possibly nested structure of `tf.data.Dataset`s
  and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively.

  Note that because TensorFlow has support for ragged tensors and NumPy has
  no equivalent representation,
  [`tf.RaggedTensor`s](https://www.tensorflow.org/api_docs/python/tf/RaggedTensor)
  are left as-is for the user to deal with them (e.g. using `to_list()`).
  In TF 1 (i.e. graph mode), `tf.RaggedTensor`s are returned as
  `tf.ragged.RaggedTensorValue`s.

  Example:

  ```
  ds = tfds.load(name="mnist", split="train")
  ds_numpy = tfds.as_numpy(ds)  # Convert `tf.data.Dataset` to Python generator
  for ex in ds_numpy:
    # `{'image': np.array(shape=(28, 28, 1)), 'labels': np.array(shape=())}`
    print(ex)
  ```

  Args:
    dataset: a possibly nested structure of `tf.data.Dataset`s and/or
      `tf.Tensor`s.
    graph: `tf.Graph`, optional, explicitly set the graph to use.

  Returns:
    A structure matching `dataset` where `tf.data.Dataset`s are converted to
    generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays.
  """
    nested_ds = dataset
    del dataset

    # Flatten
    flat_ds = tf.nest.flatten(nested_ds)
    flat_np = []

    # Type check for Tensors and Datasets
    for ds_el in flat_ds:
        types = [type(el) for el in flat_ds]
        types = tf.nest.pack_sequence_as(nested_ds, types)
        if not (isinstance(ds_el, (tf.Tensor, tf.RaggedTensor))
                or tf_compat.is_dataset(ds_el)):
            raise ValueError("Arguments to as_numpy must be tf.Tensors or "
                             "tf.data.Datasets. Got: %s" % types)

    if tf.executing_eagerly():
        # Eager mode
        for ds_el in flat_ds:
            if isinstance(ds_el, tf.Tensor):
                np_el = ds_el.numpy()
            elif isinstance(ds_el, tf.RaggedTensor):
                np_el = ds_el
            elif tf_compat.is_dataset(ds_el):
                np_el = _eager_dataset_iterator(ds_el)
            else:
                assert False
            flat_np.append(np_el)
    else:
        # Graph mode

        # First create iterators for datasets
        with utils.maybe_with_graph(graph, create_if_none=False):
            ds_iters = [
                tf.compat.v1.data.make_initializable_iterator(ds_el)
                for ds_el in flat_ds if tf_compat.is_dataset(ds_el)
            ]
        ds_iters = [
            _graph_dataset_iterator(ds_iter, graph) for ds_iter in ds_iters
        ]

        # Then create numpy arrays for tensors
        with utils.nogpu_session(
                graph) as sess:  # Shared session for tf.Tensor
            # Calling sess.run once so that randomness is shared.
            np_arrays = sess.run([
                tensor for tensor in flat_ds
                if not tf_compat.is_dataset(tensor)
            ])

        # Merge the dataset iterators and np arrays
        iter_ds = iter(ds_iters)
        iter_array = iter(np_arrays)
        flat_np = [
            next(iter_ds) if tf_compat.is_dataset(ds_el) else next(iter_array)
            for ds_el in flat_ds
        ]

    # Nest
    return tf.nest.pack_sequence_as(nested_ds, flat_np)
Ejemplo n.º 17
0
def dataset_as_numpy(dataset, graph=None):
  """Converts a `tf.data.Dataset` to an iterable of NumPy arrays.

  `dataset_as_numpy` converts a possibly nested structure of `tf.data.Dataset`s
  and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively.

  Args:
    dataset: a possibly nested structure of `tf.data.Dataset`s and/or
      `tf.Tensor`s.
    graph: `tf.Graph`, optional, explicitly set the graph to use.

  Returns:
    A structure matching `dataset` where `tf.data.Dataset`s are converted to
    generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays.
  """
  nested_ds = dataset
  del dataset

  # Flatten
  flat_ds = tf.contrib.framework.nest.flatten(nested_ds)
  flat_np = []

  # Type check for Tensors and Datasets
  for ds_el in flat_ds:
    types = [type(el) for el in flat_ds]
    types = tf.contrib.framework.nest.pack_sequence_as(nested_ds, types)
    if not isinstance(ds_el, (tf.Tensor, tf.data.Dataset)):
      raise ValueError("Arguments to dataset_as_numpy must be tf.Tensors or "
                       "tf.data.Datasets. Got: %s" % types)

  if tf.executing_eagerly():
    # Eager mode
    for ds_el in flat_ds:
      if isinstance(ds_el, tf.Tensor):
        np_el = ds_el.numpy()
      elif isinstance(ds_el, tf.data.Dataset):
        np_el = _eager_dataset_iterator(ds_el)
      else:
        assert False
      flat_np.append(np_el)
  else:
    # Graph mode

    # First create necessary graph ops
    ds_iters = [None] * len(flat_ds)
    with utils.maybe_with_graph(graph, create_if_none=False):
      for i, ds_el in enumerate(flat_ds):
        if isinstance(ds_el, tf.data.Dataset):
          ds_iters[i] = tf.compat.v1.data.make_one_shot_iterator(
              ds_el).get_next()

    # Then create NumPy items
    # Shared session for tf.Tensor runs
    with utils.nogpu_session(graph) as sess:
      for ds_iter, ds_el in zip(ds_iters, flat_ds):
        if ds_iter is None:
          # Tensor
          np_el = sess.run(ds_el)
        else:
          # Dataset
          np_el = _graph_dataset_iterator(ds_iter, graph)
        flat_np.append(np_el)

  # Nest
  return tf.contrib.framework.nest.pack_sequence_as(nested_ds, flat_np)