def _get_png(): """Returns a random png image.""" image = fake_data_utils.get_random_picture(HEIGHT, WIDTH) png = tf.image.encode_png(image) with utils.nogpu_session() as sess: res = sess.run(png) return res
def _graph_dataset_iterator(ds_item, graph=None): with utils.nogpu_session(graph) as sess: while True: try: yield sess.run(ds_item) except tf.errors.OutOfRangeError: break
def _nested_to_numpy_graph(ds_nested: Tree[TensorflowElem]) -> Tree[NumpyElem]: """Convert the nested structure of TF element to numpy.""" all_ds = [] all_arrays = [] flat_ds = tf.nest.flatten(ds_nested) for elem in flat_ds: # Create an iterator for all datasets if tf_compat.is_dataset(elem): # Capture the current graph, so calling `iter(ds)` twice will reuse the # graph in which `as_numpy` was created. graph = tf.compat.v1.get_default_graph() ds_iter = tf.compat.v1.data.make_initializable_iterator(elem) all_ds.append( _IterableDataset(_graph_dataset_iterator, ds_iter, graph)) else: all_arrays.append(elem) # Then create numpy arrays for all tensors if all_arrays: with utils.nogpu_session() as sess: # Shared session for tf.Tensor all_arrays = sess.run(all_arrays) # Merge the dataset iterators and np arrays iter_ds = iter(all_ds) iter_array = iter(all_arrays) return tf.nest.pack_sequence_as(ds_nested, [ next(iter_ds) if tf_compat.is_dataset(ds_el) else next(iter_array) for ds_el in flat_ds ])
def _get_jpeg(height, width): """Returns jpeg picture.""" image = fake_data_utils.get_random_picture(height, width) jpeg = tf.image.encode_jpeg(image) with utils.nogpu_session() as sess: res = sess.run(jpeg) return res
def _generate_examples(self, archive, subset_images): prefix_len = len("SUN397") with tf.Graph().as_default(): with utils.nogpu_session() as sess: for filepath, fobj in archive: # Note: all files in the tar.gz are in SUN397/... filename = filepath[prefix_len:].replace( "\\", "/") # For windows if filename in subset_images: # Example: # From filename: /c/car_interior/backseat/sun_aenygxwhhmjtisnf.jpg # To class: /c/car_interior/backseat label = "/".join(filename.split("/")[:-1]) image = _process_image_file( fobj, sess, filename, quality=self.builder_config.quality, target_pixels=self.builder_config.target_pixels) record = { "file_name": filename, "image": image, "label": label, } yield filename, record
def _get_random_jpeg(): image = _get_random_picture() jpeg = tf.image.encode_jpeg(image) with utils.nogpu_session() as sess: res = sess.run(jpeg) fobj = tempfile.NamedTemporaryFile(delete=False, mode='wb', suffix='.JPEG') fobj.write(res) fobj.close() return fobj.name, image.shape[0], image.shape[1]
def get_random_jpeg(height=None, width=None): """Returns path to JPEG picture.""" image = get_random_picture(height, width) jpeg = tf.image.encode_jpeg(image) with utils.nogpu_session() as sess: res = sess.run(jpeg) fobj = tempfile.NamedTemporaryFile(delete=False, mode='wb', suffix='.JPEG') fobj.write(res) fobj.close() return fobj.name
def get_random_png(height=None, width=None, channels=CHANNELS_NB): """Returns path to PNG picture.""" image = get_random_picture(height, width, channels) png = tf.image.encode_png(image) with utils.nogpu_session() as sess: res = sess.run(png) fobj = tempfile.NamedTemporaryFile(delete=False, mode='wb', suffix='.PNG') fobj.write(res) fobj.close() return fobj.name
def get_random_wav_c2(channels=2, duration=_AUDIO_DURATION, sample=_SAMPLE_RATE): """Returns path to WAV audio having channels = 2.""" audio = get_random_audio(duration, sample).reshape(-1, channels) wav = tf.audio.encode_wav(audio, sample) with utils.nogpu_session() as sess: res = sess.run(wav) with tempfile.NamedTemporaryFile(delete=False, mode='wb', suffix='.wav') as f: f.write(res) return f.name
def _graph_dataset_iterator(ds_iter, graph: tf.Graph) -> Iterator[NumpyElem]: """Constructs a Python generator from a tf.data.Iterator.""" with graph.as_default(): init = ds_iter.initializer ds_item = ds_iter.get_next() with utils.nogpu_session() as sess: sess.run(init) while True: try: yield sess.run(ds_item) except tf.errors.OutOfRangeError: break
def _graph_dataset_iterator(ds_iter, graph=None): """Constructs a Python generator from a tf.data.Iterator.""" with utils.maybe_with_graph(graph, create_if_none=False): init = ds_iter.initializer ds_item = ds_iter.get_next() with utils.nogpu_session(graph) as sess: sess.run(init) while True: try: yield sess.run(ds_item) except tf.errors.OutOfRangeError: break
def get_random_png(height=None, width=None, channels=CHANNELS_NB): """Returns path to PNG picture.""" # Big randomly generated pngs take large amounts of diskspace. # Instead, we resize a 4x4 random image to the png size. image = get_random_picture(4, 4, channels) image = tf.image.resize_nearest_neighbor(tf.expand_dims(image, 0), (height, width))[0] png = tf.image.encode_png(image) with utils.nogpu_session() as sess: res = sess.run(png) fobj = tempfile.NamedTemporaryFile(delete=False, mode='wb', suffix='.PNG') fobj.write(res) fobj.close() return fobj.name
def iterate_over_dataset(dataset): """Yields numpy elements of `tf.data.Dataset`.""" if tf.executing_eagerly(): for item in dataset: flat = tf.contrib.framework.nest.flatten(item) flat = [el.numpy() for el in flat] yield tf.contrib.framework.nest.pack_sequence_as(item, flat) else: item = dataset.make_one_shot_iterator().get_next() with utils.nogpu_session() as sess: while True: try: yield sess.run(item) except tf.errors.OutOfRangeError: break
def _generate_examples(self, archive): """Yields examples.""" prefix_len = len("SUN397") with tf.Graph().as_default(): with utils.nogpu_session() as sess: for filepath, fobj in archive: if (filepath.endswith(".jpg") and filepath not in _SUN397_IGNORE_IMAGES): # Note: all files in the tar.gz are in SUN397/... filename = filepath[prefix_len:] # Example: # From filename: /c/car_interior/backseat/sun_aenygxwhhmjtisnf.jpg # To class: /c/car_interior/backseat label = "/".join(filename.split("/")[:-1]) image = _process_image_file(fobj, sess, filepath) yield { "file_name": filename, "image": image, "label": label, }
def as_numpy(dataset, graph=None): """Converts a `tf.data.Dataset` to an iterable of NumPy arrays. `as_numpy` converts a possibly nested structure of `tf.data.Dataset`s and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively. Args: dataset: a possibly nested structure of `tf.data.Dataset`s and/or `tf.Tensor`s. graph: `tf.Graph`, optional, explicitly set the graph to use. Returns: A structure matching `dataset` where `tf.data.Dataset`s are converted to generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays. """ nested_ds = dataset del dataset # Flatten flat_ds = tf.nest.flatten(nested_ds) flat_np = [] # Type check for Tensors and Datasets for ds_el in flat_ds: types = [type(el) for el in flat_ds] types = tf.nest.pack_sequence_as(nested_ds, types) if not isinstance(ds_el, (tf.Tensor, tf.data.Dataset)): raise ValueError("Arguments to as_numpy must be tf.Tensors or " "tf.data.Datasets. Got: %s" % types) if tf.executing_eagerly(): # Eager mode for ds_el in flat_ds: if isinstance(ds_el, tf.Tensor): np_el = ds_el.numpy() elif isinstance(ds_el, tf.data.Dataset): np_el = _eager_dataset_iterator(ds_el) else: assert False flat_np.append(np_el) else: # Graph mode # First create iterators for datasets with utils.maybe_with_graph(graph, create_if_none=False): ds_iters = [ tf.compat.v1.data.make_one_shot_iterator(ds_el).get_next() for ds_el in flat_ds if _is_ds(ds_el) ] ds_iters = [ _graph_dataset_iterator(ds_iter, graph) for ds_iter in ds_iters ] # Then create numpy arrays for tensors with utils.nogpu_session( graph) as sess: # Shared session for tf.Tensor # Calling sess.run once so that randomness is shared. np_arrays = sess.run( [tensor for tensor in flat_ds if not _is_ds(tensor)]) # Merge the dataset iterators and np arrays iter_ds = iter(ds_iters) iter_array = iter(np_arrays) flat_np = [ next(iter_ds) if _is_ds(ds_el) else next(iter_array) for ds_el in flat_ds ] # Nest return tf.nest.pack_sequence_as(nested_ds, flat_np)
def as_numpy(dataset, *, graph=None): """Converts a `tf.data.Dataset` to an iterable of NumPy arrays. `as_numpy` converts a possibly nested structure of `tf.data.Dataset`s and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively. Note that because TensorFlow has support for ragged tensors and NumPy has no equivalent representation, [`tf.RaggedTensor`s](https://www.tensorflow.org/api_docs/python/tf/RaggedTensor) are left as-is for the user to deal with them (e.g. using `to_list()`). In TF 1 (i.e. graph mode), `tf.RaggedTensor`s are returned as `tf.ragged.RaggedTensorValue`s. Example: ``` ds = tfds.load(name="mnist", split="train") ds_numpy = tfds.as_numpy(ds) # Convert `tf.data.Dataset` to Python generator for ex in ds_numpy: # `{'image': np.array(shape=(28, 28, 1)), 'labels': np.array(shape=())}` print(ex) ``` Args: dataset: a possibly nested structure of `tf.data.Dataset`s and/or `tf.Tensor`s. graph: `tf.Graph`, optional, explicitly set the graph to use. Returns: A structure matching `dataset` where `tf.data.Dataset`s are converted to generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays. """ nested_ds = dataset del dataset # Flatten flat_ds = tf.nest.flatten(nested_ds) flat_np = [] # Type check for Tensors and Datasets for ds_el in flat_ds: types = [type(el) for el in flat_ds] types = tf.nest.pack_sequence_as(nested_ds, types) if not (isinstance(ds_el, (tf.Tensor, tf.RaggedTensor)) or tf_compat.is_dataset(ds_el)): raise ValueError("Arguments to as_numpy must be tf.Tensors or " "tf.data.Datasets. Got: %s" % types) if tf.executing_eagerly(): # Eager mode for ds_el in flat_ds: if isinstance(ds_el, tf.Tensor): np_el = ds_el.numpy() elif isinstance(ds_el, tf.RaggedTensor): np_el = ds_el elif tf_compat.is_dataset(ds_el): np_el = _eager_dataset_iterator(ds_el) else: assert False flat_np.append(np_el) else: # Graph mode # First create iterators for datasets with utils.maybe_with_graph(graph, create_if_none=False): ds_iters = [ tf.compat.v1.data.make_initializable_iterator(ds_el) for ds_el in flat_ds if tf_compat.is_dataset(ds_el) ] ds_iters = [ _graph_dataset_iterator(ds_iter, graph) for ds_iter in ds_iters ] # Then create numpy arrays for tensors with utils.nogpu_session( graph) as sess: # Shared session for tf.Tensor # Calling sess.run once so that randomness is shared. np_arrays = sess.run([ tensor for tensor in flat_ds if not tf_compat.is_dataset(tensor) ]) # Merge the dataset iterators and np arrays iter_ds = iter(ds_iters) iter_array = iter(np_arrays) flat_np = [ next(iter_ds) if tf_compat.is_dataset(ds_el) else next(iter_array) for ds_el in flat_ds ] # Nest return tf.nest.pack_sequence_as(nested_ds, flat_np)
def dataset_as_numpy(dataset, graph=None): """Converts a `tf.data.Dataset` to an iterable of NumPy arrays. `dataset_as_numpy` converts a possibly nested structure of `tf.data.Dataset`s and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively. Args: dataset: a possibly nested structure of `tf.data.Dataset`s and/or `tf.Tensor`s. graph: `tf.Graph`, optional, explicitly set the graph to use. Returns: A structure matching `dataset` where `tf.data.Dataset`s are converted to generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays. """ nested_ds = dataset del dataset # Flatten flat_ds = tf.contrib.framework.nest.flatten(nested_ds) flat_np = [] # Type check for Tensors and Datasets for ds_el in flat_ds: types = [type(el) for el in flat_ds] types = tf.contrib.framework.nest.pack_sequence_as(nested_ds, types) if not isinstance(ds_el, (tf.Tensor, tf.data.Dataset)): raise ValueError("Arguments to dataset_as_numpy must be tf.Tensors or " "tf.data.Datasets. Got: %s" % types) if tf.executing_eagerly(): # Eager mode for ds_el in flat_ds: if isinstance(ds_el, tf.Tensor): np_el = ds_el.numpy() elif isinstance(ds_el, tf.data.Dataset): np_el = _eager_dataset_iterator(ds_el) else: assert False flat_np.append(np_el) else: # Graph mode # First create necessary graph ops ds_iters = [None] * len(flat_ds) with utils.maybe_with_graph(graph, create_if_none=False): for i, ds_el in enumerate(flat_ds): if isinstance(ds_el, tf.data.Dataset): ds_iters[i] = tf.compat.v1.data.make_one_shot_iterator( ds_el).get_next() # Then create NumPy items # Shared session for tf.Tensor runs with utils.nogpu_session(graph) as sess: for ds_iter, ds_el in zip(ds_iters, flat_ds): if ds_iter is None: # Tensor np_el = sess.run(ds_el) else: # Dataset np_el = _graph_dataset_iterator(ds_iter, graph) flat_np.append(np_el) # Nest return tf.contrib.framework.nest.pack_sequence_as(nested_ds, flat_np)