def tf_shapes(self): """ :return: a dictionary of sampler output tensor shapes """ output_shapes = nest.map_structure_up_to( self.tf_dtypes, tf.TensorShape, self.shapes) return output_shapes
def testMapStructureUpTo(self): ab_tuple = collections.namedtuple("ab_tuple", "a, b") op_tuple = collections.namedtuple("op_tuple", "add, mul") inp_val = ab_tuple(a=2, b=3) inp_ops = ab_tuple(a=op_tuple(add=1, mul=2), b=op_tuple(add=2, mul=3)) out = nest.map_structure_up_to( inp_val, lambda val, ops: (val + ops.add) * ops.mul, inp_val, inp_ops) self.assertEqual(out.a, 6) self.assertEqual(out.b, 15) data_list = ((2, 4, 6, 8), ((1, 3, 5, 7, 9), (3, 5, 7))) name_list = ("evens", ("odds", "primes")) out = nest.map_structure_up_to( name_list, lambda name, sec: "first_{}_{}".format(len(sec), name), name_list, data_list) self.assertEqual(out, ("first_4_evens", ("first_5_odds", "first_3_primes")))
def from_string_handle(string_handle, output_types, output_shapes=None): """Creates a new, uninitialized `Iterator` based on the given handle. This method allows you to define a "feedable" iterator where you can choose between concrete iterators by feeding a value in a @{tf.Session.run} call. In that case, `string_handle` would a @{tf.placeholder}, and you would feed it with the value of @{tf.data.Iterator.string_handle} in each step. For example, if you had two iterators that marked the current position in a training dataset and a test dataset, you could choose which to use in each step as follows: ```python train_iterator = tf.data.Dataset(...).make_one_shot_iterator() train_iterator_handle = sess.run(train_iterator.string_handle()) test_iterator = tf.data.Dataset(...).make_one_shot_iterator() test_iterator_handle = sess.run(test_iterator.string_handle()) handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, train_iterator.output_types) next_element = iterator.get_next() loss = f(next_element) train_loss = sess.run(loss, feed_dict={handle: train_iterator_handle}) test_loss = sess.run(loss, feed_dict={handle: test_iterator_handle}) ``` Args: string_handle: A scalar `tf.Tensor` of type `tf.string` that evaluates to a handle produced by the `Iterator.string_handle()` method. output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`) objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`) component of an element of this dataset. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. If omitted, each component will have an unconstrainted shape. Returns: An `Iterator`. """ output_types = nest.map_structure(dtypes.as_dtype, output_types) if output_shapes is None: output_shapes = nest.map_structure( lambda _: tensor_shape.TensorShape(None), output_types) else: output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) nest.assert_same_structure(output_types, output_shapes) string_handle = ops.convert_to_tensor(string_handle, dtype=dtypes.string) iterator_resource = gen_dataset_ops.iterator_from_string_handle( string_handle, output_types=nest.flatten(sparse.unwrap_sparse_types(output_types)), output_shapes=nest.flatten(output_shapes)) return Iterator(iterator_resource, None, output_types, output_shapes)
def set_spatial_shape(self, spatial_window, source_names=None): """ Set all spatial window of the window. spatial_window should be a dictionary of window sizes tuples or single window size tuple. In the latter case the size will be used by all output image windows. :param spatial_window: tuple of integers specifying new shape :param source_names: list/dictionary of input source names :return: """ win_sizes = copy.deepcopy(spatial_window) if isinstance(spatial_window, dict): for name in list(spatial_window): window_size = spatial_window[name] if isinstance(window_size, (ParserNamespace, argparse.Namespace)): window_size = vars(window_size) if not isinstance(window_size, dict): win_sizes[name] = tuple(window_size) elif 'spatial_window_size' in window_size: win_sizes[name] = tuple( window_size['spatial_window_size']) else: raise ValueError( 'window_sizes should be a nested dictionary') elif isinstance(spatial_window, (list, tuple)): # list or tuple of single window sizes win_sizes = {name: spatial_window for name in list(self._dtypes)} # complete window shapes based on user input and input_image sizes if source_names: spatial_shapes = _read_window_sizes(source_names, win_sizes) else: try: spatial_shapes = {} for name in list(self._dtypes): spatial_shapes[name] = \ tuple(int(win_size) for win_size in win_sizes[name]) except ValueError: tf.logging.fatal("spatial window should be an array of int") raise spatial_shapes = nest.map_structure_up_to( self._dtypes, tuple, spatial_shapes) self._shapes = { name: _complete_partial_window_sizes(spatial_shapes[name], self._shapes[name]) for name in list(self._shapes)} # update based on the latest spatial shapes self.has_dynamic_shapes = self._check_dynamic_shapes() if self._placeholders_dict is not None: self._update_placeholders_dict(n_samples=self.n_samples)
def __init__(self, dataset, output_types, output_shapes=None): """Creates a new dataset with the given output types and shapes. The given `dataset` must have a structure that is convertible: * `dataset.output_types` must be the same as `output_types` module nesting. * Each shape in `dataset.output_shapes` must be compatible with each shape in `output_shapes` (if given). Note: This helper permits "unsafe casts" for shapes, equivalent to using `tf.Tensor.set_shape()` where domain-specific knowledge is available. Args: dataset: A `Dataset` object. output_types: A nested structure of `tf.DType` objects. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects. If omitted, the shapes will be inherited from `dataset`. Raises: ValueError: If either `output_types` or `output_shapes` is not compatible with the structure of `dataset`. """ super(_RestructuredDataset, self).__init__() self._dataset = dataset # Validate that the types are compatible. output_types = nest.map_structure(dtypes.as_dtype, output_types) flat_original_types = nest.flatten(dataset.output_types) flat_new_types = nest.flatten(output_types) if flat_original_types != flat_new_types: raise ValueError( "Dataset with output types %r cannot be restructured to have output " "types %r" % (dataset.output_types, output_types)) self._output_types = output_types if output_shapes is None: # Inherit shapes from the original `dataset`. self._output_shapes = nest.pack_sequence_as(output_types, nest.flatten( dataset.output_shapes)) else: # Validate that the shapes are compatible. nest.assert_same_structure(output_types, output_shapes) flat_original_shapes = nest.flatten(dataset.output_shapes) flat_new_shapes = nest.flatten_up_to(output_types, output_shapes) for original_shape, new_shape in zip(flat_original_shapes, flat_new_shapes): if not original_shape.is_compatible_with(new_shape): raise ValueError( "Dataset with output shapes %r cannot be restructured to have " "incompatible output shapes %r" % (dataset.output_shapes, output_shapes)) self._output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes)
def __init__(self, pipeline, output_dtypes=None, output_shapes=None, *, batch_size=1, num_threads=4, device_id=0, exec_separated=False, prefetch_queue_depth=2, cpu_prefetch_queue_depth=2, gpu_prefetch_queue_depth=2, dtypes=None, shapes=None): output_shapes = self._handle_deprecation(output_shapes, shapes, "shapes") output_dtypes = self._handle_deprecation(output_dtypes, dtypes, "dtypes") if not self._check_output_dtypes(output_dtypes): raise TypeError(("`output_dtypes` should be provided as single tf.DType value " + "or a tuple of tf.DType values. Got value `{}` of type `{}`.") \ .format(output_dtypes, type(output_dtypes))) if output_shapes is None: output_shapes = nest.map_structure( lambda _: tensor_shape.TensorShape(None), output_dtypes) else: output_shapes = nest.map_structure_up_to( output_dtypes, tensor_shape.as_shape, output_shapes) if not isinstance(output_dtypes, tuple): output_dtypes = (output_dtypes, ) output_shapes = (output_shapes, ) output_classes = nest.map_structure(lambda _: ops.Tensor, output_dtypes) self._pipeline = serialize_pipeline(pipeline) self._batch_size = batch_size self._num_threads = num_threads self._device_id = device_id self._exec_separated = exec_separated self._prefetch_queue_depth = prefetch_queue_depth self._cpu_prefetch_queue_depth = cpu_prefetch_queue_depth self._gpu_prefetch_queue_depth = gpu_prefetch_queue_depth self._output_shapes = output_shapes self._output_dtypes = output_dtypes self._structure = structure.convert_legacy_structure( self._output_dtypes, self._output_shapes, output_classes) super(_DALIDatasetV2, self).__init__(self._as_variant_tensor())
def __init__(self, dataset, output_types, output_shapes=None): """Creates a new dataset with the given output types and shapes. The given `dataset` must have a structure that is convertible: * `dataset.output_types` must be the same as `output_types` module nesting. * Each shape in `dataset.output_shapes` must be compatible with each shape in `output_shapes` (if given). Note: This helper permits "unsafe casts" for shapes, equivalent to using `tf.Tensor.set_shape()` where domain-specific knowledge is available. Args: dataset: A `Dataset` object. output_types: A nested structure of `tf.DType` objects. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects. If omitted, the shapes will be inherited from `dataset`. Raises: ValueError: If either `output_types` or `output_shapes` is not compatible with the structure of `dataset`. """ super(_RestructuredDataset, self).__init__() self._dataset = dataset # Validate that the types are compatible. output_types = nest.map_structure(dtypes.as_dtype, output_types) flat_original_types = nest.flatten(dataset.output_types) flat_new_types = nest.flatten(output_types) if flat_original_types != flat_new_types: raise ValueError( "Dataset with output types %r cannot be restructured to have output " "types %r" % (dataset.output_types, output_types)) self._output_types = output_types if output_shapes is None: # Inherit shapes from the original `dataset`. self._output_shapes = nest.pack_sequence_as( output_types, nest.flatten(dataset.output_shapes)) else: # Validate that the shapes are compatible. nest.assert_same_structure(output_types, output_shapes) flat_original_shapes = nest.flatten(dataset.output_shapes) flat_new_shapes = nest.flatten_up_to(output_types, output_shapes) for original_shape, new_shape in zip(flat_original_shapes, flat_new_shapes): if not original_shape.is_compatible_with(new_shape): raise ValueError( "Dataset with output shapes %r cannot be restructured to have " "incompatible output shapes %r" % (dataset.output_shapes, output_shapes)) self._output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes)
def __init__(self, input_dataset, batch_size, padded_shapes, padding_values): """Initialize `PrependFromQueueAndPaddedBatchDataset`.""" super(_PrependFromQueueAndPaddedBatchDataset, self).__init__() if sparse.any_sparse(input_dataset.output_classes): raise TypeError( "Batching of padded sparse tensors is not currently supported") self._input_dataset = input_dataset self._batch_size = ops.convert_to_tensor( batch_size, dtype=dtypes.int64, name="batch_size") # pylint: disable=protected-access if padded_shapes is None: self._padded_shapes = nest.map_structure( dataset_ops._partial_shape_to_tensor, input_dataset.output_shapes) else: self._padded_shapes = nest.map_structure_up_to( input_dataset.output_shapes, dataset_ops._partial_shape_to_tensor, padded_shapes) padding_values = ( padding_values if padding_values is not None else dataset_ops._default_padding(input_dataset)) self._padding_values = nest.map_structure_up_to( input_dataset.output_shapes, dataset_ops._padding_value_to_tensor, padding_values, input_dataset.output_types)
def __init__(self, variant_tensor, output_shapes, output_types, output_classes): # TODO(b/110122868): Consolidate the structure validation logic with the # similar logic in `Iterator.from_structure()` and # `Dataset.from_generator()`. output_types = nest.map_structure(dtypes.as_dtype, output_types) output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) nest.assert_same_structure(output_types, output_shapes) nest.assert_same_structure(output_types, output_classes) self._variant_tensor = variant_tensor self._output_shapes = output_shapes self._output_types = output_types self._output_classes = output_classes
def __init__(self, variant_tensor, output_shapes, output_types, output_classes): # TODO(b/110122868): Consolidate the structure validation logic with the # similar logic in `Iterator.from_structure()` and # `Dataset.from_generator()`. output_types = nest.map_structure(dtypes.as_dtype, output_types) output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) nest.assert_same_structure(output_types, output_shapes) nest.assert_same_structure(output_types, output_classes) self._variant_tensor = variant_tensor self._output_shapes = output_shapes self._output_types = output_types self._output_classes = output_classes
def __init__(self, input_dataset, batch_size, padded_shapes, padding_values): """Initialize `PrependFromQueueAndPaddedBatchDataset`.""" super(_PrependFromQueueAndPaddedBatchDataset, self).__init__() if sparse.any_sparse(input_dataset.output_classes): raise TypeError( "Batching of padded sparse tensors is not currently supported") self._input_dataset = input_dataset self._batch_size = ops.convert_to_tensor( batch_size, dtype=dtypes.int64, name="batch_size") # pylint: disable=protected-access if padded_shapes is None: self._padded_shapes = nest.map_structure( dataset_ops._partial_shape_to_tensor, input_dataset.output_shapes) else: self._padded_shapes = nest.map_structure_up_to( input_dataset.output_shapes, dataset_ops._partial_shape_to_tensor, padded_shapes) padding_values = ( padding_values if padding_values is not None else dataset_ops._default_padding(input_dataset)) self._padding_values = nest.map_structure_up_to( input_dataset.output_shapes, dataset_ops._padding_value_to_tensor, padding_values, input_dataset.output_types)
def from_data_reader_properties(cls, source_names, image_shapes, image_dtypes, window_sizes=None, allow_dynamic=False): """ Create a window instance with input data properties each property is grouped into dict, with pairs of image_name: data_value. Some input images is a concatenated data array from multiple data sources. example of input:: source_names={ 'image': (u'modality1', u'modality2'), 'label': (u'modality3',)}, image_shapes={ 'image': (192, 160, 192, 1, 2), 'label': (192, 160, 192, 1, 1)}, image_dtypes={ 'image': tf.float32, 'label': tf.float32}, window_sizes={ 'image': (10, 10, 2), 'label': (10, 10, 2)} the ``window_sizes`` can also be:: window_sizes={ 'modality1': (10, 10, 2), 'modality3': (10, 10, 2)} or using a nested dictionary with 'spatial_window_size' (deprecating):: window_sizes={ 'modality1': {'spatial_window_size': (10, 10, 2)}, 'modality2': {'spatial_window_size': (10, 10, 2)}, 'modality3': {'spatial_window_size': (5, 5, 1)}} see ``niftynet.io.ImageReader`` for more details. :param source_names: input image names :param image_shapes: tuple of image window shapes :param image_dtypes: tuple of image window data types :param window_sizes: window sizes for the image image :param allow_dynamic: if True, window_sizes negative or 0 indicates dynamic window sizes; . Otherwise the dynamic sizes will be fixed as the image shapes; this assumes the same image size across the dataset. :return: an ImageWindow instance """ try: image_shapes = nest.map_structure_up_to( image_dtypes, tuple, image_shapes) except KeyError: tf.logging.fatal('window_sizes wrong format %s', window_sizes) raise # create ImageWindow instance window_instance = cls(shapes=image_shapes, dtypes=image_dtypes) if not window_sizes: # image window sizes not specified, defaulting to image sizes. return window_instance window_instance.set_spatial_shape(window_sizes, source_names) if not allow_dynamic: full_shape = window_instance.match_image_shapes(image_shapes) window_instance.set_spatial_shape(full_shape) return window_instance
def _from_components(self, value): return nest.map_structure_up_to(self._nested_structure, lambda t, v: t._from_components(v), self._nested_structure, value)
def from_generator(generator, output_types, output_shapes=None): """Creates a `Dataset` whose elements are generated by `generator`. The `generator` argument must be a callable object that returns an object that support the `iter()` protocol (e.g. a generator function). The elements generated by `generator` must be compatible with the given `output_types` and (optional) `output_shapes` arguments. For example: ```python import itertools def gen(): for i in itertools.count(1): yield (i, [1] * i) ds = Dataset.from_generator( gen, (tf.int64, tf.int64), (tf.TensorShape([]), tf.TensorShape([None]))) value = ds.make_one_shot_iterator().get_next() sess.run(value) # (1, array([1])) sess.run(value) # (2, array([1, 1])) ``` Args: generator: A callable object that takes no arguments and returns an object that supports the `iter()` protocol. output_types: A nested structure of `tf.DType` objects corresponding to each component of an element yielded by `generator`. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects corresponding to each component of an element yielded by `generator`. Returns: A `Dataset`. """ if not callable(generator): raise TypeError("`generator` must be callable.") if output_shapes is None: output_shapes = nest.map_structure( lambda _: tensor_shape.TensorShape(None), output_types) else: output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) flattened_types = nest.flatten(output_types) flattened_shapes = nest.flatten(output_shapes) generator_state = dataset_ops.Dataset._GeneratorState(generator) def get_iterator_id_map_fn(unused_dummy): """Creates a unique `iterator_id` for each pass over the dataset. The "iterator_id" disambiguates between multiple concurrently existing iterators. Args: unused_dummy: Ignored value. Returns: A `tf.int64` tensor whose value uniquely identifies an iterator in `generator_state`. """ return script_ops.py_func( generator_state.get_next_id, [], dtypes.int64, stateful=True) def generator_map_fn(iterator_id_t): """Generates the next element from iterator with ID `iterator_id_t`. We map this function across an infinite repetition of the `iterator_id_t`, and raise `StopIteration` to terminate the iteration. Args: iterator_id_t: A `tf.int64` tensor whose value uniquely identifies the iterator in `generator_state` from which to generate an element. Returns: A nested structure of tensors representing an element from the iterator. """ def generator_py_func(iterator_id): """A `py_func` that will be called to invoke the iterator.""" try: values = next(generator_state.get_iterator(iterator_id)) except StopIteration: generator_state.iterator_completed(iterator_id) raise StopIteration("Iteration finished.") # Use the same _convert function from the py_func() implementation to # convert the returned values to arrays early, so that we can inspect # their values. # pylint: disable=protected-access ret_arrays = [ script_ops.FuncRegistry._convert(ret, dtype=dtype.as_numpy_dtype) for ret, dtype in zip(nest.flatten_up_to(output_types, values), flattened_types) ] # pylint: enable=protected-access # Additional type and shape checking to ensure that the components # of the generated element match the `output_types` and `output_shapes` # arguments. for (ret_array, expected_dtype, expected_shape) in zip( ret_arrays, flattened_types, flattened_shapes): if ret_array.dtype != expected_dtype.as_numpy_dtype: raise TypeError( "`generator` yielded an element of type %s where an element " "of type %s was expected." % (ret_array.dtype, expected_dtype.as_numpy_dtype)) if not expected_shape.is_compatible_with(ret_array.shape): raise ValueError( "`generator` yielded an element of shape %s where an element " "of shape %s was expected." % (ret_array.shape, expected_shape)) return ret_arrays flat_values = script_ops.py_func( generator_py_func, [iterator_id_t], flattened_types, stateful=True) # The `py_func()` op drops the inferred shapes, so we add them back in # here. if output_shapes is not None: for ret_t, shape in zip(flat_values, flattened_shapes): ret_t.set_shape(shape) return nest.pack_sequence_as(output_types, flat_values) # This function associates each traversal of `generator` with a unique # iterator ID. def flat_map_fn(iterator_id_t): # First, generate an infinite dataset containing the iterator ID repeated # forever. repeated_id = Dataset.from_tensors(iterator_id_t).repeat(None) # The `generator_map_fn` gets the next element from the iterator with the # relevant ID, and raises StopIteration when that iterator contains no # more elements. return repeated_id.map(generator_map_fn) # A single-element dataset that, each time it is evaluated, contains a # freshly-generated and unique (for the returned dataset) int64 # ID that will be used to identify the appropriate Python state, which # is encapsulated in `generator_state`, and captured in # `get_iterator_id_map_fn`. dummy = 0 id_dataset = Dataset.from_tensors(dummy).map(get_iterator_id_map_fn) # A dataset that contains all of the elements generated by a # single iterator created from `generator`, identified by the # iterator ID contained in `id_dataset`. Lifting the iteration # into a flat_map here enables multiple repetitions and/or nested # versions of the returned dataset to be created, because it forces # the generation of a new ID for each version. return id_dataset.flat_map(flat_map_fn)
def from_structure(output_types, output_shapes=None, shared_name=None, output_classes=None): """Creates a new, uninitialized `Iterator` with the given structure. This iterator-constructing method can be used to create an iterator that is reusable with many different datasets. The returned iterator is not bound to a particular dataset, and it has no `initializer`. To initialize the iterator, run the operation returned by `Iterator.make_initializer(dataset)`. The following is an example ```python iterator = Iterator.from_structure(tf.int64, tf.TensorShape([])) dataset_range = Dataset.range(10) range_initializer = iterator.make_initializer(dataset_range) dataset_evens = dataset_range.filter(lambda x: x % 2 == 0) evens_initializer = iterator.make_initializer(dataset_evens) # Define a model based on the iterator; in this example, the model_fn # is expected to take scalar tf.int64 Tensors as input (see # the definition of 'iterator' above). prediction, loss = model_fn(iterator.get_next()) # Train for `num_epochs`, where for each epoch, we first iterate over # dataset_range, and then iterate over dataset_evens. for _ in range(num_epochs): # Initialize the iterator to `dataset_range` sess.run(range_initializer) while True: try: pred, loss_val = sess.run([prediction, loss]) except tf.errors.OutOfRangeError: break # Initialize the iterator to `dataset_evens` sess.run(evens_initializer) while True: try: pred, loss_val = sess.run([prediction, loss]) except tf.errors.OutOfRangeError: break ``` Args: output_types: A nested structure of `tf.DType` objects corresponding to each component of an element of this dataset. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. If omitted, each component will have an unconstrainted shape. shared_name: (Optional.) If non-empty, this iterator will be shared under the given name across multiple sessions that share the same devices (e.g. when using a remote server). output_classes: (Optional.) A nested structure of Python `type` objects corresponding to each component of an element of this iterator. If omitted, each component is assumed to be of type `tf.Tensor`. Returns: An `Iterator`. Raises: TypeError: If the structures of `output_shapes` and `output_types` are not the same. """ output_types = nest.map_structure(dtypes.as_dtype, output_types) if output_shapes is None: output_shapes = nest.map_structure( lambda _: tensor_shape.TensorShape(None), output_types) else: output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) if output_classes is None: output_classes = nest.map_structure(lambda _: ops.Tensor, output_types) nest.assert_same_structure(output_types, output_shapes) if shared_name is None: shared_name = "" iterator_resource = gen_dataset_ops.iterator( container="", shared_name=shared_name, output_types=nest.flatten( sparse.as_dense_types(output_types, output_classes)), output_shapes=nest.flatten( sparse.as_dense_shapes(output_shapes, output_classes))) return Iterator(iterator_resource, None, output_types, output_shapes, output_classes)
def from_generator(generator, output_types, output_shapes=None): """Creates a `Dataset` whose elements are generated by `generator`. The `generator` argument must be a callable object that returns an object that support the `iter()` protocol (e.g. a generator function). The elements generated by `generator` must be compatible with the given `output_types` and (optional) `output_shapes` arguments. For example: ```python import itertools def gen(): for i in itertools.count(1): yield (i, [1] * i) ds = Dataset.from_generator( gen, (tf.int64, tf.int64), (tf.TensorShape([]), tf.TensorShape([None]))) value = ds.make_one_shot_iterator().get_next() sess.run(value) # (1, array([1])) sess.run(value) # (2, array([1, 1])) ``` Args: generator: A callable object that takes no arguments and returns an object that supports the `iter()` protocol. output_types: A nested structure of `tf.DType` objects corresponding to each component of an element yielded by `generator`. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects corresponding to each component of an element yielded by `generator`. Returns: A `Dataset`. """ if not callable(generator): raise TypeError("`generator` must be callable.") if output_shapes is None: output_shapes = nest.map_structure( lambda _: tensor_shape.TensorShape(None), output_types) else: output_shapes = nest.map_structure_up_to(output_types, tensor_shape.as_shape, output_shapes) flattened_types = nest.flatten(output_types) flattened_shapes = nest.flatten(output_shapes) generator_state = dataset_ops.Dataset._GeneratorState(generator) def get_iterator_id_map_fn(unused_dummy): """Creates a unique `iterator_id` for each pass over the dataset. The "iterator_id" disambiguates between multiple concurrently existing iterators. Args: unused_dummy: Ignored value. Returns: A `tf.int64` tensor whose value uniquely identifies an iterator in `generator_state`. """ return script_ops.py_func(generator_state.get_next_id, [], dtypes.int64, stateful=True) def generator_map_fn(iterator_id_t): """Generates the next element from iterator with ID `iterator_id_t`. We map this function across an infinite repetition of the `iterator_id_t`, and raise `StopIteration` to terminate the iteration. Args: iterator_id_t: A `tf.int64` tensor whose value uniquely identifies the iterator in `generator_state` from which to generate an element. Returns: A nested structure of tensors representing an element from the iterator. """ def generator_py_func(iterator_id): """A `py_func` that will be called to invoke the iterator.""" try: values = next(generator_state.get_iterator(iterator_id)) except StopIteration: generator_state.iterator_completed(iterator_id) raise StopIteration("Iteration finished.") # Use the same _convert function from the py_func() implementation to # convert the returned values to arrays early, so that we can inspect # their values. # pylint: disable=protected-access ret_arrays = [ script_ops.FuncRegistry._convert( ret, dtype=dtype.as_numpy_dtype) for ret, dtype in zip( nest.flatten_up_to(output_types, values), flattened_types) ] # pylint: enable=protected-access # Additional type and shape checking to ensure that the components # of the generated element match the `output_types` and `output_shapes` # arguments. for (ret_array, expected_dtype, expected_shape) in zip(ret_arrays, flattened_types, flattened_shapes): if ret_array.dtype != expected_dtype.as_numpy_dtype: raise TypeError( "`generator` yielded an element of type %s where an element " "of type %s was expected." % (ret_array.dtype, expected_dtype.as_numpy_dtype)) if not expected_shape.is_compatible_with(ret_array.shape): raise ValueError( "`generator` yielded an element of shape %s where an element " "of shape %s was expected." % (ret_array.shape, expected_shape)) return ret_arrays flat_values = script_ops.py_func(generator_py_func, [iterator_id_t], flattened_types, stateful=True) # The `py_func()` op drops the inferred shapes, so we add them back in # here. if output_shapes is not None: for ret_t, shape in zip(flat_values, flattened_shapes): ret_t.set_shape(shape) return nest.pack_sequence_as(output_types, flat_values) # This function associates each traversal of `generator` with a unique # iterator ID. def flat_map_fn(iterator_id_t): # First, generate an infinite dataset containing the iterator ID repeated # forever. repeated_id = Dataset.from_tensors(iterator_id_t).repeat(None) # The `generator_map_fn` gets the next element from the iterator with the # relevant ID, and raises StopIteration when that iterator contains no # more elements. return repeated_id.map(generator_map_fn) # A single-element dataset that, each time it is evaluated, contains a # freshly-generated and unique (for the returned dataset) int64 # ID that will be used to identify the appropriate Python state, which # is encapsulated in `generator_state`, and captured in # `get_iterator_id_map_fn`. dummy = 0 id_dataset = Dataset.from_tensors(dummy).map(get_iterator_id_map_fn) # A dataset that contains all of the elements generated by a # single iterator created from `generator`, identified by the # iterator ID contained in `id_dataset`. Lifting the iteration # into a flat_map here enables multiple repetitions and/or nested # versions of the returned dataset to be created, because it forces # the generation of a new ID for each version. return id_dataset.flat_map(flat_map_fn)
def __init__(self, pipeline, output_dtypes=None, output_shapes=None, fail_on_device_mismatch=True, *, input_datasets=None, batch_size=1, num_threads=4, device_id=0, exec_separated=False, prefetch_queue_depth=2, cpu_prefetch_queue_depth=2, gpu_prefetch_queue_depth=2, dtypes=None, shapes=None): output_shapes = self._handle_deprecation(output_shapes, shapes, "shapes") output_dtypes = self._handle_deprecation(output_dtypes, dtypes, "dtypes") if not self._check_dtypes(output_dtypes, tf.DType): raise TypeError( "`output_dtypes` should be provided as single tf.DType value " f"or a tuple of tf.DType values. Got value `{output_dtypes}` " f"of the type `{type(output_dtypes)}`.") if output_shapes is None: output_shapes = nest.map_structure( lambda _: tensor_shape.TensorShape(None), output_dtypes) else: output_shapes = nest.map_structure_up_to( output_dtypes, tensor_shape.as_shape, output_shapes) if not isinstance(output_dtypes, tuple): output_dtypes = (output_dtypes, ) output_shapes = (output_shapes, ) output_classes = nest.map_structure(lambda _: ops.Tensor, output_dtypes) self._pipeline_instance = pipeline # keep the live Pipeline object self._pipeline_serialized = serialize_pipeline(pipeline) self._batch_size = batch_size self._num_threads = num_threads if device_id is None: device_id = types.CPU_ONLY_DEVICE_ID self._device_id = device_id self._exec_separated = exec_separated self._prefetch_queue_depth = prefetch_queue_depth self._cpu_prefetch_queue_depth = cpu_prefetch_queue_depth self._gpu_prefetch_queue_depth = gpu_prefetch_queue_depth self._output_shapes = output_shapes self._output_dtypes = output_dtypes self._fail_on_device_mismatch = fail_on_device_mismatch self._setup_inputs(input_datasets) self._structure = structure.convert_legacy_structure( self._output_dtypes, self._output_shapes, output_classes) super(_DALIDatasetV2, self).__init__(self._as_variant_tensor())
def from_structure(output_types, output_shapes=None, shared_name=None, output_classes=None): """Creates a new, uninitialized `Iterator` with the given structure. This iterator-constructing method can be used to create an iterator that is reusable with many different datasets. The returned iterator is not bound to a particular dataset, and it has no `initializer`. To initialize the iterator, run the operation returned by `Iterator.make_initializer(dataset)`. The following is an example ```python iterator = Iterator.from_structure(tf.int64, tf.TensorShape([])) dataset_range = Dataset.range(10) range_initializer = iterator.make_initializer(dataset_range) dataset_evens = dataset_range.filter(lambda x: x % 2 == 0) evens_initializer = iterator.make_initializer(dataset_evens) # Define a model based on the iterator; in this example, the model_fn # is expected to take scalar tf.int64 Tensors as input (see # the definition of 'iterator' above). prediction, loss = model_fn(iterator.get_next()) # Train for `num_epochs`, where for each epoch, we first iterate over # dataset_range, and then iterate over dataset_evens. for _ in range(num_epochs): # Initialize the iterator to `dataset_range` sess.run(range_initializer) while True: try: pred, loss_val = sess.run([prediction, loss]) except tf.errors.OutOfRangeError: break # Initialize the iterator to `dataset_evens` sess.run(evens_initializer) while True: try: pred, loss_val = sess.run([prediction, loss]) except tf.errors.OutOfRangeError: break ``` Args: output_types: A nested structure of `tf.DType` objects corresponding to each component of an element of this dataset. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. If omitted, each component will have an unconstrainted shape. shared_name: (Optional.) If non-empty, this iterator will be shared under the given name across multiple sessions that share the same devices (e.g. when using a remote server). output_classes: (Optional.) A nested structure of Python `type` objects corresponding to each component of an element of this iterator. If omitted, each component is assumed to be of type `tf.Tensor`. Returns: An `Iterator`. Raises: TypeError: If the structures of `output_shapes` and `output_types` are not the same. """ output_types = nest.map_structure(dtypes.as_dtype, output_types) if output_shapes is None: output_shapes = nest.map_structure( lambda _: tensor_shape.TensorShape(None), output_types) else: output_shapes = nest.map_structure_up_to(output_types, tensor_shape.as_shape, output_shapes) if output_classes is None: output_classes = nest.map_structure(lambda _: ops.Tensor, output_types) nest.assert_same_structure(output_types, output_shapes) output_structure = structure.convert_legacy_structure( output_types, output_shapes, output_classes) if shared_name is None: shared_name = "" if _device_stack_is_empty(): with ops.device("/cpu:0"): iterator_resource = gen_dataset_ops.iterator_v2( container="", shared_name=shared_name, output_types=structure.get_flat_tensor_types( output_structure), output_shapes=structure.get_flat_tensor_shapes( output_structure)) else: iterator_resource = gen_dataset_ops.iterator_v2( container="", shared_name=shared_name, output_types=structure.get_flat_tensor_types(output_structure), output_shapes=structure.get_flat_tensor_shapes( output_structure)) return Iterator(iterator_resource, None, output_types, output_shapes, output_classes)
def from_string_handle(string_handle, output_types, output_shapes=None, output_classes=None): """Creates a new, uninitialized `Iterator` based on the given handle. This method allows you to define a "feedable" iterator where you can choose between concrete iterators by feeding a value in a `tf.Session.run` call. In that case, `string_handle` would be a `tf.compat.v1.placeholder`, and you would feed it with the value of `tf.data.Iterator.string_handle` in each step. For example, if you had two iterators that marked the current position in a training dataset and a test dataset, you could choose which to use in each step as follows: ```python train_iterator = tf.data.Dataset(...).make_one_shot_iterator() train_iterator_handle = sess.run(train_iterator.string_handle()) test_iterator = tf.data.Dataset(...).make_one_shot_iterator() test_iterator_handle = sess.run(test_iterator.string_handle()) handle = tf.compat.v1.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, train_iterator.output_types) next_element = iterator.get_next() loss = f(next_element) train_loss = sess.run(loss, feed_dict={handle: train_iterator_handle}) test_loss = sess.run(loss, feed_dict={handle: test_iterator_handle}) ``` Args: string_handle: A scalar `tf.Tensor` of type `tf.string` that evaluates to a handle produced by the `Iterator.string_handle()` method. output_types: A nested structure of `tf.DType` objects corresponding to each component of an element of this dataset. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. If omitted, each component will have an unconstrainted shape. output_classes: (Optional.) A nested structure of Python `type` objects corresponding to each component of an element of this iterator. If omitted, each component is assumed to be of type `tf.Tensor`. Returns: An `Iterator`. """ output_types = nest.map_structure(dtypes.as_dtype, output_types) if output_shapes is None: output_shapes = nest.map_structure( lambda _: tensor_shape.TensorShape(None), output_types) else: output_shapes = nest.map_structure_up_to(output_types, tensor_shape.as_shape, output_shapes) if output_classes is None: output_classes = nest.map_structure(lambda _: ops.Tensor, output_types) nest.assert_same_structure(output_types, output_shapes) output_structure = structure.convert_legacy_structure( output_types, output_shapes, output_classes) string_handle = ops.convert_to_tensor(string_handle, dtype=dtypes.string) if _device_stack_is_empty(): with ops.device("/cpu:0"): iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2( string_handle, output_types=structure.get_flat_tensor_types( output_structure), output_shapes=structure.get_flat_tensor_shapes( output_structure)) else: iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2( string_handle, output_types=structure.get_flat_tensor_types(output_structure), output_shapes=structure.get_flat_tensor_shapes( output_structure)) return Iterator(iterator_resource, None, output_types, output_shapes, output_classes)
def from_string_handle(string_handle, output_types, output_shapes=None, output_classes=None): """Creates a new, uninitialized `Iterator` based on the given handle. This method allows you to define a "feedable" iterator where you can choose between concrete iterators by feeding a value in a `tf.Session.run` call. In that case, `string_handle` would be a `tf.placeholder`, and you would feed it with the value of `tf.data.Iterator.string_handle` in each step. For example, if you had two iterators that marked the current position in a training dataset and a test dataset, you could choose which to use in each step as follows: ```python train_iterator = tf.data.Dataset(...).make_one_shot_iterator() train_iterator_handle = sess.run(train_iterator.string_handle()) test_iterator = tf.data.Dataset(...).make_one_shot_iterator() test_iterator_handle = sess.run(test_iterator.string_handle()) handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, train_iterator.output_types) next_element = iterator.get_next() loss = f(next_element) train_loss = sess.run(loss, feed_dict={handle: train_iterator_handle}) test_loss = sess.run(loss, feed_dict={handle: test_iterator_handle}) ``` Args: string_handle: A scalar `tf.Tensor` of type `tf.string` that evaluates to a handle produced by the `Iterator.string_handle()` method. output_types: A nested structure of `tf.DType` objects corresponding to each component of an element of this dataset. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. If omitted, each component will have an unconstrainted shape. output_classes: (Optional.) A nested structure of Python `type` objects corresponding to each component of an element of this iterator. If omitted, each component is assumed to be of type `tf.Tensor`. Returns: An `Iterator`. """ output_types = nest.map_structure(dtypes.as_dtype, output_types) if output_shapes is None: output_shapes = nest.map_structure( lambda _: tensor_shape.TensorShape(None), output_types) else: output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) if output_classes is None: output_classes = nest.map_structure(lambda _: ops.Tensor, output_types) nest.assert_same_structure(output_types, output_shapes) output_structure = structure_lib.convert_legacy_structure( output_types, output_shapes, output_classes) string_handle = ops.convert_to_tensor(string_handle, dtype=dtypes.string) # pylint: disable=protected-access if compat.forward_compatible(2018, 8, 3): if _device_stack_is_empty(): with ops.device("/cpu:0"): iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2( string_handle, output_types=output_structure._flat_types, output_shapes=output_structure._flat_shapes) else: iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2( string_handle, output_types=output_structure._flat_types, output_shapes=output_structure._flat_shapes) else: iterator_resource = gen_dataset_ops.iterator_from_string_handle( string_handle, output_types=output_structure._flat_types, output_shapes=output_structure._flat_shapes) # pylint: enable=protected-access return Iterator(iterator_resource, None, output_types, output_shapes, output_classes)
def __init__(self, dataset, output_types, output_shapes=None, output_classes=None, allow_unsafe_cast=False): """Creates a new dataset with the given output types and shapes. The given `dataset` must have a structure that is convertible: * `dataset.output_types` must be the same as `output_types` module nesting. * Each shape in `dataset.output_shapes` must be compatible with each shape in `output_shapes` (if given). Note: This helper permits "unsafe casts" for shapes, equivalent to using `tf.Tensor.set_shape()` where domain-specific knowledge is available. Args: dataset: A `Dataset` object. output_types: A nested structure of `tf.DType` objects. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects. If omitted, the shapes will be inherited from `dataset`. output_classes: (Optional.) A nested structure of class types. If omitted, the class types will be inherited from `dataset`. allow_unsafe_cast: (Optional.) If `True`, the caller may switch the reported output types and shapes of the restructured dataset, e.g. to switch a sparse tensor represented as `tf.variant` to its user-visible type and shape. Raises: ValueError: If either `output_types` or `output_shapes` is not compatible with the structure of `dataset`. """ self._input_dataset = dataset input_types = dataset_ops.get_legacy_output_types(dataset) if not allow_unsafe_cast: # Validate that the types are compatible. output_types = nest.map_structure(dtypes.as_dtype, output_types) flat_original_types = nest.flatten(input_types) flat_new_types = nest.flatten(output_types) if flat_original_types != flat_new_types: raise ValueError( "Dataset with output types %r cannot be restructured to have " "output types %r" % (dataset_ops.get_legacy_output_types(dataset), output_types)) input_shapes = dataset_ops.get_legacy_output_shapes(dataset) if output_shapes is None: # Inherit shapes from the original `dataset`. output_shapes = nest.pack_sequence_as(output_types, nest.flatten(input_shapes)) else: if not allow_unsafe_cast: # Validate that the shapes are compatible. nest.assert_same_structure(output_types, output_shapes) flat_original_shapes = nest.flatten(input_shapes) flat_new_shapes = nest.flatten_up_to(output_types, output_shapes) for original_shape, new_shape in zip(flat_original_shapes, flat_new_shapes): if not original_shape.is_compatible_with(new_shape): raise ValueError( "Dataset with output shapes %r cannot be restructured to have " "incompatible output shapes %r" % (input_shapes, output_shapes)) output_shapes = nest.map_structure_up_to(output_types, tensor_shape.as_shape, output_shapes) input_classes = dataset_ops.get_legacy_output_classes(dataset) if output_classes is None: # Inherit class types from the original `dataset`. output_classes = nest.pack_sequence_as(output_types, nest.flatten(input_classes)) self._structure = structure.convert_legacy_structure( output_types, output_shapes, output_classes) variant_tensor = self._input_dataset._variant_tensor # pylint: disable=protected-access super(_RestructuredDataset, self).__init__(dataset, variant_tensor)
def __init__(self, dataset, output_types, output_shapes=None, output_classes=None, allow_unsafe_cast=False): """Creates a new dataset with the given output types and shapes. The given `dataset` must have a structure that is convertible: * `dataset.output_types` must be the same as `output_types` module nesting. * Each shape in `dataset.output_shapes` must be compatible with each shape in `output_shapes` (if given). Note: This helper permits "unsafe casts" for shapes, equivalent to using `tf.Tensor.set_shape()` where domain-specific knowledge is available. Args: dataset: A `Dataset` object. output_types: A nested structure of `tf.DType` objects. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects. If omitted, the shapes will be inherited from `dataset`. output_classes: (Optional.) A nested structure of class types. If omitted, the class types will be inherited from `dataset`. allow_unsafe_cast: (Optional.) If `True`, the caller may switch the reported output types and shapes of the restructured dataset, e.g. to switch a sparse tensor represented as `tf.variant` to its user-visible type and shape. Raises: ValueError: If either `output_types` or `output_shapes` is not compatible with the structure of `dataset`. """ self._input_dataset = dataset input_types = dataset_ops.get_legacy_output_types(dataset) if not allow_unsafe_cast: # Validate that the types are compatible. output_types = nest.map_structure(dtypes.as_dtype, output_types) flat_original_types = nest.flatten(input_types) flat_new_types = nest.flatten(output_types) if flat_original_types != flat_new_types: raise ValueError( "Dataset with output types %r cannot be restructured to have " "output types %r" % (dataset_ops.get_legacy_output_types(dataset), output_types)) input_shapes = dataset_ops.get_legacy_output_shapes(dataset) if output_shapes is None: # Inherit shapes from the original `dataset`. output_shapes = nest.pack_sequence_as( output_types, nest.flatten(input_shapes)) else: if not allow_unsafe_cast: # Validate that the shapes are compatible. nest.assert_same_structure(output_types, output_shapes) flat_original_shapes = nest.flatten(input_shapes) flat_new_shapes = nest.flatten_up_to(output_types, output_shapes) for original_shape, new_shape in zip(flat_original_shapes, flat_new_shapes): if not original_shape.is_compatible_with(new_shape): raise ValueError( "Dataset with output shapes %r cannot be restructured to have " "incompatible output shapes %r" % (input_shapes, output_shapes)) output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) input_classes = dataset_ops.get_legacy_output_classes(dataset) if output_classes is None: # Inherit class types from the original `dataset`. output_classes = nest.pack_sequence_as( output_types, nest.flatten(input_classes)) self._structure = structure.convert_legacy_structure( output_types, output_shapes, output_classes) variant_tensor = self._input_dataset._variant_tensor # pylint: disable=protected-access super(_RestructuredDataset, self).__init__(dataset, variant_tensor)