Beispiel #1
0
 def tf_shapes(self):
     """
     :return: a dictionary of sampler output tensor shapes
     """
     output_shapes = nest.map_structure_up_to(
         self.tf_dtypes, tf.TensorShape, self.shapes)
     return output_shapes
Beispiel #2
0
  def testMapStructureUpTo(self):
    ab_tuple = collections.namedtuple("ab_tuple", "a, b")
    op_tuple = collections.namedtuple("op_tuple", "add, mul")
    inp_val = ab_tuple(a=2, b=3)
    inp_ops = ab_tuple(a=op_tuple(add=1, mul=2), b=op_tuple(add=2, mul=3))
    out = nest.map_structure_up_to(
        inp_val, lambda val, ops: (val + ops.add) * ops.mul, inp_val, inp_ops)
    self.assertEqual(out.a, 6)
    self.assertEqual(out.b, 15)

    data_list = ((2, 4, 6, 8), ((1, 3, 5, 7, 9), (3, 5, 7)))
    name_list = ("evens", ("odds", "primes"))
    out = nest.map_structure_up_to(
        name_list, lambda name, sec: "first_{}_{}".format(len(sec), name),
        name_list, data_list)
    self.assertEqual(out, ("first_4_evens", ("first_5_odds", "first_3_primes")))
Beispiel #3
0
  def from_string_handle(string_handle, output_types, output_shapes=None):
    """Creates a new, uninitialized `Iterator` based on the given handle.

    This method allows you to define a "feedable" iterator where you can choose
    between concrete iterators by feeding a value in a @{tf.Session.run} call.
    In that case, `string_handle` would a @{tf.placeholder}, and you would feed
    it with the value of @{tf.data.Iterator.string_handle} in each step.

    For example, if you had two iterators that marked the current position in
    a training dataset and a test dataset, you could choose which to use in
    each step as follows:

    ```python
    train_iterator = tf.data.Dataset(...).make_one_shot_iterator()
    train_iterator_handle = sess.run(train_iterator.string_handle())

    test_iterator = tf.data.Dataset(...).make_one_shot_iterator()
    test_iterator_handle = sess.run(test_iterator.string_handle())

    handle = tf.placeholder(tf.string, shape=[])
    iterator = tf.data.Iterator.from_string_handle(
        handle, train_iterator.output_types)

    next_element = iterator.get_next()
    loss = f(next_element)

    train_loss = sess.run(loss, feed_dict={handle: train_iterator_handle})
    test_loss = sess.run(loss, feed_dict={handle: test_iterator_handle})
    ```

    Args:
      string_handle: A scalar `tf.Tensor` of type `tf.string` that evaluates
        to a handle produced by the `Iterator.string_handle()` method.
      output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`)
        objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`)
        component of an element of this dataset.
      output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects
        corresponding to each component of an element of this dataset. If
        omitted, each component will have an unconstrainted shape.

    Returns:
      An `Iterator`.
    """
    output_types = nest.map_structure(dtypes.as_dtype, output_types)
    if output_shapes is None:
      output_shapes = nest.map_structure(
          lambda _: tensor_shape.TensorShape(None), output_types)
    else:
      output_shapes = nest.map_structure_up_to(
          output_types, tensor_shape.as_shape, output_shapes)
    nest.assert_same_structure(output_types, output_shapes)
    string_handle = ops.convert_to_tensor(string_handle, dtype=dtypes.string)
    iterator_resource = gen_dataset_ops.iterator_from_string_handle(
        string_handle,
        output_types=nest.flatten(sparse.unwrap_sparse_types(output_types)),
        output_shapes=nest.flatten(output_shapes))
    return Iterator(iterator_resource, None, output_types, output_shapes)
Beispiel #4
0
    def set_spatial_shape(self, spatial_window, source_names=None):
        """
        Set all spatial window of the window.

        spatial_window should be a dictionary of window sizes tuples
        or single window size tuple.  In the latter case the size
        will be used by all output image windows.

        :param spatial_window: tuple of integers specifying new shape
        :param source_names: list/dictionary of input source names
        :return:
        """
        win_sizes = copy.deepcopy(spatial_window)
        if isinstance(spatial_window, dict):
            for name in list(spatial_window):
                window_size = spatial_window[name]
                if isinstance(window_size,
                              (ParserNamespace, argparse.Namespace)):
                    window_size = vars(window_size)
                if not isinstance(window_size, dict):
                    win_sizes[name] = tuple(window_size)
                elif 'spatial_window_size' in window_size:
                    win_sizes[name] = tuple(
                        window_size['spatial_window_size'])
                else:
                    raise ValueError(
                        'window_sizes should be a nested dictionary')
        elif isinstance(spatial_window, (list, tuple)):
            # list or tuple of single window sizes
            win_sizes = {name: spatial_window for name in list(self._dtypes)}

        # complete window shapes based on user input and input_image sizes
        if source_names:
            spatial_shapes = _read_window_sizes(source_names, win_sizes)
        else:
            try:
                spatial_shapes = {}
                for name in list(self._dtypes):
                    spatial_shapes[name] = \
                        tuple(int(win_size) for win_size in win_sizes[name])
            except ValueError:
                tf.logging.fatal("spatial window should be an array of int")
                raise

        spatial_shapes = nest.map_structure_up_to(
            self._dtypes, tuple, spatial_shapes)

        self._shapes = {
            name: _complete_partial_window_sizes(spatial_shapes[name],
                                                 self._shapes[name])
            for name in list(self._shapes)}

        # update based on the latest spatial shapes
        self.has_dynamic_shapes = self._check_dynamic_shapes()
        if self._placeholders_dict is not None:
            self._update_placeholders_dict(n_samples=self.n_samples)
Beispiel #5
0
  def __init__(self, dataset, output_types, output_shapes=None):
    """Creates a new dataset with the given output types and shapes.

    The given `dataset` must have a structure that is convertible:
    * `dataset.output_types` must be the same as `output_types` module nesting.
    * Each shape in `dataset.output_shapes` must be compatible with each shape
      in `output_shapes` (if given).

    Note: This helper permits "unsafe casts" for shapes, equivalent to using
    `tf.Tensor.set_shape()` where domain-specific knowledge is available.

    Args:
      dataset: A `Dataset` object.
      output_types: A nested structure of `tf.DType` objects.
      output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects.
        If omitted, the shapes will be inherited from `dataset`.

    Raises:
      ValueError: If either `output_types` or `output_shapes` is not compatible
        with the structure of `dataset`.
    """
    super(_RestructuredDataset, self).__init__()
    self._dataset = dataset

    # Validate that the types are compatible.
    output_types = nest.map_structure(dtypes.as_dtype, output_types)
    flat_original_types = nest.flatten(dataset.output_types)
    flat_new_types = nest.flatten(output_types)
    if flat_original_types != flat_new_types:
      raise ValueError(
          "Dataset with output types %r cannot be restructured to have output "
          "types %r" % (dataset.output_types, output_types))

    self._output_types = output_types

    if output_shapes is None:
      # Inherit shapes from the original `dataset`.
      self._output_shapes = nest.pack_sequence_as(output_types,
                                                  nest.flatten(
                                                      dataset.output_shapes))
    else:
      # Validate that the shapes are compatible.
      nest.assert_same_structure(output_types, output_shapes)
      flat_original_shapes = nest.flatten(dataset.output_shapes)
      flat_new_shapes = nest.flatten_up_to(output_types, output_shapes)

      for original_shape, new_shape in zip(flat_original_shapes,
                                           flat_new_shapes):
        if not original_shape.is_compatible_with(new_shape):
          raise ValueError(
              "Dataset with output shapes %r cannot be restructured to have "
              "incompatible output shapes %r" % (dataset.output_shapes,
                                                 output_shapes))
      self._output_shapes = nest.map_structure_up_to(
          output_types, tensor_shape.as_shape, output_shapes)
Beispiel #6
0
        def __init__(self,
                     pipeline,
                     output_dtypes=None,
                     output_shapes=None,
                     *,
                     batch_size=1,
                     num_threads=4,
                     device_id=0,
                     exec_separated=False,
                     prefetch_queue_depth=2,
                     cpu_prefetch_queue_depth=2,
                     gpu_prefetch_queue_depth=2,
                     dtypes=None,
                     shapes=None):

            output_shapes = self._handle_deprecation(output_shapes, shapes,
                                                     "shapes")
            output_dtypes = self._handle_deprecation(output_dtypes, dtypes,
                                                     "dtypes")

            if not self._check_output_dtypes(output_dtypes):
                raise TypeError(("`output_dtypes` should be provided as single tf.DType value " +
                    "or a tuple of tf.DType values. Got value `{}` of type `{}`.") \
                        .format(output_dtypes, type(output_dtypes)))

            if output_shapes is None:
                output_shapes = nest.map_structure(
                    lambda _: tensor_shape.TensorShape(None), output_dtypes)
            else:
                output_shapes = nest.map_structure_up_to(
                    output_dtypes, tensor_shape.as_shape, output_shapes)

            if not isinstance(output_dtypes, tuple):
                output_dtypes = (output_dtypes, )
                output_shapes = (output_shapes, )

            output_classes = nest.map_structure(lambda _: ops.Tensor,
                                                output_dtypes)

            self._pipeline = serialize_pipeline(pipeline)
            self._batch_size = batch_size
            self._num_threads = num_threads
            self._device_id = device_id
            self._exec_separated = exec_separated
            self._prefetch_queue_depth = prefetch_queue_depth
            self._cpu_prefetch_queue_depth = cpu_prefetch_queue_depth
            self._gpu_prefetch_queue_depth = gpu_prefetch_queue_depth
            self._output_shapes = output_shapes
            self._output_dtypes = output_dtypes

            self._structure = structure.convert_legacy_structure(
                self._output_dtypes, self._output_shapes, output_classes)

            super(_DALIDatasetV2, self).__init__(self._as_variant_tensor())
Beispiel #7
0
    def __init__(self, dataset, output_types, output_shapes=None):
        """Creates a new dataset with the given output types and shapes.

    The given `dataset` must have a structure that is convertible:
    * `dataset.output_types` must be the same as `output_types` module nesting.
    * Each shape in `dataset.output_shapes` must be compatible with each shape
      in `output_shapes` (if given).

    Note: This helper permits "unsafe casts" for shapes, equivalent to using
    `tf.Tensor.set_shape()` where domain-specific knowledge is available.

    Args:
      dataset: A `Dataset` object.
      output_types: A nested structure of `tf.DType` objects.
      output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects.
        If omitted, the shapes will be inherited from `dataset`.

    Raises:
      ValueError: If either `output_types` or `output_shapes` is not compatible
        with the structure of `dataset`.
    """
        super(_RestructuredDataset, self).__init__()
        self._dataset = dataset

        # Validate that the types are compatible.
        output_types = nest.map_structure(dtypes.as_dtype, output_types)
        flat_original_types = nest.flatten(dataset.output_types)
        flat_new_types = nest.flatten(output_types)
        if flat_original_types != flat_new_types:
            raise ValueError(
                "Dataset with output types %r cannot be restructured to have output "
                "types %r" % (dataset.output_types, output_types))

        self._output_types = output_types

        if output_shapes is None:
            # Inherit shapes from the original `dataset`.
            self._output_shapes = nest.pack_sequence_as(
                output_types, nest.flatten(dataset.output_shapes))
        else:
            # Validate that the shapes are compatible.
            nest.assert_same_structure(output_types, output_shapes)
            flat_original_shapes = nest.flatten(dataset.output_shapes)
            flat_new_shapes = nest.flatten_up_to(output_types, output_shapes)

            for original_shape, new_shape in zip(flat_original_shapes,
                                                 flat_new_shapes):
                if not original_shape.is_compatible_with(new_shape):
                    raise ValueError(
                        "Dataset with output shapes %r cannot be restructured to have "
                        "incompatible output shapes %r" %
                        (dataset.output_shapes, output_shapes))
            self._output_shapes = nest.map_structure_up_to(
                output_types, tensor_shape.as_shape, output_shapes)
Beispiel #8
0
 def __init__(self, input_dataset, batch_size, padded_shapes, padding_values):
   """Initialize `PrependFromQueueAndPaddedBatchDataset`."""
   super(_PrependFromQueueAndPaddedBatchDataset, self).__init__()
   if sparse.any_sparse(input_dataset.output_classes):
     raise TypeError(
         "Batching of padded sparse tensors is not currently supported")
   self._input_dataset = input_dataset
   self._batch_size = ops.convert_to_tensor(
       batch_size, dtype=dtypes.int64, name="batch_size")
   # pylint: disable=protected-access
   if padded_shapes is None:
     self._padded_shapes = nest.map_structure(
         dataset_ops._partial_shape_to_tensor, input_dataset.output_shapes)
   else:
     self._padded_shapes = nest.map_structure_up_to(
         input_dataset.output_shapes, dataset_ops._partial_shape_to_tensor,
         padded_shapes)
   padding_values = (
       padding_values if padding_values is not None else
       dataset_ops._default_padding(input_dataset))
   self._padding_values = nest.map_structure_up_to(
       input_dataset.output_shapes, dataset_ops._padding_value_to_tensor,
       padding_values, input_dataset.output_types)
Beispiel #9
0
 def __init__(self, variant_tensor, output_shapes, output_types,
              output_classes):
   # TODO(b/110122868): Consolidate the structure validation logic with the
   # similar logic in `Iterator.from_structure()` and
   # `Dataset.from_generator()`.
   output_types = nest.map_structure(dtypes.as_dtype, output_types)
   output_shapes = nest.map_structure_up_to(
       output_types, tensor_shape.as_shape, output_shapes)
   nest.assert_same_structure(output_types, output_shapes)
   nest.assert_same_structure(output_types, output_classes)
   self._variant_tensor = variant_tensor
   self._output_shapes = output_shapes
   self._output_types = output_types
   self._output_classes = output_classes
Beispiel #10
0
 def __init__(self, variant_tensor, output_shapes, output_types,
              output_classes):
   # TODO(b/110122868): Consolidate the structure validation logic with the
   # similar logic in `Iterator.from_structure()` and
   # `Dataset.from_generator()`.
   output_types = nest.map_structure(dtypes.as_dtype, output_types)
   output_shapes = nest.map_structure_up_to(
       output_types, tensor_shape.as_shape, output_shapes)
   nest.assert_same_structure(output_types, output_shapes)
   nest.assert_same_structure(output_types, output_classes)
   self._variant_tensor = variant_tensor
   self._output_shapes = output_shapes
   self._output_types = output_types
   self._output_classes = output_classes
 def __init__(self, input_dataset, batch_size, padded_shapes, padding_values):
   """Initialize `PrependFromQueueAndPaddedBatchDataset`."""
   super(_PrependFromQueueAndPaddedBatchDataset, self).__init__()
   if sparse.any_sparse(input_dataset.output_classes):
     raise TypeError(
         "Batching of padded sparse tensors is not currently supported")
   self._input_dataset = input_dataset
   self._batch_size = ops.convert_to_tensor(
       batch_size, dtype=dtypes.int64, name="batch_size")
   # pylint: disable=protected-access
   if padded_shapes is None:
     self._padded_shapes = nest.map_structure(
         dataset_ops._partial_shape_to_tensor, input_dataset.output_shapes)
   else:
     self._padded_shapes = nest.map_structure_up_to(
         input_dataset.output_shapes, dataset_ops._partial_shape_to_tensor,
         padded_shapes)
   padding_values = (
       padding_values if padding_values is not None else
       dataset_ops._default_padding(input_dataset))
   self._padding_values = nest.map_structure_up_to(
       input_dataset.output_shapes, dataset_ops._padding_value_to_tensor,
       padding_values, input_dataset.output_types)
Beispiel #12
0
    def from_data_reader_properties(cls,
                                    source_names,
                                    image_shapes,
                                    image_dtypes,
                                    window_sizes=None,
                                    allow_dynamic=False):
        """
        Create a window instance with input data properties
        each property is grouped into dict, with pairs of
        image_name: data_value. Some input images is a
        concatenated data array from multiple data sources.
        example of input::

            source_names={
                'image': (u'modality1', u'modality2'),
                'label': (u'modality3',)},
            image_shapes={
                'image': (192, 160, 192, 1, 2),
                'label': (192, 160, 192, 1, 1)},
            image_dtypes={
                'image': tf.float32,
                'label': tf.float32},
            window_sizes={
                'image': (10, 10, 2),
                'label': (10, 10, 2)}

        the ``window_sizes`` can also be::

            window_sizes={
                'modality1': (10, 10, 2),
                'modality3': (10, 10, 2)}

        or using a nested dictionary with 'spatial_window_size' (deprecating)::

            window_sizes={
                'modality1': {'spatial_window_size': (10, 10, 2)},
                'modality2': {'spatial_window_size': (10, 10, 2)},
                'modality3': {'spatial_window_size': (5, 5, 1)}}

        see ``niftynet.io.ImageReader`` for more details.

        :param source_names: input image names
        :param image_shapes: tuple of image window shapes
        :param image_dtypes: tuple of image window data types
        :param window_sizes: window sizes for the image image
        :param allow_dynamic: if True, window_sizes negative or 0 indicates
            dynamic window sizes; . Otherwise the dynamic sizes will be fixed
            as the image shapes; this assumes the same image size across the
            dataset.
        :return: an ImageWindow instance
        """
        try:
            image_shapes = nest.map_structure_up_to(
                image_dtypes, tuple, image_shapes)
        except KeyError:
            tf.logging.fatal('window_sizes wrong format %s', window_sizes)
            raise
        # create ImageWindow instance
        window_instance = cls(shapes=image_shapes, dtypes=image_dtypes)

        if not window_sizes:
            # image window sizes not specified, defaulting to image sizes.
            return window_instance

        window_instance.set_spatial_shape(window_sizes, source_names)
        if not allow_dynamic:
            full_shape = window_instance.match_image_shapes(image_shapes)
            window_instance.set_spatial_shape(full_shape)
        return window_instance
Beispiel #13
0
 def _from_components(self, value):
     return nest.map_structure_up_to(self._nested_structure,
                                     lambda t, v: t._from_components(v),
                                     self._nested_structure, value)
Beispiel #14
0
  def from_generator(generator, output_types, output_shapes=None):
    """Creates a `Dataset` whose elements are generated by `generator`.

    The `generator` argument must be a callable object that returns
    an object that support the `iter()` protocol (e.g. a generator function).
    The elements generated by `generator` must be compatible with the given
    `output_types` and (optional) `output_shapes` arguments.

    For example:

    ```python
    import itertools

    def gen():
      for i in itertools.count(1):
        yield (i, [1] * i)

    ds = Dataset.from_generator(
        gen, (tf.int64, tf.int64), (tf.TensorShape([]), tf.TensorShape([None])))
    value = ds.make_one_shot_iterator().get_next()

    sess.run(value)  # (1, array([1]))
    sess.run(value)  # (2, array([1, 1]))
    ```

    Args:
      generator: A callable object that takes no arguments and returns an
        object that supports the `iter()` protocol.
      output_types: A nested structure of `tf.DType` objects corresponding to
        each component of an element yielded by `generator`.
      output_shapes: (Optional.) A nested structure of `tf.TensorShape`
        objects corresponding to each component of an element yielded by
        `generator`.

    Returns:
      A `Dataset`.
    """
    if not callable(generator):
      raise TypeError("`generator` must be callable.")
    if output_shapes is None:
      output_shapes = nest.map_structure(
          lambda _: tensor_shape.TensorShape(None), output_types)
    else:
      output_shapes = nest.map_structure_up_to(
          output_types, tensor_shape.as_shape, output_shapes)

    flattened_types = nest.flatten(output_types)
    flattened_shapes = nest.flatten(output_shapes)

    generator_state = dataset_ops.Dataset._GeneratorState(generator)

    def get_iterator_id_map_fn(unused_dummy):
      """Creates a unique `iterator_id` for each pass over the dataset.

      The "iterator_id" disambiguates between multiple concurrently
      existing iterators.

      Args:
        unused_dummy: Ignored value.

      Returns:
        A `tf.int64` tensor whose value uniquely identifies an iterator in
        `generator_state`.
      """
      return script_ops.py_func(
          generator_state.get_next_id, [], dtypes.int64, stateful=True)

    def generator_map_fn(iterator_id_t):
      """Generates the next element from iterator with ID `iterator_id_t`.

      We map this function across an infinite repetition of the
      `iterator_id_t`, and raise `StopIteration` to terminate the iteration.

      Args:
        iterator_id_t: A `tf.int64` tensor whose value uniquely identifies
          the iterator in `generator_state` from which to generate an element.

      Returns:
        A nested structure of tensors representing an element from the iterator.
      """

      def generator_py_func(iterator_id):
        """A `py_func` that will be called to invoke the iterator."""
        try:
          values = next(generator_state.get_iterator(iterator_id))
        except StopIteration:
          generator_state.iterator_completed(iterator_id)
          raise StopIteration("Iteration finished.")

        # Use the same _convert function from the py_func() implementation to
        # convert the returned values to arrays early, so that we can inspect
        # their values.
        # pylint: disable=protected-access
        ret_arrays = [
            script_ops.FuncRegistry._convert(ret, dtype=dtype.as_numpy_dtype)
            for ret, dtype in zip(nest.flatten_up_to(output_types, values),
                                  flattened_types)
        ]
        # pylint: enable=protected-access

        # Additional type and shape checking to ensure that the components
        # of the generated element match the `output_types` and `output_shapes`
        # arguments.
        for (ret_array, expected_dtype, expected_shape) in zip(
            ret_arrays, flattened_types, flattened_shapes):
          if ret_array.dtype != expected_dtype.as_numpy_dtype:
            raise TypeError(
                "`generator` yielded an element of type %s where an element "
                "of type %s was expected." % (ret_array.dtype,
                                              expected_dtype.as_numpy_dtype))
          if not expected_shape.is_compatible_with(ret_array.shape):
            raise ValueError(
                "`generator` yielded an element of shape %s where an element "
                "of shape %s was expected." % (ret_array.shape, expected_shape))

        return ret_arrays

      flat_values = script_ops.py_func(
          generator_py_func, [iterator_id_t], flattened_types, stateful=True)

      # The `py_func()` op drops the inferred shapes, so we add them back in
      # here.
      if output_shapes is not None:
        for ret_t, shape in zip(flat_values, flattened_shapes):
          ret_t.set_shape(shape)

      return nest.pack_sequence_as(output_types, flat_values)

    # This function associates each traversal of `generator` with a unique
    # iterator ID.
    def flat_map_fn(iterator_id_t):
      # First, generate an infinite dataset containing the iterator ID repeated
      # forever.
      repeated_id = Dataset.from_tensors(iterator_id_t).repeat(None)

      # The `generator_map_fn` gets the next element from the iterator with the
      # relevant ID, and raises StopIteration when that iterator contains no
      # more elements.
      return repeated_id.map(generator_map_fn)

    # A single-element dataset that, each time it is evaluated, contains a
    # freshly-generated and unique (for the returned dataset) int64
    # ID that will be used to identify the appropriate Python state, which
    # is encapsulated in `generator_state`, and captured in
    # `get_iterator_id_map_fn`.
    dummy = 0
    id_dataset = Dataset.from_tensors(dummy).map(get_iterator_id_map_fn)

    # A dataset that contains all of the elements generated by a
    # single iterator created from `generator`, identified by the
    # iterator ID contained in `id_dataset`. Lifting the iteration
    # into a flat_map here enables multiple repetitions and/or nested
    # versions of the returned dataset to be created, because it forces
    # the generation of a new ID for each version.
    return id_dataset.flat_map(flat_map_fn)
Beispiel #15
0
  def from_structure(output_types,
                     output_shapes=None,
                     shared_name=None,
                     output_classes=None):
    """Creates a new, uninitialized `Iterator` with the given structure.

    This iterator-constructing method can be used to create an iterator that
    is reusable with many different datasets.

    The returned iterator is not bound to a particular dataset, and it has
    no `initializer`. To initialize the iterator, run the operation returned by
    `Iterator.make_initializer(dataset)`.

    The following is an example

    ```python
    iterator = Iterator.from_structure(tf.int64, tf.TensorShape([]))

    dataset_range = Dataset.range(10)
    range_initializer = iterator.make_initializer(dataset_range)

    dataset_evens = dataset_range.filter(lambda x: x % 2 == 0)
    evens_initializer = iterator.make_initializer(dataset_evens)

    # Define a model based on the iterator; in this example, the model_fn
    # is expected to take scalar tf.int64 Tensors as input (see
    # the definition of 'iterator' above).
    prediction, loss = model_fn(iterator.get_next())

    # Train for `num_epochs`, where for each epoch, we first iterate over
    # dataset_range, and then iterate over dataset_evens.
    for _ in range(num_epochs):
      # Initialize the iterator to `dataset_range`
      sess.run(range_initializer)
      while True:
        try:
          pred, loss_val = sess.run([prediction, loss])
        except tf.errors.OutOfRangeError:
          break

      # Initialize the iterator to `dataset_evens`
      sess.run(evens_initializer)
      while True:
        try:
          pred, loss_val = sess.run([prediction, loss])
        except tf.errors.OutOfRangeError:
          break
    ```

    Args:
      output_types: A nested structure of `tf.DType` objects corresponding to
        each component of an element of this dataset.
      output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects
        corresponding to each component of an element of this dataset. If
        omitted, each component will have an unconstrainted shape.
      shared_name: (Optional.) If non-empty, this iterator will be shared under
        the given name across multiple sessions that share the same devices
        (e.g. when using a remote server).
      output_classes: (Optional.) A nested structure of Python `type` objects
        corresponding to each component of an element of this iterator. If
        omitted, each component is assumed to be of type `tf.Tensor`.

    Returns:
      An `Iterator`.

    Raises:
      TypeError: If the structures of `output_shapes` and `output_types` are
        not the same.
    """
    output_types = nest.map_structure(dtypes.as_dtype, output_types)
    if output_shapes is None:
      output_shapes = nest.map_structure(
          lambda _: tensor_shape.TensorShape(None), output_types)
    else:
      output_shapes = nest.map_structure_up_to(
          output_types, tensor_shape.as_shape, output_shapes)
    if output_classes is None:
      output_classes = nest.map_structure(lambda _: ops.Tensor, output_types)
    nest.assert_same_structure(output_types, output_shapes)
    if shared_name is None:
      shared_name = ""
    iterator_resource = gen_dataset_ops.iterator(
        container="",
        shared_name=shared_name,
        output_types=nest.flatten(
            sparse.as_dense_types(output_types, output_classes)),
        output_shapes=nest.flatten(
            sparse.as_dense_shapes(output_shapes, output_classes)))
    return Iterator(iterator_resource, None, output_types, output_shapes,
                    output_classes)
Beispiel #16
0
    def from_generator(generator, output_types, output_shapes=None):
        """Creates a `Dataset` whose elements are generated by `generator`.

    The `generator` argument must be a callable object that returns
    an object that support the `iter()` protocol (e.g. a generator function).
    The elements generated by `generator` must be compatible with the given
    `output_types` and (optional) `output_shapes` arguments.

    For example:

    ```python
    import itertools

    def gen():
      for i in itertools.count(1):
        yield (i, [1] * i)

    ds = Dataset.from_generator(
        gen, (tf.int64, tf.int64), (tf.TensorShape([]), tf.TensorShape([None])))
    value = ds.make_one_shot_iterator().get_next()

    sess.run(value)  # (1, array([1]))
    sess.run(value)  # (2, array([1, 1]))
    ```

    Args:
      generator: A callable object that takes no arguments and returns an
        object that supports the `iter()` protocol.
      output_types: A nested structure of `tf.DType` objects corresponding to
        each component of an element yielded by `generator`.
      output_shapes: (Optional.) A nested structure of `tf.TensorShape`
        objects corresponding to each component of an element yielded by
        `generator`.

    Returns:
      A `Dataset`.
    """
        if not callable(generator):
            raise TypeError("`generator` must be callable.")
        if output_shapes is None:
            output_shapes = nest.map_structure(
                lambda _: tensor_shape.TensorShape(None), output_types)
        else:
            output_shapes = nest.map_structure_up_to(output_types,
                                                     tensor_shape.as_shape,
                                                     output_shapes)

        flattened_types = nest.flatten(output_types)
        flattened_shapes = nest.flatten(output_shapes)

        generator_state = dataset_ops.Dataset._GeneratorState(generator)

        def get_iterator_id_map_fn(unused_dummy):
            """Creates a unique `iterator_id` for each pass over the dataset.

      The "iterator_id" disambiguates between multiple concurrently
      existing iterators.

      Args:
        unused_dummy: Ignored value.

      Returns:
        A `tf.int64` tensor whose value uniquely identifies an iterator in
        `generator_state`.
      """
            return script_ops.py_func(generator_state.get_next_id, [],
                                      dtypes.int64,
                                      stateful=True)

        def generator_map_fn(iterator_id_t):
            """Generates the next element from iterator with ID `iterator_id_t`.

      We map this function across an infinite repetition of the
      `iterator_id_t`, and raise `StopIteration` to terminate the iteration.

      Args:
        iterator_id_t: A `tf.int64` tensor whose value uniquely identifies
          the iterator in `generator_state` from which to generate an element.

      Returns:
        A nested structure of tensors representing an element from the iterator.
      """
            def generator_py_func(iterator_id):
                """A `py_func` that will be called to invoke the iterator."""
                try:
                    values = next(generator_state.get_iterator(iterator_id))
                except StopIteration:
                    generator_state.iterator_completed(iterator_id)
                    raise StopIteration("Iteration finished.")

                # Use the same _convert function from the py_func() implementation to
                # convert the returned values to arrays early, so that we can inspect
                # their values.
                # pylint: disable=protected-access
                ret_arrays = [
                    script_ops.FuncRegistry._convert(
                        ret, dtype=dtype.as_numpy_dtype) for ret, dtype in zip(
                            nest.flatten_up_to(output_types, values),
                            flattened_types)
                ]
                # pylint: enable=protected-access

                # Additional type and shape checking to ensure that the components
                # of the generated element match the `output_types` and `output_shapes`
                # arguments.
                for (ret_array, expected_dtype,
                     expected_shape) in zip(ret_arrays, flattened_types,
                                            flattened_shapes):
                    if ret_array.dtype != expected_dtype.as_numpy_dtype:
                        raise TypeError(
                            "`generator` yielded an element of type %s where an element "
                            "of type %s was expected." %
                            (ret_array.dtype, expected_dtype.as_numpy_dtype))
                    if not expected_shape.is_compatible_with(ret_array.shape):
                        raise ValueError(
                            "`generator` yielded an element of shape %s where an element "
                            "of shape %s was expected." %
                            (ret_array.shape, expected_shape))

                return ret_arrays

            flat_values = script_ops.py_func(generator_py_func,
                                             [iterator_id_t],
                                             flattened_types,
                                             stateful=True)

            # The `py_func()` op drops the inferred shapes, so we add them back in
            # here.
            if output_shapes is not None:
                for ret_t, shape in zip(flat_values, flattened_shapes):
                    ret_t.set_shape(shape)

            return nest.pack_sequence_as(output_types, flat_values)

        # This function associates each traversal of `generator` with a unique
        # iterator ID.
        def flat_map_fn(iterator_id_t):
            # First, generate an infinite dataset containing the iterator ID repeated
            # forever.
            repeated_id = Dataset.from_tensors(iterator_id_t).repeat(None)

            # The `generator_map_fn` gets the next element from the iterator with the
            # relevant ID, and raises StopIteration when that iterator contains no
            # more elements.
            return repeated_id.map(generator_map_fn)

        # A single-element dataset that, each time it is evaluated, contains a
        # freshly-generated and unique (for the returned dataset) int64
        # ID that will be used to identify the appropriate Python state, which
        # is encapsulated in `generator_state`, and captured in
        # `get_iterator_id_map_fn`.
        dummy = 0
        id_dataset = Dataset.from_tensors(dummy).map(get_iterator_id_map_fn)

        # A dataset that contains all of the elements generated by a
        # single iterator created from `generator`, identified by the
        # iterator ID contained in `id_dataset`. Lifting the iteration
        # into a flat_map here enables multiple repetitions and/or nested
        # versions of the returned dataset to be created, because it forces
        # the generation of a new ID for each version.
        return id_dataset.flat_map(flat_map_fn)
Beispiel #17
0
        def __init__(self,
                     pipeline,
                     output_dtypes=None,
                     output_shapes=None,
                     fail_on_device_mismatch=True,
                     *,
                     input_datasets=None,
                     batch_size=1,
                     num_threads=4,
                     device_id=0,
                     exec_separated=False,
                     prefetch_queue_depth=2,
                     cpu_prefetch_queue_depth=2,
                     gpu_prefetch_queue_depth=2,
                     dtypes=None,
                     shapes=None):

            output_shapes = self._handle_deprecation(output_shapes, shapes,
                                                     "shapes")
            output_dtypes = self._handle_deprecation(output_dtypes, dtypes,
                                                     "dtypes")

            if not self._check_dtypes(output_dtypes, tf.DType):
                raise TypeError(
                    "`output_dtypes` should be provided as single tf.DType value "
                    f"or a tuple of tf.DType values. Got value `{output_dtypes}` "
                    f"of the type `{type(output_dtypes)}`.")

            if output_shapes is None:
                output_shapes = nest.map_structure(
                    lambda _: tensor_shape.TensorShape(None), output_dtypes)
            else:
                output_shapes = nest.map_structure_up_to(
                    output_dtypes, tensor_shape.as_shape, output_shapes)

            if not isinstance(output_dtypes, tuple):
                output_dtypes = (output_dtypes, )
                output_shapes = (output_shapes, )

            output_classes = nest.map_structure(lambda _: ops.Tensor,
                                                output_dtypes)

            self._pipeline_instance = pipeline  # keep the live Pipeline object
            self._pipeline_serialized = serialize_pipeline(pipeline)
            self._batch_size = batch_size
            self._num_threads = num_threads
            if device_id is None:
                device_id = types.CPU_ONLY_DEVICE_ID
            self._device_id = device_id
            self._exec_separated = exec_separated
            self._prefetch_queue_depth = prefetch_queue_depth
            self._cpu_prefetch_queue_depth = cpu_prefetch_queue_depth
            self._gpu_prefetch_queue_depth = gpu_prefetch_queue_depth
            self._output_shapes = output_shapes
            self._output_dtypes = output_dtypes
            self._fail_on_device_mismatch = fail_on_device_mismatch

            self._setup_inputs(input_datasets)

            self._structure = structure.convert_legacy_structure(
                self._output_dtypes, self._output_shapes, output_classes)

            super(_DALIDatasetV2, self).__init__(self._as_variant_tensor())
Beispiel #18
0
    def from_structure(output_types,
                       output_shapes=None,
                       shared_name=None,
                       output_classes=None):
        """Creates a new, uninitialized `Iterator` with the given structure.

    This iterator-constructing method can be used to create an iterator that
    is reusable with many different datasets.

    The returned iterator is not bound to a particular dataset, and it has
    no `initializer`. To initialize the iterator, run the operation returned by
    `Iterator.make_initializer(dataset)`.

    The following is an example

    ```python
    iterator = Iterator.from_structure(tf.int64, tf.TensorShape([]))

    dataset_range = Dataset.range(10)
    range_initializer = iterator.make_initializer(dataset_range)

    dataset_evens = dataset_range.filter(lambda x: x % 2 == 0)
    evens_initializer = iterator.make_initializer(dataset_evens)

    # Define a model based on the iterator; in this example, the model_fn
    # is expected to take scalar tf.int64 Tensors as input (see
    # the definition of 'iterator' above).
    prediction, loss = model_fn(iterator.get_next())

    # Train for `num_epochs`, where for each epoch, we first iterate over
    # dataset_range, and then iterate over dataset_evens.
    for _ in range(num_epochs):
      # Initialize the iterator to `dataset_range`
      sess.run(range_initializer)
      while True:
        try:
          pred, loss_val = sess.run([prediction, loss])
        except tf.errors.OutOfRangeError:
          break

      # Initialize the iterator to `dataset_evens`
      sess.run(evens_initializer)
      while True:
        try:
          pred, loss_val = sess.run([prediction, loss])
        except tf.errors.OutOfRangeError:
          break
    ```

    Args:
      output_types: A nested structure of `tf.DType` objects corresponding to
        each component of an element of this dataset.
      output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects
        corresponding to each component of an element of this dataset. If
        omitted, each component will have an unconstrainted shape.
      shared_name: (Optional.) If non-empty, this iterator will be shared under
        the given name across multiple sessions that share the same devices
        (e.g. when using a remote server).
      output_classes: (Optional.) A nested structure of Python `type` objects
        corresponding to each component of an element of this iterator. If
        omitted, each component is assumed to be of type `tf.Tensor`.

    Returns:
      An `Iterator`.

    Raises:
      TypeError: If the structures of `output_shapes` and `output_types` are
        not the same.
    """
        output_types = nest.map_structure(dtypes.as_dtype, output_types)
        if output_shapes is None:
            output_shapes = nest.map_structure(
                lambda _: tensor_shape.TensorShape(None), output_types)
        else:
            output_shapes = nest.map_structure_up_to(output_types,
                                                     tensor_shape.as_shape,
                                                     output_shapes)
        if output_classes is None:
            output_classes = nest.map_structure(lambda _: ops.Tensor,
                                                output_types)
        nest.assert_same_structure(output_types, output_shapes)
        output_structure = structure.convert_legacy_structure(
            output_types, output_shapes, output_classes)
        if shared_name is None:
            shared_name = ""
        if _device_stack_is_empty():
            with ops.device("/cpu:0"):
                iterator_resource = gen_dataset_ops.iterator_v2(
                    container="",
                    shared_name=shared_name,
                    output_types=structure.get_flat_tensor_types(
                        output_structure),
                    output_shapes=structure.get_flat_tensor_shapes(
                        output_structure))
        else:
            iterator_resource = gen_dataset_ops.iterator_v2(
                container="",
                shared_name=shared_name,
                output_types=structure.get_flat_tensor_types(output_structure),
                output_shapes=structure.get_flat_tensor_shapes(
                    output_structure))
        return Iterator(iterator_resource, None, output_types, output_shapes,
                        output_classes)
Beispiel #19
0
    def from_string_handle(string_handle,
                           output_types,
                           output_shapes=None,
                           output_classes=None):
        """Creates a new, uninitialized `Iterator` based on the given handle.

    This method allows you to define a "feedable" iterator where you can choose
    between concrete iterators by feeding a value in a `tf.Session.run` call.
    In that case, `string_handle` would be a `tf.compat.v1.placeholder`, and you
    would
    feed it with the value of `tf.data.Iterator.string_handle` in each step.

    For example, if you had two iterators that marked the current position in
    a training dataset and a test dataset, you could choose which to use in
    each step as follows:

    ```python
    train_iterator = tf.data.Dataset(...).make_one_shot_iterator()
    train_iterator_handle = sess.run(train_iterator.string_handle())

    test_iterator = tf.data.Dataset(...).make_one_shot_iterator()
    test_iterator_handle = sess.run(test_iterator.string_handle())

    handle = tf.compat.v1.placeholder(tf.string, shape=[])
    iterator = tf.data.Iterator.from_string_handle(
        handle, train_iterator.output_types)

    next_element = iterator.get_next()
    loss = f(next_element)

    train_loss = sess.run(loss, feed_dict={handle: train_iterator_handle})
    test_loss = sess.run(loss, feed_dict={handle: test_iterator_handle})
    ```

    Args:
      string_handle: A scalar `tf.Tensor` of type `tf.string` that evaluates to
        a handle produced by the `Iterator.string_handle()` method.
      output_types: A nested structure of `tf.DType` objects corresponding to
        each component of an element of this dataset.
      output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects
        corresponding to each component of an element of this dataset. If
        omitted, each component will have an unconstrainted shape.
      output_classes: (Optional.) A nested structure of Python `type` objects
        corresponding to each component of an element of this iterator. If
        omitted, each component is assumed to be of type `tf.Tensor`.

    Returns:
      An `Iterator`.
    """
        output_types = nest.map_structure(dtypes.as_dtype, output_types)
        if output_shapes is None:
            output_shapes = nest.map_structure(
                lambda _: tensor_shape.TensorShape(None), output_types)
        else:
            output_shapes = nest.map_structure_up_to(output_types,
                                                     tensor_shape.as_shape,
                                                     output_shapes)
        if output_classes is None:
            output_classes = nest.map_structure(lambda _: ops.Tensor,
                                                output_types)
        nest.assert_same_structure(output_types, output_shapes)
        output_structure = structure.convert_legacy_structure(
            output_types, output_shapes, output_classes)
        string_handle = ops.convert_to_tensor(string_handle,
                                              dtype=dtypes.string)
        if _device_stack_is_empty():
            with ops.device("/cpu:0"):
                iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2(
                    string_handle,
                    output_types=structure.get_flat_tensor_types(
                        output_structure),
                    output_shapes=structure.get_flat_tensor_shapes(
                        output_structure))
        else:
            iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2(
                string_handle,
                output_types=structure.get_flat_tensor_types(output_structure),
                output_shapes=structure.get_flat_tensor_shapes(
                    output_structure))
        return Iterator(iterator_resource, None, output_types, output_shapes,
                        output_classes)
Beispiel #20
0
  def from_string_handle(string_handle,
                         output_types,
                         output_shapes=None,
                         output_classes=None):
    """Creates a new, uninitialized `Iterator` based on the given handle.

    This method allows you to define a "feedable" iterator where you can choose
    between concrete iterators by feeding a value in a `tf.Session.run` call.
    In that case, `string_handle` would be a `tf.placeholder`, and you would
    feed it with the value of `tf.data.Iterator.string_handle` in each step.

    For example, if you had two iterators that marked the current position in
    a training dataset and a test dataset, you could choose which to use in
    each step as follows:

    ```python
    train_iterator = tf.data.Dataset(...).make_one_shot_iterator()
    train_iterator_handle = sess.run(train_iterator.string_handle())

    test_iterator = tf.data.Dataset(...).make_one_shot_iterator()
    test_iterator_handle = sess.run(test_iterator.string_handle())

    handle = tf.placeholder(tf.string, shape=[])
    iterator = tf.data.Iterator.from_string_handle(
        handle, train_iterator.output_types)

    next_element = iterator.get_next()
    loss = f(next_element)

    train_loss = sess.run(loss, feed_dict={handle: train_iterator_handle})
    test_loss = sess.run(loss, feed_dict={handle: test_iterator_handle})
    ```

    Args:
      string_handle: A scalar `tf.Tensor` of type `tf.string` that evaluates
        to a handle produced by the `Iterator.string_handle()` method.
      output_types: A nested structure of `tf.DType` objects corresponding to
        each component of an element of this dataset.
      output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects
        corresponding to each component of an element of this dataset. If
        omitted, each component will have an unconstrainted shape.
      output_classes: (Optional.) A nested structure of Python `type` objects
        corresponding to each component of an element of this iterator. If
        omitted, each component is assumed to be of type `tf.Tensor`.

    Returns:
      An `Iterator`.
    """
    output_types = nest.map_structure(dtypes.as_dtype, output_types)
    if output_shapes is None:
      output_shapes = nest.map_structure(
          lambda _: tensor_shape.TensorShape(None), output_types)
    else:
      output_shapes = nest.map_structure_up_to(
          output_types, tensor_shape.as_shape, output_shapes)
    if output_classes is None:
      output_classes = nest.map_structure(lambda _: ops.Tensor, output_types)
    nest.assert_same_structure(output_types, output_shapes)
    output_structure = structure_lib.convert_legacy_structure(
        output_types, output_shapes, output_classes)
    string_handle = ops.convert_to_tensor(string_handle, dtype=dtypes.string)
    # pylint: disable=protected-access
    if compat.forward_compatible(2018, 8, 3):
      if _device_stack_is_empty():
        with ops.device("/cpu:0"):
          iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2(
              string_handle,
              output_types=output_structure._flat_types,
              output_shapes=output_structure._flat_shapes)
      else:
        iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2(
            string_handle,
            output_types=output_structure._flat_types,
            output_shapes=output_structure._flat_shapes)
    else:
      iterator_resource = gen_dataset_ops.iterator_from_string_handle(
          string_handle,
          output_types=output_structure._flat_types,
          output_shapes=output_structure._flat_shapes)
    # pylint: enable=protected-access
    return Iterator(iterator_resource, None, output_types, output_shapes,
                    output_classes)
Beispiel #21
0
    def __init__(self,
                 dataset,
                 output_types,
                 output_shapes=None,
                 output_classes=None,
                 allow_unsafe_cast=False):
        """Creates a new dataset with the given output types and shapes.

    The given `dataset` must have a structure that is convertible:
    * `dataset.output_types` must be the same as `output_types` module nesting.
    * Each shape in `dataset.output_shapes` must be compatible with each shape
      in `output_shapes` (if given).

    Note: This helper permits "unsafe casts" for shapes, equivalent to using
    `tf.Tensor.set_shape()` where domain-specific knowledge is available.

    Args:
      dataset: A `Dataset` object.
      output_types: A nested structure of `tf.DType` objects.
      output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects.
        If omitted, the shapes will be inherited from `dataset`.
      output_classes: (Optional.) A nested structure of class types. If omitted,
        the class types will be inherited from `dataset`.
      allow_unsafe_cast: (Optional.) If `True`, the caller may switch the
        reported output types and shapes of the restructured dataset, e.g. to
        switch a sparse tensor represented as `tf.variant` to its user-visible
        type and shape.

    Raises:
      ValueError: If either `output_types` or `output_shapes` is not compatible
        with the structure of `dataset`.
    """
        self._input_dataset = dataset

        input_types = dataset_ops.get_legacy_output_types(dataset)
        if not allow_unsafe_cast:
            # Validate that the types are compatible.
            output_types = nest.map_structure(dtypes.as_dtype, output_types)
            flat_original_types = nest.flatten(input_types)
            flat_new_types = nest.flatten(output_types)
            if flat_original_types != flat_new_types:
                raise ValueError(
                    "Dataset with output types %r cannot be restructured to have "
                    "output types %r" %
                    (dataset_ops.get_legacy_output_types(dataset),
                     output_types))

        input_shapes = dataset_ops.get_legacy_output_shapes(dataset)
        if output_shapes is None:
            # Inherit shapes from the original `dataset`.
            output_shapes = nest.pack_sequence_as(output_types,
                                                  nest.flatten(input_shapes))
        else:
            if not allow_unsafe_cast:
                # Validate that the shapes are compatible.
                nest.assert_same_structure(output_types, output_shapes)
                flat_original_shapes = nest.flatten(input_shapes)
                flat_new_shapes = nest.flatten_up_to(output_types,
                                                     output_shapes)

                for original_shape, new_shape in zip(flat_original_shapes,
                                                     flat_new_shapes):
                    if not original_shape.is_compatible_with(new_shape):
                        raise ValueError(
                            "Dataset with output shapes %r cannot be restructured to have "
                            "incompatible output shapes %r" %
                            (input_shapes, output_shapes))
            output_shapes = nest.map_structure_up_to(output_types,
                                                     tensor_shape.as_shape,
                                                     output_shapes)

        input_classes = dataset_ops.get_legacy_output_classes(dataset)
        if output_classes is None:
            # Inherit class types from the original `dataset`.
            output_classes = nest.pack_sequence_as(output_types,
                                                   nest.flatten(input_classes))

        self._structure = structure.convert_legacy_structure(
            output_types, output_shapes, output_classes)
        variant_tensor = self._input_dataset._variant_tensor  # pylint: disable=protected-access
        super(_RestructuredDataset, self).__init__(dataset, variant_tensor)
Beispiel #22
0
  def __init__(self,
               dataset,
               output_types,
               output_shapes=None,
               output_classes=None,
               allow_unsafe_cast=False):
    """Creates a new dataset with the given output types and shapes.

    The given `dataset` must have a structure that is convertible:
    * `dataset.output_types` must be the same as `output_types` module nesting.
    * Each shape in `dataset.output_shapes` must be compatible with each shape
      in `output_shapes` (if given).

    Note: This helper permits "unsafe casts" for shapes, equivalent to using
    `tf.Tensor.set_shape()` where domain-specific knowledge is available.

    Args:
      dataset: A `Dataset` object.
      output_types: A nested structure of `tf.DType` objects.
      output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects.
        If omitted, the shapes will be inherited from `dataset`.
      output_classes: (Optional.) A nested structure of class types.
        If omitted, the class types will be inherited from `dataset`.
      allow_unsafe_cast: (Optional.) If `True`, the caller may switch the
        reported output types and shapes of the restructured dataset, e.g. to
        switch a sparse tensor represented as `tf.variant` to its user-visible
        type and shape.

    Raises:
      ValueError: If either `output_types` or `output_shapes` is not compatible
        with the structure of `dataset`.
    """
    self._input_dataset = dataset

    input_types = dataset_ops.get_legacy_output_types(dataset)
    if not allow_unsafe_cast:
      # Validate that the types are compatible.
      output_types = nest.map_structure(dtypes.as_dtype, output_types)
      flat_original_types = nest.flatten(input_types)
      flat_new_types = nest.flatten(output_types)
      if flat_original_types != flat_new_types:
        raise ValueError(
            "Dataset with output types %r cannot be restructured to have "
            "output types %r" %
            (dataset_ops.get_legacy_output_types(dataset), output_types))

    input_shapes = dataset_ops.get_legacy_output_shapes(dataset)
    if output_shapes is None:
      # Inherit shapes from the original `dataset`.
      output_shapes = nest.pack_sequence_as(
          output_types, nest.flatten(input_shapes))
    else:
      if not allow_unsafe_cast:
        # Validate that the shapes are compatible.
        nest.assert_same_structure(output_types, output_shapes)
        flat_original_shapes = nest.flatten(input_shapes)
        flat_new_shapes = nest.flatten_up_to(output_types, output_shapes)

        for original_shape, new_shape in zip(flat_original_shapes,
                                             flat_new_shapes):
          if not original_shape.is_compatible_with(new_shape):
            raise ValueError(
                "Dataset with output shapes %r cannot be restructured to have "
                "incompatible output shapes %r" % (input_shapes,
                                                   output_shapes))
      output_shapes = nest.map_structure_up_to(
          output_types, tensor_shape.as_shape, output_shapes)

    input_classes = dataset_ops.get_legacy_output_classes(dataset)
    if output_classes is None:
      # Inherit class types from the original `dataset`.
      output_classes = nest.pack_sequence_as(
          output_types, nest.flatten(input_classes))

    self._structure = structure.convert_legacy_structure(
        output_types, output_shapes, output_classes)
    variant_tensor = self._input_dataset._variant_tensor  # pylint: disable=protected-access
    super(_RestructuredDataset, self).__init__(dataset, variant_tensor)