Example #1
0
    def testSetBatchSizeSingleTensor1d(self):
        dataset = tf.data.Dataset.range(4).batch(2)
        self.assertFalse(dataset.output_shapes.is_fully_defined())

        dataset = dataset_ops.set_batch_size(dataset, 2)
        self.assertEqual([2], dataset.output_shapes)

        next_batch = dataset.make_one_shot_iterator().get_next()
        with self.test_session() as sess:
            batch_value = sess.run(next_batch)
            self.assertAllEqual([0, 1], batch_value)

            batch_value = sess.run(next_batch)
            self.assertAllEqual([2, 3], batch_value)

            with self.assertRaises(tf.errors.OutOfRangeError):
                sess.run(next_batch)
Example #2
0
  def testSetBatchSizeSingleTensor1d(self):
    dataset = tf.data.Dataset.range(4).batch(2)
    self.assertFalse(dataset.output_shapes.is_fully_defined())

    dataset = dataset_ops.set_batch_size(dataset, 2)
    self.assertEqual([2], dataset.output_shapes)

    next_batch = dataset.make_one_shot_iterator().get_next()
    with self.test_session() as sess:
      batch_value = sess.run(next_batch)
      self.assertAllEqual([0, 1], batch_value)

      batch_value = sess.run(next_batch)
      self.assertAllEqual([2, 3], batch_value)

      with self.assertRaises(tf.errors.OutOfRangeError):
        sess.run(next_batch)
Example #3
0
    def testSetBatchSizeSingleTensor2d(self):
        values = np.arange(12, dtype=np.int32).reshape([4, 3])
        dataset = tf.data.Dataset.from_tensor_slices(values).batch(2)
        self.assertFalse(dataset.output_shapes.is_fully_defined())

        dataset = dataset_ops.set_batch_size(dataset, 2)
        self.assertEqual([2, 3], dataset.output_shapes)

        next_batch = dataset.make_one_shot_iterator().get_next()
        with self.test_session() as sess:
            batch_value = sess.run(next_batch)
            self.assertAllEqual([[0, 1, 2], [3, 4, 5]], batch_value)

            batch_value = sess.run(next_batch)
            self.assertAllEqual([[6, 7, 8], [9, 10, 11]], batch_value)

            with self.assertRaises(tf.errors.OutOfRangeError):
                sess.run(next_batch)
Example #4
0
  def testSetBatchSizeSingleTensor2d(self):
    values = np.arange(12, dtype=np.int32).reshape([4, 3])
    dataset = tf.data.Dataset.from_tensor_slices(values).batch(2)
    self.assertFalse(dataset.output_shapes.is_fully_defined())

    dataset = dataset_ops.set_batch_size(dataset, 2)
    self.assertEqual([2, 3], dataset.output_shapes)

    next_batch = dataset.make_one_shot_iterator().get_next()
    with self.test_session() as sess:
      batch_value = sess.run(next_batch)
      self.assertAllEqual([[0, 1, 2], [3, 4, 5]], batch_value)

      batch_value = sess.run(next_batch)
      self.assertAllEqual([[6, 7, 8], [9, 10, 11]], batch_value)

      with self.assertRaises(tf.errors.OutOfRangeError):
        sess.run(next_batch)
Example #5
0
    def testSetBatchSizeNested(self):
        values = {
            "a": 100 + np.arange(4, dtype=np.int32),
            "nest": {
                "b": np.arange(12, dtype=np.int32).reshape([4, 3]),
                "c": np.arange(4, dtype=np.int32)
            }
        }
        dataset = tf.data.Dataset.from_tensor_slices(values).batch(2)
        self.assertItemsEqual(["a", "nest"], dataset.output_shapes.keys())
        self.assertItemsEqual(["b", "c"], dataset.output_shapes["nest"].keys())
        self.assertFalse(dataset.output_shapes["a"].is_fully_defined())
        self.assertFalse(dataset.output_shapes["nest"]["b"].is_fully_defined())
        self.assertFalse(dataset.output_shapes["nest"]["c"].is_fully_defined())

        dataset = dataset_ops.set_batch_size(dataset, 2)
        self.assertItemsEqual(["a", "nest"], dataset.output_shapes.keys())
        self.assertItemsEqual(["b", "c"], dataset.output_shapes["nest"].keys())
        self.assertEqual([2], dataset.output_shapes["a"])
        self.assertEqual([2, 3], dataset.output_shapes["nest"]["b"])
        self.assertEqual([2], dataset.output_shapes["nest"]["c"])

        next_batch = dataset.make_one_shot_iterator().get_next()
        next_a = next_batch["a"]
        next_b = next_batch["nest"]["b"]
        next_c = next_batch["nest"]["c"]

        with self.test_session() as sess:
            a, b, c = sess.run([next_a, next_b, next_c])
            self.assertAllEqual([100, 101], a)
            self.assertAllEqual([[0, 1, 2], [3, 4, 5]], b)
            self.assertAllEqual([0, 1], c)

            a, b, c = sess.run([next_a, next_b, next_c])
            self.assertAllEqual([102, 103], a)
            self.assertAllEqual([[6, 7, 8], [9, 10, 11]], b)
            self.assertAllEqual([2, 3], c)

            with self.assertRaises(tf.errors.OutOfRangeError):
                sess.run(next_batch)
Example #6
0
  def testSetBatchSizeNested(self):
    values = {
        "a": 100 + np.arange(4, dtype=np.int32),
        "nest": {
            "b": np.arange(12, dtype=np.int32).reshape([4, 3]),
            "c": np.arange(4, dtype=np.int32)
        }
    }
    dataset = tf.data.Dataset.from_tensor_slices(values).batch(2)
    self.assertItemsEqual(["a", "nest"], dataset.output_shapes.keys())
    self.assertItemsEqual(["b", "c"], dataset.output_shapes["nest"].keys())
    self.assertFalse(dataset.output_shapes["a"].is_fully_defined())
    self.assertFalse(dataset.output_shapes["nest"]["b"].is_fully_defined())
    self.assertFalse(dataset.output_shapes["nest"]["c"].is_fully_defined())

    dataset = dataset_ops.set_batch_size(dataset, 2)
    self.assertItemsEqual(["a", "nest"], dataset.output_shapes.keys())
    self.assertItemsEqual(["b", "c"], dataset.output_shapes["nest"].keys())
    self.assertEqual([2], dataset.output_shapes["a"])
    self.assertEqual([2, 3], dataset.output_shapes["nest"]["b"])
    self.assertEqual([2], dataset.output_shapes["nest"]["c"])

    next_batch = dataset.make_one_shot_iterator().get_next()
    next_a = next_batch["a"]
    next_b = next_batch["nest"]["b"]
    next_c = next_batch["nest"]["c"]

    with self.test_session() as sess:
      a, b, c = sess.run([next_a, next_b, next_c])
      self.assertAllEqual([100, 101], a)
      self.assertAllEqual([[0, 1, 2], [3, 4, 5]], b)
      self.assertAllEqual([0, 1], c)

      a, b, c = sess.run([next_a, next_b, next_c])
      self.assertAllEqual([102, 103], a)
      self.assertAllEqual([[6, 7, 8], [9, 10, 11]], b)
      self.assertAllEqual([2, 3], c)

      with self.assertRaises(tf.errors.OutOfRangeError):
        sess.run(next_batch)
Example #7
0
  def build(self, batch_size):
    """Builds the dataset input pipeline.

    Args:
      batch_size:

    Returns:
      A tf.data.Dataset.

    Raises:
      ValueError: If no files match self.file_pattern.
    """
    file_patterns = self.file_pattern.split(",")
    filenames = []
    for p in file_patterns:
      matches = tf.gfile.Glob(p)
      if not matches:
        raise ValueError("Found no input files matching {}".format(p))
      filenames.extend(matches)
    tf.logging.info(
        "Building input pipeline from %d files matching patterns: %s",
        len(filenames), file_patterns)

    is_training = self.mode == tf.estimator.ModeKeys.TRAIN

    # Create a string dataset of filenames, and possibly shuffle.
    filename_dataset = tf.data.Dataset.from_tensor_slices(filenames)
    if is_training and len(filenames) > 1:
      filename_dataset = filename_dataset.shuffle(len(filenames))

    # Read serialized Example protos.
    dataset = filename_dataset.apply(
        tf.contrib.data.parallel_interleave(
            self.file_reader(), cycle_length=8, block_length=8, sloppy=True))

    if is_training:
      # Shuffle and repeat. Note that shuffle() is before repeat(), so elements
      # are shuffled among each epoch of data, and not between epochs of data.
      if self.config.shuffle_values_buffer > 0:
        dataset = dataset.shuffle(self.config.shuffle_values_buffer)
      dataset = dataset.repeat()

    # Map the parser over the dataset.
    dataset = dataset.map(
        self.create_example_parser(),
        num_parallel_calls=self.config.num_parallel_parser_calls)

    def _prepare_wavenet_inputs(features):
      """Validates features, and clips lengths and adds weights if needed."""
      # Validate feature names.
      required_features = {"autoregressive_input", "conditioning_stack"}
      allowed_features = required_features | {"weights"}
      feature_names = features.keys()
      if not required_features.issubset(feature_names):
        raise ValueError("Features must contain all of: {}. Got: {}".format(
            required_features, feature_names))
      if not allowed_features.issuperset(feature_names):
        raise ValueError("Features can only contain: {}. Got: {}".format(
            allowed_features, feature_names))

      output = {}
      for name, value in features.items():
        # Validate shapes. The output dimension is [num_samples, dim].
        ndims = len(value.shape)
        if ndims == 1:
          # Add an extra dimension: [num_samples] -> [num_samples, 1].
          value = tf.expand_dims(value, -1)
        elif ndims != 2:
          raise ValueError(
              "Features should be 1D or 2D sequences. Got '{}' = {}".format(
                  name, value))
        if self.config.max_length:
          value = value[:self.config.max_length]
        output[name] = value

      if "weights" not in output:
        output["weights"] = tf.ones_like(output["autoregressive_input"])

      return output

    dataset = dataset.map(_prepare_wavenet_inputs)

    # Batch results by up to batch_size.
    dataset = self._batch_and_pad(dataset, batch_size)

    if is_training:
      # The dataset repeats infinitely before batching, so each batch has the
      # maximum number of elements.
      dataset = dataset_ops.set_batch_size(dataset, batch_size)
    elif self.use_tpu and self.mode == tf.estimator.ModeKeys.EVAL:
      # Pad to ensure that each batch has the same number of elements.
      dataset = dataset_ops.pad_dataset_to_batch_size(dataset, batch_size)

    # Prefetch batches.
    buffer_size = (
        self.config.batches_buffer_size or max(1, int(256 / batch_size)))
    dataset = dataset.prefetch(buffer_size)

    return dataset
Example #8
0
  def build(self, batch_size):
    """Builds the dataset input pipeline.

    Args:
      batch_size:

    Returns:
      A tf.data.Dataset.

    Raises:
      ValueError: If no files match self.file_pattern.
    """
    file_patterns = self.file_pattern.split(",")
    filenames = []
    for p in file_patterns:
      matches = tf.gfile.Glob(p)
      if not matches:
        raise ValueError("Found no input files matching {}".format(p))
      filenames.extend(matches)
    tf.logging.info(
        "Building input pipeline from %d files matching patterns: %s",
        len(filenames), file_patterns)

    is_training = self.mode == tf.estimator.ModeKeys.TRAIN

    # Create a string dataset of filenames, and possibly shuffle.
    filename_dataset = tf.data.Dataset.from_tensor_slices(filenames)
    if is_training and len(filenames) > 1:
      filename_dataset = filename_dataset.shuffle(len(filenames))

    # Read serialized Example protos.
    dataset = filename_dataset.apply(
        tf.contrib.data.parallel_interleave(
            self.file_reader(), cycle_length=8, block_length=8, sloppy=True))

    if is_training:
      # Shuffle and repeat. Note that shuffle() is before repeat(), so elements
      # are shuffled among each epoch of data, and not between epochs of data.
      if self.config.shuffle_values_buffer > 0:
        dataset = dataset.shuffle(self.config.shuffle_values_buffer)
      dataset = dataset.repeat()

    # Map the parser over the dataset.
    dataset = dataset.map(
        self.create_example_parser(),
        num_parallel_calls=self.config.num_parallel_parser_calls)

    def _prepare_wavenet_inputs(features):
      """Validates features, and clips lengths and adds weights if needed."""
      # Validate feature names.
      required_features = {"autoregressive_input", "conditioning_stack"}
      allowed_features = required_features | {"weights"}
      feature_names = features.keys()
      if not required_features.issubset(feature_names):
        raise ValueError("Features must contain all of: {}. Got: {}".format(
            required_features, feature_names))
      if not allowed_features.issuperset(feature_names):
        raise ValueError("Features can only contain: {}. Got: {}".format(
            allowed_features, feature_names))

      output = {}
      for name, value in features.items():
        # Validate shapes. The output dimension is [num_samples, dim].
        ndims = len(value.shape)
        if ndims == 1:
          # Add an extra dimension: [num_samples] -> [num_samples, 1].
          value = tf.expand_dims(value, -1)
        elif ndims != 2:
          raise ValueError(
              "Features should be 1D or 2D sequences. Got '{}' = {}".format(
                  name, value))
        if self.config.max_length:
          value = value[:self.config.max_length]
        output[name] = value

      if "weights" not in output:
        output["weights"] = tf.ones_like(output["autoregressive_input"])

      return output

    dataset = dataset.map(_prepare_wavenet_inputs)

    # Batch results by up to batch_size.
    dataset = self._batch_and_pad(dataset, batch_size)

    if is_training:
      # The dataset repeats infinitely before batching, so each batch has the
      # maximum number of elements.
      dataset = dataset_ops.set_batch_size(dataset, batch_size)
    elif self.use_tpu and self.mode == tf.estimator.ModeKeys.EVAL:
      # Pad to ensure that each batch has the same number of elements.
      dataset = dataset_ops.pad_dataset_to_batch_size(dataset, batch_size)

    # Prefetch batches.
    buffer_size = (
        self.config.batches_buffer_size or max(1, int(256 / batch_size)))
    dataset = dataset.prefetch(buffer_size)

    return dataset