def testMultipleDatasetWithPrefixes(self, dataset_transformation):
    aggregator = stats_aggregator.StatsAggregator()
    dataset = dataset_ops.Dataset.range(100).apply(
        stats_ops.latency_stats("record_latency"))
    dataset = dataset_transformation(dataset, aggregator, prefix="dataset1")
    dataset2 = dataset_ops.Dataset.range(100).apply(
        stats_ops.latency_stats("record_latency"))
    dataset2 = dataset_transformation(dataset2, aggregator, prefix="dataset2")
    iterator_0 = dataset_ops.make_initializable_iterator(dataset)
    iterator_1 = dataset_ops.make_initializable_iterator(dataset2)
    next_element = iterator_0.get_next() + iterator_1.get_next()
    summary_t = aggregator.get_summary()

    with self.test_session() as sess:
      self.evaluate([iterator_0.initializer, iterator_1.initializer])
      for i in range(100):
        self.assertEqual(i * 2, self.evaluate(next_element))
        self._assertSummaryHasCount(
            self.evaluate(summary_t), "dataset1_record_latency", float(i + 1))
        self._assertSummaryHasCount(
            self.evaluate(summary_t), "dataset2_record_latency", float(i + 1))
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)
      self._assertSummaryHasCount(
          self.evaluate(summary_t), "dataset1_record_latency", 100.0)
      self._assertSummaryHasCount(
          self.evaluate(summary_t), "dataset2_record_latency", 100.0)
Example #2
0
  def testSkipEagerUnbatchDynamicShapeMismatch(self):
    ph1 = array_ops.placeholder(dtypes.int32, shape=[None])
    ph2 = array_ops.placeholder(dtypes.int32, shape=None)
    data = dataset_ops.Dataset.from_tensors((ph1, ph2))
    data = data.apply(batching.unbatch())
    iterator = dataset_ops.make_initializable_iterator(data)
    next_element = iterator.get_next()

    with self.cached_session() as sess:
      # Mismatch in the 0th dimension.
      sess.run(
          iterator.initializer,
          feed_dict={
              ph1: np.arange(7).astype(np.int32),
              ph2: np.arange(8).astype(np.int32)
          })
      with self.assertRaises(errors.InvalidArgumentError):
        self.evaluate(next_element)

      # No 0th dimension (i.e. scalar value) for one component.
      sess.run(
          iterator.initializer,
          feed_dict={
              ph1: np.arange(7).astype(np.int32),
              ph2: 7
          })
      with self.assertRaises(errors.InvalidArgumentError):
        self.evaluate(next_element)
Example #3
0
  def testArbitraryReaderFunc(self):

    def MakeRecord(i, j):
      return compat.as_bytes('%04d-%04d' % (i, j))

    record_bytes = len(MakeRecord(10, 200))

    all_contents = []
    for i in range(_NUM_FILES):
      filename = os.path.join(self.get_temp_dir(), 'fixed_length.%d' % i)
      with open(filename, 'wb') as f:
        for j in range(_NUM_ENTRIES):
          record = MakeRecord(i, j)
          f.write(record)
          all_contents.append(record)

    def FixedLengthFile(filename):
      return readers.FixedLengthRecordDataset(filename, record_bytes)

    dataset = datasets.StreamingFilesDataset(
        os.path.join(self.get_temp_dir(), 'fixed_length*'),
        filetype=FixedLengthFile)

    with ops.device(self._worker_device):
      iterator = dataset_ops.make_initializable_iterator(dataset)
    self._sess.run(iterator.initializer)
    get_next = iterator.get_next()

    retrieved_values = []
    for _ in range(4 * len(all_contents)):
      retrieved_values.append(compat.as_bytes(self._sess.run(get_next)))

    self.assertEqual(set(all_contents), set(retrieved_values))
  def testSlideSparse(self):

    def _sparse(i):
      return sparse_tensor.SparseTensorValue(
          indices=[[0]], values=(i * [1]), dense_shape=[1])

    iterator = dataset_ops.make_initializable_iterator(
        dataset_ops.Dataset.range(10).map(_sparse).apply(
            sliding.sliding_window_batch(window_size=5, window_shift=3)))
    init_op = iterator.initializer
    get_next = iterator.get_next()

    with self.cached_session() as sess:
      sess.run(init_op)
      num_batches = (10 - 5) // 3 + 1
      for i in range(num_batches):
        actual = sess.run(get_next)
        expected = sparse_tensor.SparseTensorValue(
            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
            values=[i * 3, i * 3 + 1, i * 3 + 2, i * 3 + 3, i * 3 + 4],
            dense_shape=[5, 1])
        self.assertTrue(sparse_tensor.is_sparse(actual))
        self.assertSparseValuesEqual(actual, expected)
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)
  def test_assert_element_shape_on_unknown_shape_dataset(self):

    def create_unknown_shape_dataset(x):
      return script_ops.py_func(
          lambda _: (  # pylint: disable=g-long-lambda
              np.ones(2, dtype=np.float32),
              np.zeros((3, 4), dtype=np.int32)),
          [x],
          [dtypes.float32, dtypes.int32])

    dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset)
    unknown_shapes = (tensor_shape.TensorShape(None),
                      tensor_shape.TensorShape(None))
    self.assertEqual(unknown_shapes,
                     dataset_ops.get_legacy_output_shapes(dataset))

    expected_shapes = (tensor_shape.TensorShape(2),
                       tensor_shape.TensorShape((3, 4)))
    result = dataset.apply(batching.assert_element_shape(expected_shapes))
    self.assertEqual(expected_shapes,
                     dataset_ops.get_legacy_output_shapes(result))

    iterator = dataset_ops.make_initializable_iterator(result)
    init_op = iterator.initializer
    get_next = iterator.get_next()
    with self.cached_session() as sess:
      sess.run(init_op)
      for _ in range(5):
        sess.run(get_next)
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)
Example #6
0
  def getNext(self, dataset, requires_initialization=False):
    """Returns a callable that returns the next element of the dataset.

    Example use:
    ```python
    # In both graph and eager modes
    dataset = ...
    get_next = self.getNext(dataset)
    result = self.evaluate(get_next())
    ```

    Args:
      dataset: A dataset whose elements will be returned.
      requires_initialization: Indicates that when the test is executed in graph
        mode, it should use an initializable iterator to iterate through the
        dataset (e.g. when it contains stateful nodes). Defaults to False.
    Returns:
      A callable that returns the next element of `dataset`.
    """
    if context.executing_eagerly():
      iterator = dataset.__iter__()
      return iterator._next_internal  # pylint: disable=protected-access
    else:
      if requires_initialization:
        iterator = dataset_ops.make_initializable_iterator(dataset)
        self.evaluate(iterator.initializer)
      else:
        iterator = dataset_ops.make_one_shot_iterator(dataset)
      get_next = iterator.get_next()
      return lambda: get_next
  def test_assert_wrong_partial_element_shape_on_unknown_shape_dataset(self):

    def create_unknown_shape_dataset(x):
      return script_ops.py_func(
          lambda _: (  # pylint: disable=g-long-lambda
              np.ones(2, dtype=np.float32),
              np.zeros((3, 4), dtype=np.int32)),
          [x],
          [dtypes.float32, dtypes.int32])

    dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset)
    unknown_shapes = (tensor_shape.TensorShape(None),
                      tensor_shape.TensorShape(None))
    self.assertEqual(unknown_shapes, dataset.output_shapes)

    wrong_shapes = (tensor_shape.TensorShape(2),
                    tensor_shape.TensorShape((None, 10)))
    iterator = dataset_ops.make_initializable_iterator(
        dataset.apply(batching.assert_element_shape(wrong_shapes)))
    init_op = iterator.initializer
    get_next = iterator.get_next()
    with self.cached_session() as sess:
      sess.run(init_op)
      with self.assertRaises(errors.InvalidArgumentError):
        sess.run(get_next)
  def testCopyToDeviceWithReInitAndPrefetch(self):
    host_dataset = dataset_ops.Dataset.range(10)
    device_dataset = host_dataset.apply(
        prefetching_ops.copy_to_device("/cpu:1")).prefetch(1)

    with ops.device("/cpu:1"):
      iterator = dataset_ops.make_initializable_iterator(device_dataset)
      next_element = iterator.get_next()

    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
    self.assertEqual(host_dataset.output_types, iterator.output_types)
    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
    self.assertEqual(host_dataset.output_classes, iterator.output_classes)

    self.assertEqual(dtypes.int64, next_element.dtype)
    self.assertEqual([], next_element.shape)

    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
    with self.test_session(config=worker_config):
      self.evaluate(iterator.initializer)
      for i in range(5):
        self.assertEqual(i, self.evaluate(next_element))
      self.evaluate(iterator.initializer)
      for i in range(10):
        self.assertEqual(i, self.evaluate(next_element))
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)
Example #9
0
 def make_initializable_iterator(self):
   iterators = []
   for worker, dataset in self._datasets:
     with ops.device(worker):
       iterators.append(
           (worker, dataset_ops.make_initializable_iterator(dataset)))
   return MultiWorkerDataIterator(iterators, self._input_workers)
Example #10
0
  def test_sequence_file_dataset(self):
    """Test case for SequenceFileDataset.

    The file is generated with `org.apache.hadoop.io.Text` for key/value.
    There are 25 records in the file with the format of:
    key = XXX
    value = VALUEXXX
    where XXX is replaced as the line number (starts with 001).
    """
    filename = os.path.join(resource_loader.get_data_files_path(),
                            "testdata", "string.seq")

    filenames = constant_op.constant([filename], dtypes.string)
    num_repeats = 2

    dataset = hadoop_dataset_ops.SequenceFileDataset(filenames).repeat(
        num_repeats)
    iterator = dataset_ops.make_initializable_iterator(dataset)
    init_op = iterator.initializer
    get_next = iterator.get_next()

    with self.cached_session() as sess:
      sess.run(init_op)
      for _ in range(num_repeats):  # Dataset is repeated.
        for i in range(25):  # 25 records.
          v0 = b"%03d" % (i + 1)
          v1 = b"VALUE%03d" % (i + 1)
          self.assertEqual((v0, v1), sess.run(get_next))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)
Example #11
0
  def testSkipEagerSplitPipelineFailsWithPlacementError(self):
    with session.Session(
        target="",
        config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess:

      dataset = dataset_ops.Dataset.from_tensors(0)

      # Define a pipeline that attempts to use variables on two
      # different devices.
      #
      # Initialize the variables before creating to iterator, to avoid the
      # placement algorithm overriding the DT_RESOURCE colocation constraints.
      with ops.device("/cpu:0"):
        var_0 = resource_variable_ops.ResourceVariable(initial_value=0)
        dataset = dataset.map(lambda x: x + var_0.read_value())
      sess.run(var_0.initializer)

      with ops.device("/cpu:1"):
        var_1 = resource_variable_ops.ResourceVariable(initial_value=0)
        dataset = dataset.map(lambda x: x + var_1.read_value())
      sess.run(var_1.initializer)

      iterator = dataset_ops.make_initializable_iterator(dataset)
      sess.run(iterator.initializer)

      with self.assertRaisesRegexp(
          errors.FailedPreconditionError,
          "Error while reading resource variable Variable"):
        sess.run(iterator.get_next())
Example #12
0
  def _benchmarkRangeHelper(self, modeling_enabled):
    num_elements = 10000000 if modeling_enabled else 50000000

    # Use `Dataset.skip()` and `Dataset.take()` to perform the iteration in
    # C++, and focus on the minimal overheads (excluding Python invocation
    # costs).
    dataset = dataset_ops.Dataset.range(num_elements).skip(
        num_elements - 1).take(1)
    options = dataset_ops.Options()
    options.experimental_autotune = modeling_enabled
    options.experimental_optimization.apply_default_optimizations = False
    dataset = dataset.with_options(options)
    iterator = dataset_ops.make_initializable_iterator(dataset)
    next_element = iterator.get_next()

    with session.Session() as sess:
      # Run once to warm up the session caches.
      sess.run(iterator.initializer)
      sess.run(next_element)

      # Run once for timing.
      sess.run(iterator.initializer)
      start = time.time()
      sess.run(next_element)
      end = time.time()

      time_per_element = (end - start) / num_elements
      self.report_benchmark(
          iters=num_elements,
          wall_time=time_per_element,
          name="modeling_%s" % ("on" if modeling_enabled else "off"))
Example #13
0
  def testIteratorStringHandleReuseTensorObject(self):
    dataset = dataset_ops.Dataset.from_tensor_slices([1, 2, 3])
    one_shot_iterator = dataset_ops.make_one_shot_iterator(dataset)
    initializable_iterator = dataset_ops.make_initializable_iterator(dataset)
    structure_iterator = iterator_ops.Iterator.from_structure(
        dataset.output_types)

    created_ops = len(ops.get_default_graph().get_operations())

    self.assertIs(one_shot_iterator.string_handle(),
                  one_shot_iterator.string_handle())
    self.assertIs(initializable_iterator.string_handle(),
                  initializable_iterator.string_handle())
    self.assertIs(structure_iterator.string_handle(),
                  structure_iterator.string_handle())

    # Assert that getting the (default) string handle creates no ops.
    self.assertEqual(created_ops, len(ops.get_default_graph().get_operations()))

    # Specifying an explicit name will create a new op.
    handle_with_name = one_shot_iterator.string_handle(name="foo")
    self.assertEqual("foo", handle_with_name.op.name)
    self.assertIsNot(one_shot_iterator.string_handle(), handle_with_name)

    handle_with_same_name = one_shot_iterator.string_handle(name="foo")
    self.assertEqual("foo_1", handle_with_same_name.op.name)
    self.assertIsNot(handle_with_name, handle_with_same_name)
  def test_assert_element_shape(self):

    def create_dataset(_):
      return (array_ops.ones(2, dtype=dtypes.float32),
              array_ops.zeros((3, 4), dtype=dtypes.int32))

    dataset = dataset_ops.Dataset.range(5).map(create_dataset)
    expected_shapes = (tensor_shape.TensorShape(2),
                       tensor_shape.TensorShape((3, 4)))
    self.assertEqual(expected_shapes,
                     dataset_ops.get_legacy_output_shapes(dataset))

    result = dataset.apply(batching.assert_element_shape(expected_shapes))
    self.assertEqual(expected_shapes,
                     dataset_ops.get_legacy_output_shapes(result))

    iterator = dataset_ops.make_initializable_iterator(result)
    init_op = iterator.initializer
    get_next = iterator.get_next()
    with self.cached_session() as sess:
      sess.run(init_op)
      for _ in range(5):
        sess.run(get_next)
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)
Example #15
0
  def testSkipEagerSharedResourceNestedFlatMapDataset(self):
    repeats = [[1, 2], [3, 4], [5, 0], [1, 7]]
    components = np.array(repeats, dtype=np.int64)
    iterator = (
        dataset_ops.make_initializable_iterator(
            dataset_ops.Dataset.from_tensor_slices(components).flat_map(
                lambda x: dataset_ops.Dataset.from_tensor_slices(x).flat_map(
                    lambda y: dataset_ops.Dataset.from_tensors(y).repeat(y))),
            shared_name="shared_flat_map_iterator"))
    init_op = iterator.initializer
    get_next = iterator.get_next()

    # Create two concurrent sessions that share the same iterator
    # resource on the same server, and verify that a random
    # interleaving of `Session.run(get_next)` calls on the two
    # sessions yields the expected result.
    server = server_lib.Server.create_local_server()
    with session.Session(server.target) as sess1:
      with session.Session(server.target) as sess2:
        for _ in range(3):
          sess = random.choice([sess1, sess2])
          sess.run(init_op)
          for row in repeats:
            for i in row:
              for _ in range(i):
                sess = random.choice([sess1, sess2])
                self.assertEqual(i, sess.run(get_next))

        with self.assertRaises(errors.OutOfRangeError):
          sess = random.choice([sess1, sess2])
          sess.run(get_next)
  def benchmarkSliceRepeatBatch(self):
    input_size = 10000
    batch_size = 100
    num_epochs = 100

    input_data = np.random.randn(input_size)

    dataset = (
        dataset_ops.Dataset.from_tensor_slices(input_data)
        .repeat(num_epochs + 1).batch(batch_size))
    iterator = dataset_ops.make_initializable_iterator(dataset)
    next_element = iterator.get_next()

    with session.Session() as sess:
      sess.run(iterator.initializer)
      # Run one whole epoch to burn in the computation.
      for _ in range(input_size // batch_size):
        sess.run(next_element)
      deltas = []
      try:
        while True:
          start = time.time()
          sess.run(next_element)
          deltas.append(time.time() - start)
      except errors.OutOfRangeError:
        pass

    median_wall_time = np.median(deltas)
    print("Slice/repeat/batch with sess.run() input size: %d batch size: %d "
          "Median wall time per element: %f" % (input_size, batch_size,
                                                median_wall_time))
    self.report_benchmark(
        iters=len(deltas),
        wall_time=median_wall_time,
        name="slice_repeat_batch_input_%d_batch_%d" % (input_size, batch_size))
  def test_assert_partial_element_shape(self):

    def create_dataset(_):
      return (array_ops.ones(2, dtype=dtypes.float32),
              array_ops.zeros((3, 4), dtype=dtypes.int32))

    dataset = dataset_ops.Dataset.range(5).map(create_dataset)
    partial_expected_shape = (
        tensor_shape.TensorShape(None),  # Unknown shape
        tensor_shape.TensorShape((None, 4)))  # Partial shape
    result = dataset.apply(
        batching.assert_element_shape(partial_expected_shape))
    # Partial shapes are merged with actual shapes:
    actual_shapes = (tensor_shape.TensorShape(2),
                     tensor_shape.TensorShape((3, 4)))
    self.assertEqual(actual_shapes, result.output_shapes)

    iterator = dataset_ops.make_initializable_iterator(result)
    init_op = iterator.initializer
    get_next = iterator.get_next()
    with self.cached_session() as sess:
      sess.run(init_op)
      for _ in range(5):
        sess.run(get_next)
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)
  def testPrefetchToDeviceWithReInit(self):
    host_dataset = dataset_ops.Dataset.range(10)
    device_dataset = host_dataset.apply(
        prefetching_ops.prefetch_to_device("/cpu:1"))

    with ops.device("/cpu:1"):
      iterator = dataset_ops.make_initializable_iterator(device_dataset)
      next_element = iterator.get_next()

    self.assertTrue(dataset_ops.get_structure(host_dataset).is_compatible_with(
        dataset_ops.get_structure(device_dataset)))
    self.assertTrue(dataset_ops.get_structure(host_dataset).is_compatible_with(
        dataset_ops.get_structure(iterator)))

    self.assertEqual(dtypes.int64, next_element.dtype)
    self.assertEqual([], next_element.shape)

    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
    with self.test_session(config=worker_config):
      self.evaluate(iterator.initializer)
      for i in range(5):
        self.assertEqual(i, self.evaluate(next_element))
      self.evaluate(iterator.initializer)
      for i in range(10):
        self.assertEqual(i, self.evaluate(next_element))
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)
  def testPrefetchBufferUtilization(self, dataset_transformation):
    aggregator = stats_aggregator.StatsAggregator()
    dataset = dataset_ops.Dataset.range(100).map(
        lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).prefetch(-1)
    dataset = dataset_transformation(dataset, aggregator)
    iterator = dataset_ops.make_initializable_iterator(dataset)
    next_element = iterator.get_next()
    summary_t = aggregator.get_summary()

    with self.cached_session() as sess:
      self.evaluate(iterator.initializer)
      for i in range(100):
        self.assertAllEqual(
            np.array([i] * i, dtype=np.int64), self.evaluate(next_element))
        summary_str = self.evaluate(summary_t)
        self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization",
                                    float(i + 1))
        self._assertSummaryContains(summary_str, "Prefetch::buffer_capacity")
        self._assertSummaryContains(summary_str, "Prefetch::buffer_size")
        self._assertSummaryHasRange(summary_str, "Prefetch::buffer_utilization",
                                    0, 1)
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)
      summary_str = self.evaluate(summary_t)
      self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization",
                                  100)
Example #20
0
  def benchmarkOldUnbatchImplementation(self):
    batch_sizes = [1, 2, 5, 10, 20, 50]
    elems_per_trial = 10000
    with ops.Graph().as_default():
      dataset = dataset_ops.Dataset.from_tensors("element").repeat(None)
      batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
      dataset = dataset.batch(batch_size_placeholder)
      dataset = dataset.flat_map(dataset_ops.Dataset.from_tensor_slices)
      dataset = dataset.skip(elems_per_trial)
      options = dataset_ops.Options()
      options.experimental_optimization.apply_default_optimizations = False
      dataset = dataset.with_options(options)
      iterator = dataset_ops.make_initializable_iterator(dataset)
      next_element = iterator.get_next()

      with session.Session() as sess:
        for batch_size in batch_sizes:
          deltas = []
          for _ in range(5):
            sess.run(
                iterator.initializer,
                feed_dict={batch_size_placeholder: batch_size})
            start = time.time()
            sess.run(next_element.op)
            end = time.time()
            deltas.append((end - start) / elems_per_trial)

          median_wall_time = np.median(deltas)
          self.report_benchmark(
              iters=10000,
              wall_time=median_wall_time,
              name="unfused_batch_size_%d" %
              batch_size)
  def testBytesProduced(self, dataset_transformation):
    aggregator = stats_aggregator.StatsAggregator()
    dataset = dataset_ops.Dataset.range(100).map(
        lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply(
            stats_ops.bytes_produced_stats("bytes_produced"))
    dataset = dataset_transformation(dataset, aggregator)
    iterator = dataset_ops.make_initializable_iterator(dataset)
    next_element = iterator.get_next()
    summary_t = aggregator.get_summary()

    with self.cached_session() as sess:
      self.evaluate(iterator.initializer)
      expected_sum = 0.0
      for i in range(100):
        self.assertAllEqual(
            np.array([i] * i, dtype=np.int64), self.evaluate(next_element))
        summary_str = self.evaluate(summary_t)
        self._assertSummaryHasCount(summary_str, "bytes_produced", float(i + 1))
        expected_sum += i * 8.0
        self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)
      summary_str = self.evaluate(summary_t)
      self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0)
      self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
Example #22
0
  def testMapAndBatchShapeMismatch(self, numa_aware):
    """Test a dataset that maps a TF function across its input elements."""

    def generator():
      yield [1]
      yield [2]
      yield [3]
      yield [[4, 5, 6]]

    dataset = dataset_ops.Dataset.from_generator(
        generator, output_types=dtypes.int32)
    batch_size = 4
    dataset = dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
    if numa_aware:
      options = dataset_ops.Options()
      options.experimental_numa_aware = True
      dataset = dataset.with_options(options)
    iterator = dataset_ops.make_initializable_iterator(dataset)

    init_op = iterator.initializer
    get_next = iterator.get_next()
    with self.cached_session() as sess:
      self.evaluate(init_op)
      with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                   "number of elements does not match"):
        self.evaluate(get_next)
  def testSlideSparseWithDifferentDenseShapes(self):

    def _sparse(i):
      return sparse_tensor.SparseTensorValue(
          indices=array_ops.expand_dims(
              math_ops.range(i, dtype=dtypes.int64), 1),
          values=array_ops.fill([math_ops.to_int32(i)], i),
          dense_shape=[i])

    iterator = dataset_ops.make_initializable_iterator(
        dataset_ops.Dataset.range(10).map(_sparse).apply(
            sliding.sliding_window_batch(window_size=5, window_shift=3)))
    init_op = iterator.initializer
    get_next = iterator.get_next()

    with self.cached_session() as sess:
      sess.run(init_op)
      num_batches = (10 - 5) // 3 + 1
      for i in range(num_batches):
        actual = sess.run(get_next)
        expected_indices = []
        expected_values = []
        for j in range(5):
          for k in range(i * 3 + j):
            expected_indices.append([j, k])
            expected_values.append(i * 3 + j)
        expected = sparse_tensor.SparseTensorValue(
            indices=expected_indices,
            values=expected_values,
            dense_shape=[5, i * 3 + 5 - 1])
        self.assertTrue(sparse_tensor.is_sparse(actual))
        self.assertSparseValuesEqual(actual, expected)
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)
  def testSlideDatasetInvalid(self, count, window_size, window_shift,
                              window_stride):
    count_t = array_ops.placeholder(dtypes.int64, shape=[])
    window_size_t = array_ops.placeholder(dtypes.int64, shape=[])
    window_shift_t = array_ops.placeholder(dtypes.int64, shape=[])
    window_stride_t = array_ops.placeholder(dtypes.int64, shape=[])

    iterator = dataset_ops.make_initializable_iterator(
        dataset_ops.Dataset.range(10).map(lambda x: x).repeat(count_t).apply(
            sliding.sliding_window_batch(
                window_size=window_size_t,
                window_shift=window_shift_t,
                window_stride=window_stride_t)))
    init_op = iterator.initializer

    with self.cached_session() as sess:
      with self.assertRaises(errors.InvalidArgumentError):
        sess.run(
            init_op,
            feed_dict={
                count_t: count,
                window_size_t: window_size,
                window_shift_t: window_shift,
                window_stride_t: window_stride
            })
Example #25
0
  def testMapAndBatchSparse(self, numa_aware):

    def _sparse(i):
      return sparse_tensor.SparseTensorValue(
          indices=[[0]], values=(i * [1]), dense_shape=[1])

    dataset = dataset_ops.Dataset.range(10).apply(
        batching.map_and_batch(_sparse, 5))
    if numa_aware:
      options = dataset_ops.Options()
      options.experimental_numa_aware = True
      dataset = dataset.with_options(options)
    iterator = dataset_ops.make_initializable_iterator(dataset)

    init_op = iterator.initializer
    get_next = iterator.get_next()

    with self.cached_session() as sess:
      self.evaluate(init_op)
      for i in range(2):
        actual = self.evaluate(get_next)
        expected = sparse_tensor.SparseTensorValue(
            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
            values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
            dense_shape=[5, 1])
        self.assertTrue(sparse_tensor.is_sparse(actual))
        self.assertSparseValuesEqual(actual, expected)
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(get_next)
  def _testNumThreadsHelper(self, num_threads, override_threadpool_fn):

    def get_thread_id(_):
      # Python creates a dummy thread object to represent the current
      # thread when called from an "alien" thread (such as a
      # `PrivateThreadPool` thread in this case). It does not include
      # the TensorFlow-given display name, but it has a unique
      # identifier that maps one-to-one with the underlying OS thread.
      return np.array(threading.current_thread().ident).astype(np.int64)

    dataset = (
        dataset_ops.Dataset.range(1000).map(
            lambda x: script_ops.py_func(get_thread_id, [x], dtypes.int64),
            num_parallel_calls=32).apply(unique.unique()))
    dataset = override_threadpool_fn(dataset)
    iterator = dataset_ops.make_initializable_iterator(dataset)
    next_element = iterator.get_next()

    self.evaluate(iterator.initializer)
    thread_ids = []
    try:
      while True:
        thread_ids.append(self.evaluate(next_element))
    except errors.OutOfRangeError:
      pass
    self.assertLen(thread_ids, len(set(thread_ids)))
    self.assertNotEmpty(thread_ids)
    if num_threads:
      # NOTE(mrry): We don't control the thread pool scheduling, and
      # so cannot guarantee that all of the threads in the pool will
      # perform work.
      self.assertLessEqual(len(thread_ids), num_threads)
Example #27
0
  def testSkipEagerIteratorGetNextAsOptional(self, np_value, tf_value_fn,
                                             works_on_gpu):
    if not works_on_gpu and test.is_gpu_available():
      self.skipTest("Test case not yet supported on GPU.")
    ds = dataset_ops.Dataset.from_tensors(np_value).repeat(3)
    iterator = dataset_ops.make_initializable_iterator(ds)
    next_elem = iterator_ops.get_next_as_optional(iterator)
    self.assertIsInstance(next_elem, optional_ops.Optional)
    self.assertTrue(
        next_elem.value_structure.is_compatible_with(
            structure.Structure.from_value(tf_value_fn())))
    elem_has_value_t = next_elem.has_value()
    elem_value_t = next_elem.get_value()
    with self.cached_session() as sess:
      # Before initializing the iterator, evaluating the optional fails with
      # a FailedPreconditionError.
      with self.assertRaises(errors.FailedPreconditionError):
        sess.run(elem_has_value_t)
      with self.assertRaises(errors.FailedPreconditionError):
        sess.run(elem_value_t)

      # For each element of the dataset, assert that the optional evaluates to
      # the expected value.
      sess.run(iterator.initializer)
      for _ in range(3):
        elem_has_value, elem_value = sess.run([elem_has_value_t, elem_value_t])
        self.assertTrue(elem_has_value)
        self._assertElementValueEqual(np_value, elem_value)

      # After exhausting the iterator, `next_elem.has_value()` will evaluate to
      # false, and attempting to get the value will fail.
      for _ in range(2):
        self.assertFalse(sess.run(elem_has_value_t))
        with self.assertRaises(errors.InvalidArgumentError):
          sess.run(elem_value_t)
Example #28
0
  def testTFRecordDatasetFromDataset(self):
    filenames = []
    all_contents = []
    for i in range(_NUM_FILES):
      filename = os.path.join(self.get_temp_dir(), 'tf_record.%d' % i)
      filenames.append(filename)
      writer = python_io.TFRecordWriter(filename)
      for j in range(_NUM_ENTRIES):
        record = compat.as_bytes('Record %d of file %d' % (j, i))
        writer.write(record)
        all_contents.append(record)
      writer.close()

    filenames = dataset_ops.Dataset.from_tensor_slices(filenames)

    dataset = datasets.StreamingFilesDataset(filenames, filetype='tfrecord')

    with ops.device(self._worker_device):
      iterator = dataset_ops.make_initializable_iterator(dataset)
    self._sess.run(iterator.initializer)
    get_next = iterator.get_next()

    retrieved_values = []
    for _ in range(4 * len(all_contents)):
      retrieved_values.append(compat.as_bytes(self._sess.run(get_next)))

    self.assertEqual(set(all_contents), set(retrieved_values))
  def testFilteredElementsStats(self, dataset_transformation):
    aggregator = stats_aggregator.StatsAggregator()
    dataset = dataset_ops.Dataset.range(101).filter(
        lambda x: math_ops.equal(math_ops.mod(x, 3), 0))
    dataset = dataset_transformation(dataset, aggregator)
    iterator = dataset_ops.make_initializable_iterator(dataset)
    next_element = iterator.get_next()
    summary_t = aggregator.get_summary()

    with self.test_session() as sess:
      self.evaluate(iterator.initializer)
      for i in range(34):
        self.assertEqual(i * 3, self.evaluate(next_element))
        if i is not 0:
          self._assertSummaryHasScalarValue(
              self.evaluate(summary_t), "Filter::dropped_elements",
              float(i * 2))
        self._assertSummaryHasScalarValue(
            self.evaluate(summary_t), "Filter::filtered_elements", float(i + 1))
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)
      self._assertSummaryHasScalarValue(
          self.evaluate(summary_t), "Filter::dropped_elements", 67.0)
      self._assertSummaryHasScalarValue(
          self.evaluate(summary_t), "Filter::filtered_elements", 34.0)
Example #30
0
  def testSkipEagerMultipleIterators(self, reshuffle, initializable):
    with ops.Graph().as_default() as g:
      dataset = dataset_ops.Dataset.range(100).shuffle(
          10, reshuffle_each_iteration=reshuffle).repeat(3)

      if initializable:
        iterators = [dataset_ops.make_initializable_iterator(dataset)
                     for _ in range(2)]
      else:
        iterators = [dataset_ops.make_one_shot_iterator(dataset)
                     for _ in range(2)]

      results = []
      with self.session(graph=g) as sess:
        for iterator in iterators:
          if initializable:
            sess.run(iterator.initializer)
          next_element = iterator.get_next()
          run_results = []
          for _ in range(300):
            run_results.append(sess.run(next_element))
          with self.assertRaises(errors.OutOfRangeError):
            sess.run(next_element)

          results.append(run_results)

        self.assertNotEqual(results[0], results[1])
Example #31
0
 def testSameGraphErrorInitializable(self):
     dataset = dataset_ops.Dataset.range(10)
     with ops.Graph().as_default():
         with self.assertRaisesRegex(
                 ValueError,
                 "Please ensure that all datasets in the pipeline are "
                 "created in the same graph as the iterator."):
             _ = dataset_ops.make_initializable_iterator(dataset)
Example #32
0
 def _build_graph():
     captured_iterator = dataset_ops.make_initializable_iterator(
         dataset_ops.Dataset.range(10))
     ds = _build_ds(captured_iterator)
     iterator = ds.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
     return captured_iterator.initializer, init_op, get_next
 def _createSqlDataset(self, output_types, num_repeats=1):
     dataset = readers.SqlDataset(self.driver_name, self.data_source_name,
                                  self.query,
                                  output_types).repeat(num_repeats)
     iterator = dataset_ops.make_initializable_iterator(dataset)
     init_op = iterator.initializer
     get_next = iterator.get_next()
     return init_op, get_next
Example #34
0
    def testSkipEagerOptimizationWithCapturedRefVar(self, dataset_fn):
        """Tests that default optimizations are disabled with ref variables."""
        variable = variable_scope.get_variable("v",
                                               initializer=0,
                                               use_resource=False)
        assign_op = variable.assign_add(1)

        # Check that warning is logged.
        warnings.simplefilter("always")
        with warnings.catch_warnings(record=True) as w:
            unoptimized_dataset = dataset_fn(variable)

            options = dataset_ops.Options()
            options.experimental_optimization.apply_default_optimizations = False
            options.experimental_optimization.noop_elimination = True
            options.experimental_optimization.map_and_batch_fusion = True
            optimized_dataset = unoptimized_dataset.with_options(options)
            optimized_it = dataset_ops.make_initializable_iterator(
                optimized_dataset)

        self.assertGreaterEqual(len(w), 1)
        expected = (
            "tf.data graph rewrites are not compatible with "
            "tf.Variable. The following rewrites will be disabled: %s."
            " To enable rewrites, use resource variables instead by "
            "calling `tf.enable_resource_variables()` at the start of the "
            "program." % (", ".join(options._graph_rewrites())))
        self.assertTrue(any(expected in str(warning) for warning in w))

        # Check that outputs are the same in the optimized and unoptimized cases,
        # when the variable value is changing.
        unoptimized_it = dataset_ops.make_initializable_iterator(
            unoptimized_dataset)
        with ops.control_dependencies([assign_op]):
            unoptimized_output = unoptimized_it.get_next()
            optimized_output = optimized_it.get_next()

        self.evaluate(variable.initializer)
        self.evaluate((unoptimized_it.initializer, optimized_it.initializer))
        while True:
            try:
                unoptimized, optimized = self.evaluate(
                    (unoptimized_output, optimized_output))
                self.assertEqual(unoptimized, optimized)
            except errors.OutOfRangeError:
                break
 def _build_graph(start, stop):
     iterator = dataset_ops.make_initializable_iterator(
         dataset_ops.Dataset.range(start, stop))
     init_op = iterator.initializer
     get_next = iterator.get_next()
     save_op = self._save_op(iterator._iterator_resource)
     restore_op = self._restore_op(iterator._iterator_resource)
     return init_op, get_next, save_op, restore_op
Example #36
0
    def _assert_datasets_equal(self, ds1, ds2):
        # First lets assert the structure is the same.
        self.assertTrue(
            structure.are_compatible(ds1._element_structure,
                                     ds2._element_structure))

        # Now create iterators on both and assert they produce the same values.
        it1 = dataset_ops.make_initializable_iterator(ds1)
        it2 = dataset_ops.make_initializable_iterator(ds2)

        get_next1 = it1.get_next()
        get_next2 = it2.get_next()

        with self.cached_session():
            self.evaluate([it1.initializer, it2.initializer])
            val1, val2 = self.evaluate([get_next1, get_next2])
            self.assertEqual(val1, val2)
Example #37
0
 def _build_graph():
     if context.executing_eagerly():
         captured_iterator = iter(dataset_ops.Dataset.range(10))
     else:
         captured_iterator = dataset_ops.make_initializable_iterator(
             dataset_ops.Dataset.range(10))
     ds = _build_ds(captured_iterator)
     return captured_iterator, ds
Example #38
0
  def run_benchmark(self,
                    dataset,
                    num_elements,
                    iters=1,
                    warmup=True,
                    apply_default_optimizations=False,
                    session_config=None):
    """Benchmarks the dataset.

    Runs the dataset `iters` times. In each iteration, the benchmark measures
    the time it takes to go through `num_elements` elements of the dataset.

    Args:
      dataset: Dataset to benchmark.
      num_elements: Number of dataset elements to iterate through each benchmark
        iteration.
      iters: Number of times to repeat the timing.
      warmup: If true, warms up the session caches by running an untimed run.
      apply_default_optimizations: Determines whether default optimizations
        should be applied.
      session_config: A ConfigProto protocol buffer with configuration options
        for the session. Applicable only for benchmarking in graph mode.

    Returns:
      A float, representing the per-element wall time of the dataset in seconds.
      This is the median time (with respect to `iters`) it takes for the dataset
      to go through `num_elements` elements, divided by `num_elements.`
    """

    # The options that have been applied to the dataset are preserved so that
    # they are not overwritten while benchmarking.
    options = dataset.options()
    options.experimental_optimization.apply_default_optimizations = (
        apply_default_optimizations)
    dataset = dataset.with_options(options)

    # NOTE: We use `dataset.skip()` to perform the iterations in C++, avoiding
    # the overhead of having to execute a TensorFlow op for each step of the
    # input pipeline. Note that this relies on the underlying implementation of
    # `skip` to execute upstream computation. If it is optimized in the future,
    # we will have to change this code.
    dataset = dataset.skip(num_elements - 1)

    if context.executing_eagerly():
      median_duration = self._run_eager_benchmark(
          iterable=dataset, iters=iters, warmup=warmup)
      return median_duration / float(num_elements)

    iterator = dataset_ops.make_initializable_iterator(dataset)
    next_element = iterator.get_next()
    op = nest.flatten(next_element)[0].op
    median_duration = self._run_graph_benchmark(
        iterable=op,
        iters=iters,
        warmup=warmup,
        session_config=session_config,
        initializer=iterator.initializer)
    return median_duration / float(num_elements)
Example #39
0
    def testIteratorGetNextAsOptional(self, np_value, tf_value_fn,
                                      gpu_compatible):
        if not gpu_compatible and test.is_gpu_available():
            self.skipTest("Test case not yet supported on GPU.")
        ds = dataset_ops.Dataset.from_tensors(np_value).repeat(3)

        if context.executing_eagerly():
            iterator = dataset_ops.make_one_shot_iterator(ds)
            # For each element of the dataset, assert that the optional evaluates to
            # the expected value.
            for _ in range(3):
                next_elem = iterator_ops.get_next_as_optional(iterator)
                self.assertIsInstance(next_elem, optional_ops.Optional)
                self.assertTrue(
                    structure.are_compatible(
                        next_elem.element_spec,
                        structure.type_spec_from_value(tf_value_fn())))
                self.assertTrue(next_elem.has_value())
                self.assertValuesEqual(np_value, next_elem.get_value())
            # After exhausting the iterator, `next_elem.has_value()` will evaluate to
            # false, and attempting to get the value will fail.
            for _ in range(2):
                next_elem = iterator_ops.get_next_as_optional(iterator)
                self.assertFalse(self.evaluate(next_elem.has_value()))
                with self.assertRaises(errors.InvalidArgumentError):
                    self.evaluate(next_elem.get_value())
        else:
            iterator = dataset_ops.make_initializable_iterator(ds)
            next_elem = iterator_ops.get_next_as_optional(iterator)
            self.assertIsInstance(next_elem, optional_ops.Optional)
            self.assertTrue(
                structure.are_compatible(
                    next_elem.element_spec,
                    structure.type_spec_from_value(tf_value_fn())))
            # Before initializing the iterator, evaluating the optional fails with
            # a FailedPreconditionError. This is only relevant in graph mode.
            elem_has_value_t = next_elem.has_value()
            elem_value_t = next_elem.get_value()
            with self.assertRaises(errors.FailedPreconditionError):
                self.evaluate(elem_has_value_t)
            with self.assertRaises(errors.FailedPreconditionError):
                self.evaluate(elem_value_t)
            # Now we initialize the iterator.
            self.evaluate(iterator.initializer)
            # For each element of the dataset, assert that the optional evaluates to
            # the expected value.
            for _ in range(3):
                elem_has_value, elem_value = self.evaluate(
                    [elem_has_value_t, elem_value_t])
                self.assertTrue(elem_has_value)
                self.assertValuesEqual(np_value, elem_value)

            # After exhausting the iterator, `next_elem.has_value()` will evaluate to
            # false, and attempting to get the value will fail.
            for _ in range(2):
                self.assertFalse(self.evaluate(elem_has_value_t))
                with self.assertRaises(errors.InvalidArgumentError):
                    self.evaluate(elem_value_t)
Example #40
0
 def parse_input_fn_result(result):
     input_hooks = []
     if isinstance(result, dataset_ops.DatasetV2):
         device = "/device:HPU:0"
         with tf.device(device):
             iterator = dataset_ops.make_initializable_iterator(result)
             result = iterator.get_next()
         input_hooks.append(_DatasetInitializerHook(iterator))
     return parse_iterator_result(result) + (input_hooks, )
Example #41
0
    def testIteratorOnDeviceGraphModeInitializableIterator(self):
        if not test_util.is_gpu_available():
            self.skipTest("No GPU available")

        host_dataset = dataset_ops.Dataset.range(10)
        device_dataset = host_dataset.apply(
            prefetching_ops.prefetch_to_device("/gpu:0"))

        host_iterator = dataset_ops.make_initializable_iterator(host_dataset)
        device_iterator = dataset_ops.make_initializable_iterator(
            device_dataset)

        host_tensor = host_iterator.get_next()
        device_tensor = device_iterator.get_next()

        self.assert_dataset_placement(host_dataset, host_iterator, host_tensor,
                                      device_dataset, device_iterator,
                                      device_tensor)
Example #42
0
    def testMapDataset(self):
        """Test an dataset that maps a TF function across its input elements."""
        # The pipeline is TensorSliceDataset -> MapDataset(square_3) ->
        # RepeatDataset(count).
        components = (np.arange(7),
                      np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
                      np.array(37.0) * np.arange(7))
        count = array_ops.placeholder(dtypes.int64, shape=[])

        dataset = self._buildMapDataset(components, count)
        iterator = dataset_ops.make_initializable_iterator(dataset)
        init_op = iterator.initializer
        get_next = iterator.get_next()

        self.assertEqual([c.shape[1:] for c in components],
                         [t.shape for t in get_next])

        with self.cached_session() as sess:
            # Test single-threaded access to the iterator.
            sess.run(init_op, feed_dict={count: 14})
            for _ in range(14):
                for i in range(7):
                    result = sess.run(get_next)
                    for component, result_component in zip(components, result):
                        self.assertAllEqual(component[i]**2, result_component)
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(get_next)

            # Test multi-threaded access to the same iterator.
            sess.run(init_op, feed_dict={count: 18})
            results = []

            def iterator_thread():
                while True:
                    try:
                        results.append(sess.run(get_next))
                    except errors.OutOfRangeError:
                        return

            threads = [
                self.checkedThread(target=iterator_thread) for _ in range(8)
            ]
            for t in threads:
                t.start()
            for t in threads:
                t.join()

            # `results` will contain the same elements components**2
            # repeated 18 times, but in a non-deterministic order. Sort the
            # results, and assert that each element of components**2 is
            # produced 18 times.
            results.sort(key=lambda x: x[0])
            for i in range(7):
                for j in range(18):
                    for component, result_component in zip(
                            components, results[i * 18 + j]):
                        self.assertAllEqual(component[i]**2, result_component)
Example #43
0
 def build_dataset(row, num):
   # pylint: disable=g-long-lambda
   iterator = dataset_ops.make_initializable_iterator(
       dataset_ops.Dataset.from_tensors(row).map(
           lambda elems: functional_ops.map_fn(
               lambda x: control_map_fn(x, num), elems)))
   init_op = iterator.initializer
   get_next = iterator.get_next()
   return init_op, get_next
Example #44
0
 def _build_reader_dataset_graph():
   filenames = ["test"]  # Does not exist but we don't care in this test.
   iterator = dataset_ops.make_initializable_iterator(
       readers.FixedLengthRecordDataset(filenames, 1, 0, 0))
   init_op = iterator.initializer
   get_next_op = iterator.get_next()
   save_op = _save_op(iterator._iterator_resource)
   restore_op = _restore_op(iterator._iterator_resource)
   return init_op, get_next_op, save_op, restore_op
 def testSampleKeyPairsPrefixAndEndKey(self):
     ds = bigtable_api._BigtableSampleKeyPairsDataset(self._table,
                                                      prefix="r",
                                                      start="",
                                                      end="r3")
     itr = dataset_ops.make_initializable_iterator(ds)
     with self.cached_session() as sess:
         with self.assertRaises(errors.InvalidArgumentError):
             sess.run(itr.initializer)
Example #46
0
    def testNotInitializedError(self):
        components = (np.array(1), np.array([1, 2, 3]), np.array(37.0))
        iterator = dataset_ops.make_initializable_iterator(
            dataset_ops.Dataset.from_tensors(components))
        get_next = iterator.get_next()

        with self.cached_session() as sess:
            with self.assertRaisesRegexp(errors.FailedPreconditionError,
                                         "iterator has not been initialized"):
                sess.run(get_next)
Example #47
0
  def testShortCircuitCapturedInput(self, num_parallel_calls):
    captured_t = array_ops.placeholder(dtypes.int64, shape=[])
    dataset = self.structuredDataset(None).repeat().map(
        lambda x: captured_t, num_parallel_calls=num_parallel_calls)
    iterator = dataset_ops.make_initializable_iterator(dataset)
    get_next = iterator.get_next()

    with self.cached_session() as sess:
      sess.run(iterator.initializer, feed_dict={captured_t: 42})
      self.assertEqual(42, sess.run(get_next))
Example #48
0
    def testShortCircuitCapturedInput(self):
        captured_t = array_ops.placeholder(dtypes.int64, shape=[])
        dataset = self.structuredDataset(None).repeat().apply(
            batching.map_and_batch(lambda x: captured_t, batch_size=10))
        iterator = dataset_ops.make_initializable_iterator(dataset)
        get_next = iterator.get_next()

        with self.cached_session() as sess:
            sess.run(iterator.initializer, feed_dict={captured_t: 42})
            self.assertAllEqual([42] * 10, self.evaluate(get_next))
Example #49
0
    def testPrefetch(self):
        # We will use this event to test that `_map_py_func()` has been
        # invoked a certain number of times (6 times, to be exact) after
        # consuming fewer elements from the iterator.
        ev = threading.Event()

        set_event_during_invocation = 5

        def _map_py_func(x):
            if x == set_event_during_invocation:
                ev.set()
            return x * x

        def _map_fn(x):
            return script_ops.py_func(_map_py_func, [x], x.dtype)

        buffer_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
        iterator = dataset_ops.make_initializable_iterator(
            dataset_ops.Dataset.range(100).map(_map_fn).prefetch(
                buffer_size_placeholder))
        init_op = iterator.initializer
        get_next = iterator.get_next()

        with self.cached_session() as sess:
            # Simple test that prefetch yields the expected values in the
            # expected order.
            for buffer_size in [1, 10, 100, 1000]:
                sess.run(init_op,
                         feed_dict={buffer_size_placeholder: buffer_size})
                for i in range(100):
                    self.assertEqual(i * i, sess.run(get_next))
                with self.assertRaises(errors.OutOfRangeError):
                    sess.run(get_next)

            # We can indirectly observe that varying the buffer size has the
            # intended effect by observing when `ev` is set (on the 6th
            # invocation of `_map_py_func()`).
            # NOTE(mrry): We do not test with `buffer_size ==
            # set_event_during_invocation`, because we must consume at least
            # one element to start the prefetching.
            for buffer_size in range(1, set_event_during_invocation):
                event_will_be_set_after_consuming = (
                    set_event_during_invocation - buffer_size + 1)

                ev.clear()
                sess.run(init_op,
                         feed_dict={buffer_size_placeholder: buffer_size})
                for i in range(event_will_be_set_after_consuming):
                    self.assertFalse(ev.is_set())
                    self.assertEqual(i * i, sess.run(get_next))
                ev.wait()
                for i in range(event_will_be_set_after_consuming, 100):
                    self.assertEqual(i * i, sess.run(get_next))
                with self.assertRaises(errors.OutOfRangeError):
                    sess.run(get_next)
Example #50
0
    def testRemoteIteratorUsingRemoteCallOpMultiWorkers(self):
        s1 = server_lib.Server.create_local_server()
        s2 = server_lib.Server.create_local_server()
        s3 = server_lib.Server.create_local_server()

        cluster_def = cluster_pb2.ClusterDef()
        workers = cluster_def.job.add()
        workers.name = "worker"
        workers.tasks[0] = s1.target[len("grpc://"):]
        workers.tasks[1] = s2.target[len("grpc://"):]
        client = cluster_def.job.add()
        client.name = "client"
        client.tasks[0] = s3.target[len("grpc://"):]
        config = config_pb2.ConfigProto(cluster_def=cluster_def)

        worker_devices = [
            "/job:worker/replica:0/task:%d/cpu:0" % i for i in range(2)
        ]
        itr_handles = []
        for device in worker_devices:
            with ops.device(device):
                src = dataset_ops.Dataset.from_tensor_slices([device])
                itr = dataset_ops.make_one_shot_iterator(src)
                itr_handles.append(itr.string_handle())

        targets = dataset_ops.Dataset.from_tensor_slices(worker_devices)
        handles = dataset_ops.Dataset.from_tensor_slices(itr_handles)

        @function.Defun(dtypes.string)
        def loading_func(h):
            remote_itr = iterator_ops.Iterator.from_string_handle(
                h, dataset_ops.get_legacy_output_types(itr),
                dataset_ops.get_legacy_output_shapes(itr))
            return remote_itr.get_next()

        def map_fn(target, handle):
            return functional_ops.remote_call(args=[handle],
                                              Tout=[dtypes.string],
                                              f=loading_func,
                                              target=target)

        with ops.device("/job:client"):
            client_dataset = dataset_ops.Dataset.zip(
                (targets, handles)).map(map_fn)
            itr = dataset_ops.make_initializable_iterator(client_dataset)
            n = itr.get_next()

        with session.Session(s3.target, config=config) as sess:
            sess.run(itr.initializer)
            expected_values = worker_devices
            for expected in expected_values:
                self.assertEqual((compat.as_bytes(expected), ), sess.run(n))

            with self.assertRaises(errors.OutOfRangeError):
                sess.run(n)
Example #51
0
    def testEvenOddBuckets(self):
        def _map_fn(v):
            return (v, array_ops.fill([v], v),
                    array_ops.fill([3], string_ops.as_string(v)))

        input_dataset = (dataset_ops.Dataset.from_tensor_slices(
            math_ops.range(64)).map(_map_fn))

        bucketed_dataset = input_dataset.apply(
            grouping.group_by_window(
                lambda x, y, z: math_ops.cast(x % 2, dtypes.int64),
                lambda k, bucket: self._dynamicPad(k, bucket, 32), 32))

        iterator = dataset_ops.make_initializable_iterator(bucketed_dataset)
        init_op = iterator.initializer
        get_next = iterator.get_next()

        with self.cached_session() as sess:
            self.evaluate(init_op)

            # Get two minibatches (one containing even values, one containing odds)
            which_bucket_even, bucketed_values_even = self.evaluate(get_next)
            which_bucket_odd, bucketed_values_odd = self.evaluate(get_next)

            # Count number of bucket_tensors.
            self.assertEqual(3, len(bucketed_values_even))
            self.assertEqual(3, len(bucketed_values_odd))

            # Ensure bucket 0 was used for all minibatch entries.
            self.assertAllEqual(0, which_bucket_even)
            self.assertAllEqual(1, which_bucket_odd)

            # Test the first bucket outputted, the events starting at 0
            expected_scalar_int = np.arange(0, 32 * 2, 2, dtype=np.int64)
            expected_unk_int64 = np.zeros((32, 31 * 2)).astype(np.int64)
            for i in range(0, 32):
                expected_unk_int64[i, :2 * i] = 2 * i
                expected_vec3_str = np.vstack(
                    3 * [np.arange(0, 32 * 2, 2).astype(bytes)]).T

            self.assertAllEqual(expected_scalar_int, bucketed_values_even[0])
            self.assertAllEqual(expected_unk_int64, bucketed_values_even[1])
            self.assertAllEqual(expected_vec3_str, bucketed_values_even[2])

            # Test the second bucket outputted, the odds starting at 1
            expected_scalar_int = np.arange(1, 32 * 2 + 1, 2, dtype=np.int64)
            expected_unk_int64 = np.zeros((32, 31 * 2 + 1)).astype(np.int64)
            for i in range(0, 32):
                expected_unk_int64[i, :2 * i + 1] = 2 * i + 1
                expected_vec3_str = np.vstack(
                    3 * [np.arange(1, 32 * 2 + 1, 2).astype(bytes)]).T

            self.assertAllEqual(expected_scalar_int, bucketed_values_odd[0])
            self.assertAllEqual(expected_unk_int64, bucketed_values_odd[1])
            self.assertAllEqual(expected_vec3_str, bucketed_values_odd[2])
Example #52
0
  def benchmarkMapAndBatchDense(self):
    """Measures the performance of parallelized batching."""
    shapes = [(), (10,), (10, 10), (10, 10, 10), (224, 224, 3)]
    batch_size_values = [1, 32, 64, 128, 1024]

    shape_placeholder = array_ops.placeholder(dtypes.int64, shape=[None])
    batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])

    dataset = dataset_ops.Dataset.range(1000000000)

    dense_value = random_ops.random_normal(shape=shape_placeholder)

    dataset = dataset.apply(batching.map_and_batch(
        lambda _: dense_value, batch_size_placeholder))
    iterator = dataset_ops.make_initializable_iterator(dataset)
    next_element = iterator.get_next()

    for shape in shapes:
      for batch_size in batch_size_values:

        with session.Session() as sess:
          sess.run(iterator.initializer, feed_dict={
              shape_placeholder: shape, batch_size_placeholder: batch_size})

          # Use a C++ callable to minimize the Python overhead in the benchmark.
          callable_opts = config_pb2.CallableOptions()
          callable_opts.target.append(next_element.op.name)
          op_callable = sess._make_callable_from_options(callable_opts)  # pylint: disable=protected-access

          # Run five steps to warm up the session caches before taking the
          # first measurement.
          for _ in range(5):
            op_callable()
          deltas = []
          overall_start = time.time()
          # Run at least five repetitions and for at least five seconds.
          while len(deltas) < 5 or time.time() - overall_start < 5.0:
            start = time.time()
            for _ in range(100):
              op_callable()
            end = time.time()
            deltas.append(end - start)
          del op_callable

        median_wall_time = np.median(deltas) / 100.0
        iters = len(deltas) * 100

        print("Map and batch dense dataset shape: %r batch_size: %d "
              "wall time: %f (%d iters)"
              % (shape, batch_size, median_wall_time, iters))
        self.report_benchmark(
            iters=iters, wall_time=median_wall_time,
            name="benchmark_batch_dense_dataset_nnz_%d_batch_size_%d" % (
                np.prod(shape), batch_size))
    def testSkipEagerOptimizationLargeInputFromTensor(self):
        input_t = array_ops.placeholder(dtypes.int32, (None, None, None))
        dataset = dataset_ops.Dataset.from_tensors(input_t)
        dataset = dataset_ops._OptimizeDataset(dataset, [])
        iterator = dataset_ops.make_initializable_iterator(dataset)
        init_op = iterator.initializer
        get_next = iterator.get_next()

        with self.cached_session() as sess:
            sess.run(init_op, {input_t: np.ones([512, 1024, 1025], np.int32)})
            self.evaluate(get_next)
Example #54
0
  def _testFromGeneratorsRunningInParallel(self):
    num_parallel_iterators = 3

    # Define shared state that multiple iterator instances will access to
    # demonstrate their concurrent activity.
    lock = threading.Lock()
    condition = threading.Condition(lock)
    next_ticket = [0]  # GUARDED_BY(lock)

    def generator():
      # NOTE(mrry): We yield one element before the barrier, because
      # the current implementation of `Dataset.interleave()` must
      # fetch one element from each incoming dataset to start the
      # prefetching.
      yield 0

      # Define a barrier that `num_parallel_iterators` iterators must enter
      # before any can proceed. Demonstrates that multiple iterators may be
      # active at the same time.
      condition.acquire()
      ticket = next_ticket[0]
      next_ticket[0] += 1
      if ticket == num_parallel_iterators - 1:
        # The last iterator to join the barrier notifies the others.
        condition.notify_all()
      else:
        # Wait until the last iterator enters the barrier.
        while next_ticket[0] < num_parallel_iterators:
          condition.wait()
      condition.release()

      yield 1

    # As in `testFromMultipleConcurrentGenerators()`, we use a combination of
    # `Dataset.interleave()` and `Dataset.prefetch()` to cause multiple
    # iterators to be active concurrently.
    def interleave_fn(_):
      return dataset_ops.Dataset.from_generator(
          generator, output_types=dtypes.int64, output_shapes=[]).prefetch(2)

    iterator = dataset_ops.make_initializable_iterator(
        dataset_ops.Dataset.range(num_parallel_iterators)
        .interleave(
            interleave_fn, cycle_length=num_parallel_iterators, block_length=1))
    init_op = iterator.initializer
    get_next = iterator.get_next()

    with self.cached_session() as sess:
      sess.run(init_op)
      for elem in [0, 1]:
        for _ in range(num_parallel_iterators):
          self.assertAllEqual(elem, sess.run(get_next))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)
  def testIteratorOnDeviceGraphModeInitializableIterator(self):
    if not test_util.is_gpu_available():
      self.skipTest("No GPU available")

    dataset = dataset_ops.Dataset.range(10)
    dataset = dataset.apply(prefetching_ops.prefetch_to_device("/gpu:0"))
    iterator = dataset_ops.make_initializable_iterator(dataset)
    data = iterator.get_next()

    self.assertIn("gpu:0", dataset._variant_tensor.device.lower())
    self.assertIn("gpu:0", iterator._iterator_resource.device.lower())
    self.assertIn("gpu:0", data.device.lower())
Example #56
0
  def testConstantOutput(self):
    iterator = dataset_ops.make_initializable_iterator(
        dataset_ops.Dataset.range(10).map(lambda x: [x, "hello", 10]))
    init_op = iterator.initializer
    get_next = iterator.get_next()

    with self.cached_session() as sess:
      sess.run(init_op)
      for i in range(10):
        self.assertEqual((i, b"hello", 10), sess.run(get_next))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)
    def testNoAggregatorRegistered(self, dataset_transformation):
        dataset = dataset_ops.Dataset.range(100).apply(
            stats_ops.latency_stats("record_latency"))
        iterator = dataset_ops.make_initializable_iterator(dataset)
        next_element = iterator.get_next()

        with self.cached_session() as sess:
            self.evaluate(iterator.initializer)
            for i in range(100):
                self.assertEqual(i, self.evaluate(next_element))
            with self.assertRaises(errors.OutOfRangeError):
                self.evaluate(next_element)
    def testMultipleIteratorsSameAggregator(self, dataset_transformation):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(100).apply(
            stats_ops.latency_stats("record_latency"))
        dataset = dataset_transformation(dataset, aggregator)
        iterator_0 = dataset_ops.make_initializable_iterator(dataset)
        iterator_1 = dataset_ops.make_initializable_iterator(dataset)
        next_element = iterator_0.get_next() + iterator_1.get_next()
        summary_t = aggregator.get_summary()

        with self.cached_session() as sess:
            self.evaluate([iterator_0.initializer, iterator_1.initializer])
            for i in range(100):
                self.assertEqual(i * 2, self.evaluate(next_element))
                self._assertSummaryHasCount(self.evaluate(summary_t),
                                            "record_latency",
                                            float(2 * (i + 1)))
            with self.assertRaises(errors.OutOfRangeError):
                self.evaluate(next_element)
            self._assertSummaryHasCount(self.evaluate(summary_t),
                                        "record_latency", 200.0)
Example #59
0
    def benchmarkBatchSparse(self):
        non_zeros_per_row_values = [0, 1, 5, 10, 100]
        batch_size_values = [1, 32, 64, 128, 1024]

        sparse_placeholder = array_ops.sparse_placeholder(dtype=dtypes.int64)
        batch_size_placeholder = array_ops.placeholder(dtype=dtypes.int64,
                                                       shape=[])

        dataset = dataset_ops.Dataset.from_tensors(
            sparse_placeholder).repeat().batch(batch_size_placeholder)
        iterator = dataset_ops.make_initializable_iterator(dataset)
        next_element = iterator.get_next()

        for non_zeros_per_row in non_zeros_per_row_values:

            sparse_value = sparse_tensor.SparseTensorValue(
                indices=np.arange(non_zeros_per_row,
                                  dtype=np.int64)[:, np.newaxis],
                values=np.arange(non_zeros_per_row, dtype=np.int64),
                dense_shape=[1000])

            for batch_size in batch_size_values:

                with session.Session() as sess:
                    sess.run(iterator.initializer,
                             feed_dict={
                                 sparse_placeholder: sparse_value,
                                 batch_size_placeholder: batch_size
                             })
                    # Run five steps to warm up the session caches before taking the
                    # first measurement.
                    for _ in range(5):
                        sess.run(next_element.indices.op)
                    deltas = []
                    for _ in range(100):
                        start = time.time()
                        for _ in range(100):
                            sess.run(next_element.indices.op)
                        end = time.time()
                        deltas.append(end - start)

                median_wall_time = np.median(deltas) / 100.0

                print(
                    "Batch sparse dataset non-zeros per row: %d batch_size: %d "
                    "wall time: %f" %
                    (non_zeros_per_row, batch_size, median_wall_time))
                self.report_benchmark(
                    iters=10000,
                    wall_time=median_wall_time,
                    name="batch_sparse_dataset_nnz_%d_batch_size_%d" %
                    (non_zeros_per_row, batch_size))
Example #60
0
    def run_benchmark(self,
                      dataset,
                      num_elements,
                      iters=1,
                      warmup=True,
                      apply_default_optimizations=False):
        """Benchmarks the dataset.

    Runs the dataset `iters` times. In each iteration, the benchmark measures
    the time it takes to go through `num_elements` elements of the dataset.

    Args:
      dataset: Dataset to benchmark.
      num_elements: Number of dataset elements to iterate through each benchmark
        iteration.
      iters: Number of times to repeat the timing.
      warmup: If true, warms up the session caches by running an untimed run.
      apply_default_optimizations: Determines whether default optimizations
        should be applied.

    Returns:
      A float, representing the per-element wall time of the dataset in seconds.
      This is the median time (with respect to `iters`) it takes for the dataset
      to go through `num_elements` elements, divided by `num_elements.`
    """
        options = dataset_ops.Options()
        options.experimental_optimization.apply_default_optimizations = (
            apply_default_optimizations)
        dataset = dataset.with_options(options)
        # NOTE: We use `dataset.skip()` to perform the iterations in C++, avoiding
        # the overhead of multiple `session.run()` calls. Note that this relies on
        # the underlying implementation of `skip`: if it is optimized in the future,
        # we will have to change this code.
        dataset = dataset.skip(num_elements - 1)
        iterator = dataset_ops.make_initializable_iterator(dataset)
        next_element = iterator.get_next()
        next_element = nest.flatten(next_element)[0]

        deltas = []
        for _ in range(iters):
            with session.Session() as sess:
                if warmup:
                    # Run once to warm up the session caches.
                    sess.run(iterator.initializer)
                    sess.run(next_element)

                sess.run(iterator.initializer)
                start = time.time()
                sess.run(next_element.op)
                end = time.time()
            deltas.append(end - start)
        return np.median(deltas) / float(num_elements)