def testPrefetchToDeviceWithReInit(self):
    host_dataset = dataset_ops.Dataset.range(10)
    device_dataset = host_dataset.apply(
        prefetching_ops.prefetch_to_device("/cpu:1"))

    # NOTE(mrry): This device block creates the "host" dataset and iterator on
    # /cpu:0, and ensures that the prefetching is across devices. In typical use
    # this would not be necessary, because the GPU device would not support any
    # of the dataset-related ops.
    with ops.device("/cpu:0"):
      iterator = device_dataset.make_initializable_iterator()

    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
    self.assertEqual(host_dataset.output_types, iterator.output_types)
    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
    self.assertEqual(host_dataset.output_classes, iterator.output_classes)

    next_element = iterator.get_next()
    self.assertEqual(dtypes.int64, next_element.dtype)
    self.assertEqual([], next_element.shape)

    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
    with self.test_session(config=worker_config) as sess:
      sess.run(iterator.initializer)
      for i in range(5):
        self.assertEqual(i, sess.run(next_element))
      sess.run(iterator.initializer)
      for i in range(10):
        self.assertEqual(i, sess.run(next_element))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)
  def testPrefetchSparseTensorsToDevice(self):
    def make_tensor(i):
      return sparse_tensor.SparseTensorValue(
          indices=[[0, 0]], values=(i*[1]), dense_shape=[2, 2])
    host_dataset = dataset_ops.Dataset.range(10).map(make_tensor)

    device_dataset = host_dataset.apply(
        prefetching_ops.prefetch_to_device("/cpu:1"))

    # NOTE(mrry): This device block creates the "host" dataset and iterator on
    # /cpu:0, and ensures that the prefetching is across devices. In typical use
    # this would not be necessary, because the GPU device would not support any
    # of the dataset-related ops.
    with ops.device("/cpu:0"):
      iterator = device_dataset.make_one_shot_iterator()

    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
    self.assertEqual(host_dataset.output_types, iterator.output_types)
    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
    self.assertEqual(host_dataset.output_classes, iterator.output_classes)

    next_element = iterator.get_next()
    self.assertEqual(dtypes.int64, next_element.dtype)

    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
    with self.test_session(config=worker_config) as sess:
      for i in range(10):
        actual = sess.run(next_element)
        self.assertAllEqual([i], actual.values)
        self.assertAllEqual([[0, 0]], actual.indices)
        self.assertAllEqual([2, 2], actual.dense_shape)
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)
Exemple #3
0
  def testPrefetchDictToDevice(self):
    host_dataset = dataset_ops.Dataset.range(10).map(lambda x: {"a": x})
    device_dataset = host_dataset.apply(
        prefetching_ops.prefetch_to_device("/cpu:1"))

    # NOTE (mrry): This device block creates the "host" dataset and iterator on id:626
    # https://github.com/imdone/tensorflow/issues/627
    # /cpu:0, and ensures that the prefetching is across devices. In typical use
    # this would not be necessary, because the GPU device would not support any
    # of the dataset-related ops.
    with ops.device("/cpu:0"):
      iterator = device_dataset.make_one_shot_iterator()

    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
    self.assertEqual(host_dataset.output_types, iterator.output_types)
    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
    self.assertEqual(host_dataset.output_classes, iterator.output_classes)

    next_element = iterator.get_next()
    self.assertEqual(dtypes.int64, next_element["a"].dtype)
    self.assertEqual([], next_element["a"].shape)

    worker_config = config_pb2.ConfigProto()
    worker_config.device_count["CPU"] = 2
    with self.test_session(config=worker_config) as sess:
      for i in range(10):
        self.assertEqual({"a": i}, sess.run(next_element))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)
  def testPrefetchToSameDevice(self):
    host_dataset = dataset_ops.Dataset.range(10)
    device_dataset = host_dataset.apply(
        prefetching_ops.prefetch_to_device(
            "/job:localhost/replica:0/task:0/device:CPU:0"))

    # NOTE(mrry): This device block creates the "host" dataset and iterator on
    # /cpu:0, and ensures that the prefetching is across devices. In typical use
    # this would not be necessary, because the GPU device would not support any
    # of the dataset-related ops.
    with ops.device("/cpu:0"):
      iterator = device_dataset.make_one_shot_iterator()

    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
    self.assertEqual(host_dataset.output_types, iterator.output_types)
    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
    self.assertEqual(host_dataset.output_classes, iterator.output_classes)

    next_element = iterator.get_next()
    self.assertEqual(dtypes.int64, next_element.dtype)
    self.assertEqual([], next_element.shape)

    with self.cached_session() as sess:
      for i in range(10):
        self.assertEqual(i, sess.run(next_element))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)
  def testPrefetchDictToDevice(self):
    host_dataset = dataset_ops.Dataset.range(10).map(lambda x: {"a": x})
    device_dataset = host_dataset.apply(
        prefetching_ops.prefetch_to_device("/cpu:1"))

    # NOTE(mrry): This device block creates the "host" dataset and iterator on
    # /cpu:0, and ensures that the prefetching is across devices. In typical use
    # this would not be necessary, because the GPU device would not support any
    # of the dataset-related ops.
    with ops.device("/cpu:0"):
      iterator = device_dataset.make_one_shot_iterator()

    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
    self.assertEqual(host_dataset.output_types, iterator.output_types)
    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
    self.assertEqual(host_dataset.output_classes, iterator.output_classes)

    next_element = iterator.get_next()
    self.assertEqual(dtypes.int64, next_element["a"].dtype)
    self.assertEqual([], next_element["a"].shape)

    worker_config = config_pb2.ConfigProto()
    worker_config.device_count["CPU"] = 2
    with self.test_session(config=worker_config) as sess:
      for i in range(10):
        self.assertEqual({"a": i}, sess.run(next_element))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)
Exemple #6
0
  def testTensorsExplicitPrefetchToDevice(self):
    ds = Dataset.from_tensor_slices([0., 1.])
    ds = ds.apply(prefetching_ops.prefetch_to_device(test.gpu_device_name()))

    with self.assertRaisesRegexp(TypeError, 'prefetch_to_device'):
      datasets.Iterator(ds)

    for i, x in enumerate(ds):
      with ops.device(test.gpu_device_name()):
        x = math_ops.add(x, x)
        self.assertEqual(float(i) + float(i), x.numpy())
  def testTensorsExplicitPrefetchToDevice(self):
    ds = Dataset.from_tensor_slices([0., 1.])
    ds = ds.apply(prefetching_ops.prefetch_to_device(test.gpu_device_name()))

    with self.assertRaisesRegexp(TypeError, 'prefetch_to_device'):
      datasets.Iterator(ds)

    for i, x in enumerate(ds):
      with ops.device(test.gpu_device_name()):
        x = math_ops.add(x, x)
        self.assertEqual(float(i) + float(i), x.numpy())
  def testPrefetchToDeviceGpu(self):
    if not test_util.is_gpu_available():
      self.skipTest("No GPU available")

    host_dataset = dataset_ops.Dataset.range(10)
    device_dataset = host_dataset.apply(
        prefetching_ops.prefetch_to_device("/gpu:0"))

    iterator = device_dataset.make_one_shot_iterator()
    next_element = iterator.get_next()

    with self.cached_session() as sess:
      for i in range(10):
        self.assertEqual(i, sess.run(next_element))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)
    def testPrefetchToDeviceGpuWithReInit(self):
        if not test_util.is_gpu_available():
            self.skipTest("No GPU available")

        host_dataset = dataset_ops.Dataset.range(10)
        gpu_name = test_util.gpu_device_name()
        device_dataset = host_dataset.apply(
            prefetching_ops.prefetch_to_device(gpu_name))

        iterator = device_dataset.make_initializable_iterator()
        next_element = iterator.get_next()

        with self.test_session() as sess:
            sess.run(iterator.initializer)
            for i in range(5):
                self.assertEqual(i, sess.run(next_element))
            sess.run(iterator.initializer)
            for i in range(10):
                self.assertEqual(i, sess.run(next_element))
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(next_element)
Exemple #10
0
def wrap_as_tfdataset(x_train,
                      y_train,
                      data_augmentation,
                      batch_size,
                      gpu_local_rank=None,
                      prefetch_to_device=False,
                      comm=DummyComm()):
    '''Wrap numpy data in TF Datasets API.'''
    # ref: https://www.tensorflow.org/versions/master/performance/datasets_performance @IgnorePep8
    buffer_size = tf.contrib.data.AUTOTUNE if TFVER >= '1.8.0' \
        else 1000
    shuffle_buffer = 1000
    # Create the dataset and its associated one-shot iterator.
    dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))

    if TFVER >= '1.5.0':
        dataset = dataset.apply(
            tf.contrib.data.shuffle_and_repeat(
                shuffle_buffer))  # , seed=1234 + hvdrank))
    else:
        dataset = dataset.shuffle(shuffle_buffer)
        dataset = dataset.repeat()

    if data_augmentation:
        print_rank0('USING IMAGE AUGMENTATION IN DATASET PIPELINE.', comm)

        def proc_dataset(images, labels):
            '''Aug/proc function for map_and_batch.'''
            images = aug_fn(images)
            # The per_image_standardization could be part of the model
            # layers (incorporate via Lambda layer), but preprocessing
            # via dataset pipeline seems to be faster.
            # images = tf.image.per_image_standardization(images)

            # NOTE: If using per_image_standardization then during
            # eval/inference the images have to be standardized as well.
            # Code for that would be:
            #   xtest_dset = tf.data.Dataset.from_tensor_slices(x_test)
            #   xtest_dset = xtest_dset.map(tf.image.per_image_standardization)
            #   test_samples = x_test.shape[0]
            #   xtest_dset = xtest_dset.batch(test_samples)
            #   xtest_gen = xtest_dset.make_one_shot_iterator().get_next()
            #   x_test = KB.get_session().run([xtest_gen])

            return images, labels

        dataset = dataset.apply(
            tf.contrib.data.map_and_batch(
                map_func=proc_dataset,
                batch_size=batch_size))  # ,num_parallel_batches=4))
    else:
        dataset = dataset.batch(batch_size)

    if TFVER >= '1.8.0' and prefetch_to_device:
        print_in_order(
            'RANK {} PREFETCHING TO GPU: {}'.format(comm.rank(),
                                                    gpu_local_rank), comm)
        # Note: In horovod once the visible device list is set you prefetch
        #     to device starting from 0 even when that device is not
        #     physically 0 device.
        # gdev = '/gpu:{}'.format(gpu_local_rank)  # incorrect per Note ^.
        gdev = '/gpu:0'
        # Prefetch to GPU doesn't seem to help much
        dataset = dataset.apply(prefetching_ops.prefetch_to_device(gdev))
        # Don't know what buffer_size to use. Some value is automatically set.
        # , buffer_size=10000
        # Hangs if using AUTOTUNE???
        # , buffer_size=tf.contrib.data.AUTOTUNE
        # failed to query event: CUDA_ERROR_DEINITIALIZED
    else:
        dataset.prefetch(buffer_size=buffer_size)

    # dataset.prefetch(buffer_size=buffer_size)

    return dataset