Example #1
0
  def testMirroredStratParaAsync(self):
    """Tests RNG/MirrorStrategy interaction #3.

    The user can create n independent RNGs outside strategy.scope(), where n
    is the number of replicas, and give one to each replica. The replicas can
    thus get different random-number streams.
    """
    shape = [3, 4]
    dtype = dtypes.int32
    gens = random.get_global_generator().split(count=2)
    devices = ["/cpu:0", test_util.gpu_device_name()]
    strat = MirroredStrategy(devices=devices)
    # Use `PerReplica` to specify which `gen` is sent to which replica
    gens = dist_values.PerReplica(
        device_map=dist_values.ReplicaDeviceMap(devices),
        values=[[g] for g in gens])
    with strat.scope():
      def f(gen):
        t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t = array_ops.stack([t1, t2])
        return t
      results = strat.extended.call_for_each_replica(
          fn=f, args=gens)
      values = results.values
      self.assertAllEqual(2, len(values))
      self.assertAllDifferent(values)
    def testMirroredVarAsFunctionArg(self):
        """Tests that RNG with MirroredVariable can be used as tf.function's arg.
    """
        shape = [3, 4]
        dtype = dtypes.int32
        strat = MirroredStrategy(
            devices=["/cpu:0", test_util.gpu_device_name()])
        with strat.scope():
            gen = random.Generator.from_seed(1234)

            @def_function.function
            def f(gen):
                t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
                t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
                t = array_ops.stack([t1, t2])
                return t

            def g():
                return f(gen)

            for _ in range(2):
                results = strat.extended.call_for_each_replica(fn=g)
                values = results.values
                self.assertAllEqual(2, len(values))
                self.assertAllEqual(values[0], values[1])
Example #3
0
  def testMirroredStratUnseedSync(self):
    """Tests RNG/MirrorStrategy interaction #2c.

    If the RNG created in situation #2 is unseeded, the replicas' random-number
    streams are still the same.

    If the RNG created in situation #2b is unseeded, the replicas' random-number
    streams will be different. We can't test this for now because the op
    'NonDeterministicInts' is not implemented on GPU yet.
    """
    shape = [3, 4]
    dtype = dtypes.int32
    strat = MirroredStrategy(devices=["/cpu:0", test_util.gpu_device_name()])
    # TODO(wangpeng): support calling `random.Generator()` inside `f` (i.e.
    #   inside `call_for_each_replica` so that each replica can get a
    #   different random-number stream. The only obstacle is that op
    #   'NonDeterministicInts' is not implemented on GPU.)
    with strat.scope():
      gen = random.Generator()
      def f():
        t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t = array_ops.stack([t1, t2])
        return t
      results = strat.extended.call_for_each_replica(fn=f)
      values = results.values
      self.assertAllEqual(2, len(values))
      self.assertAllEqual(values[0], values[1])
Example #4
0
    def testTrain(self):
        if "sycl" in test_util.gpu_device_name().lower():
            return

        batch_size = 20
        sequence_length = 35
        with tf.Graph().as_default(), tf.device(tf.test.gpu_device_name()):
            inputs_ph = tf.placeholder(tf.int64, [sequence_length, batch_size],
                                       "inputs")
            labels_ph = tf.placeholder(tf.int64, [sequence_length, batch_size],
                                       "labels")

            inputs = np.ones(inputs_ph.shape.as_list(), dtype=np.int64)
            labels = np.ones(labels_ph.shape.as_list(), dtype=np.int64)

            model = rnn_ptb.test_model(tf.test.is_gpu_available())
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
            loss = rnn_ptb.loss_fn(model, inputs_ph, labels_ph, training=True)
            grads = rnn_ptb.clip_gradients(optimizer.compute_gradients(loss),
                                           0.25)
            train_op = optimizer.apply_gradients(grads)

            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                sess.run(train_op,
                         feed_dict={
                             inputs_ph: inputs,
                             labels_ph: labels
                         })
                sess.run([train_op, loss],
                         feed_dict={
                             inputs_ph: inputs,
                             labels_ph: labels
                         })
    def testGPUSameAsOldRandomOps(self):
        """Tests that the generated numbers are the same as the old random_ops.py.

    The GPU version.
    """
        seed1, seed2 = 79, 25
        with ops.device(test_util.gpu_device_name()):
            random.reset_global_generator([0, seed2, seed1])
        shape = constant_op.constant([4, 7])
        dtype = dtypes.float64

        @def_function.function
        def old():
            with ops.device(test_util.gpu_device_name()):
                return gen_random_ops.random_standard_normal(shape,
                                                             dtype=dtype,
                                                             seed=seed1,
                                                             seed2=seed2)

        def new():
            with ops.device(test_util.gpu_device_name()):
                return random.get_global_generator().standard_normal(
                    shape, dtype=dtype)

        for _ in range(100):
            self.assertAllEqual(old(), new())
Example #6
0
    def testMirroredStratParaSyncWithinFun(self):
        """Tests RNG/MirrorStrategy interaction #2b.

    If the RNG creation is within `f` in situation #2, the replicas'
    random-number streams are still the same. Note that whether the RNG creation
    is within strategy.scope() or not doesn't affect the result in this case
    (putting in inside strategy.scope() will cause unnecessary mirror creation
    and waste memory though).
    """
        shape = [3, 4]
        dtype = dtypes.int32
        strat = MirroredStrategy(
            devices=["/cpu:0", test_util.gpu_device_name()])

        def f():
            gen = random.Generator.from_seed(1234)
            t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
            t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
            t = array_ops.stack([t1, t2])
            return t

        results = strat.extended.call_for_each_replica(fn=f)
        values = results.values
        self.assertAllEqual(2, len(values))
        self.assertAllEqual(values[0], values[1])
Example #7
0
  def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
    if not test.is_gpu_available():
      # Can't perform this test w/o a GPU
      return

    with self.test_session(use_gpu=True) as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        x = array_ops.zeros([1, 1, 3])
        cell = rnn_cell_impl.DeviceWrapper(
            rnn_cell_impl.GRUCell(3), test_util.gpu_device_name())
        with ops.device("/cpu:0"):
          outputs, _ = rnn.dynamic_rnn(
              cell=cell, inputs=x, dtype=dtypes.float32)
        run_metadata = config_pb2.RunMetadata()
        opts = config_pb2.RunOptions(
            trace_level=config_pb2.RunOptions.FULL_TRACE)

        sess.run([variables_lib.global_variables_initializer()])
        _ = sess.run(outputs, options=opts, run_metadata=run_metadata)

      step_stats = run_metadata.step_stats
      ix = 0 if (("gpu"  in step_stats.dev_stats[0].device) or
                 ("sycl" in step_stats.dev_stats[0].device)) else 1
      gpu_stats = step_stats.dev_stats[ix].node_stats
      cpu_stats = step_stats.dev_stats[1 - ix].node_stats
      self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])
      self.assertTrue([s for s in gpu_stats if "gru_cell" in s.node_name])
Example #8
0
    def testRemoteFunctionGPUCPU(self):
        if not test_util.is_gpu_available():
            self.skipTest("No GPU available")

        gpu_target = "/job:localhost/replica:0/task:0" + test_util.gpu_device_name(
        )

        @function.Defun(dtypes.float32, dtypes.float32)
        def _remote_fn(a, b):
            return math_ops.multiply(a, b)

        with ops.device(gpu_target):
            a = variables.Variable(2, dtype=dtypes.float32)
            b = variables.Variable(3, dtype=dtypes.float32)

        with ops.device(gpu_target):
            remote_op = functional_ops.remote_call(
                args=[a, b],
                Tout=[dtypes.float32],
                f=_remote_fn,
                target="/job:localhost/replica:0/task:0/cpu:0")[0] + 3.0

        with self.test_session() as sess:
            sess.run(variables.global_variables_initializer())
            mul = sess.run(remote_op)
            self.assertEqual(mul, 9.0)
Example #9
0
    def _benchmark_apply(self, label, model):
        if "sycl" in test_util.gpu_device_name().lower():
            return

        num_iters = 100
        num_warmup = 10
        dataset = tf.data.Dataset.from_tensors(
            tf.ones([PTBBenchmark.SEQ_LEN, PTBBenchmark.BATCH_SIZE],
                    dtype=tf.int64)).repeat(num_iters + num_warmup)
        inputs = dataset.make_one_shot_iterator().get_next()

        with tf.device(tf.test.gpu_device_name()):
            outputs = model(inputs, training=True)

            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                for _ in range(num_warmup):
                    sess.run(outputs)
                gc.collect()

                start = time.time()
                for _ in range(num_iters):
                    sess.run(outputs)
                self._report(label, start, num_iters,
                             tf.test.gpu_device_name(),
                             PTBBenchmark.BATCH_SIZE)
Example #10
0
    def _benchmark_train(self, label, model):
        if "sycl" in test_util.gpu_device_name().lower():
            return

        num_iters = 100
        num_warmup = 10
        dataset = tf.data.Dataset.from_tensors(
            tf.ones([PTBBenchmark.SEQ_LEN, PTBBenchmark.BATCH_SIZE],
                    dtype=tf.int64)).repeat(num_iters + num_warmup)
        # inputs and labels have the same shape
        dataset = tf.data.Dataset.zip((dataset, dataset))
        (inputs, labels) = dataset.make_one_shot_iterator().get_next()

        with tf.device(tf.test.gpu_device_name()):
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
            loss = rnn_ptb.loss_fn(model, inputs, labels, training=True)
            grads = rnn_ptb.clip_gradients(optimizer.compute_gradients(loss),
                                           0.25)
            train_op = optimizer.apply_gradients(grads)

            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                for _ in range(num_warmup):
                    sess.run(train_op)
                gc.collect()
                start = time.time()
                for _ in range(num_iters):
                    sess.run(train_op)
                self._report(label, start, num_iters,
                             tf.test.gpu_device_name(),
                             PTBBenchmark.BATCH_SIZE)
Example #11
0
    def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
        if not test.is_gpu_available():
            # Can't perform this test w/o a GPU
            return

        with self.test_session(use_gpu=True) as sess:
            with variable_scope.variable_scope(
                    "root", initializer=init_ops.constant_initializer(0.5)):
                x = array_ops.zeros([1, 1, 3])
                cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3),
                                                   test_util.gpu_device_name())
                with ops.device("/cpu:0"):
                    outputs, _ = rnn.dynamic_rnn(cell=cell,
                                                 inputs=x,
                                                 dtype=dtypes.float32)
                run_metadata = config_pb2.RunMetadata()
                opts = config_pb2.RunOptions(
                    trace_level=config_pb2.RunOptions.FULL_TRACE)

                sess.run([variables_lib.global_variables_initializer()])
                _ = sess.run(outputs, options=opts, run_metadata=run_metadata)

            step_stats = run_metadata.step_stats
            ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or
                       ("sycl" in step_stats.dev_stats[0].device)) else 1
            gpu_stats = step_stats.dev_stats[ix].node_stats
            cpu_stats = step_stats.dev_stats[1 - ix].node_stats
            self.assertFalse(
                [s for s in cpu_stats if "gru_cell" in s.node_name])
            self.assertTrue(
                [s for s in gpu_stats if "gru_cell" in s.node_name])
Example #12
0
    def testDifferentDeviceCPUGPU(self):
        if not test_util.is_gpu_available():
            self.skipTest("No GPU available")

        gpu_name = test_util.gpu_device_name()
        self._prefetch_fn_helper_one_shot(
            "cpu_gpu", "/job:localhost/replica:0/task:0/cpu:0",
            "/job:localhost/replica:0/task:0" + gpu_name)
Example #13
0
 def testColocateGradients(self):
     with ops.Graph().as_default() as g:
         w = constant(1.0, shape=[1, 1])
         x = constant(1.0, shape=[1, 2])
         with g.device(test_util.gpu_device_name()):
             wx = math_ops.matmul(w, x)
         gw = gradients.gradients(wx, [w],
                                  colocate_gradients_with_ops=True)[0]
     self.assertEqual(gw.op.colocation_groups(), wx.op.colocation_groups())
Example #14
0
def _available_devices():
  devices = ["cpu"]
  if not test_util.gpu_device_name():
    devices.append("gpu")

  if has_tpu():
    devices.append("tpu")

  return tuple(devices)
Example #15
0
def _available_devices():
  devices = ["cpu"]
  if not test_util.gpu_device_name():
    devices.append("gpu")

  if has_tpu():
    devices.append("tpu")

  return tuple(devices)
Example #16
0
 def testCrossDeviceSplit(self):
   """Tests that a CPU RNG can split into RNGs on GPU.
   """
   with ops.device("/device:CPU:0"):
     gen = random.Generator(seed=1234)  # gen is on CPU
     self.assertRegex("CPU", gen.state.device)
   with ops.device(test_util.gpu_device_name()):
     gens = gen.split(count=10)  # gens are on GPU
     self.assertRegex("GPU", gens[0].state.device)
Example #17
0
 def _testGpu(self, x):
   device = test_util.gpu_device_name()
   if device:
     np_ans = np.array(x)
     with context.device(device):
       tf_ans = ops.convert_to_tensor(x).numpy()
     if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]:
       self.assertAllClose(np_ans, tf_ans)
     else:
       self.assertAllEqual(np_ans, tf_ans)
 def testGPUEqualsCPU(self, dtype):
   """Tests that GPU and CPU generate the same integer outputs."""
   seed = 1234
   shape = [315, 49]
   with ops.device("/device:CPU:0"):
     cpu = random.Generator.from_seed(seed).uniform_full_int(
         shape=shape, dtype=dtype)
   with ops.device(test_util.gpu_device_name()):
     gpu = random.Generator.from_seed(seed).uniform_full_int(
         shape=shape, dtype=dtype)
   self.assertAllEqual(cpu, gpu)
Example #19
0
 def testGPUEqualsCPU(self, dtype):
     """Tests that GPU and CPU generate the same integer outputs."""
     seed = 1234
     shape = [315, 49]
     with ops.device("/device:CPU:0"):
         cpu = random.Generator.from_seed(seed).uniform_full_int(
             shape=shape, dtype=dtype)
     with ops.device(test_util.gpu_device_name()):
         gpu = random.Generator.from_seed(seed).uniform_full_int(
             shape=shape, dtype=dtype)
     self.assertAllEqual(cpu, gpu)
Example #20
0
    def testSupportDevices(self):
        gpu_type = test_util.gpu_device_type()
        gpu_name = test_util.gpu_device_name()
        with ops.Graph().as_default() as g:
            a = random_ops.random_uniform(shape=(2, 3))
            b = random_ops.random_uniform(shape=(2, 3))
            c = a + b
            dims = math_ops.range(0, array_ops.rank(c), 1)
            d = math_ops.reduce_sum(a, axis=dims)
            train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
            train_op.append(d)
            mg = meta_graph.create_meta_graph_def(graph=g)
            grappler_item = item.Item(mg)

            device_properties = device_properties_pb2.DeviceProperties(
                type=gpu_type, frequency=1000, num_cores=60)
            named_gpu = device_properties_pb2.NamedDevice(
                properties=device_properties, name=gpu_name)
            device_properties = device_properties_pb2.DeviceProperties(
                type='CPU', frequency=3000, num_cores=6)
            named_cpu = device_properties_pb2.NamedDevice(
                properties=device_properties, name='/CPU:0')
            virtual_cluster = cluster.Cluster(devices=[named_cpu, named_gpu])
            supported_dev = virtual_cluster.GetSupportedDevices(grappler_item)
            self.assertEqual(supported_dev['add'], ['/CPU:0', gpu_name])
            self.assertEqual(supported_dev['Sum'], ['/CPU:0', gpu_name])
            self.assertEqual(supported_dev['range'], ['/CPU:0', gpu_name])

            real_cluster = cluster.Cluster()
            supported_dev = real_cluster.GetSupportedDevices(grappler_item)
            if test.is_gpu_available():
                self.assertEqual(supported_dev['add'], [
                    '/job:localhost/replica:0/task:0/device:CPU:0',
                    '/job:localhost/replica:0/task:0' + gpu_name
                ])
                self.assertEqual(supported_dev['Sum'], [
                    '/job:localhost/replica:0/task:0/device:CPU:0',
                    '/job:localhost/replica:0/task:0' + gpu_name
                ])
                # The axis tensor must reside on the host
                self.assertEqual(
                    supported_dev['range'],
                    ['/job:localhost/replica:0/task:0/device:CPU:0'])
            else:
                self.assertEqual(
                    supported_dev['add'],
                    ['/job:localhost/replica:0/task:0/device:CPU:0'])
Example #21
0
    def testPrefetchToDeviceGpu(self):
        if not test_util.is_gpu_available():
            self.skipTest("No GPU available")

        host_dataset = dataset_ops.Dataset.range(10)
        gpu_name = test_util.gpu_device_name()
        device_dataset = host_dataset.apply(
            prefetching_ops.prefetch_to_device(gpu_name))

        iterator = device_dataset.make_one_shot_iterator()
        next_element = iterator.get_next()

        with self.test_session() as sess:
            for i in range(10):
                self.assertEqual(i, sess.run(next_element))
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(next_element)
Example #22
0
 def testCPUGPUCopy(self):
   if not context.num_gpus():
     return
   t = constant_op.constant([1.0, 2.0])
   l = list_ops.tensor_list_from_tensor(t, element_shape=scalar_shape())
   gpu_name = test_util.gpu_device_name()
   with context.device(gpu_name):
     l_gpu = array_ops.identity(l)
     self.assertAllEqual(
         self.evaluate(
             list_ops.tensor_list_pop_back(
                 l_gpu, element_dtype=dtypes.float32)[1]), 2.0)
   l_cpu = array_ops.identity(l_gpu)
   self.assertAllEqual(
       self.evaluate(
           list_ops.tensor_list_pop_back(
               l_cpu, element_dtype=dtypes.float32)[1]), 2.0)
Example #23
0
    def testColocateGradientsWithGateGradients(self):
        if not test_util.is_gpu_available():
            self.skipTest("No GPU available")
        with ops.Graph().as_default() as g:
            with g.device("/device:CPU:0"):
                x = constant(1.0, shape=[1, 1])
                y = constant(1.0, shape=[1, 1])
                s = x + y
            with g.device(test_util.gpu_device_name()):
                z = math_ops.reduce_sum(s)

            gz_x = gradients.gradients(z, [x],
                                       colocate_gradients_with_ops=True,
                                       gate_gradients=True)[0]
            with session.Session():
                # Make sure the placer doesn't complain.
                gz_x.eval()
Example #24
0
  def testMirroredStratParaSync(self):
    """Tests RNG/MirrorStrategy interaction #2.

    If an RNG is created inside strategy.scope(), each replica gets an
    mirror of this RNG. If they access their RNGs in the same
    manner, their random-number streams are the same.
    """
    shape = [3, 4]
    dtype = dtypes.int32
    strat = MirroredStrategy(devices=["/cpu:0", test_util.gpu_device_name()])
    with strat.scope():
      gen = random.Generator(seed=1234)
      def f():
        t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t = array_ops.stack([t1, t2])
        return t
      results = strat.extended.call_for_each_replica(fn=f)
      values = results.values
      self.assertAllEqual(2, len(values))
      self.assertAllEqual(values[0], values[1])
Example #25
0
  def testMirroredStratSeq(self):
    """Tests RNG/MirrorStrategy interaction #1.

    If an RNG is created outside strategy.scope(), all replicas will access the
    same RNG object, and accesses are serialized.
    """
    shape = [3, 4]
    dtype = dtypes.int32
    gen = random.Generator(seed=1234)
    strat = MirroredStrategy(devices=["/cpu:0", test_util.gpu_device_name()])
    with strat.scope():
      def f():
        t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t = array_ops.stack([t1, t2])
        return t
      results = strat.extended.call_for_each_replica(
          fn=f)
      values = results.values
      self.assertAllEqual(2, len(values))
      self.assertAllDifferent(values)
  def testMirroredStratParaSyncWithinFun(self):
    """Tests RNG/MirrorStrategy interaction #2b.

    If the RNG creation is within `f` in situation #2, the replicas'
    random-number streams are still the same. Note that whether the RNG creation
    is within strategy.scope() or not doesn't affect the result in this case
    (putting in inside strategy.scope() will cause unnecessary mirror creation
    and waste memory though).
    """
    shape = [3, 4]
    dtype = dtypes.int32
    strat = MirroredStrategy(devices=["/cpu:0", test_util.gpu_device_name()])
    def f():
      gen = random.Generator(seed=1234)
      t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
      t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
      t = array_ops.stack([t1, t2])
      return t
    results = strat.extended.call_for_each_replica(fn=f)
    values = results.values
    self.assertAllEqual(2, len(values))
    self.assertAllEqual(values[0], values[1])
Example #27
0
 def testGetSetGPU(self):
   if not context.num_gpus():
     return
   gpu_name = test_util.gpu_device_name()
   with context.device(gpu_name):
     self.testGetSetItem()
Example #28
0
  def testSameAsOldRandomOpsGPU(self):
    """Tests that the generated numbers are the same as the old random_ops.py.

    The GPU version.
    """
    self._sameAsOldRandomOps(test_util.gpu_device_name(), GPU_FLOATS)
Example #29
0
 def testStackGPU(self):
   if not context.num_gpus():
     return
   gpu_name = test_util.gpu_device_name()
   with context.device(gpu_name):
     self.testStack()
Example #30
0
 def testFromTensorGPU(self):
   if not context.num_gpus():
     return
   gpu_name = test_util.gpu_device_name()
   with context.device(gpu_name):
     self.testTensorListFromTensor()
 def old():
     with ops.device(test_util.gpu_device_name()):
         return gen_random_ops.random_standard_normal(shape,
                                                      dtype=dtype,
                                                      seed=seed1,
                                                      seed2=seed2)
 def new():
     with ops.device(test_util.gpu_device_name()):
         return random.get_global_generator().standard_normal(
             shape, dtype=dtype)