def _VerifyBuildGraph(self, n, m, k, transpose_a, transpose_b, dtype):
   graph = ops.Graph()
   with graph.as_default():
     matmul_benchmark.build_graph(googletest.gpu_device_name(), n, m, k, transpose_a, transpose_b,
                                  dtype)
     gd = graph.as_graph_def()
     dev=googletest.gpu_device_name()
     proto_expected = """
     node { name: "random_uniform/shape" op: "Const" device: \""""+ dev +"""\" }
     node { name: "random_uniform/min" op: "Const" device: \""""+ dev +"""\" }
     node { name: "random_uniform/max" op: "Const" device: \""""+ dev +"""\" }
     node { name: "random_uniform/RandomUniform" op: "RandomUniform" input: "random_uniform/shape" device: \""""+ dev +"""\" }
     node { name: "random_uniform/sub" op: "Sub" input: "random_uniform/max" input: "random_uniform/min" device: \""""+ dev +"""\" }
     node { name: "random_uniform/mul" op: "Mul" input: "random_uniform/RandomUniform" input: "random_uniform/sub" device: \""""+ dev +"""\" }
     node { name: "random_uniform" op: "Add" input: "random_uniform/mul" input: "random_uniform/min" device: \""""+ dev +"""\" }
     node { name: "Variable" op: "VariableV2" device: \""""+ dev +"""\" }
     node { name: "Variable/Assign" op: "Assign" input: "Variable" input: "random_uniform" device: \""""+ dev +"""\" }
     node { name: "Variable/read" op: "Identity" input: "Variable" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1/shape" op: "Const" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1/min" op: "Const" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1/max" op: "Const" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1/RandomUniform" op: "RandomUniform" input: "random_uniform_1/shape" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1/sub" op: "Sub" input: "random_uniform_1/max" input: "random_uniform_1/min" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1/mul" op: "Mul" input: "random_uniform_1/RandomUniform" input: "random_uniform_1/sub" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1" op: "Add" input: "random_uniform_1/mul" input: "random_uniform_1/min" device: \""""+ dev +"""\" }
     node { name: "Variable_1" op: "VariableV2" device: \""""+ dev +"""\" }
     node { name: "Variable_1/Assign" op: "Assign" input: "Variable_1" input: "random_uniform_1" device: \""""+ dev +"""\" }
     node { name: "Variable_1/read" op: "Identity" input: "Variable_1" device: \""""+ dev +"""\" }
     node { name: "MatMul" op: "MatMul" input: "Variable/read" input: "Variable_1/read" device: \""""+ dev +"""\" }
     node { name: "group_deps" op: "NoOp" input: "^MatMul" device: \""""+ dev +"""\" }
                      """
     self.assertProtoEquals(str(proto_expected), self._StripGraph(gd))
  def testDictionary(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        pi = array_ops.placeholder(dtypes.int64)
        gi = array_ops.placeholder(dtypes.int64)
        v = 2. * (array_ops.zeros([128, 128]) + x)
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea(
            [dtypes.float32, dtypes.float32],
            shapes=[[], [128, 128]],
            names=['x', 'v'])
        stage = stager.put(pi, {'x': x, 'v': v})
        key, ret = stager.get(gi)
        z = ret['x']
        y = ret['v']
        y = math_ops.reduce_max(z * math_ops.matmul(y, y))

    G.finalize()

    with self.session(use_gpu=True, graph=G) as sess:
      sess.run(stage, feed_dict={x: -1, pi: 0})
      for i in range(10):
        _, yval = sess.run([stage, y], feed_dict={x: i, pi: i + 1, gi: i})
        self.assertAllClose(
            4 * (i - 1) * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
Beispiel #3
0
  def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
    if not test.is_gpu_available():
      # Can't perform this test w/o a GPU
      return

    gpu_dev = test.gpu_device_name()
    with self.test_session(use_gpu=True) as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        x = array_ops.zeros([1, 1, 3])
        cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), gpu_dev)
        with ops.device("/cpu:0"):
          outputs, _ = rnn.dynamic_rnn(
              cell=cell, inputs=x, dtype=dtypes.float32)
        run_metadata = config_pb2.RunMetadata()
        opts = config_pb2.RunOptions(
            trace_level=config_pb2.RunOptions.FULL_TRACE)

        sess.run([variables_lib.global_variables_initializer()])
        _ = sess.run(outputs, options=opts, run_metadata=run_metadata)

      step_stats = run_metadata.step_stats
      ix = 0 if gpu_dev in step_stats.dev_stats[0].device else 1
      gpu_stats = step_stats.dev_stats[ix].node_stats
      cpu_stats = step_stats.dev_stats[1 - ix].node_stats
      self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])
      self.assertTrue([s for s in gpu_stats if "gru_cell" in s.node_name])
  def testPeek(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.int32, name='x')
        pi = array_ops.placeholder(dtypes.int64)
        gi = array_ops.placeholder(dtypes.int64)
        p = array_ops.placeholder(dtypes.int32, name='p')
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea(
            [
                dtypes.int32,
            ], shapes=[[]])
        stage = stager.put(pi, [x], [0])
        peek = stager.peek(gi)
        size = stager.size()

    G.finalize()

    n = 10

    with self.session(use_gpu=True, graph=G) as sess:
      for i in range(n):
        sess.run(stage, feed_dict={x: i, pi: i})

      for i in range(n):
        self.assertTrue(sess.run(peek, feed_dict={gi: i})[0] == i)

      self.assertTrue(sess.run(size) == 10)
  def testSizeAndClear(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32, name='x')
        pi = array_ops.placeholder(dtypes.int64)
        gi = array_ops.placeholder(dtypes.int64)
        v = 2. * (array_ops.zeros([128, 128]) + x)
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea(
            [dtypes.float32, dtypes.float32],
            shapes=[[], [128, 128]],
            names=['x', 'v'])
        stage = stager.put(pi, {'x': x, 'v': v})
        size = stager.size()
        clear = stager.clear()

    G.finalize()

    with self.session(use_gpu=True, graph=G) as sess:
      sess.run(stage, feed_dict={x: -1, pi: 3})
      self.assertEqual(sess.run(size), 1)
      sess.run(stage, feed_dict={x: -1, pi: 1})
      self.assertEqual(sess.run(size), 2)
      sess.run(clear)
      self.assertEqual(sess.run(size), 0)
  def testAllocationHistory(self):
    if not test.is_gpu_available(cuda_only=True):
      return

    gpu_dev = test.gpu_device_name()
    ops.reset_default_graph()
    with ops.device(gpu_dev):
      _, run_meta = _run_model()

    mm = _extract_node(run_meta, 'MatMul')['gpu:0'][0]
    mm_allocs = mm.memory[0].allocation_records
    # has allocation and deallocation.
    self.assertEqual(len(mm_allocs), 2)
    # first allocated.
    self.assertGreater(mm_allocs[1].alloc_micros, mm_allocs[0].alloc_micros)
    self.assertGreater(mm_allocs[0].alloc_bytes, 0)
    # Then deallocated.
    self.assertLess(mm_allocs[1].alloc_bytes, 0)
    # All memory deallocated.
    self.assertEqual(mm_allocs[0].alloc_bytes + mm_allocs[1].alloc_bytes, 0)

    rand = _extract_node(
        run_meta, 'random_normal/RandomStandardNormal')['gpu:0'][0]
    random_allocs = rand.memory[0].allocation_records
    # random normal must allocated first since matmul depends on it.
    self.assertLess(random_allocs[0].alloc_micros, mm.all_start_micros)
    # deallocates the memory after matmul started.
    self.assertGreater(random_allocs[1].alloc_micros, mm.all_start_micros)
  def testCapacity(self):
    capacity = 3

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.int32, name='x')
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.StagingArea(
            [
                dtypes.int32,
            ], capacity=capacity, shapes=[[]])
        stage = stager.put([x])
        ret = stager.get()
        size = stager.size()

    G.finalize()

    from six.moves import queue as Queue
    import threading

    queue = Queue.Queue()
    n = 8

    with self.test_session(use_gpu=True, graph=G) as sess:
      # Stage data in a separate thread which will block
      # when it hits the staging area's capacity and thus
      # not fill the queue with n tokens
      def thread_run():
        for i in range(n):
          sess.run(stage, feed_dict={x: i})
          queue.put(0)

      t = threading.Thread(target=thread_run)
      t.daemon = True
      t.start()

      # Get tokens from the queue until a timeout occurs
      try:
        for i in range(n):
          queue.get(timeout=TIMEOUT)
      except Queue.Empty:
        pass

      # Should've timed out on the iteration 'capacity'
      if not i == capacity:
        self.fail("Expected to timeout on iteration '{}' "
                  "but instead timed out on iteration '{}' "
                  "Staging Area size is '{}' and configured "
                  "capacity is '{}'.".format(capacity, i, sess.run(size),
                                             capacity))

      # Should have capacity elements in the staging area
      self.assertTrue(sess.run(size) == capacity)

      # Clear the staging area completely
      for i in range(n):
        self.assertTrue(sess.run(ret) == [i])

      # It should now be empty
      self.assertTrue(sess.run(size) == 0)
  def testOrdering(self):
    import six
    import random

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.int32, name='x')
        pi = array_ops.placeholder(dtypes.int64, name='pi')
        gi = array_ops.placeholder(dtypes.int64, name='gi')
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea([dtypes.int32, ],
          shapes=[[]], ordered=True)
        stage = stager.put(pi, [x], [0])
        get = stager.get()
        size = stager.size()

    G.finalize()

    n = 10

    with self.test_session(use_gpu=True, graph=G) as sess:
      # Keys n-1..0
      keys = list(reversed(six.moves.range(n)))

      for i in keys:
        sess.run(stage, feed_dict={pi: i, x: i})

      self.assertTrue(sess.run(size) == n)

      # Check that key, values come out in ascending order
      for i, k in enumerate(reversed(keys)):
        get_key, values = sess.run(get)
        self.assertTrue(i == k == get_key == values)

      self.assertTrue(sess.run(size) == 0)
  def testMemoryLimit(self):
    memory_limit = 512*1024  # 512K
    chunk = 200*1024 # 256K
    capacity = memory_limit // chunk

    with ops.device('/cpu:0'):
      x = array_ops.placeholder(dtypes.uint8, name='x')
      pi = array_ops.placeholder(dtypes.int64, name='pi')
      gi = array_ops.placeholder(dtypes.int64, name='gi')
    with ops.device(test.gpu_device_name()):
      stager = data_flow_ops.MapStagingArea([dtypes.uint8],
        memory_limit=memory_limit, shapes=[[]])
      stage = stager.put(pi, [x], [0])
      get = stager.get()
      size = stager.size()

    from six.moves import queue as Queue
    import threading
    import numpy as np

    queue = Queue.Queue()
    n = 5
    missed = 0

    with self.test_session(use_gpu=True) as sess:
      # Stage data in a separate thread which will block
      # when it hits the staging area's capacity and thus
      # not fill the queue with n tokens
      def thread_run():
        for i in range(n):
          sess.run(stage, feed_dict={x: np.full(chunk, i, dtype=np.uint8),
                                    pi: i})
          queue.put(0)

      t = threading.Thread(target=thread_run)
      t.start()

      # Get tokens from the queue, making notes of when we timeout
      for i in range(n):
        try:
          queue.get(timeout=0.05)
        except Queue.Empty:
          missed += 1

      # We timed out n - capacity times waiting for queue puts
      self.assertTrue(missed == n - capacity)

      # Clear the staging area out a bit
      for i in range(n - capacity):
        sess.run(get)

      # This should now succeed
      t.join()

      self.assertTrue(sess.run(size) == capacity)

      # Clear out the staging area completely
      for i in range(capacity):
        sess.run(get)
  def testCapacity(self):
    capacity = 3

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.int32, name='x')
        pi = array_ops.placeholder(dtypes.int64, name='pi')
        gi = array_ops.placeholder(dtypes.int64, name='gi')
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea([dtypes.int32, ],
          capacity=capacity, shapes=[[]])

      stage = stager.put(pi, [x], [0])
      get = stager.get()
      size = stager.size()

    G.finalize()

    from six.moves import queue as Queue
    import threading

    queue = Queue.Queue()
    n = 5
    missed = 0

    with self.test_session(use_gpu=True, graph=G) as sess:
      # Stage data in a separate thread which will block
      # when it hits the staging area's capacity and thus
      # not fill the queue with n tokens
      def thread_run():
        for i in range(n):
          sess.run(stage, feed_dict={x: i, pi: i})
          queue.put(0)

      t = threading.Thread(target=thread_run)
      t.start()

      # Get tokens from the queue, making notes of when we timeout
      for i in range(n):
        try:
          queue.get(timeout=0.05)
        except Queue.Empty:
          missed += 1

      # We timed out n - capacity times waiting for queue puts
      self.assertTrue(missed == n - capacity)

      # Clear the staging area out a bit
      for i in range(n - capacity):
        sess.run(get)

      # This should now succeed
      t.join()

      self.assertTrue(sess.run(size) == capacity)

      # Clear out the staging area completely
      for i in range(capacity):
        sess.run(get)
  def testPartialDictInsert(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        f = array_ops.placeholder(dtypes.float32)
        v = array_ops.placeholder(dtypes.float32)
        pi = array_ops.placeholder(dtypes.int64)
        gi = array_ops.placeholder(dtypes.int64)
      with ops.device(test.gpu_device_name()):
        # Test barrier with dictionary
        stager = data_flow_ops.MapStagingArea(
            [dtypes.float32, dtypes.float32, dtypes.float32],
            names=['x', 'v', 'f'])
        stage_xf = stager.put(pi, {'x': x, 'f': f})
        stage_v = stager.put(pi, {'v': v})
        key, ret = stager.get(gi)
        size = stager.size()
        isize = stager.incomplete_size()

    G.finalize()

    with self.session(use_gpu=True, graph=G) as sess:
      # 0 complete and incomplete entries
      self.assertTrue(sess.run([size, isize]) == [0, 0])
      # Stage key 0, x and f tuple entries
      sess.run(stage_xf, feed_dict={pi: 0, x: 1, f: 2})
      self.assertTrue(sess.run([size, isize]) == [0, 1])
      # Stage key 1, x and f tuple entries
      sess.run(stage_xf, feed_dict={pi: 1, x: 1, f: 2})
      self.assertTrue(sess.run([size, isize]) == [0, 2])

      # Now complete key 0 with tuple entry v
      sess.run(stage_v, feed_dict={pi: 0, v: 1})
      # 1 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [1, 1])
      # We can now obtain tuple associated with key 0
      self.assertTrue(
          sess.run([key, ret], feed_dict={
              gi: 0
          }) == [0, {
              'x': 1,
              'f': 2,
              'v': 1
          }])

      # 0 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [0, 1])
      # Now complete key 1 with tuple entry v
      sess.run(stage_v, feed_dict={pi: 1, v: 3})
      # We can now obtain tuple associated with key 1
      self.assertTrue(
          sess.run([key, ret], feed_dict={
              gi: 1
          }) == [1, {
              'x': 1,
              'f': 2,
              'v': 3
          }])
  def testGPU(self):
    if not test.is_gpu_available(cuda_only=True):
      return

    gpu_dev = test.gpu_device_name()
    ops.reset_default_graph()
    with ops.device(gpu_dev):
      tfprof_node, run_meta = _run_model()
      self.assertEqual(tfprof_node.children[0].name, 'MatMul')
      self.assertGreater(tfprof_node.children[0].exec_micros, 10)

    ret = _extract_node(run_meta, 'MatMul')
    self.assertEqual(len(ret['gpu:0']), 1)
    self.assertEqual(len(ret['gpu:0/stream:all']), 1, '%s' % run_meta)
Beispiel #13
0
  def testColocation(self):
    gpu_dev = test.gpu_device_name()

    with ops.device('/cpu:0'):
      x = array_ops.placeholder(dtypes.float32)
      v = 2. * (array_ops.zeros([128, 128]) + x)
    with ops.device(gpu_dev):
      stager = data_flow_ops.StagingArea([dtypes.float32])
      y = stager.put([v])
      self.assertEqual(y.device, '/device:GPU:0' if gpu_dev
                                                 else gpu_dev)
    with ops.device('/cpu:0'):
      x = stager.get()
      self.assertEqual(x.device, '/device:CPU:0')
 def testSimple(self):
   with self.test_session(use_gpu=True) as sess:
     with ops.device('/cpu:0'):
       x = array_ops.placeholder(dtypes.float32)
       v = 2. * (array_ops.zeros([128, 128]) + x)
     with ops.device(test.gpu_device_name()):
       stager = data_flow_ops.StagingArea([dtypes.float32])
       stage = stager.put([v])
       y = stager.get()
       y = math_ops.reduce_max(math_ops.matmul(y, y))
     sess.run(stage, feed_dict={x: -1})
     for i in range(10):
       _, yval = sess.run([stage, y], feed_dict={x: i})
       self.assertAllClose(4 * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
Beispiel #15
0
  def testPeek(self):
    with ops.device('/cpu:0'):
      x = array_ops.placeholder(dtypes.int32, name='x')
      p = array_ops.placeholder(dtypes.int32, name='p')
    with ops.device(test.gpu_device_name()):
      stager = data_flow_ops.StagingArea([dtypes.int32, ], shapes=[[]])
      stage = stager.put([x])
      peek = stager.peek(p)
      ret = stager.get()

    with self.test_session(use_gpu=True) as sess:
      for i in range(10):
        sess.run(stage, feed_dict={x:i})

      for i in range(10):
        self.assertTrue(sess.run(peek, feed_dict={p:i}) == i)
Beispiel #16
0
  def testMultiDevices(self):
    with self.test_session() as sess:
      with ops.device(test.gpu_device_name()):
        a = constant_op.constant(1.0)
        a_handle = sess.run(session_ops.get_session_handle(a))
      with ops.device("/cpu:0"):
        b = constant_op.constant(2.0)
        b_handle = sess.run(session_ops.get_session_handle(b))

      a_p, a_t = session_ops.get_session_tensor(a_handle.handle, dtypes.float32)
      b_p, b_t = session_ops.get_session_tensor(b_handle.handle, dtypes.float32)
      c = math_ops.add(a_t, b_t)
      c_handle = sess.run(
          session_ops.get_session_handle(c),
          feed_dict={a_p: a_handle.handle,
                     b_p: b_handle.handle})
      self.assertEqual(3.0, c_handle.eval())
Beispiel #17
0
  def testColocation(self):
    gpu_dev = test.gpu_device_name()

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        v = 2. * (array_ops.zeros([128, 128]) + x)
      with ops.device(gpu_dev):
        stager = data_flow_ops.StagingArea([dtypes.float32])
        y = stager.put([v])
        expected_name = gpu_dev if 'gpu' not in gpu_dev else '/device:GPU:0'
        self.assertEqual(y.device, expected_name)
      with ops.device('/cpu:0'):
        x = stager.get()[0]
        self.assertEqual(x.device, '/device:CPU:0')

    G.finalize()
Beispiel #18
0
 def testMultiple(self):
   with self.test_session(use_gpu=True) as sess:
     with ops.device('/cpu:0'):
       x = array_ops.placeholder(dtypes.float32)
       pi = array_ops.placeholder(dtypes.int64)
       gi = array_ops.placeholder(dtypes.int64)
       v = 2. * (array_ops.zeros([128, 128]) + x)
     with ops.device(test.gpu_device_name()):
       stager = data_flow_ops.MapStagingArea([dtypes.float32, dtypes.float32])
       stage = stager.put(pi, [x, v], [0, 1])
       k, (z, y) = stager.get(gi)
       y = math_ops.reduce_max(z * math_ops.matmul(y, y))
     sess.run(stage, feed_dict={x: -1, pi: 0})
     for i in range(10):
       _, yval = sess.run([stage, y], feed_dict={x: i, pi: i+1, gi:i})
       self.assertAllClose(
           4 * (i - 1) * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
  def testMultiple(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        v = 2. * (array_ops.zeros([128, 128]) + x)
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.StagingArea([dtypes.float32, dtypes.float32])
        stage = stager.put([x, v])
        z, y = stager.get()
        y = math_ops.reduce_max(z * math_ops.matmul(y, y))

    G.finalize()

    with self.session(use_gpu=True, graph=G) as sess:
      sess.run(stage, feed_dict={x: -1})
      for i in range(10):
        _, yval = sess.run([stage, y], feed_dict={x: i})
        self.assertAllClose(
            4 * (i - 1) * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
Beispiel #20
0
  def testHandleMover(self):
    with self.test_session() as sess:
      # Return a handle.
      a = constant_op.constant(10)
      b = constant_op.constant(5)
      c = math_ops.multiply(a, b)
      h = session_ops.get_session_handle(c)
      h = sess.run(h)

      # Feed a tensor handle.
      f, x = session_ops.get_session_tensor(h.handle, dtypes.int32)
      y = math_ops.multiply(x, 10)
      self.assertEqual(500, sess.run(y, feed_dict={f: h.handle}))

      # Feed another tensor handle.
      with ops.device(test.gpu_device_name()):
        a = constant_op.constant(10)
        h = session_ops.get_session_handle(a)
        h = sess.run(h)
        self.assertEqual(100, sess.run(y, feed_dict={f: h.handle}))
 def testDictionary(self):
   with self.test_session(use_gpu=True) as sess:
     with ops.device('/cpu:0'):
       x = array_ops.placeholder(dtypes.float32)
       v = 2. * (array_ops.zeros([128, 128]) + x)
     with ops.device(test.gpu_device_name()):
       stager = data_flow_ops.StagingArea(
           [dtypes.float32, dtypes.float32],
           shapes=[[], [128, 128]],
           names=['x', 'v'])
       stage = stager.put({'x': x, 'v': v})
       ret = stager.get()
       z = ret['x']
       y = ret['v']
       y = math_ops.reduce_max(z * math_ops.matmul(y, y))
     sess.run(stage, feed_dict={x: -1})
     for i in range(10):
       _, yval = sess.run([stage, y], feed_dict={x: i})
       self.assertAllClose(
           4 * (i - 1) * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
  def testPartialIndexGets(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        f = array_ops.placeholder(dtypes.float32)
        v = array_ops.placeholder(dtypes.float32)
        pi = array_ops.placeholder(dtypes.int64)
        pei = array_ops.placeholder(dtypes.int64)
        gi = array_ops.placeholder(dtypes.int64)
      with ops.device(test.gpu_device_name()):
        # Test again with partial index gets
        stager = data_flow_ops.MapStagingArea(
            [dtypes.float32, dtypes.float32, dtypes.float32])
        stage_xvf = stager.put(pi, [x, v, f], [0, 1, 2])
        key_xf, get_xf = stager.get(gi, [0, 2])
        key_v, get_v = stager.get(gi, [1])
        size = stager.size()
        isize = stager.incomplete_size()

    G.finalize()

    with self.session(use_gpu=True, graph=G) as sess:
      # Stage complete tuple
      sess.run(stage_xvf, feed_dict={pi: 0, x: 1, f: 2, v: 3})

      self.assertTrue(sess.run([size, isize]) == [1, 0])

      # Partial get using indices
      self.assertTrue(
          sess.run([key_xf, get_xf], feed_dict={
              gi: 0
          }) == [0, [1, 2]])

      # Still some of key 0 left
      self.assertTrue(sess.run([size, isize]) == [1, 0])

      # Partial get of remaining index
      self.assertTrue(sess.run([key_v, get_v], feed_dict={gi: 0}) == [0, [3]])

      # All gone
      self.assertTrue(sess.run([size, isize]) == [0, 0])
Beispiel #23
0
  def testColocation(self):
    gpu_dev = test.gpu_device_name()

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        v = 2. * (array_ops.zeros([128, 128]) + x)
      with ops.device(gpu_dev):
        stager = data_flow_ops.MapStagingArea([dtypes.float32])
        y = stager.put(1, [v], [0])
        self.assertEqual(y.device, '/device:GPU:0' if gpu_dev
                                                   else gpu_dev)
      with ops.device('/cpu:0'):
        _, x = stager.get(1)
        y = stager.peek(1)
        _, z = stager.get()
        self.assertEqual(x.device, '/device:CPU:0')
        self.assertEqual(y.device, '/device:CPU:0')
        self.assertEqual(z.device, '/device:CPU:0')

    G.finalize()
Beispiel #24
0
  def testSizeAndClear(self):
    with ops.device('/cpu:0'):
      x = array_ops.placeholder(dtypes.float32, name='x')
      v = 2. * (array_ops.zeros([128, 128]) + x)
    with ops.device(test.gpu_device_name()):
      stager = data_flow_ops.StagingArea(
          [dtypes.float32, dtypes.float32],
          shapes=[[], [128, 128]],
          names=['x', 'v'])
      stage = stager.put({'x': x, 'v': v})
      ret = stager.get()
      size = stager.size()
      clear = stager.clear()

    with self.test_session(use_gpu=True) as sess:
      sess.run(stage, feed_dict={x: -1})
      self.assertEqual(sess.run(size), 1)
      sess.run(stage, feed_dict={x: -1})
      self.assertEqual(sess.run(size), 2)
      sess.run(clear)
      self.assertEqual(sess.run(size), 0)
Beispiel #25
0
  def testHandleGC(self):
    with self.test_session() as sess:
      # initial values live on CPU
      with ops.device("/cpu:0"):
        one = constant_op.constant(1, dtype=dtypes.float32)
        one_handle = sess.run(session_ops.get_session_handle(one))
        x_handle = sess.run(session_ops.get_session_handle(one))

      # addition lives on GPU
      with ops.device(test.gpu_device_name()):
        add_h1, add_t1 = session_ops.get_session_tensor(one_handle.handle,
                                                        dtypes.float32)
        add_h2, add_t2 = session_ops.get_session_tensor(x_handle.handle,
                                                        dtypes.float32)
        add_op = math_ops.add(add_t1, add_t2)
        add_output = session_ops.get_session_handle(add_op)

      # add 1 to tensor 20 times
      for _ in range(20):
        x_handle = sess.run(
            add_output,
            feed_dict={add_h1: one_handle.handle,
                       add_h2: x_handle.handle})
 def _VerifyRunGraph(self, n, m, k, transpose_a, transpose_b, dtype):
   benchmark_instance = matmul_benchmark.MatmulBenchmark()
   duration = benchmark_instance.run_graph(googletest.gpu_device_name(), n, m, k, transpose_a,
                                           transpose_b, 1, dtype)
   self.assertTrue(duration > 1e-6)
 def benchmark_adjust_saturation_in_yiq_gpu_all(self):
   self._benchmark_adjust_saturation_in_yiq(test.gpu_device_name(), None)
  def testMemoryLimit(self):
    memory_limit = 512 * 1024  # 512K
    chunk = 200 * 1024  # 256K
    capacity = memory_limit // chunk

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.uint8, name='x')
        pi = array_ops.placeholder(dtypes.int64, name='pi')
        gi = array_ops.placeholder(dtypes.int64, name='gi')
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea(
            [dtypes.uint8], memory_limit=memory_limit, shapes=[[]])
        stage = stager.put(pi, [x], [0])
        get = stager.get()
        size = stager.size()

    G.finalize()

    from six.moves import queue as Queue
    import threading
    import numpy as np

    queue = Queue.Queue()
    n = 8

    with self.session(use_gpu=True, graph=G) as sess:
      # Stage data in a separate thread which will block
      # when it hits the staging area's capacity and thus
      # not fill the queue with n tokens
      def thread_run():
        for i in range(n):
          data = np.full(chunk, i, dtype=np.uint8)
          sess.run(stage, feed_dict={x: data, pi: i})
          queue.put(0)

      t = threading.Thread(target=thread_run)
      t.daemon = True
      t.start()

      # Get tokens from the queue until a timeout occurs
      try:
        for i in range(n):
          queue.get(timeout=TIMEOUT)
      except Queue.Empty:
        pass

      # Should've timed out on the iteration 'capacity'
      if not i == capacity:
        self.fail("Expected to timeout on iteration '{}' "
                  "but instead timed out on iteration '{}' "
                  "Staging Area size is '{}' and configured "
                  "capacity is '{}'.".format(capacity, i, sess.run(size),
                                             capacity))

      # Should have capacity elements in the staging area
      self.assertTrue(sess.run(size) == capacity)

      # Clear the staging area completely
      for i in range(n):
        sess.run(get)

      self.assertTrue(sess.run(size) == 0)
  def testPartialDictGetsAndPeeks(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        f = array_ops.placeholder(dtypes.float32)
        v = array_ops.placeholder(dtypes.float32)
        pi = array_ops.placeholder(dtypes.int64)
        pei = array_ops.placeholder(dtypes.int64)
        gi = array_ops.placeholder(dtypes.int64)
      with ops.device(test.gpu_device_name()):
        # Test barrier with dictionary
        stager = data_flow_ops.MapStagingArea(
            [dtypes.float32, dtypes.float32, dtypes.float32],
            names=['x', 'v', 'f'])
        stage_xf = stager.put(pi, {'x': x, 'f': f})
        stage_v = stager.put(pi, {'v': v})
        peek_xf = stager.peek(pei, ['x', 'f'])
        peek_v = stager.peek(pei, ['v'])
        key_xf, get_xf = stager.get(gi, ['x', 'f'])
        key_v, get_v = stager.get(gi, ['v'])
        pop_key_xf, pop_xf = stager.get(indices=['x', 'f'])
        pop_key_v, pop_v = stager.get(pi, ['v'])
        size = stager.size()
        isize = stager.incomplete_size()

    G.finalize()

    with self.session(use_gpu=True, graph=G) as sess:
      # 0 complete and incomplete entries
      self.assertTrue(sess.run([size, isize]) == [0, 0])
      # Stage key 0, x and f tuple entries
      sess.run(stage_xf, feed_dict={pi: 0, x: 1, f: 2})
      self.assertTrue(sess.run([size, isize]) == [0, 1])
      # Stage key 1, x and f tuple entries
      sess.run(stage_xf, feed_dict={pi: 1, x: 1, f: 2})
      self.assertTrue(sess.run([size, isize]) == [0, 2])

      # Now complete key 0 with tuple entry v
      sess.run(stage_v, feed_dict={pi: 0, v: 1})
      # 1 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [1, 1])

      # We can now peek at 'x' and 'f' values associated with key 0
      self.assertTrue(sess.run(peek_xf, feed_dict={pei: 0}) == {'x': 1, 'f': 2})
      # Peek at 'v' value associated with key 0
      self.assertTrue(sess.run(peek_v, feed_dict={pei: 0}) == {'v': 1})
      # 1 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [1, 1])

      # We can now obtain 'x' and 'f' values associated with key 0
      self.assertTrue(
          sess.run([key_xf, get_xf], feed_dict={
              gi: 0
          }) == [0, {
              'x': 1,
              'f': 2
          }])
      # Still have 1 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [1, 1])

      # We can no longer get 'x' and 'f' from key 0
      with self.assertRaises(errors.InvalidArgumentError) as cm:
        sess.run([key_xf, get_xf], feed_dict={gi: 0})

      exc_str = ("Tensor at index '0' for key '0' " 'has already been removed.')

      self.assertTrue(exc_str in cm.exception.message)

      # Obtain 'v' value associated with key 0
      self.assertTrue(
          sess.run([key_v, get_v], feed_dict={
              gi: 0
          }) == [0, {
              'v': 1
          }])
      # 0 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [0, 1])

      # Now complete key 1 with tuple entry v
      sess.run(stage_v, feed_dict={pi: 1, v: 1})
      # 1 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [1, 0])

      # Pop without key to obtain 'x' and 'f' values associated with key 1
      self.assertTrue(sess.run([pop_key_xf, pop_xf]) == [1, {'x': 1, 'f': 2}])
      # still 1 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [1, 0])
      # We can now obtain 'x' and 'f' values associated with key 1
      self.assertTrue(
          sess.run([pop_key_v, pop_v], feed_dict={
              pi: 1
          }) == [1, {
              'v': 1
          }])
      # Nothing is left
      self.assertTrue(sess.run([size, isize]) == [0, 0])
 def run_test_gpu(self, n, m, k, transpose_a, transpose_b, dtype, num_iters):
   self.run_graph(test.gpu_device_name(), n, m, k, transpose_a, transpose_b,
                  num_iters, dtype)