コード例 #1
0
 def _VerifyBuildGraph(self, n, m, k, transpose_a, transpose_b, dtype):
   graph = ops.Graph()
   with graph.as_default():
     matmul_benchmark.build_graph(googletest.gpu_device_name(), n, m, k, transpose_a, transpose_b,
                                  dtype)
     gd = graph.as_graph_def()
     dev=googletest.gpu_device_name()
     proto_expected = """
     node { name: "random_uniform/shape" op: "Const" device: \""""+ dev +"""\" }
     node { name: "random_uniform/min" op: "Const" device: \""""+ dev +"""\" }
     node { name: "random_uniform/max" op: "Const" device: \""""+ dev +"""\" }
     node { name: "random_uniform/RandomUniform" op: "RandomUniform" input: "random_uniform/shape" device: \""""+ dev +"""\" }
     node { name: "random_uniform/sub" op: "Sub" input: "random_uniform/max" input: "random_uniform/min" device: \""""+ dev +"""\" }
     node { name: "random_uniform/mul" op: "Mul" input: "random_uniform/RandomUniform" input: "random_uniform/sub" device: \""""+ dev +"""\" }
     node { name: "random_uniform" op: "Add" input: "random_uniform/mul" input: "random_uniform/min" device: \""""+ dev +"""\" }
     node { name: "Variable" op: "VariableV2" device: \""""+ dev +"""\" }
     node { name: "Variable/Assign" op: "Assign" input: "Variable" input: "random_uniform" device: \""""+ dev +"""\" }
     node { name: "Variable/read" op: "Identity" input: "Variable" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1/shape" op: "Const" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1/min" op: "Const" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1/max" op: "Const" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1/RandomUniform" op: "RandomUniform" input: "random_uniform_1/shape" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1/sub" op: "Sub" input: "random_uniform_1/max" input: "random_uniform_1/min" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1/mul" op: "Mul" input: "random_uniform_1/RandomUniform" input: "random_uniform_1/sub" device: \""""+ dev +"""\" }
     node { name: "random_uniform_1" op: "Add" input: "random_uniform_1/mul" input: "random_uniform_1/min" device: \""""+ dev +"""\" }
     node { name: "Variable_1" op: "VariableV2" device: \""""+ dev +"""\" }
     node { name: "Variable_1/Assign" op: "Assign" input: "Variable_1" input: "random_uniform_1" device: \""""+ dev +"""\" }
     node { name: "Variable_1/read" op: "Identity" input: "Variable_1" device: \""""+ dev +"""\" }
     node { name: "MatMul" op: "MatMul" input: "Variable/read" input: "Variable_1/read" device: \""""+ dev +"""\" }
     node { name: "group_deps" op: "NoOp" input: "^MatMul" device: \""""+ dev +"""\" }
                      """
     self.assertProtoEquals(str(proto_expected), self._StripGraph(gd))
コード例 #2
0
 def _VerifyBuildGraph(self, n, m, k, transpose_a, transpose_b, dtype):
     graph = ops.Graph()
     with graph.as_default():
         matmul_benchmark.build_graph(googletest.gpu_device_name(), n, m, k,
                                      transpose_a, transpose_b, dtype)
         gd = graph.as_graph_def()
         dev = googletest.gpu_device_name()
         proto_expected = """
   node { name: "random_uniform/shape" op: "Const" device: \"""" + dev + """\" }
   node { name: "random_uniform/min" op: "Const" device: \"""" + dev + """\" }
   node { name: "random_uniform/max" op: "Const" device: \"""" + dev + """\" }
   node { name: "random_uniform/RandomUniform" op: "RandomUniform" input: "random_uniform/shape" device: \"""" + dev + """\" }
   node { name: "random_uniform/sub" op: "Sub" input: "random_uniform/max" input: "random_uniform/min" device: \"""" + dev + """\" }
   node { name: "random_uniform/mul" op: "Mul" input: "random_uniform/RandomUniform" input: "random_uniform/sub" device: \"""" + dev + """\" }
   node { name: "random_uniform" op: "Add" input: "random_uniform/mul" input: "random_uniform/min" device: \"""" + dev + """\" }
   node { name: "Variable" op: "VariableV2" device: \"""" + dev + """\" }
   node { name: "Variable/Assign" op: "Assign" input: "Variable" input: "random_uniform" device: \"""" + dev + """\" }
   node { name: "Variable/read" op: "Identity" input: "Variable" device: \"""" + dev + """\" }
   node { name: "random_uniform_1/shape" op: "Const" device: \"""" + dev + """\" }
   node { name: "random_uniform_1/min" op: "Const" device: \"""" + dev + """\" }
   node { name: "random_uniform_1/max" op: "Const" device: \"""" + dev + """\" }
   node { name: "random_uniform_1/RandomUniform" op: "RandomUniform" input: "random_uniform_1/shape" device: \"""" + dev + """\" }
   node { name: "random_uniform_1/sub" op: "Sub" input: "random_uniform_1/max" input: "random_uniform_1/min" device: \"""" + dev + """\" }
   node { name: "random_uniform_1/mul" op: "Mul" input: "random_uniform_1/RandomUniform" input: "random_uniform_1/sub" device: \"""" + dev + """\" }
   node { name: "random_uniform_1" op: "Add" input: "random_uniform_1/mul" input: "random_uniform_1/min" device: \"""" + dev + """\" }
   node { name: "Variable_1" op: "VariableV2" device: \"""" + dev + """\" }
   node { name: "Variable_1/Assign" op: "Assign" input: "Variable_1" input: "random_uniform_1" device: \"""" + dev + """\" }
   node { name: "Variable_1/read" op: "Identity" input: "Variable_1" device: \"""" + dev + """\" }
   node { name: "MatMul" op: "MatMul" input: "Variable/read" input: "Variable_1/read" device: \"""" + dev + """\" }
   node { name: "group_deps" op: "NoOp" input: "^MatMul" device: \"""" + dev + """\" }
                    """
         self.assertProtoEquals(str(proto_expected), self._StripGraph(gd))
コード例 #3
0
  def testOrdering(self):
    import six
    import random

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.int32, name='x')
        pi = array_ops.placeholder(dtypes.int64, name='pi')
        gi = array_ops.placeholder(dtypes.int64, name='gi')
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea([dtypes.int32, ],
          shapes=[[]], ordered=True)
        stage = stager.put(pi, [x], [0])
        get = stager.get()
        size = stager.size()

    G.finalize()

    n = 10

    with self.test_session(use_gpu=True, graph=G) as sess:
      # Keys n-1..0
      keys = list(reversed(six.moves.range(n)))

      for i in keys:
        sess.run(stage, feed_dict={pi: i, x: i})

      self.assertTrue(sess.run(size) == n)

      # Check that key, values come out in ascending order
      for i, k in enumerate(reversed(keys)):
        get_key, values = sess.run(get)
        self.assertTrue(i == k == get_key == values)

      self.assertTrue(sess.run(size) == 0)
コード例 #4
0
    def testAllocationHistory(self):
        if not test.is_gpu_available(cuda_only=True):
            return

        gpu_dev = test.gpu_device_name()
        ops.reset_default_graph()
        with ops.device(gpu_dev):
            _, run_meta = _run_model()

        mm = _extract_node(run_meta, 'MatMul')['gpu:0'][0]
        mm_allocs = mm.memory[0].allocation_records
        # has allocation and deallocation.
        self.assertEqual(len(mm_allocs), 2)
        # first allocated.
        self.assertGreater(mm_allocs[1].alloc_micros,
                           mm_allocs[0].alloc_micros)
        self.assertGreater(mm_allocs[0].alloc_bytes, 0)
        # Then deallocated.
        self.assertLess(mm_allocs[1].alloc_bytes, 0)
        # All memory deallocated.
        self.assertEqual(mm_allocs[0].alloc_bytes + mm_allocs[1].alloc_bytes,
                         0)

        rand = _extract_node(run_meta,
                             'random_normal/RandomStandardNormal')['gpu:0'][0]
        random_allocs = rand.memory[0].allocation_records
        # random normal must allocated first since matmul depends on it.
        self.assertLess(random_allocs[0].alloc_micros, mm.all_start_micros)
        # deallocates the memory after matmul started.
        self.assertGreater(random_allocs[1].alloc_micros, mm.all_start_micros)
コード例 #5
0
  def testAllocationHistory(self):
    if not test.is_gpu_available(cuda_only=True):
      return

    gpu_dev = test.gpu_device_name()
    ops.reset_default_graph()
    with ops.device(gpu_dev):
      _, run_meta = _run_model()

    mm = _extract_node(run_meta, 'MatMul')['gpu:0'][0]
    mm_allocs = mm.memory[0].allocation_records
    # has allocation and deallocation.
    self.assertEqual(len(mm_allocs), 2)
    # first allocated.
    self.assertGreater(mm_allocs[1].alloc_micros, mm_allocs[0].alloc_micros)
    self.assertGreater(mm_allocs[0].alloc_bytes, 0)
    # Then deallocated.
    self.assertLess(mm_allocs[1].alloc_bytes, 0)
    # All memory deallocated.
    self.assertEqual(mm_allocs[0].alloc_bytes + mm_allocs[1].alloc_bytes, 0)

    rand = _extract_node(
        run_meta, 'random_normal/RandomStandardNormal')['gpu:0'][0]
    random_allocs = rand.memory[0].allocation_records
    # random normal must allocated first since matmul depends on it.
    self.assertLess(random_allocs[0].alloc_micros, mm.all_start_micros)
    # deallocates the memory after matmul started.
    self.assertGreater(random_allocs[1].alloc_micros, mm.all_start_micros)
コード例 #6
0
  def testOrdering(self):
    import six
    import random

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.int32, name='x')
        pi = array_ops.placeholder(dtypes.int64, name='pi')
        gi = array_ops.placeholder(dtypes.int64, name='gi')
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea([dtypes.int32, ],
          shapes=[[]], ordered=True)
        stage = stager.put(pi, [x], [0])
        get = stager.get()
        size = stager.size()

    G.finalize()

    n = 10

    with self.test_session(use_gpu=True, graph=G) as sess:
      # Keys n-1..0
      keys = list(reversed(six.moves.range(n)))

      for i in keys:
        sess.run(stage, feed_dict={pi: i, x: i})

      self.assertTrue(sess.run(size) == n)

      # Check that key, values come out in ascending order
      for i, k in enumerate(reversed(keys)):
        get_key, values = sess.run(get)
        self.assertTrue(i == k == get_key == values)

      self.assertTrue(sess.run(size) == 0)
コード例 #7
0
  def testCapacity(self):
    capacity = 3

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.int32, name='x')
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.StagingArea(
            [
                dtypes.int32,
            ], capacity=capacity, shapes=[[]])
        stage = stager.put([x])
        ret = stager.get()
        size = stager.size()

    G.finalize()

    from six.moves import queue as Queue
    import threading

    queue = Queue.Queue()
    n = 8

    with self.test_session(use_gpu=True, graph=G) as sess:
      # Stage data in a separate thread which will block
      # when it hits the staging area's capacity and thus
      # not fill the queue with n tokens
      def thread_run():
        for i in range(n):
          sess.run(stage, feed_dict={x: i})
          queue.put(0)

      t = threading.Thread(target=thread_run)
      t.daemon = True
      t.start()

      # Get tokens from the queue until a timeout occurs
      try:
        for i in range(n):
          queue.get(timeout=TIMEOUT)
      except Queue.Empty:
        pass

      # Should've timed out on the iteration 'capacity'
      if not i == capacity:
        self.fail("Expected to timeout on iteration '{}' "
                  "but instead timed out on iteration '{}' "
                  "Staging Area size is '{}' and configured "
                  "capacity is '{}'.".format(capacity, i, sess.run(size),
                                             capacity))

      # Should have capacity elements in the staging area
      self.assertTrue(sess.run(size) == capacity)

      # Clear the staging area completely
      for i in range(n):
        self.assertTrue(sess.run(ret) == [i])

      # It should now be empty
      self.assertTrue(sess.run(size) == 0)
コード例 #8
0
    def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
        if not test.is_gpu_available():
            # Can't perform this test w/o a GPU
            return

        gpu_dev = test.gpu_device_name()
        with self.test_session(use_gpu=True) as sess:
            with variable_scope.variable_scope(
                    "root", initializer=init_ops.constant_initializer(0.5)):
                x = array_ops.zeros([1, 1, 3])
                cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3),
                                                   gpu_dev)
                with ops.device("/cpu:0"):
                    outputs, _ = rnn.dynamic_rnn(cell=cell,
                                                 inputs=x,
                                                 dtype=dtypes.float32)
                run_metadata = config_pb2.RunMetadata()
                opts = config_pb2.RunOptions(
                    trace_level=config_pb2.RunOptions.FULL_TRACE)

                sess.run([variables_lib.global_variables_initializer()])
                _ = sess.run(outputs, options=opts, run_metadata=run_metadata)

            cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata)
            self.assertFalse(
                [s for s in cpu_stats if "gru_cell" in s.node_name])
            self.assertTrue(
                [s for s in gpu_stats if "gru_cell" in s.node_name])
コード例 #9
0
  def testDictionary(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        pi = array_ops.placeholder(dtypes.int64)
        gi = array_ops.placeholder(dtypes.int64)
        v = 2. * (array_ops.zeros([128, 128]) + x)
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea(
            [dtypes.float32, dtypes.float32],
            shapes=[[], [128, 128]],
            names=['x', 'v'])
        stage = stager.put(pi, {'x': x, 'v': v})
        key, ret = stager.get(gi)
        z = ret['x']
        y = ret['v']
        y = math_ops.reduce_max(z * math_ops.matmul(y, y))

    G.finalize()

    with self.session(use_gpu=True, graph=G) as sess:
      sess.run(stage, feed_dict={x: -1, pi: 0})
      for i in range(10):
        _, yval = sess.run([stage, y], feed_dict={x: i, pi: i + 1, gi: i})
        self.assertAllClose(
            4 * (i - 1) * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
コード例 #10
0
  def testPeek(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.int32, name='x')
        pi = array_ops.placeholder(dtypes.int64)
        gi = array_ops.placeholder(dtypes.int64)
        p = array_ops.placeholder(dtypes.int32, name='p')
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea(
            [
                dtypes.int32,
            ], shapes=[[]])
        stage = stager.put(pi, [x], [0])
        peek = stager.peek(gi)
        size = stager.size()

    G.finalize()

    n = 10

    with self.session(use_gpu=True, graph=G) as sess:
      for i in range(n):
        sess.run(stage, feed_dict={x: i, pi: i})

      for i in range(n):
        self.assertTrue(sess.run(peek, feed_dict={gi: i})[0] == i)

      self.assertTrue(sess.run(size) == 10)
コード例 #11
0
  def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
    if not test.is_gpu_available():
      # Can't perform this test w/o a GPU
      return

    gpu_dev = test.gpu_device_name()
    with self.test_session(use_gpu=True) as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        x = array_ops.zeros([1, 1, 3])
        cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), gpu_dev)
        with ops.device("/cpu:0"):
          outputs, _ = rnn.dynamic_rnn(
              cell=cell, inputs=x, dtype=dtypes.float32)
        run_metadata = config_pb2.RunMetadata()
        opts = config_pb2.RunOptions(
            trace_level=config_pb2.RunOptions.FULL_TRACE)

        sess.run([variables_lib.global_variables_initializer()])
        _ = sess.run(outputs, options=opts, run_metadata=run_metadata)

      step_stats = run_metadata.step_stats
      ix = 0 if gpu_dev in step_stats.dev_stats[0].device else 1
      gpu_stats = step_stats.dev_stats[ix].node_stats
      cpu_stats = step_stats.dev_stats[1 - ix].node_stats
      self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])
      self.assertTrue([s for s in gpu_stats if "gru_cell" in s.node_name])
コード例 #12
0
    def testSimple(self):
        with ops.Graph().as_default() as G:
            with ops.device('/cpu:0'):
                x = array_ops.placeholder(dtypes.float32)
                pi = array_ops.placeholder(dtypes.int64)
                gi = array_ops.placeholder(dtypes.int64)
                v = 2. * (array_ops.zeros([128, 128]) + x)
            with ops.device(test.gpu_device_name()):
                stager = data_flow_ops.MapStagingArea([dtypes.float32])
                stage = stager.put(pi, [v], [0])
                k, y = stager.get(gi)
                y = math_ops.reduce_max(math_ops.matmul(y, y))

        G.finalize()

        with self.session(use_gpu=True, graph=G) as sess:
            sess.run(stage, feed_dict={x: -1, pi: 0})
            for i in range(10):
                _, yval = sess.run([stage, y],
                                   feed_dict={
                                       x: i,
                                       pi: i + 1,
                                       gi: i
                                   })
                self.assertAllClose(4 * (i - 1) * (i - 1) * 128,
                                    yval,
                                    rtol=1e-4)
コード例 #13
0
  def testSizeAndClear(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32, name='x')
        pi = array_ops.placeholder(dtypes.int64)
        gi = array_ops.placeholder(dtypes.int64)
        v = 2. * (array_ops.zeros([128, 128]) + x)
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea(
            [dtypes.float32, dtypes.float32],
            shapes=[[], [128, 128]],
            names=['x', 'v'])
        stage = stager.put(pi, {'x': x, 'v': v})
        size = stager.size()
        clear = stager.clear()

    G.finalize()

    with self.session(use_gpu=True, graph=G) as sess:
      sess.run(stage, feed_dict={x: -1, pi: 3})
      self.assertEqual(sess.run(size), 1)
      sess.run(stage, feed_dict={x: -1, pi: 1})
      self.assertEqual(sess.run(size), 2)
      sess.run(clear)
      self.assertEqual(sess.run(size), 0)
コード例 #14
0
  def testCaseLowering(self):
    for use_gpu in (True, False):
      @eager_function.defun
      def Run(branch, x):
        @function.Defun(dtypes.float32)
        def two(x):
          return -1, x * 2

        @function.Defun(dtypes.float32)
        def three(x):
          return 0, x * 3

        @function.Defun(dtypes.float32)
        def four(x):
          return 1, x * 4

        outputs = gen_functional_ops.case(branch, input=[x],
                                          Tout=[dtypes.int32, dtypes.float32],
                                          branches=[two, three, four])

        # `outputs` is the list of output tensors of the Case op. We
        # arbitrarily choose the 0th tensor to get the Case op and set the
        # lowering attribute on it.
        outputs[0].op._set_attr("_lower_using_switch_merge",
                                attr_value_pb2.AttrValue(b=True))
        outputs = array_ops.identity_n(outputs)
        return outputs[1]

      with ops.device(test.gpu_device_name() if use_gpu else "CPU:0"):
        self.assertAllEqual(2 * 1., self.evaluate(Run(0, 1.)))
        self.assertAllEqual(3 * 7., self.evaluate(Run(1, 7.)))
        self.assertAllEqual(4 * -3., self.evaluate(Run(2, -3.)))
        self.assertAllEqual(4 * -4., self.evaluate(Run(7, -4.)))  # >=N default
        self.assertAllEqual(4 * -5., self.evaluate(Run(-1, -5.)))  # <0 default
コード例 #15
0
  def testMemoryLimit(self):
    memory_limit = 512*1024  # 512K
    chunk = 200*1024 # 256K
    capacity = memory_limit // chunk

    with ops.device('/cpu:0'):
      x = array_ops.placeholder(dtypes.uint8, name='x')
      pi = array_ops.placeholder(dtypes.int64, name='pi')
      gi = array_ops.placeholder(dtypes.int64, name='gi')
    with ops.device(test.gpu_device_name()):
      stager = data_flow_ops.MapStagingArea([dtypes.uint8],
        memory_limit=memory_limit, shapes=[[]])
      stage = stager.put(pi, [x], [0])
      get = stager.get()
      size = stager.size()

    from six.moves import queue as Queue
    import threading
    import numpy as np

    queue = Queue.Queue()
    n = 5
    missed = 0

    with self.test_session(use_gpu=True) as sess:
      # Stage data in a separate thread which will block
      # when it hits the staging area's capacity and thus
      # not fill the queue with n tokens
      def thread_run():
        for i in range(n):
          sess.run(stage, feed_dict={x: np.full(chunk, i, dtype=np.uint8),
                                    pi: i})
          queue.put(0)

      t = threading.Thread(target=thread_run)
      t.start()

      # Get tokens from the queue, making notes of when we timeout
      for i in range(n):
        try:
          queue.get(timeout=0.05)
        except Queue.Empty:
          missed += 1

      # We timed out n - capacity times waiting for queue puts
      self.assertTrue(missed == n - capacity)

      # Clear the staging area out a bit
      for i in range(n - capacity):
        sess.run(get)

      # This should now succeed
      t.join()

      self.assertTrue(sess.run(size) == capacity)

      # Clear out the staging area completely
      for i in range(capacity):
        sess.run(get)
コード例 #16
0
  def testCapacity(self):
    capacity = 3

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.int32, name='x')
        pi = array_ops.placeholder(dtypes.int64, name='pi')
        gi = array_ops.placeholder(dtypes.int64, name='gi')
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea([dtypes.int32, ],
          capacity=capacity, shapes=[[]])

      stage = stager.put(pi, [x], [0])
      get = stager.get()
      size = stager.size()

    G.finalize()

    from six.moves import queue as Queue
    import threading

    queue = Queue.Queue()
    n = 5
    missed = 0

    with self.test_session(use_gpu=True, graph=G) as sess:
      # Stage data in a separate thread which will block
      # when it hits the staging area's capacity and thus
      # not fill the queue with n tokens
      def thread_run():
        for i in range(n):
          sess.run(stage, feed_dict={x: i, pi: i})
          queue.put(0)

      t = threading.Thread(target=thread_run)
      t.start()

      # Get tokens from the queue, making notes of when we timeout
      for i in range(n):
        try:
          queue.get(timeout=0.05)
        except Queue.Empty:
          missed += 1

      # We timed out n - capacity times waiting for queue puts
      self.assertTrue(missed == n - capacity)

      # Clear the staging area out a bit
      for i in range(n - capacity):
        sess.run(get)

      # This should now succeed
      t.join()

      self.assertTrue(sess.run(size) == capacity)

      # Clear out the staging area completely
      for i in range(capacity):
        sess.run(get)
コード例 #17
0
  def testMemoryLimit(self):
    memory_limit = 512 * 1024  # 512K
    chunk = 200 * 1024  # 256K
    capacity = memory_limit // chunk

    with ops.Graph().as_default() as g:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.uint8, name='x')
        pi = array_ops.placeholder(dtypes.int64, name='pi')
        gi = array_ops.placeholder(dtypes.int64, name='gi')
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea([dtypes.uint8],
                                              memory_limit=memory_limit,
                                              shapes=[[]])
        stage = stager.put(pi, [x], [0])
        get = stager.get()
        size = stager.size()

    g.finalize()

    value_queue = queue.Queue()
    n = 8

    with self.session(graph=g) as sess:
      # Stage data in a separate thread which will block when it hits the
      # staging area's capacity and thus not fill the value_queue with n tokens
      def thread_run():
        for i in range(n):
          data = np.full(chunk, i, dtype=np.uint8)
          sess.run(stage, feed_dict={x: data, pi: i})
          value_queue.put(0)

      t = threading.Thread(target=thread_run)
      t.daemon = True
      t.start()

      # Get tokens from the value_queue until a timeout occurs
      try:
        for i in range(n):
          value_queue.get(timeout=TIMEOUT)
      except queue.Empty:
        pass

      # Should've timed out on the iteration 'capacity'
      if not i == capacity:
        self.fail("Expected to timeout on iteration '{}' "
                  "but instead timed out on iteration '{}' "
                  "Staging Area size is '{}' and configured "
                  "capacity is '{}'.".format(capacity, i, sess.run(size),
                                             capacity))

      # Should have capacity elements in the staging area
      self.assertEqual(sess.run(size), capacity)

      # Clear the staging area completely
      for i in range(n):
        sess.run(get)

      self.assertEqual(sess.run(size), 0)
コード例 #18
0
  def testPartialDictInsert(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        f = array_ops.placeholder(dtypes.float32)
        v = array_ops.placeholder(dtypes.float32)
        pi = array_ops.placeholder(dtypes.int64)
        gi = array_ops.placeholder(dtypes.int64)
      with ops.device(test.gpu_device_name()):
        # Test barrier with dictionary
        stager = data_flow_ops.MapStagingArea(
            [dtypes.float32, dtypes.float32, dtypes.float32],
            names=['x', 'v', 'f'])
        stage_xf = stager.put(pi, {'x': x, 'f': f})
        stage_v = stager.put(pi, {'v': v})
        key, ret = stager.get(gi)
        size = stager.size()
        isize = stager.incomplete_size()

    G.finalize()

    with self.session(use_gpu=True, graph=G) as sess:
      # 0 complete and incomplete entries
      self.assertTrue(sess.run([size, isize]) == [0, 0])
      # Stage key 0, x and f tuple entries
      sess.run(stage_xf, feed_dict={pi: 0, x: 1, f: 2})
      self.assertTrue(sess.run([size, isize]) == [0, 1])
      # Stage key 1, x and f tuple entries
      sess.run(stage_xf, feed_dict={pi: 1, x: 1, f: 2})
      self.assertTrue(sess.run([size, isize]) == [0, 2])

      # Now complete key 0 with tuple entry v
      sess.run(stage_v, feed_dict={pi: 0, v: 1})
      # 1 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [1, 1])
      # We can now obtain tuple associated with key 0
      self.assertTrue(
          sess.run([key, ret], feed_dict={
              gi: 0
          }) == [0, {
              'x': 1,
              'f': 2,
              'v': 1
          }])

      # 0 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [0, 1])
      # Now complete key 1 with tuple entry v
      sess.run(stage_v, feed_dict={pi: 1, v: 3})
      # We can now obtain tuple associated with key 1
      self.assertTrue(
          sess.run([key, ret], feed_dict={
              gi: 1
          }) == [1, {
              'x': 1,
              'f': 2,
              'v': 3
          }])
コード例 #19
0
  def testMemoryLimit(self):
    memory_limit = 512*1024  # 512K
    chunk = 200*1024 # 256K
    capacity = memory_limit // chunk

    with ops.device('/cpu:0'):
      x = array_ops.placeholder(dtypes.uint8, name='x')
    with ops.device(test.gpu_device_name()):
      stager = data_flow_ops.StagingArea([dtypes.uint8, ],
        memory_limit=memory_limit, shapes=[[]])
      stage = stager.put([x])
      ret = stager.get()
      size = stager.size()

    from six.moves import queue as Queue
    import threading
    import numpy as np

    queue = Queue.Queue()
    n = 5
    missed = 0

    with self.test_session(use_gpu=True) as sess:
      # Stage data in a separate thread which will block
      # when it hits the staging area's capacity and thus
      # not fill the queue with n tokens
      def thread_run():
        for i in range(n):
          sess.run(stage, feed_dict={x: np.full(chunk, i, dtype=np.uint8)})
          queue.put(0)

      t = threading.Thread(target=thread_run)
      t.start()

      # Get tokens from the queue, making notes of when we timeout
      for i in range(n):
        try:
          queue.get(timeout=0.05)
        except Queue.Empty:
          missed += 1

      # We timed out n - capacity times waiting for queue puts
      self.assertTrue(missed == n - capacity)

      # Clear the staging area out a bit
      for i in range(n - capacity):
        self.assertTrue(sess.run(ret)[0] == i)

      # Thread should be able to join now
      t.join()

      self.assertTrue(sess.run(size) == capacity)

      # Clear the staging area completely
      for i in range(capacity):
        self.assertTrue(sess.run(ret)[0] == i+(n-capacity))

      self.assertTrue(sess.run(size) == 0)
コード例 #20
0
    def testGPU(self):
        if not test.is_gpu_available(cuda_only=True):
            return

        gpu_dev = test.gpu_device_name()
        ops.reset_default_graph()
        with ops.device(gpu_dev):
            tfprof_node, run_meta = _run_model()
            self.assertEqual(tfprof_node.children[0].name, 'MatMul')
            self.assertGreater(tfprof_node.children[0].exec_micros, 10)

        ret = _extract_node(run_meta, 'MatMul')
        self.assertEqual(len(ret['gpu:0']), 1)
コード例 #21
0
  def testGPU(self):
    if not test.is_gpu_available(cuda_only=True):
      return

    gpu_dev = test.gpu_device_name()
    ops.reset_default_graph()
    with ops.device(gpu_dev):
      tfprof_node, run_meta = _run_model()
      self.assertEqual(tfprof_node.children[0].name, 'MatMul')
      self.assertGreater(tfprof_node.children[0].exec_micros, 10)

    ret = _extract_node(run_meta, 'MatMul')
    self.assertEqual(len(ret['gpu:0']), 1)
    self.assertEqual(len(ret['gpu:0/stream:all']), 1, '%s' % run_meta)
コード例 #22
0
 def testSimple(self):
   with self.test_session(use_gpu=True) as sess:
     with ops.device('/cpu:0'):
       x = array_ops.placeholder(dtypes.float32)
       v = 2. * (array_ops.zeros([128, 128]) + x)
     with ops.device(test.gpu_device_name()):
       stager = data_flow_ops.StagingArea([dtypes.float32])
       stage = stager.put([v])
       y = stager.get()
       y = math_ops.reduce_max(math_ops.matmul(y, y))
     sess.run(stage, feed_dict={x: -1})
     for i in range(10):
       _, yval = sess.run([stage, y], feed_dict={x: i})
       self.assertAllClose(4 * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
コード例 #23
0
  def testColocation(self):
    gpu_dev = test.gpu_device_name()

    with ops.device('/cpu:0'):
      x = array_ops.placeholder(dtypes.float32)
      v = 2. * (array_ops.zeros([128, 128]) + x)
    with ops.device(gpu_dev):
      stager = data_flow_ops.StagingArea([dtypes.float32])
      y = stager.put([v])
      self.assertEqual(y.device, '/device:GPU:0' if gpu_dev
                                                 else gpu_dev)
    with ops.device('/cpu:0'):
      x = stager.get()
      self.assertEqual(x.device, '/device:CPU:0')
コード例 #24
0
    def testGPU(self):
        if not test.is_gpu_available(cuda_only=True):
            return

        gpu_dev = test.gpu_device_name()
        ops.reset_default_graph()
        with ops.device(gpu_dev):
            tfprof_node, run_meta = _run_model()
            self.assertEqual(tfprof_node.children[0].name, 'MatMul')
            self.assertGreater(tfprof_node.children[0].exec_micros, 10)

        ret = _extract_node(run_meta, 'MatMul')
        self.assertEqual(len(ret['gpu:0']), 1)
        if not test.is_built_with_rocm():
            # stream tracing is currently not available in tensorflow with ROCm
            self.assertEqual(len(ret['gpu:0/stream:all']), 1, '%s' % run_meta)
コード例 #25
0
  def testPeek(self):
    with ops.device('/cpu:0'):
      x = array_ops.placeholder(dtypes.int32, name='x')
      p = array_ops.placeholder(dtypes.int32, name='p')
    with ops.device(test.gpu_device_name()):
      stager = data_flow_ops.StagingArea([dtypes.int32, ], shapes=[[]])
      stage = stager.put([x])
      peek = stager.peek(p)
      ret = stager.get()

    with self.test_session(use_gpu=True) as sess:
      for i in range(10):
        sess.run(stage, feed_dict={x:i})

      for i in range(10):
        self.assertTrue(sess.run(peek, feed_dict={p:i}) == i)
コード例 #26
0
    def testColocation(self):
        gpu_dev = test.gpu_device_name()

        with ops.Graph().as_default() as G:
            with ops.device('/cpu:0'):
                x = array_ops.placeholder(dtypes.float32)
                v = 2. * (array_ops.zeros([128, 128]) + x)
            with ops.device(gpu_dev):
                stager = data_flow_ops.StagingArea([dtypes.float32])
                y = stager.put([v])
                expected_name = gpu_dev if 'gpu' not in gpu_dev else '/device:GPU:0'
                self.assertEqual(y.device, expected_name)
            with ops.device('/cpu:0'):
                x = stager.get()[0]
                self.assertEqual(x.device, '/device:CPU:0')

        G.finalize()
コード例 #27
0
  def testMultiDevices(self):
    with self.cached_session() as sess:
      with ops.device(test.gpu_device_name()):
        a = constant_op.constant(1.0)
        a_handle = self.evaluate(session_ops.get_session_handle(a))
      with ops.device("/cpu:0"):
        b = constant_op.constant(2.0)
        b_handle = self.evaluate(session_ops.get_session_handle(b))

      a_p, a_t = session_ops.get_session_tensor(a_handle.handle, dtypes.float32)
      b_p, b_t = session_ops.get_session_tensor(b_handle.handle, dtypes.float32)
      c = math_ops.add(a_t, b_t)
      c_handle = sess.run(
          session_ops.get_session_handle(c),
          feed_dict={a_p: a_handle.handle,
                     b_p: b_handle.handle})
      self.assertEqual(3.0, c_handle.eval())
コード例 #28
0
 def testMultiple(self):
   with self.test_session(use_gpu=True) as sess:
     with ops.device('/cpu:0'):
       x = array_ops.placeholder(dtypes.float32)
       pi = array_ops.placeholder(dtypes.int64)
       gi = array_ops.placeholder(dtypes.int64)
       v = 2. * (array_ops.zeros([128, 128]) + x)
     with ops.device(test.gpu_device_name()):
       stager = data_flow_ops.MapStagingArea([dtypes.float32, dtypes.float32])
       stage = stager.put(pi, [x, v], [0, 1])
       k, (z, y) = stager.get(gi)
       y = math_ops.reduce_max(z * math_ops.matmul(y, y))
     sess.run(stage, feed_dict={x: -1, pi: 0})
     for i in range(10):
       _, yval = sess.run([stage, y], feed_dict={x: i, pi: i+1, gi:i})
       self.assertAllClose(
           4 * (i - 1) * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
コード例 #29
0
  def testMultiDevices(self):
    with self.test_session() as sess:
      with ops.device(test.gpu_device_name()):
        a = constant_op.constant(1.0)
        a_handle = sess.run(session_ops.get_session_handle(a))
      with ops.device("/cpu:0"):
        b = constant_op.constant(2.0)
        b_handle = sess.run(session_ops.get_session_handle(b))

      a_p, a_t = session_ops.get_session_tensor(a_handle.handle, dtypes.float32)
      b_p, b_t = session_ops.get_session_tensor(b_handle.handle, dtypes.float32)
      c = math_ops.add(a_t, b_t)
      c_handle = sess.run(
          session_ops.get_session_handle(c),
          feed_dict={a_p: a_handle.handle,
                     b_p: b_handle.handle})
      self.assertEqual(3.0, c_handle.eval())
コード例 #30
0
    def testGPU(self):
        if not test.is_gpu_available(cuda_only=True):
            return

        gpu_dev = test.gpu_device_name()
        ops.reset_default_graph()
        with ops.device(gpu_dev):
            tfprof_node, run_meta = _run_model()
            self.assertEqual(tfprof_node.children[0].name, 'MatMul')
            self.assertGreater(tfprof_node.children[0].exec_micros, 10)

        ret = _extract_node(run_meta, 'MatMul')
        self.assertEqual(len(ret['gpu:0']), 1)
        if not test.is_built_with_rocm():
            # skip this check for the ROCm platform
            # stream level tracing is not yet supported on the ROCm platform
            self.assertEqual(len(ret['gpu:0/stream:all']), 1, '%s' % run_meta)
コード例 #31
0
  def testColocation(self):
    gpu_dev = test.gpu_device_name()

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        v = 2. * (array_ops.zeros([128, 128]) + x)
      with ops.device(gpu_dev):
        stager = data_flow_ops.StagingArea([dtypes.float32])
        y = stager.put([v])
        expected_name = gpu_dev if 'gpu' not in gpu_dev else '/device:GPU:0'
        self.assertEqual(y.device, expected_name)
      with ops.device('/cpu:0'):
        x = stager.get()[0]
        self.assertEqual(x.device, '/device:CPU:0')

    G.finalize()
コード例 #32
0
    def testPartialIndexInsert(self):
        with ops.Graph().as_default() as G:
            with ops.device('/cpu:0'):
                x = array_ops.placeholder(dtypes.float32)
                f = array_ops.placeholder(dtypes.float32)
                v = array_ops.placeholder(dtypes.float32)
                pi = array_ops.placeholder(dtypes.int64)
                gi = array_ops.placeholder(dtypes.int64)
            with ops.device(test.gpu_device_name()):
                stager = data_flow_ops.MapStagingArea(
                    [dtypes.float32, dtypes.float32, dtypes.float32])
                stage_xf = stager.put(pi, [x, f], [0, 2])
                stage_v = stager.put(pi, [v], [1])
                key, ret = stager.get(gi)
                size = stager.size()
                isize = stager.incomplete_size()

        G.finalize()

        with self.session(graph=G) as sess:
            # 0 complete and incomplete entries
            self.assertTrue(sess.run([size, isize]) == [0, 0])
            # Stage key 0, x and f tuple entries
            sess.run(stage_xf, feed_dict={pi: 0, x: 1, f: 2})
            self.assertTrue(sess.run([size, isize]) == [0, 1])
            # Stage key 1, x and f tuple entries
            sess.run(stage_xf, feed_dict={pi: 1, x: 1, f: 2})
            self.assertTrue(sess.run([size, isize]) == [0, 2])

            # Now complete key 0 with tuple entry v
            sess.run(stage_v, feed_dict={pi: 0, v: 1})
            # 1 complete and 1 incomplete entry
            self.assertTrue(sess.run([size, isize]) == [1, 1])
            # We can now obtain tuple associated with key 0
            self.assertTrue(
                sess.run([key, ret], feed_dict={gi: 0}) == [0, [1, 1, 2]])

            # 0 complete and 1 incomplete entry
            self.assertTrue(sess.run([size, isize]) == [0, 1])
            # Now complete key 1 with tuple entry v
            sess.run(stage_v, feed_dict={pi: 1, v: 3})
            # We can now obtain tuple associated with key 1
            self.assertTrue(
                sess.run([key, ret], feed_dict={gi: 1}) == [1, [1, 3, 2]])
コード例 #33
0
ファイル: stage_op_test.py プロジェクト: whoozle/tensorflow
  def testMultiple(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        v = 2. * (array_ops.zeros([128, 128]) + x)
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.StagingArea([dtypes.float32, dtypes.float32])
        stage = stager.put([x, v])
        z, y = stager.get()
        y = math_ops.reduce_max(z * math_ops.matmul(y, y))

    G.finalize()

    with self.session(graph=G) as sess:
      sess.run(stage, feed_dict={x: -1})
      for i in range(10):
        _, yval = sess.run([stage, y], feed_dict={x: i})
        self.assertAllClose(
            4 * (i - 1) * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
コード例 #34
0
  def testMultiple(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        v = 2. * (array_ops.zeros([128, 128]) + x)
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.StagingArea([dtypes.float32, dtypes.float32])
        stage = stager.put([x, v])
        z, y = stager.get()
        y = math_ops.reduce_max(z * math_ops.matmul(y, y))

    G.finalize()

    with self.session(use_gpu=True, graph=G) as sess:
      sess.run(stage, feed_dict={x: -1})
      for i in range(10):
        _, yval = sess.run([stage, y], feed_dict={x: i})
        self.assertAllClose(
            4 * (i - 1) * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
コード例 #35
0
 def testDictionary(self):
   with self.test_session(use_gpu=True) as sess:
     with ops.device('/cpu:0'):
       x = array_ops.placeholder(dtypes.float32)
       v = 2. * (array_ops.zeros([128, 128]) + x)
     with ops.device(test.gpu_device_name()):
       stager = data_flow_ops.StagingArea(
           [dtypes.float32, dtypes.float32],
           shapes=[[], [128, 128]],
           names=['x', 'v'])
       stage = stager.put({'x': x, 'v': v})
       ret = stager.get()
       z = ret['x']
       y = ret['v']
       y = math_ops.reduce_max(z * math_ops.matmul(y, y))
     sess.run(stage, feed_dict={x: -1})
     for i in range(10):
       _, yval = sess.run([stage, y], feed_dict={x: i})
       self.assertAllClose(
           4 * (i - 1) * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
コード例 #36
0
 def testDictionary(self):
   with self.test_session(use_gpu=True) as sess:
     with ops.device('/cpu:0'):
       x = array_ops.placeholder(dtypes.float32)
       v = 2. * (array_ops.zeros([128, 128]) + x)
     with ops.device(test.gpu_device_name()):
       stager = data_flow_ops.StagingArea(
           [dtypes.float32, dtypes.float32],
           shapes=[[], [128, 128]],
           names=['x', 'v'])
       stage = stager.put({'x': x, 'v': v})
       ret = stager.get()
       z = ret['x']
       y = ret['v']
       y = math_ops.reduce_max(z * math_ops.matmul(y, y))
     sess.run(stage, feed_dict={x: -1})
     for i in range(10):
       _, yval = sess.run([stage, y], feed_dict={x: i})
       self.assertAllClose(
           4 * (i - 1) * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
コード例 #37
0
  def testHandleMover(self):
    with self.test_session() as sess:
      # Return a handle.
      a = constant_op.constant(10)
      b = constant_op.constant(5)
      c = math_ops.multiply(a, b)
      h = session_ops.get_session_handle(c)
      h = sess.run(h)

      # Feed a tensor handle.
      f, x = session_ops.get_session_tensor(h.handle, dtypes.int32)
      y = math_ops.multiply(x, 10)
      self.assertEqual(500, sess.run(y, feed_dict={f: h.handle}))

      # Feed another tensor handle.
      with ops.device(test.gpu_device_name()):
        a = constant_op.constant(10)
        h = session_ops.get_session_handle(a)
        h = sess.run(h)
        self.assertEqual(100, sess.run(y, feed_dict={f: h.handle}))
コード例 #38
0
ファイル: session_ops_test.py プロジェクト: qwerzou1/shibie
    def testHandleMover(self):
        with self.cached_session() as sess:
            # Return a handle.
            a = constant_op.constant(10)
            b = constant_op.constant(5)
            c = math_ops.multiply(a, b)
            h = session_ops.get_session_handle(c)
            h = self.evaluate(h)

            # Feed a tensor handle.
            f, x = session_ops.get_session_tensor(h.handle, dtypes.int32)
            y = math_ops.multiply(x, 10)
            self.assertEqual(500, sess.run(y, feed_dict={f: h.handle}))

            # Feed another tensor handle.
            with ops.device(test.gpu_device_name()):
                a = constant_op.constant(10)
                h = session_ops.get_session_handle(a)
                h = self.evaluate(h)
                self.assertEqual(100, sess.run(y, feed_dict={f: h.handle}))
コード例 #39
0
  def testPartialIndexGets(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        f = array_ops.placeholder(dtypes.float32)
        v = array_ops.placeholder(dtypes.float32)
        pi = array_ops.placeholder(dtypes.int64)
        pei = array_ops.placeholder(dtypes.int64)
        gi = array_ops.placeholder(dtypes.int64)
      with ops.device(test.gpu_device_name()):
        # Test again with partial index gets
        stager = data_flow_ops.MapStagingArea(
            [dtypes.float32, dtypes.float32, dtypes.float32])
        stage_xvf = stager.put(pi, [x, v, f], [0, 1, 2])
        key_xf, get_xf = stager.get(gi, [0, 2])
        key_v, get_v = stager.get(gi, [1])
        size = stager.size()
        isize = stager.incomplete_size()

    G.finalize()

    with self.session(use_gpu=True, graph=G) as sess:
      # Stage complete tuple
      sess.run(stage_xvf, feed_dict={pi: 0, x: 1, f: 2, v: 3})

      self.assertTrue(sess.run([size, isize]) == [1, 0])

      # Partial get using indices
      self.assertTrue(
          sess.run([key_xf, get_xf], feed_dict={
              gi: 0
          }) == [0, [1, 2]])

      # Still some of key 0 left
      self.assertTrue(sess.run([size, isize]) == [1, 0])

      # Partial get of remaining index
      self.assertTrue(sess.run([key_v, get_v], feed_dict={gi: 0}) == [0, [3]])

      # All gone
      self.assertTrue(sess.run([size, isize]) == [0, 0])
コード例 #40
0
ファイル: map_stage_op_test.py プロジェクト: Joetz/tensorflow
  def testColocation(self):
    gpu_dev = test.gpu_device_name()

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        v = 2. * (array_ops.zeros([128, 128]) + x)
      with ops.device(gpu_dev):
        stager = data_flow_ops.MapStagingArea([dtypes.float32])
        y = stager.put(1, [v], [0])
        self.assertEqual(y.device, '/device:GPU:0' if gpu_dev
                                                   else gpu_dev)
      with ops.device('/cpu:0'):
        _, x = stager.get(1)
        y = stager.peek(1)
        _, z = stager.get()
        self.assertEqual(x.device, '/device:CPU:0')
        self.assertEqual(y.device, '/device:CPU:0')
        self.assertEqual(z.device, '/device:CPU:0')

    G.finalize()
コード例 #41
0
  def testColocation(self):
    gpu_dev = test.gpu_device_name()

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        v = 2. * (array_ops.zeros([128, 128]) + x)
      with ops.device(gpu_dev):
        stager = data_flow_ops.MapStagingArea([dtypes.float32])
        y = stager.put(1, [v], [0])
        self.assertEqual(y.device, '/device:GPU:0' if gpu_dev
                                                   else gpu_dev)
      with ops.device('/cpu:0'):
        _, x = stager.get(1)
        y = stager.peek(1)
        _, z = stager.get()
        self.assertEqual(x.device, '/device:CPU:0')
        self.assertEqual(y.device, '/device:CPU:0')
        self.assertEqual(z.device, '/device:CPU:0')

    G.finalize()
コード例 #42
0
  def testSizeAndClear(self):
    with ops.device('/cpu:0'):
      x = array_ops.placeholder(dtypes.float32, name='x')
      v = 2. * (array_ops.zeros([128, 128]) + x)
    with ops.device(test.gpu_device_name()):
      stager = data_flow_ops.StagingArea(
          [dtypes.float32, dtypes.float32],
          shapes=[[], [128, 128]],
          names=['x', 'v'])
      stage = stager.put({'x': x, 'v': v})
      ret = stager.get()
      size = stager.size()
      clear = stager.clear()

    with self.test_session(use_gpu=True) as sess:
      sess.run(stage, feed_dict={x: -1})
      self.assertEqual(sess.run(size), 1)
      sess.run(stage, feed_dict={x: -1})
      self.assertEqual(sess.run(size), 2)
      sess.run(clear)
      self.assertEqual(sess.run(size), 0)
コード例 #43
0
    def testGPU(self):
        if not test.is_gpu_available(cuda_only=True):
            return

        gpu_dev = test.gpu_device_name()
        ops.reset_default_graph()
        with ops.device(gpu_dev):
            tfprof_node, run_meta = _run_model()
            self.assertEqual(tfprof_node.children[0].name, 'MatMul')
            self.assertGreater(tfprof_node.children[0].exec_micros, 10)

        ret = _extract_node(run_meta, ['MatMul', 'MatMul:MatMul'])
        self.assertEqual(len(ret), 3)
        self.assertTrue('/job:localhost/replica:0/task:0' + gpu_dev in ret)
        del ret['/job:localhost/replica:0/task:0' + gpu_dev]

        has_all_stream = False
        for k, _ in six.iteritems(ret):
            self.assertTrue(gpu_dev + '/stream' in k)
            if gpu_dev + '/stream:all' in k:
                has_all_stream = True
        self.assertTrue(has_all_stream)
コード例 #44
0
  def testHandleGC(self):
    with self.test_session() as sess:
      # initial values live on CPU
      with ops.device("/cpu:0"):
        one = constant_op.constant(1, dtype=dtypes.float32)
        one_handle = sess.run(session_ops.get_session_handle(one))
        x_handle = sess.run(session_ops.get_session_handle(one))

      # addition lives on GPU
      with ops.device(test.gpu_device_name()):
        add_h1, add_t1 = session_ops.get_session_tensor(one_handle.handle,
                                                        dtypes.float32)
        add_h2, add_t2 = session_ops.get_session_tensor(x_handle.handle,
                                                        dtypes.float32)
        add_op = math_ops.add(add_t1, add_t2)
        add_output = session_ops.get_session_handle(add_op)

      # add 1 to tensor 20 times
      for _ in range(20):
        x_handle = sess.run(
            add_output,
            feed_dict={add_h1: one_handle.handle,
                       add_h2: x_handle.handle})
コード例 #45
0
 def _VerifyRunGraph(self, n, m, k, transpose_a, transpose_b, dtype):
   benchmark_instance = matmul_benchmark.MatmulBenchmark()
   duration = benchmark_instance.run_graph(googletest.gpu_device_name(), n, m, k, transpose_a,
                                           transpose_b, 1, dtype)
   self.assertTrue(duration > 1e-6)
コード例 #46
0
 def benchmark_adjust_saturation_in_yiq_gpu_all(self):
   self._benchmark_adjust_saturation_in_yiq(test.gpu_device_name(), None)
コード例 #47
0
  def testMemoryLimit(self):
    memory_limit = 512 * 1024  # 512K
    chunk = 200 * 1024  # 256K
    capacity = memory_limit // chunk

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.uint8, name='x')
        pi = array_ops.placeholder(dtypes.int64, name='pi')
        gi = array_ops.placeholder(dtypes.int64, name='gi')
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea(
            [dtypes.uint8], memory_limit=memory_limit, shapes=[[]])
        stage = stager.put(pi, [x], [0])
        get = stager.get()
        size = stager.size()

    G.finalize()

    from six.moves import queue as Queue
    import threading
    import numpy as np

    queue = Queue.Queue()
    n = 8

    with self.session(use_gpu=True, graph=G) as sess:
      # Stage data in a separate thread which will block
      # when it hits the staging area's capacity and thus
      # not fill the queue with n tokens
      def thread_run():
        for i in range(n):
          data = np.full(chunk, i, dtype=np.uint8)
          sess.run(stage, feed_dict={x: data, pi: i})
          queue.put(0)

      t = threading.Thread(target=thread_run)
      t.daemon = True
      t.start()

      # Get tokens from the queue until a timeout occurs
      try:
        for i in range(n):
          queue.get(timeout=TIMEOUT)
      except Queue.Empty:
        pass

      # Should've timed out on the iteration 'capacity'
      if not i == capacity:
        self.fail("Expected to timeout on iteration '{}' "
                  "but instead timed out on iteration '{}' "
                  "Staging Area size is '{}' and configured "
                  "capacity is '{}'.".format(capacity, i, sess.run(size),
                                             capacity))

      # Should have capacity elements in the staging area
      self.assertTrue(sess.run(size) == capacity)

      # Clear the staging area completely
      for i in range(n):
        sess.run(get)

      self.assertTrue(sess.run(size) == 0)
コード例 #48
0
  def testPartialDictGetsAndPeeks(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        f = array_ops.placeholder(dtypes.float32)
        v = array_ops.placeholder(dtypes.float32)
        pi = array_ops.placeholder(dtypes.int64)
        pei = array_ops.placeholder(dtypes.int64)
        gi = array_ops.placeholder(dtypes.int64)
      with ops.device(test.gpu_device_name()):
        # Test barrier with dictionary
        stager = data_flow_ops.MapStagingArea(
            [dtypes.float32, dtypes.float32, dtypes.float32],
            names=['x', 'v', 'f'])
        stage_xf = stager.put(pi, {'x': x, 'f': f})
        stage_v = stager.put(pi, {'v': v})
        peek_xf = stager.peek(pei, ['x', 'f'])
        peek_v = stager.peek(pei, ['v'])
        key_xf, get_xf = stager.get(gi, ['x', 'f'])
        key_v, get_v = stager.get(gi, ['v'])
        pop_key_xf, pop_xf = stager.get(indices=['x', 'f'])
        pop_key_v, pop_v = stager.get(pi, ['v'])
        size = stager.size()
        isize = stager.incomplete_size()

    G.finalize()

    with self.session(use_gpu=True, graph=G) as sess:
      # 0 complete and incomplete entries
      self.assertTrue(sess.run([size, isize]) == [0, 0])
      # Stage key 0, x and f tuple entries
      sess.run(stage_xf, feed_dict={pi: 0, x: 1, f: 2})
      self.assertTrue(sess.run([size, isize]) == [0, 1])
      # Stage key 1, x and f tuple entries
      sess.run(stage_xf, feed_dict={pi: 1, x: 1, f: 2})
      self.assertTrue(sess.run([size, isize]) == [0, 2])

      # Now complete key 0 with tuple entry v
      sess.run(stage_v, feed_dict={pi: 0, v: 1})
      # 1 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [1, 1])

      # We can now peek at 'x' and 'f' values associated with key 0
      self.assertTrue(sess.run(peek_xf, feed_dict={pei: 0}) == {'x': 1, 'f': 2})
      # Peek at 'v' value associated with key 0
      self.assertTrue(sess.run(peek_v, feed_dict={pei: 0}) == {'v': 1})
      # 1 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [1, 1])

      # We can now obtain 'x' and 'f' values associated with key 0
      self.assertTrue(
          sess.run([key_xf, get_xf], feed_dict={
              gi: 0
          }) == [0, {
              'x': 1,
              'f': 2
          }])
      # Still have 1 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [1, 1])

      # We can no longer get 'x' and 'f' from key 0
      with self.assertRaises(errors.InvalidArgumentError) as cm:
        sess.run([key_xf, get_xf], feed_dict={gi: 0})

      exc_str = ("Tensor at index '0' for key '0' " 'has already been removed.')

      self.assertTrue(exc_str in cm.exception.message)

      # Obtain 'v' value associated with key 0
      self.assertTrue(
          sess.run([key_v, get_v], feed_dict={
              gi: 0
          }) == [0, {
              'v': 1
          }])
      # 0 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [0, 1])

      # Now complete key 1 with tuple entry v
      sess.run(stage_v, feed_dict={pi: 1, v: 1})
      # 1 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [1, 0])

      # Pop without key to obtain 'x' and 'f' values associated with key 1
      self.assertTrue(sess.run([pop_key_xf, pop_xf]) == [1, {'x': 1, 'f': 2}])
      # still 1 complete and 1 incomplete entry
      self.assertTrue(sess.run([size, isize]) == [1, 0])
      # We can now obtain 'x' and 'f' values associated with key 1
      self.assertTrue(
          sess.run([pop_key_v, pop_v], feed_dict={
              pi: 1
          }) == [1, {
              'v': 1
          }])
      # Nothing is left
      self.assertTrue(sess.run([size, isize]) == [0, 0])
コード例 #49
0
 def run_test_gpu(self, n, m, k, transpose_a, transpose_b, dtype, num_iters):
   self.run_graph(test.gpu_device_name(), n, m, k, transpose_a, transpose_b,
                  num_iters, dtype)