def testLoopCPU(self):
        ops.reset_default_graph()
        with ops.device('/cpu:0'):
            tfprof_node, run_meta = _run_loop_model()
            # The while-loop caused a node to appear 4 times in scheduling.
            ret = _extract_node(
                run_meta, 'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')
            self.assertEqual(len(ret['/job:localhost/replica:0/task:0/cpu:0']),
                             4)

            total_cpu_execs = 0
            for node in ret['/job:localhost/replica:0/task:0/cpu:0']:
                total_cpu_execs += node.op_end_rel_micros

            mm_node = lib.SearchTFProfNode(
                tfprof_node,
                'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')

            self.assertEqual(mm_node.run_count, 4)
            self.assertEqual(mm_node.cpu_exec_micros, total_cpu_execs)
            self.assertEqual(mm_node.exec_micros, total_cpu_execs)
예제 #2
0
    def testLoopGPU(self):
        if not test.is_gpu_available():
            return

        ops.reset_default_graph()
        with ops.device('/gpu:0'):
            tfprof_node, run_meta = _run_loop_model()
            # The while-loop caused a node to appear 4 times in scheduling.
            ret = _extract_node(
                run_meta, 'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')
            self.assertEqual(len(ret['/job:localhost/replica:0/task:0/gpu:0']),
                             4)

            total_cpu_execs = 0
            for node in ret['/job:localhost/replica:0/task:0/gpu:0']:
                total_cpu_execs += node.op_end_rel_micros

            ret = _extract_node(
                run_meta,
                'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul:MatMul')
            self.assertGreaterEqual(len(ret['/gpu:0/stream:all']), 4)

            total_accelerator_execs = 0
            for node in ret['/gpu:0/stream:all']:
                total_accelerator_execs += node.op_end_rel_micros

            mm_node = lib.SearchTFProfNode(
                tfprof_node,
                'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')

            self.assertEqual(mm_node.run_count, 4)
            self.assertEqual(mm_node.accelerator_exec_micros,
                             total_accelerator_execs)
            self.assertEqual(mm_node.cpu_exec_micros, total_cpu_execs)
            self.assertEqual(mm_node.exec_micros,
                             total_cpu_execs + total_accelerator_execs)
예제 #3
0
  def testMultiStepProfile(self):
    ops.reset_default_graph()
    opts = model_analyzer.PRINT_ALL_TIMING_MEMORY.copy()
    opts['account_type_regexes'] = ['.*']

    with session.Session() as sess:
      r1, r2, r3 = lib.BuildSplitableModel()
      sess.run(variables.global_variables_initializer())

      profiler = model_analyzer.Profiler(sess.graph)
      pb0 = profiler.profile_name_scope(opts)

      run_meta = config_pb2.RunMetadata()
      _ = sess.run(r1,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta)
      profiler.add_step(1, run_meta)
      pb1 = profiler.profile_name_scope(opts)

      self.assertNotEqual(lib.SearchTFProfNode(pb1, 'DW'), None)
      self.assertEqual(lib.SearchTFProfNode(pb1, 'DW2'), None)
      self.assertEqual(lib.SearchTFProfNode(pb1, 'add'), None)

      run_meta2 = config_pb2.RunMetadata()
      _ = sess.run(r2,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta2)
      profiler.add_step(2, run_meta2)
      pb2 = profiler.profile_name_scope(opts)

      self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW'), None)
      self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW2'), None)
      self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None)

      run_meta3 = config_pb2.RunMetadata()
      _ = sess.run(r3,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta3)
      profiler.add_step(3, run_meta3)
      pb3 = profiler.profile_name_scope(opts)

      self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW'), None)
      self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW2'), None)
      self.assertNotEqual(lib.SearchTFProfNode(pb3, 'add'), None)

      self.assertEqual(lib.SearchTFProfNode(pb0, 'Conv2D'), None)
      self.assertGreater(lib.SearchTFProfNode(pb1, 'Conv2D').exec_micros, 0)
      self.assertEqual(lib.SearchTFProfNode(pb1, 'Conv2D_1'), None)
      self.assertGreater(lib.SearchTFProfNode(pb2, 'Conv2D_1').exec_micros, 0)
      self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None)
      self.assertGreater(lib.SearchTFProfNode(pb3, 'add').exec_micros, 0)

      advice_pb = profiler.advise(model_analyzer.ALL_ADVICE)
      self.assertTrue('AcceleratorUtilizationChecker' in advice_pb.checkers)
      self.assertTrue('ExpensiveOperationChecker' in advice_pb.checkers)
      self.assertTrue('OperationChecker' in advice_pb.checkers)

      checker = advice_pb.checkers['AcceleratorUtilizationChecker']
      if test.is_gpu_available():
        self.assertGreater(len(checker.reports), 0)
      else:
        self.assertEqual(len(checker.reports), 0)
      checker = advice_pb.checkers['ExpensiveOperationChecker']
      self.assertGreater(len(checker.reports), 0)
예제 #4
0
    def testMultiStepProfile(self):
        ops.reset_default_graph()
        opts = model_analyzer.PRINT_ALL_TIMING_MEMORY.copy()
        opts['account_type_regexes'] = ['.*']

        with session.Session() as sess, ops.device('/cpu:0'):
            r1, r2, r3 = lib.BuildSplitableModel()
            sess.run(variables.global_variables_initializer())

            profiler = model_analyzer.Profiler(sess.graph)
            pb0 = profiler.profile_name_scope(opts)

            run_meta = config_pb2.RunMetadata()
            _ = sess.run(r1,
                         options=config_pb2.RunOptions(
                             trace_level=config_pb2.RunOptions.FULL_TRACE),
                         run_metadata=run_meta)
            profiler.add_step(1, run_meta)
            pb1 = profiler.profile_name_scope(opts)

            self.assertNotEqual(lib.SearchTFProfNode(pb1, 'DW'), None)
            self.assertEqual(lib.SearchTFProfNode(pb1, 'DW2'), None)
            self.assertEqual(lib.SearchTFProfNode(pb1, 'add'), None)

            run_meta2 = config_pb2.RunMetadata()
            _ = sess.run(r2,
                         options=config_pb2.RunOptions(
                             trace_level=config_pb2.RunOptions.FULL_TRACE),
                         run_metadata=run_meta2)
            profiler.add_step(2, run_meta2)
            pb2 = profiler.profile_name_scope(opts)

            self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW'), None)
            self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW2'), None)
            self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None)

            run_meta3 = config_pb2.RunMetadata()
            _ = sess.run(r3,
                         options=config_pb2.RunOptions(
                             trace_level=config_pb2.RunOptions.FULL_TRACE),
                         run_metadata=run_meta3)
            profiler.add_step(3, run_meta3)
            pb3 = profiler.profile_name_scope(opts)

            self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW'), None)
            self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW2'), None)
            self.assertNotEqual(lib.SearchTFProfNode(pb3, 'add'), None)

            self.assertEqual(lib.SearchTFProfNode(pb0, 'Conv2D'), None)
            self.assertGreater(
                lib.SearchTFProfNode(pb1, 'Conv2D').exec_micros, 0)
            self.assertEqual(lib.SearchTFProfNode(pb1, 'Conv2D_1'), None)
            self.assertGreater(
                lib.SearchTFProfNode(pb2, 'Conv2D_1').exec_micros, 0)
            self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None)
            self.assertGreater(lib.SearchTFProfNode(pb3, 'add').exec_micros, 0)

            # TODO(xpan): Better test of advisor.
            profiler.advise()