def testTrackPersistentBytes(self):
    ops.reset_default_graph()
    a = array_ops.constant(np.ones((100, 100)))
    b = array_ops.constant(np.ones((100, 100)))
    c = a * b

    with session.Session() as sess:
      run_options = config_pb2.RunOptions(
          trace_level=config_pb2.RunOptions.FULL_TRACE)
      run_metadata = config_pb2.RunMetadata()
      sess.run(c, options=run_options, run_metadata=run_metadata)

      options = option_builder.ProfileOptionBuilder.time_and_memory()
      options['min_bytes'] = 0
      options['select'] = ('bytes', 'peak_bytes', 'output_bytes',
                           'residual_bytes')
      ret = model_analyzer.profile(
          sess.graph, run_meta=run_metadata, cmd='scope', options=options)

      run_metadata = config_pb2.RunMetadata()
      sess.run(c, options=run_options, run_metadata=run_metadata)
      ret2 = model_analyzer.profile(
          sess.graph, run_meta=run_metadata, cmd='scope', options=options)

      n = lib.SearchTFProfNode(ret, 'mul')
      n2 = lib.SearchTFProfNode(ret2, 'mul')
      self.assertGreater(n.peak_bytes, 0)
      self.assertGreater(n.output_bytes, 0)
      self.assertGreater(n.residual_bytes, 0)
      self.assertEqual(n.peak_bytes, n2.peak_bytes)
      self.assertEqual(n.output_bytes, n2.output_bytes)
      self.assertEqual(n.residual_bytes, n2.residual_bytes)
  def testMultipleProfilePerStep(self):
    ops.reset_default_graph()
    opts = (builder(builder.trainable_variables_parameter())
            .with_empty_output()
            .with_accounted_types(['.*'])
            .select(['micros', 'bytes', 'peak_bytes',
                     'residual_bytes', 'output_bytes']).build())

    r = lib.BuildSmallModel()
    sess = session.Session()
    profiler = model_analyzer.Profiler(sess.graph)

    init_var_run_meta = config_pb2.RunMetadata()
    sess.run(variables.global_variables_initializer(),
             options=config_pb2.RunOptions(
                 trace_level=config_pb2.RunOptions.FULL_TRACE),
             run_metadata=init_var_run_meta)

    train_run_meta = config_pb2.RunMetadata()
    sess.run(r,
             options=config_pb2.RunOptions(
                 trace_level=config_pb2.RunOptions.FULL_TRACE),
             run_metadata=train_run_meta)

    profiler.add_step(0, train_run_meta)
    ret1 = profiler.profile_name_scope(opts)
    n1 = lib.SearchTFProfNode(
        ret1, 'DW/Initializer/random_normal/RandomStandardNormal')
    # Without the var initialization run_meta, it doesn't have the
    # information of var_initialization.
    self.assertEqual(n1.exec_micros, 0)
    self.assertEqual(n1.requested_bytes, 0)
    self.assertEqual(n1.peak_bytes, 0)
    self.assertEqual(n1.residual_bytes, 0)

    profiler.add_step(0, init_var_run_meta)
    ret2 = profiler.profile_name_scope(opts)
    n2 = lib.SearchTFProfNode(
        ret2, 'DW/Initializer/random_normal/RandomStandardNormal')
    # After adding the var initialization run_meta.
    self.assertGreater(n2.exec_micros, 0)
    self.assertGreater(n2.requested_bytes, 0)
    self.assertGreater(n2.peak_bytes, 0)
    self.assertGreater(n2.residual_bytes, 0)
Esempio n. 3
0
    def testLoopCPU(self):
        ops.reset_default_graph()
        with ops.device('/cpu:0'):
            tfprof_node, run_meta = _run_loop_model()
            # The while-loop caused a node to appear 4 times in scheduling.
            ret = _extract_node(run_meta, 'rnn/while/basic_rnn_cell/MatMul')
            self.assertEqual(len(ret['cpu:0']), 4)

            total_cpu_execs = 0
            for node in ret['cpu:0']:
                total_cpu_execs += node.op_end_rel_micros

            mm_node = lib.SearchTFProfNode(tfprof_node,
                                           'rnn/while/basic_rnn_cell/MatMul')

            self.assertEqual(mm_node.run_count, 4)
            self.assertEqual(mm_node.cpu_exec_micros, total_cpu_execs)
            self.assertEqual(mm_node.exec_micros, total_cpu_execs)
  def testMultiStepProfile(self):
    ops.reset_default_graph()
    opts = builder.time_and_memory(min_bytes=0)

    with session.Session() as sess:
      r1, r2, r3 = lib.BuildSplittableModel()
      sess.run(variables.global_variables_initializer())

      profiler = model_analyzer.Profiler(sess.graph)
      pb0 = profiler.profile_name_scope(opts)

      run_meta = config_pb2.RunMetadata()
      _ = sess.run(r1,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta)
      profiler.add_step(1, run_meta)
      pb1 = profiler.profile_name_scope(opts)

      self.assertNotEqual(lib.SearchTFProfNode(pb1, 'DW'), None)
      self.assertEqual(lib.SearchTFProfNode(pb1, 'DW2'), None)
      self.assertEqual(lib.SearchTFProfNode(pb1, 'add'), None)

      run_meta2 = config_pb2.RunMetadata()
      _ = sess.run(r2,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta2)
      profiler.add_step(2, run_meta2)
      pb2 = profiler.profile_name_scope(opts)

      self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW'), None)
      self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW2'), None)
      self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None)

      run_meta3 = config_pb2.RunMetadata()
      _ = sess.run(r3,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta3)
      profiler.add_step(3, run_meta3)
      pb3 = profiler.profile_name_scope(opts)

      self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW'), None)
      self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW2'), None)
      self.assertNotEqual(lib.SearchTFProfNode(pb3, 'add'), None)

      self.assertEqual(lib.SearchTFProfNode(pb0, 'Conv2D'), None)
      self.assertGreater(lib.SearchTFProfNode(pb1, 'Conv2D').exec_micros, 0)
      self.assertEqual(lib.SearchTFProfNode(pb1, 'Conv2D_1'), None)
      self.assertGreater(lib.SearchTFProfNode(pb2, 'Conv2D_1').exec_micros, 0)
      self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None)
      self.assertGreater(lib.SearchTFProfNode(pb3, 'add').exec_micros, 0)

      advice_pb = profiler.advise(model_analyzer.ALL_ADVICE)
      self.assertTrue('AcceleratorUtilizationChecker' in advice_pb.checkers)
      self.assertTrue('ExpensiveOperationChecker' in advice_pb.checkers)
      self.assertTrue('OperationChecker' in advice_pb.checkers)

      checker = advice_pb.checkers['AcceleratorUtilizationChecker']
      if test.is_gpu_available():
        self.assertGreater(len(checker.reports), 0)
      else:
        self.assertEqual(len(checker.reports), 0)
      checker = advice_pb.checkers['ExpensiveOperationChecker']
      self.assertGreater(len(checker.reports), 0)