def testTrackPersistentBytes(self): ops.reset_default_graph() a = array_ops.constant(np.ones((100, 100))) b = array_ops.constant(np.ones((100, 100))) c = a * b with session.Session() as sess: run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() sess.run(c, options=run_options, run_metadata=run_metadata) options = option_builder.ProfileOptionBuilder.time_and_memory() options['min_bytes'] = 0 options['select'] = ('bytes', 'peak_bytes', 'output_bytes', 'residual_bytes') ret = model_analyzer.profile( sess.graph, run_meta=run_metadata, cmd='scope', options=options) run_metadata = config_pb2.RunMetadata() sess.run(c, options=run_options, run_metadata=run_metadata) ret2 = model_analyzer.profile( sess.graph, run_meta=run_metadata, cmd='scope', options=options) n = lib.SearchTFProfNode(ret, 'mul') n2 = lib.SearchTFProfNode(ret2, 'mul') self.assertGreater(n.peak_bytes, 0) self.assertGreater(n.output_bytes, 0) self.assertGreater(n.residual_bytes, 0) self.assertEqual(n.peak_bytes, n2.peak_bytes) self.assertEqual(n.output_bytes, n2.output_bytes) self.assertEqual(n.residual_bytes, n2.residual_bytes)
def testMultipleProfilePerStep(self): ops.reset_default_graph() opts = (builder(builder.trainable_variables_parameter()) .with_empty_output() .with_accounted_types(['.*']) .select(['micros', 'bytes', 'peak_bytes', 'residual_bytes', 'output_bytes']).build()) r = lib.BuildSmallModel() sess = session.Session() profiler = model_analyzer.Profiler(sess.graph) init_var_run_meta = config_pb2.RunMetadata() sess.run(variables.global_variables_initializer(), options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=init_var_run_meta) train_run_meta = config_pb2.RunMetadata() sess.run(r, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=train_run_meta) profiler.add_step(0, train_run_meta) ret1 = profiler.profile_name_scope(opts) n1 = lib.SearchTFProfNode( ret1, 'DW/Initializer/random_normal/RandomStandardNormal') # Without the var initialization run_meta, it doesn't have the # information of var_initialization. self.assertEqual(n1.exec_micros, 0) self.assertEqual(n1.requested_bytes, 0) self.assertEqual(n1.peak_bytes, 0) self.assertEqual(n1.residual_bytes, 0) profiler.add_step(0, init_var_run_meta) ret2 = profiler.profile_name_scope(opts) n2 = lib.SearchTFProfNode( ret2, 'DW/Initializer/random_normal/RandomStandardNormal') # After adding the var initialization run_meta. self.assertGreater(n2.exec_micros, 0) self.assertGreater(n2.requested_bytes, 0) self.assertGreater(n2.peak_bytes, 0) self.assertGreater(n2.residual_bytes, 0)
def testLoopCPU(self): ops.reset_default_graph() with ops.device('/cpu:0'): tfprof_node, run_meta = _run_loop_model() # The while-loop caused a node to appear 4 times in scheduling. ret = _extract_node(run_meta, 'rnn/while/basic_rnn_cell/MatMul') self.assertEqual(len(ret['cpu:0']), 4) total_cpu_execs = 0 for node in ret['cpu:0']: total_cpu_execs += node.op_end_rel_micros mm_node = lib.SearchTFProfNode(tfprof_node, 'rnn/while/basic_rnn_cell/MatMul') self.assertEqual(mm_node.run_count, 4) self.assertEqual(mm_node.cpu_exec_micros, total_cpu_execs) self.assertEqual(mm_node.exec_micros, total_cpu_execs)
def testMultiStepProfile(self): ops.reset_default_graph() opts = builder.time_and_memory(min_bytes=0) with session.Session() as sess: r1, r2, r3 = lib.BuildSplittableModel() sess.run(variables.global_variables_initializer()) profiler = model_analyzer.Profiler(sess.graph) pb0 = profiler.profile_name_scope(opts) run_meta = config_pb2.RunMetadata() _ = sess.run(r1, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) profiler.add_step(1, run_meta) pb1 = profiler.profile_name_scope(opts) self.assertNotEqual(lib.SearchTFProfNode(pb1, 'DW'), None) self.assertEqual(lib.SearchTFProfNode(pb1, 'DW2'), None) self.assertEqual(lib.SearchTFProfNode(pb1, 'add'), None) run_meta2 = config_pb2.RunMetadata() _ = sess.run(r2, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta2) profiler.add_step(2, run_meta2) pb2 = profiler.profile_name_scope(opts) self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW'), None) self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW2'), None) self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None) run_meta3 = config_pb2.RunMetadata() _ = sess.run(r3, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta3) profiler.add_step(3, run_meta3) pb3 = profiler.profile_name_scope(opts) self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW'), None) self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW2'), None) self.assertNotEqual(lib.SearchTFProfNode(pb3, 'add'), None) self.assertEqual(lib.SearchTFProfNode(pb0, 'Conv2D'), None) self.assertGreater(lib.SearchTFProfNode(pb1, 'Conv2D').exec_micros, 0) self.assertEqual(lib.SearchTFProfNode(pb1, 'Conv2D_1'), None) self.assertGreater(lib.SearchTFProfNode(pb2, 'Conv2D_1').exec_micros, 0) self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None) self.assertGreater(lib.SearchTFProfNode(pb3, 'add').exec_micros, 0) advice_pb = profiler.advise(model_analyzer.ALL_ADVICE) self.assertTrue('AcceleratorUtilizationChecker' in advice_pb.checkers) self.assertTrue('ExpensiveOperationChecker' in advice_pb.checkers) self.assertTrue('OperationChecker' in advice_pb.checkers) checker = advice_pb.checkers['AcceleratorUtilizationChecker'] if test.is_gpu_available(): self.assertGreater(len(checker.reports), 0) else: self.assertEqual(len(checker.reports), 0) checker = advice_pb.checkers['ExpensiveOperationChecker'] self.assertGreater(len(checker.reports), 0)