def testAutoProfiling(self): ops.reset_default_graph() time_dir = os.path.join(test.get_temp_dir(), 'time') memory_dir = os.path.join(test.get_temp_dir(), 'memory') profile_dir = os.path.join(test.get_temp_dir(), 'dir/dir2/profile') # TODO(xpan): Should we create parent directory for them? gfile.MkDir(time_dir) gfile.MkDir(memory_dir) time_opts = (builder(builder.time_and_memory()) .with_file_output(os.path.join(time_dir, 'profile')) .select(['micros']).build()) memory_opts = (builder(builder.time_and_memory()) .with_file_output(os.path.join(memory_dir, 'profile')) .select(['bytes']).build()) time_steps = [2, 3] memory_steps = [1, 3] dump_steps = [3, 4] x = lib.BuildSmallModel() with profile_context.ProfileContext(profile_dir, trace_steps=[1, 2, 3], dump_steps=[3, 4]) as pctx: pctx.add_auto_profiling('scope', time_opts, time_steps) pctx.add_auto_profiling('scope', memory_opts, memory_steps) self._trainLoop(x, 10, time_dir, time_steps, memory_dir, memory_steps, profile_dir, dump_steps)
def testCodeViewLeafGraphNode(self): ops.reset_default_graph() opts = (builder(builder.trainable_variables_parameter()) .with_empty_output() .with_accounted_types(['.*']) .account_displayed_op_only(False) .select(['bytes', 'params', 'float_ops', 'device']).build()) with session.Session(config=self._no_rewrite_session_config()) as sess: x = lib.BuildSmallModel() self.evaluate(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile( sess.graph, run_meta, cmd='code', options=opts) leaf = tfprof_node while leaf.children: self.assertEqual(0, len(leaf.graph_nodes)) leaf = leaf.children[0] self.assertEqual(1, len(leaf.graph_nodes))
def testSimpleCodeView(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') # TODO(xpan): Test 'micros'. Since the execution time changes each run, # it's a bit difficult to test it now. opts = (builder(builder.trainable_variables_parameter()) .with_file_output(outfile) .with_accounted_types(['.*']) .with_node_names(show_name_regexes=['.*model_analyzer_testlib.*']) .account_displayed_op_only(False) .select(['bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'input_shapes']).build()) with session.Session(config=self._no_rewrite_session_config()) as sess: x = lib.BuildSmallModel() self.evaluate(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile( sess.graph, run_meta, cmd='code', options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( 'node name | requested bytes | # parameters | # float_ops | assigned devices | in', lib.CheckAndRemoveDoc(f.read())[0:80])
def testSelectEverything(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder(builder.trainable_variables_parameter()) .with_file_output(outfile) .with_accounted_types(['.*']) .select(['params', 'float_ops', 'occurrence', 'device', 'op_types', 'input_shapes']).build()) rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True) graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config) config = config_pb2.ConfigProto(graph_options=graph_options) with session.Session(config=config) as sess, ops.device('/device:CPU:0'): x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile( sess.graph, run_meta, options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( 'node name | # parameters | # float_ops | assigned devices | op types | op count (run|defined) | input shapes\n_TFProfRoot (--/451 params, --/11.34k flops, _kTFScopeParent, --/8|--/36, )\n Conv2D (0/0 params, 5.83k/5.83k flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Conv2D, 1/1|1/1, 0:2x6x6x3|1:3x3x3x6)\n Conv2D_1 (0/0 params, 4.61k/4.61k flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Conv2D, 1/1|1/1, 0:2x3x3x6|1:2x2x6x12)\n DW (3x3x3x6, 162/162 params, 0/324 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:3x3x3x6|1:3x3x3x6)\n DW/Initializer (0/0 params, 0/324 flops, _kTFScopeParent, 0/0|1/7, )\n DW/Initializer/random_normal (0/0 params, 162/324 flops, Add, 0/0|1/6, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/mul (0/0 params, 162/162 flops, Mul, 0/0|1/1, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Identity, 1/1|1/1, 0:3x3x3x6)\n DW2 (2x2x6x12, 288/288 params, 0/576 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW2/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:2x2x6x12|1:2x2x6x12)\n DW2/Initializer (0/0 params, 0/576 flops, _kTFScopeParent, 0/0|1/7, )\n DW2/Initializer/random_normal (0/0 params, 288/576 flops, Add, 0/0|1/6, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/mul (0/0 params, 288/288 flops, Mul, 0/0|1/1, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Identity, 1/1|1/1, 0:2x2x6x12)\n ScalarW (1, 1/1 params, 0/2 flops, VariableV2|_trainable_variables, 0/0|1/10, )\n ScalarW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer (0/0 params, 0/2 flops, _kTFScopeParent, 0/0|1/7, )\n ScalarW/Initializer/random_normal (0/0 params, 1/2 flops, Add, 0/0|1/6, 0:1|1:1)\n ScalarW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:0)\n ScalarW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/mul (0/0 params, 1/1 flops, Mul, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/read (0/0 params, 0/0 flops, Identity, 0/0|1/1, 0:1)\n _retval_Conv2D_1_0_0 (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|_retval_Conv2D_1_0_0, 1/1|1/1, )\n init (0/0 params, 0/0 flops, NoOp, 0/0|1/1, 0:1|1:3x3x3x6|2:2x2x6x12)\n zeros (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Const, 1/1|1/1, )\n', f.read())
def testSelectOption(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') def check_selection(selected, not_selected): with gfile.Open(outfile, 'r') as f: s = f.read() for attr in selected: self.assertTrue(s.find(attr) > 0, s) for attr in not_selected: self.assertFalse(s.find(attr) > 0, s) with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) opts = builder( builder.time_and_memory()).with_file_output(outfile).select( ['micros']).build() _ = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_selection( ['total execution time', 'accelerator execution time'], ['bytes']) opts = builder( builder.time_and_memory()).with_file_output(outfile).select( ['bytes']).build() _ = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_selection(['requested bytes'], ['peak bytes', 'residual bytes', 'output bytes']) opts = builder( builder.time_and_memory()).with_file_output(outfile).select( ['peak_bytes', 'residual_bytes', 'output_bytes']).build() _ = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_selection(['peak bytes', 'residual bytes', 'output bytes'], ['requested_bytes'])
def testDumpToFile(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = builder(builder.trainable_variables_parameter() ).with_file_output(outfile).build() with session.Session() as sess: _ = lib.BuildSmallModel() model_analyzer.profile(sess.graph, options=opts) with gfile.Open(outfile, 'r') as f: self.assertEqual(u'node name | # parameters\n' '_TFProfRoot (--/451 params)\n' ' DW (3x3x3x6, 162/162 params)\n' ' DW2 (2x2x6x12, 288/288 params)\n' ' ScalarW (1, 1/1 params)\n', f.read())
def testDumpToFile(self): ops.reset_default_graph() opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS.copy() outfile = os.path.join(test.get_temp_dir(), 'dump') opts['output'] = 'file:outfile=' + outfile with session.Session() as sess: _ = lib.BuildSmallModel() model_analyzer.profile(sess.graph, options=opts) with gfile.Open(outfile, 'r') as f: self.assertEqual( u'node name | # parameters\n' '_TFProfRoot (--/451 params)\n' ' DW (3x3x3x6, 162/162 params)\n' ' DW2 (2x2x6x12, 288/288 params)\n' ' ScalarW (1, 1/1 params)\n', f.read())
def testMultipleProfilePerStep(self): ops.reset_default_graph() opts = (builder(builder.trainable_variables_parameter()) .with_empty_output() .with_accounted_types(['.*']) .select(['micros', 'bytes', 'peak_bytes', 'residual_bytes', 'output_bytes']).build()) r = lib.BuildSmallModel() sess = session.Session() profiler = model_analyzer.Profiler(sess.graph) init_var_run_meta = config_pb2.RunMetadata() sess.run(variables.global_variables_initializer(), options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=init_var_run_meta) train_run_meta = config_pb2.RunMetadata() sess.run(r, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=train_run_meta) profiler.add_step(0, train_run_meta) ret1 = profiler.profile_name_scope(opts) n1 = lib.SearchTFProfNode( ret1, 'DW/Initializer/random_normal/RandomStandardNormal') # Without the var initialization run_meta, it doesn't have the # information of var_initialization. self.assertEqual(n1.exec_micros, 0) self.assertEqual(n1.requested_bytes, 0) self.assertEqual(n1.peak_bytes, 0) self.assertEqual(n1.residual_bytes, 0) profiler.add_step(0, init_var_run_meta) ret2 = profiler.profile_name_scope(opts) n2 = lib.SearchTFProfNode( ret2, 'DW/Initializer/random_normal/RandomStandardNormal') # After adding the var initialization run_meta. self.assertGreater(n2.exec_micros, 0) self.assertGreater(n2.requested_bytes, 0) self.assertGreater(n2.peak_bytes, 0) self.assertGreater(n2.residual_bytes, 0)
def testSelectEverything(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).select([ 'params', 'float_ops', 'occurrence', 'device', 'op_types', 'input_shapes' ]).build()) with session.Session(config=self._no_rewrite_session_config() ) as sess, ops.device('/device:CPU:0'): x = lib.BuildSmallModel() self.evaluate(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile(sess.graph, run_meta, options=opts)
def testEager(self): ops.reset_default_graph() with context.eager_mode(): outfile = os.path.join(test.get_temp_dir(), 'dump') opts = builder( builder.time_and_memory()).with_file_output(outfile).build() context.enable_run_metadata() lib.BuildSmallModel() profiler = model_analyzer.Profiler() profiler.add_step(0, context.export_run_metadata()) context.disable_run_metadata() profiler.profile_operations(opts) with gfile.Open(outfile, 'r') as f: out_str = f.read() self.assertTrue('Conv2D' in out_str) self.assertTrue('VarHandleOp' in out_str) with gfile.Open('/tmp/eager_profile', 'wb') as f: profile_pb = tfprof_log_pb2.ProfileProto() profile_pb.ParseFromString(profiler.serialize_to_string()) profile_pb_str = '%s' % profile_pb self.assertTrue('Conv2D' in profile_pb_str) self.assertTrue('VarHandleOp' in profile_pb_str)
def testSelectEverythingDetail(self): ops.reset_default_graph() dev = '/device:GPU:0' if test.is_gpu_available() else '/device:CPU:0' outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).select([ 'micros', 'bytes', 'params', 'float_ops', 'occurrence', 'device', 'op_types', 'input_shapes' ]).build()) with profile_context.ProfileContext(test.get_temp_dir(), trace_steps=[], dump_steps=[]) as pctx: with session.Session() as sess, ops.device(dev): x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) pctx.trace_next_step() pctx.dump_next_step() _ = sess.run(x) pctx.profiler.profile_name_scope(options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long dump_str = lib.CheckAndRemoveDoc(f.read()) outputs = dump_str.split('\n') self.assertEqual( outputs[0], 'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes' ) for o in outputs[1:]: if o.find('Conv2D ') > 0: metrics = o[o.find('(') + 1:o.find(')')].split(',') # Make sure time is profiled. gap = 1 if test.is_gpu_available() else 2 for i in range(3, 6, gap): mat = re.search('(.*)[um]s/(.*)[um]s', metrics[i]) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # Make sure device is profiled. if test.is_gpu_available(): self.assertTrue(metrics[6].find('gpu') > 0) self.assertFalse(metrics[6].find('cpu') > 0) else: self.assertFalse(metrics[6].find('gpu') > 0) self.assertTrue(metrics[6].find('cpu') > 0) # Make sure float_ops is profiled. mat = re.search('(.*)k/(.*)k flops', metrics[1].strip()) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # Make sure op_count is profiled. self.assertEqual(metrics[8].strip(), '1/1|1/1') # Make sure input_shapes is profiled. self.assertEqual(metrics[9].strip(), '0:2x6x6x3|1:3x3x3x6') if o.find('DW (3x3x3x6') > 0: metrics = o[o.find('(') + 1:o.find(')')].split(',') mat = re.search('(.*)/(.*) params', metrics[1].strip()) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # pylint: enable=line-too-long # Test that profiler restored from profile file gives the same result. gfile.Remove(outfile) profile_file = os.path.join(test.get_temp_dir(), 'profile_1') with lib.ProfilerFromFile(profile_file) as profiler: profiler.profile_name_scope(options=opts) with gfile.Open(outfile, 'r') as f: self.assertEqual(dump_str, lib.CheckAndRemoveDoc(f.read()))
def testMinOption(self): ops.reset_default_graph() def check_min(nodes, mm=0, mam=0, mcm=0, mb=0, mpb=0, mrb=0, mob=0): for n in nodes: if mm > 0: self.assertGreaterEqual(n.exec_micros, mm) if mam > 0: self.assertGreaterEqual(n.accelerator_exec_micros, mam) if mcm > 0: self.assertGreaterEqual(n.cpu_exec_micros, mcm) if mb > 0: self.assertGreaterEqual(n.requested_bytes, mb) if mpb > 0: self.assertGreaterEqual(n.peak_bytes, mpb) if mrb > 0: self.assertGreaterEqual(n.residual_bytes, mrb) if mob > 0: self.assertGreaterEqual(n.output_bytes, mob) check_min(n.children, mm, mam, mcm, mb, mpb, mrb, mob) with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) min_val = random.randint(0, 10000) opts = builder(builder.time_and_memory( min_micros=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mm=min_val) opts = builder( builder.time_and_memory(min_accelerator_micros=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mam=min_val) opts = builder(builder.time_and_memory( min_cpu_micros=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mcm=min_val) opts = builder(builder.time_and_memory( min_bytes=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mb=min_val) opts = builder(builder.time_and_memory( min_peak_bytes=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mpb=min_val) opts = builder(builder.time_and_memory( min_residual_bytes=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mrb=min_val) opts = builder(builder.time_and_memory( min_output_bytes=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mob=min_val)
def testSelectEverthingDetail(self): ops.reset_default_graph() dev = '/gpu:0' if test.is_gpu_available() else '/cpu:0' outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).select([ 'micros', 'bytes', 'params', 'float_ops', 'occurrence', 'device', 'op_types', 'input_shapes' ]).build()) config = config_pb2.ConfigProto() with session.Session(config=config) as sess, ops.device(dev): x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile(sess.graph, run_meta, options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long outputs = f.read().split('\n') self.assertEqual( outputs[0], 'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes' ) for o in outputs[1:]: if o.find('Conv2D ') > 0: metrics = o[o.find('(') + 1:o.find(')')].split(',') # Make sure time is profiled. gap = 1 if test.is_gpu_available() else 2 for i in range(3, 6, gap): mat = re.search('(.*)us/(.*)us', metrics[i]) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # Make sure device is profiled. if test.is_gpu_available(): self.assertTrue(metrics[6].find('gpu') > 0) self.assertFalse(metrics[6].find('cpu') > 0) else: self.assertFalse(metrics[6].find('gpu') > 0) self.assertTrue(metrics[6].find('cpu') > 0) # Make sure float_ops is profiled. mat = re.search('(.*)k/(.*)k flops', metrics[1].strip()) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # Make sure op_count is profiled. self.assertEqual(metrics[8].strip(), '1/1|1/1') # Make sure input_shapes is profiled. self.assertEqual(metrics[9].strip(), '0:2x6x6x3|1:3x3x3x6') if o.find('DW (3x3x3x6') > 0: metrics = o[o.find('(') + 1:o.find(')')].split(',') mat = re.search('(.*)/(.*) params', metrics[1].strip()) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0)