def testSelectEverything(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder(builder.trainable_variables_parameter()) .with_file_output(outfile) .with_accounted_types(['.*']) .select(['params', 'float_ops', 'occurrence', 'device', 'op_types', 'input_shapes']).build()) rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True) graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config) config = config_pb2.ConfigProto(graph_options=graph_options) with session.Session(config=config) as sess, ops.device('/cpu:0'): x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile( sess.graph, run_meta, options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( 'node name | # parameters | # float_ops | assigned devices | op types | op count (run|defined) | input shapes\n_TFProfRoot (--/451 params, --/10.44k flops, _kTFScopeParent, --/7|--/35, )\n Conv2D (0/0 params, 5.83k/5.83k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x6x6x3|1:3x3x3x6)\n Conv2D_1 (0/0 params, 4.61k/4.61k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x3x3x6|1:2x2x6x12)\n DW (3x3x3x6, 162/162 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:3x3x3x6|1:3x3x3x6)\n DW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n DW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:3x3x3x6)\n DW2 (2x2x6x12, 288/288 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW2/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:2x2x6x12|1:2x2x6x12)\n DW2/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n DW2/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:2x2x6x12)\n ScalarW (1, 1/1 params, 0/0 flops, VariableV2|_trainable_variables, 0/0|1/10, )\n ScalarW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n ScalarW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:1|1:1)\n ScalarW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:0)\n ScalarW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/read (0/0 params, 0/0 flops, Identity, 0/0|1/1, 0:1)\n init (0/0 params, 0/0 flops, NoOp, 0/0|1/1, 0:1|1:3x3x3x6|2:2x2x6x12)\n zeros (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const, 1/1|1/1, )\n', f.read())
def testTrackPersistentBytes(self): ops.reset_default_graph() a = array_ops.constant(np.ones((100, 100))) b = array_ops.constant(np.ones((100, 100))) c = a * b with session.Session() as sess: run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() sess.run(c, options=run_options, run_metadata=run_metadata) options = option_builder.ProfileOptionBuilder.time_and_memory() options['min_bytes'] = 0 options['select'] = ('bytes', 'peak_bytes', 'output_bytes', 'residual_bytes') ret = model_analyzer.profile( sess.graph, run_meta=run_metadata, cmd='scope', options=options) run_metadata = config_pb2.RunMetadata() sess.run(c, options=run_options, run_metadata=run_metadata) ret2 = model_analyzer.profile( sess.graph, run_meta=run_metadata, cmd='scope', options=options) n = lib.SearchTFProfNode(ret, 'mul') n2 = lib.SearchTFProfNode(ret2, 'mul') self.assertGreater(n.peak_bytes, 0) self.assertGreater(n.output_bytes, 0) self.assertGreater(n.residual_bytes, 0) self.assertEqual(n.peak_bytes, n2.peak_bytes) self.assertEqual(n.output_bytes, n2.output_bytes) self.assertEqual(n.residual_bytes, n2.residual_bytes)
def testTrackPersistentBytes(self): ops.reset_default_graph() a = array_ops.constant(np.ones((100, 100))) b = array_ops.constant(np.ones((100, 100))) c = a * b with session.Session() as sess: run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() sess.run(c, options=run_options, run_metadata=run_metadata) options = option_builder.ProfileOptionBuilder.time_and_memory() options['min_bytes'] = 0 options['select'] = ('bytes', 'peak_bytes', 'output_bytes', 'residual_bytes') ret = model_analyzer.profile( sess.graph, run_meta=run_metadata, cmd='scope', options=options) run_metadata = config_pb2.RunMetadata() sess.run(c, options=run_options, run_metadata=run_metadata) ret2 = model_analyzer.profile( sess.graph, run_meta=run_metadata, cmd='scope', options=options) n = lib.SearchTFProfNode(ret, 'mul') n2 = lib.SearchTFProfNode(ret2, 'mul') self.assertGreater(n.peak_bytes, 0) self.assertGreater(n.output_bytes, 0) self.assertGreater(n.residual_bytes, 0) self.assertEqual(n.peak_bytes, n2.peak_bytes) self.assertEqual(n.output_bytes, n2.output_bytes) self.assertEqual(n.residual_bytes, n2.residual_bytes)
def testSimpleCodeView(self): ops.reset_default_graph() opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS.copy() outfile = os.path.join(test.get_temp_dir(), 'dump') opts['output'] = 'file:outfile=' + outfile opts['account_type_regexes'] = ['.*'] opts['show_name_regexes'] = ['.*model_analyzer_testlib.*'] opts['account_displayed_op_only'] = False # TODO(xpan): Test 'micros'. Since the execution time changes each run, # it's a bit difficult to test it now. opts['select'] = [ 'bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'input_shapes' ] with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile(sess.graph, run_meta, cmd='code', options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( 'node name | output bytes | # parameters | # float_ops | assigned devices | input', f.read()[0:80])
def testSelectEverything(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).select([ 'params', 'float_ops', 'occurrence', 'device', 'op_types', 'input_shapes' ]).build()) rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True) graph_options = config_pb2.GraphOptions( rewrite_options=rewriter_config) config = config_pb2.ConfigProto(graph_options=graph_options) with session.Session(config=config) as sess, ops.device('/cpu:0'): x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile(sess.graph, run_meta, options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( 'node name | # parameters | # float_ops | assigned devices | op types | op count (run|defined) | input shapes\n_TFProfRoot (--/451 params, --/10.44k flops, _kTFScopeParent, --/8|--/36, )\n Conv2D (0/0 params, 5.83k/5.83k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x6x6x3|1:3x3x3x6)\n Conv2D_1 (0/0 params, 4.61k/4.61k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x3x3x6|1:2x2x6x12)\n DW (3x3x3x6, 162/162 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:3x3x3x6|1:3x3x3x6)\n DW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n DW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:3x3x3x6)\n DW2 (2x2x6x12, 288/288 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW2/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:2x2x6x12|1:2x2x6x12)\n DW2/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n DW2/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:2x2x6x12)\n ScalarW (1, 1/1 params, 0/0 flops, VariableV2|_trainable_variables, 0/0|1/10, )\n ScalarW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n ScalarW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:1|1:1)\n ScalarW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:0)\n ScalarW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/read (0/0 params, 0/0 flops, Identity, 0/0|1/1, 0:1)\n _retval_Conv2D_1_0_0 (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|RunTimeOp, 1/1|1/1, )\n init (0/0 params, 0/0 flops, NoOp, 0/0|1/1, 0:1|1:3x3x3x6|2:2x2x6x12)\n zeros (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const, 1/1|1/1, )\n', f.read())
def testSimpleCodeView(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') # TODO(xpan): Test 'micros'. Since the execution time changes each run, # it's a bit difficult to test it now. opts = (builder(builder.trainable_variables_parameter()) .with_file_output(outfile) .with_accounted_types(['.*']) .with_node_names(show_name_regexes=['.*model_analyzer_testlib.*']) .account_displayed_op_only(False) .select(['bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'input_shapes']).build()) with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile( sess.graph, run_meta, cmd='code', options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( 'node name | output bytes | # parameters | # float_ops | assigned devices | input', f.read()[0:80])
def testSelectEverything(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).select([ 'params', 'float_ops', 'occurrence', 'device', 'op_types', 'input_shapes' ]).build()) rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True) graph_options = config_pb2.GraphOptions( rewrite_options=rewriter_config) config = config_pb2.ConfigProto(graph_options=graph_options) with session.Session( config=config) as sess, ops.device('/device:CPU:0'): x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile(sess.graph, run_meta, options=opts)
def testSimpleCodeView(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') # TODO(xpan): Test 'micros'. Since the execution time changes each run, # it's a bit difficult to test it now. opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).with_node_names( show_name_regexes=['.*model_analyzer_testlib.*']). account_displayed_op_only(False).select([ 'bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'input_shapes' ]).build()) with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile(sess.graph, run_meta, cmd='code', options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( 'node name | requested bytes | # parameters | # float_ops | assigned devices | in', lib.CheckAndRemoveDoc(f.read())[0:80])
def testSelectEverthingDetail(self): ops.reset_default_graph() dev = '/gpu:0' if test.is_gpu_available() else '/cpu:0' outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder(builder.trainable_variables_parameter()) .with_file_output(outfile) .with_accounted_types(['.*']) .select(['micros', 'bytes', 'params', 'float_ops', 'occurrence', 'device', 'op_types', 'input_shapes']).build()) config = config_pb2.ConfigProto() with session.Session(config=config) as sess, ops.device(dev): x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile( sess.graph, run_meta, options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long outputs = f.read().split('\n') self.assertEqual(outputs[0], 'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes') for o in outputs[1:]: if o.find('Conv2D ') > 0: metrics = o[o.find('(') +1: o.find(')')].split(',') # Make sure time is profiled. gap = 1 if test.is_gpu_available() else 2 for i in range(3, 6, gap): mat = re.search('(.*)[um]s/(.*)[um]s', metrics[i]) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # Make sure device is profiled. if test.is_gpu_available(): self.assertTrue(metrics[6].find('gpu') > 0) self.assertFalse(metrics[6].find('cpu') > 0) else: self.assertFalse(metrics[6].find('gpu') > 0) self.assertTrue(metrics[6].find('cpu') > 0) # Make sure float_ops is profiled. mat = re.search('(.*)k/(.*)k flops', metrics[1].strip()) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # Make sure op_count is profiled. self.assertEqual(metrics[8].strip(), '1/1|1/1') # Make sure input_shapes is profiled. self.assertEqual(metrics[9].strip(), '0:2x6x6x3|1:3x3x3x6') if o.find('DW (3x3x3x6') > 0: metrics = o[o.find('(') +1: o.find(')')].split(',') mat = re.search('(.*)/(.*) params', metrics[1].strip()) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0)
def testSelectEverything(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder(builder.trainable_variables_parameter()) .with_file_output(outfile) .with_accounted_types(['.*']) .select(['params', 'float_ops', 'occurrence', 'device', 'op_types', 'input_shapes']).build()) rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True) graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config) config = config_pb2.ConfigProto(graph_options=graph_options) with session.Session(config=config) as sess, ops.device('/device:CPU:0'): x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile( sess.graph, run_meta, options=opts)
def _profiled_run(self, fetches, feed_dict=None, options=None, run_metadata=None): """Overwrites the session.run().""" # pylint: disable=protected-access # Count the session steps. self.profile_context._new_step() # Fast path if no need for profiling. to_profiles = self.profile_context._profile_candidates() to_dumps = self.profile_context._dump_candidates() if (not to_profiles and not to_dumps and not self.profile_context._is_capture_enforced()): return self._profiler_run_internal(fetches, feed_dict, options, run_metadata) # Enable tracing, perform auto profiling or auto dump. if not run_metadata: run_metadata = config_pb2.RunMetadata() if not options: options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) old_trace_level = options.trace_level else: old_trace_level = options.trace_level options.trace_level = config_pb2.RunOptions.FULL_TRACE ret = self._profiler_run_internal(fetches, feed_dict, options, run_metadata) if self.profile_context._capture_next_step: self.profile_context._add_run_meta(run_metadata) for to_dump in to_dumps: outdir, _ = to_dump if not gfile.Exists(outdir): gfile.MakeDirs(outdir) with gfile.Open(os.path.join(outdir, 'graph.pbtxt'), 'w') as f: f.write('%s' % self.graph.as_graph_def(add_shapes=True)) with gfile.Open(os.path.join(outdir, 'run_metadata'), 'w') as f: f.write(run_metadata.SerializeToString()) tfprof_logger.write_op_log(self.graph, outdir, run_meta=run_metadata, add_trace=True) for to_prof in to_profiles: cmd, opts, _ = to_prof model_analyzer.profile(self.graph, run_meta=run_metadata, cmd=cmd, options=opts) # Restore to default. options.trace_level = old_trace_level return ret
def _profiled_run(self, fetches, feed_dict=None, options=None, run_metadata=None): """Overwrites the session.run().""" # pylint: disable=protected-access # Count the session steps. self.profile_context._new_step() # Fast path if no need for profiling. to_profiles = self.profile_context._profile_candidates() to_dumps = self.profile_context._dump_candidates() if (not to_profiles and not to_dumps and not self.profile_context._is_capture_enforced()): return self._profiler_run_internal( fetches, feed_dict, options, run_metadata) # Enable tracing, perform auto profiling or auto dump. if not run_metadata: run_metadata = config_pb2.RunMetadata() if not options: options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) old_trace_level = options.trace_level else: old_trace_level = options.trace_level options.trace_level = config_pb2.RunOptions.FULL_TRACE ret = self._profiler_run_internal(fetches, feed_dict, options, run_metadata) if self.profile_context._capture_next_step: self.profile_context._add_run_meta(run_metadata) for to_dump in to_dumps: outdir, _ = to_dump if not gfile.Exists(outdir): gfile.MakeDirs(outdir) with gfile.Open(os.path.join(outdir, 'graph.pbtxt'), 'w') as f: f.write('%s' % self.graph.as_graph_def(add_shapes=True)) with gfile.Open(os.path.join(outdir, 'run_metadata'), 'w') as f: f.write(run_metadata.SerializeToString()) tfprof_logger.write_op_log( self.graph, outdir, run_meta=run_metadata, add_trace=True) for to_prof in to_profiles: cmd, opts, _ = to_prof model_analyzer.profile( self.graph, run_meta=run_metadata, cmd=cmd, options=opts) # Restore to default. options.trace_level = old_trace_level return ret
def testSelectOption(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') def check_selection(selected, not_selected): with gfile.Open(outfile, 'r') as f: s = f.read() for attr in selected: self.assertTrue(s.find(attr) > 0, s) for attr in not_selected: self.assertFalse(s.find(attr) > 0, s) with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) opts = builder( builder.time_and_memory()).with_file_output(outfile).select( ['micros']).build() _ = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_selection( ['total execution time', 'accelerator execution time'], ['bytes']) opts = builder( builder.time_and_memory()).with_file_output(outfile).select( ['bytes']).build() _ = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_selection(['requested bytes'], ['peak bytes', 'residual bytes', 'output bytes']) opts = builder( builder.time_and_memory()).with_file_output(outfile).select( ['peak_bytes', 'residual_bytes', 'output_bytes']).build() _ = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_selection(['peak bytes', 'residual bytes', 'output bytes'], ['requested_bytes'])
def testComplexCodeView(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder(builder.trainable_variables_parameter()) .with_file_output(outfile) .with_accounted_types(['.*']) .with_node_names(show_name_regexes= ['.*model_analyzer_testlib.py.*']) .account_displayed_op_only(False) .select(['params', 'float_ops']).build()) with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile( sess.graph, run_meta, cmd='code', options=opts) # pylint: disable=line-too-long with gfile.Open(outfile, 'r') as f: lines = f.read().split('\n') result = '\n'.join([l[:min(len(l), 80)] for l in lines]) self.assertEqual('node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/91.04k flops)\n model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (0/1.80k para\n model_analyzer_testlib.py:35:BuildSmallModel:image = array_ops... (0/0 param\n model_analyzer_testlib.py:39:BuildSmallModel:initializer=init_... (0/4 param\n model_analyzer_testlib.py:43:BuildSmallModel:initializer=init_... (0/648 par\n model_analyzer_testlib.py:44:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n model_analyzer_testlib.py:48:BuildSmallModel:initializer=init_... (0/1.15k p\n model_analyzer_testlib.py:49:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (gradient) (0\n model_analyzer_testlib.py:44:BuildSmallModel:x = nn_ops.conv2d... (gradient)\n model_analyzer_testlib.py:49:BuildSmallModel:x = nn_ops.conv2d... (gradient)\n model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (0/1.04k para\n model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (gradient) (0\n model_analyzer_testlib.py:64:BuildFullModel:target = array_op... (0/0 params, \n model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (0/0 params, \n model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (gradient) (0\n model_analyzer_testlib.py:67:BuildFullModel:return sgd_op.min... (0/0 params, \n', result) self.assertLess(0, tfprof_node.total_exec_micros) self.assertEqual(2844, tfprof_node.total_parameters) self.assertEqual(91040, tfprof_node.total_float_ops) self.assertEqual(8, len(tfprof_node.children)) self.assertEqual('_TFProfRoot', tfprof_node.name) self.assertEqual( 'model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_...', tfprof_node.children[0].name) self.assertEqual( 'model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (gradient)', tfprof_node.children[1].name) self.assertEqual( 'model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c...', tfprof_node.children[2].name) self.assertEqual( 'model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (gradient)', tfprof_node.children[3].name) self.assertEqual( 'model_analyzer_testlib.py:64:BuildFullModel:target = array_op...', tfprof_node.children[4].name) self.assertEqual( 'model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_...', tfprof_node.children[5].name) self.assertEqual( 'model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (gradient)', tfprof_node.children[6].name) self.assertEqual( 'model_analyzer_testlib.py:67:BuildFullModel:return sgd_op.min...', tfprof_node.children[7].name)
def testTimeline(self): ops.reset_default_graph() opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS.copy() outfile = os.path.join(test.get_temp_dir(), 'timeline') opts['output'] = 'timeline:outfile=' + outfile opts['account_type_regexes'] = ['.*'] opts['max_depth'] = 100000 opts['step'] = 0 with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) _ = model_analyzer.profile(sess.graph, run_meta, cmd='graph', options=opts) with gfile.Open(outfile, 'r') as f: # Test that a json file is created. # TODO(xpan): tfprof Timeline isn't quite correct on Windows. # Investigate why. if os.name != 'nt': self.assertLess(1000, len(f.read())) else: self.assertLess(1, len(f.read()))
def testCodeViewLeafGraphNode(self): ops.reset_default_graph() opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS.copy() opts['account_type_regexes'] = ['.*'] opts['account_displayed_op_only'] = False opts['select'] = ['bytes', 'params', 'float_ops', 'device'] opts['output'] = 'none' with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile(sess.graph, run_meta, cmd='code', options=opts) leaf = tfprof_node while leaf.children: self.assertEqual(0, len(leaf.graph_nodes)) leaf = leaf.children[0] self.assertEqual(1, len(leaf.graph_nodes))
def _run_model(): x = random_ops.random_normal(shape=[1, SIZE]) w = random_ops.random_normal(shape=[SIZE, 2 * SIZE]) y = math_ops.matmul(x, w) config = config_pb2.ConfigProto() config.graph_options.rewrite_options.arithmetic_optimization = ( rewriter_config_pb2.RewriterConfig.OFF) with session.Session(config=config) as sess: run_metadata = config_pb2.RunMetadata() opts = builder.time_and_memory() opts['min_micros'] = 0 opts['min_bytes'] = 0 opts['order_by'] = 'name' opts['output'] = 'none' _ = sess.run(y, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_metadata) tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_metadata, options=opts) return tfprof_node, run_metadata
def testCodeViewLeafGraphNode(self): ops.reset_default_graph() opts = (builder(builder.trainable_variables_parameter()) .with_empty_output() .with_accounted_types(['.*']) .account_displayed_op_only(False) .select(['bytes', 'params', 'float_ops', 'device']).build()) with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile( sess.graph, run_meta, cmd='code', options=opts) leaf = tfprof_node while leaf.children: self.assertEqual(0, len(leaf.graph_nodes)) leaf = leaf.children[0] self.assertEqual(1, len(leaf.graph_nodes))
def testTimeline(self): ops.reset_default_graph() opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS.copy() outfile = os.path.join(test.get_temp_dir(), 'timeline') opts['output'] = 'timeline:outfile=' + outfile opts['account_type_regexes'] = ['.*'] opts['max_depth'] = 100000 opts['step'] = 0 with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run( x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) _ = model_analyzer.profile( sess.graph, run_meta, cmd='graph', options=opts) with gfile.Open(outfile, 'r') as f: # Test that a json file is created. # TODO(xpan): tfprof Timeline isn't quite correct on Windows. # Investigate why. if os.name != 'nt': self.assertLess(1000, len(f.read())) else: self.assertLess(1, len(f.read()))
def testTimeline(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'timeline') opts = (builder(builder.trainable_variables_parameter()) .with_max_depth(100000) .with_step(0) .with_timeline_output(outfile) .with_accounted_types(['.*']).build()) with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run( x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) _ = model_analyzer.profile( sess.graph, run_meta, cmd='graph', options=opts) with gfile.Open(outfile, 'r') as f: # Test that a json file is created. # TODO(xpan): tfprof Timeline isn't quite correct on Windows. # Investigate why. if os.name != 'nt': self.assertLess(1000, len(f.read())) else: self.assertLess(1, len(f.read()))
def testCodeViewLeafGraphNode(self): ops.reset_default_graph() opts = (builder(builder.trainable_variables_parameter()). with_empty_output().with_accounted_types( ['.*']).account_displayed_op_only(False).select( ['bytes', 'params', 'float_ops', 'device']).build()) with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile(sess.graph, run_meta, cmd='code', options=opts) leaf = tfprof_node while leaf.children: self.assertEqual(0, len(leaf.graph_nodes)) leaf = leaf.children[0] self.assertEqual(1, len(leaf.graph_nodes))
def testTimeline(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'timeline') opts = ( builder( builder.trainable_variables_parameter()).with_max_depth(100000) .with_step(0).with_timeline_output(outfile).with_accounted_types( ['.*']).build()) with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) _ = model_analyzer.profile(sess.graph, run_meta, cmd='graph', options=opts) with gfile.Open(outfile + '_0', 'r') as f: # Test that a json file is created. # TODO(xpan): tfprof Timeline isn't quite correct on Windows. # Investigate why. if os.name != 'nt': self.assertLess(1000, len(f.read())) else: self.assertLess(1, len(f.read()))
def _run_model(): x = random_ops.random_normal(shape=[1, SIZE]) w = random_ops.random_normal(shape=[SIZE, 2 * SIZE]) y = math_ops.matmul(x, w) config = config_pb2.ConfigProto() config.graph_options.rewrite_options.arithmetic_optimization = ( rewriter_config_pb2.RewriterConfig.OFF) with session.Session(config=config) as sess: run_metadata = config_pb2.RunMetadata() opts = builder.time_and_memory() opts['min_micros'] = 0 opts['min_bytes'] = 0 opts['order_by'] = 'name' opts['output'] = 'none' _ = sess.run(y, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.SOFTWARE_TRACE), run_metadata=run_metadata) tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_metadata, options=opts) return tfprof_node, run_metadata
def testCodeViewLeafGraphNode(self): ops.reset_default_graph() opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS.copy() opts['account_type_regexes'] = ['.*'] opts['account_displayed_op_only'] = False opts['select'] = [ 'bytes', 'params', 'float_ops', 'device' ] opts['output'] = 'none' with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile( sess.graph, run_meta, cmd='code', options=opts) leaf = tfprof_node while leaf.children: self.assertEqual(0, len(leaf.graph_nodes)) leaf = leaf.children[0] self.assertEqual(1, len(leaf.graph_nodes))
def get_flops(model, write_path=tempfile.NamedTemporaryFile().name): forward_pass = tf.function(model.call, input_signature=[tf.TensorSpec(shape=(1,) + model.input_shape[1:])]) opts = ProfileOptionBuilder.float_operation() if write_path: opts['output'] = 'file:outfile={}'.format(write_path) graph_info = profile(forward_pass.get_concrete_function().graph, options=opts) return graph_info.total_float_ops
def testDumpToFile(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = builder(builder.trainable_variables_parameter() ).with_file_output(outfile).build() with session.Session() as sess: _ = lib.BuildSmallModel() model_analyzer.profile(sess.graph, options=opts) with gfile.Open(outfile, 'r') as f: self.assertEqual(u'node name | # parameters\n' '_TFProfRoot (--/451 params)\n' ' DW (3x3x3x6, 162/162 params)\n' ' DW2 (2x2x6x12, 288/288 params)\n' ' ScalarW (1, 1/1 params)\n', f.read())
def testDumpToFile(self): ops.reset_default_graph() opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS.copy() outfile = os.path.join(test.get_temp_dir(), 'dump') opts['output'] = 'file:outfile=' + outfile with session.Session() as sess: _ = lib.BuildSmallModel() model_analyzer.profile(sess.graph, options=opts) with gfile.Open(outfile, 'r') as f: self.assertEqual( u'node name | # parameters\n' '_TFProfRoot (--/451 params)\n' ' DW (3x3x3x6, 162/162 params)\n' ' DW2 (2x2x6x12, 288/288 params)\n' ' ScalarW (1, 1/1 params)\n', f.read())
def testDumpToFile(self): ops.reset_default_graph() opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS.copy() outfile = os.path.join(test.get_temp_dir(), 'dump') opts['output'] = 'file:outfile=' + outfile with session.Session() as sess: _ = lib.BuildSmallModel() model_analyzer.profile(sess.graph, options=opts) with gfile.Open(outfile, 'r') as f: self.assertEqual(u'node name | # parameters\n' '_TFProfRoot (--/451 params)\n' ' DW (3x3x3x6, 162/162 params)\n' ' DW2 (2x2x6x12, 288/288 params)\n' ' ScalarW (1, 1/1 params)\n', f.read())
def testDumpToFile(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = builder(builder.trainable_variables_parameter() ).with_file_output(outfile).build() with session.Session() as sess: _ = lib.BuildSmallModel() model_analyzer.profile(sess.graph, options=opts) with gfile.Open(outfile, 'r') as f: self.assertEqual(u'node name | # parameters\n' '_TFProfRoot (--/451 params)\n' ' DW (3x3x3x6, 162/162 params)\n' ' DW2 (2x2x6x12, 288/288 params)\n' ' ScalarW (1, 1/1 params)\n', f.read())
def testComplexCodeView(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).with_node_names( show_name_regexes=['.*model_analyzer_testlib.py.*']). account_displayed_op_only(False).select( ['params', 'float_ops']).build()) with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile(sess.graph, run_meta, cmd='code', options=opts) # pylint: disable=line-too-long with gfile.Open(outfile, 'r') as f: lines = f.read().split('\n') result = '\n'.join([l[:min(len(l), 80)] for l in lines]) self.assertEqual( 'node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/91.04k flops)\n model_analyzer_testlib.py:58:BuildFullModel (0/1.80k params, 0/41.76k flops)\n model_analyzer_testlib.py:35:BuildSmallModel (0/0 params, 0/0 flops)\n model_analyzer_testlib.py:39:BuildSmallModel (0/4 params, 0/0 flops)\n model_analyzer_testlib.py:43:BuildSmallModel (0/648 params, 0/0 flops)\n model_analyzer_testlib.py:44:BuildSmallModel (0/0 params, 0/23.33k flops)\n model_analyzer_testlib.py:48:BuildSmallModel (0/1.15k params, 0/0 flops)\n model_analyzer_testlib.py:49:BuildSmallModel (0/0 params, 0/18.43k flops)\n model_analyzer_testlib.py:58:BuildFullModel (gradient) (0/0 params, 0/0 flops)\n model_analyzer_testlib.py:44:BuildSmallModel (gradient) (0/0 params, 0/0 flo\n model_analyzer_testlib.py:49:BuildSmallModel (gradient) (0/0 params, 0/0 flo\n model_analyzer_testlib.py:62:BuildFullModel (0/1.04k params, 0/16.51k flops)\n model_analyzer_testlib.py:62:BuildFullModel (gradient) (0/0 params, 0/32.77k f\n model_analyzer_testlib.py:64:BuildFullModel (0/0 params, 0/0 flops)\n model_analyzer_testlib.py:65:BuildFullModel (0/0 params, 0/0 flops)\n model_analyzer_testlib.py:65:BuildFullModel (gradient) (0/0 params, 0/0 flops)\n model_analyzer_testlib.py:67:BuildFullModel (0/0 params, 0/0 flops)\n', result) self.assertLess(0, tfprof_node.total_exec_micros) self.assertEqual(2844, tfprof_node.total_parameters) self.assertEqual(91040, tfprof_node.total_float_ops) self.assertEqual(8, len(tfprof_node.children)) self.assertEqual('_TFProfRoot', tfprof_node.name) self.assertEqual('model_analyzer_testlib.py:58:BuildFullModel', tfprof_node.children[0].name) self.assertEqual( 'model_analyzer_testlib.py:58:BuildFullModel (gradient)', tfprof_node.children[1].name) self.assertEqual('model_analyzer_testlib.py:62:BuildFullModel', tfprof_node.children[2].name) self.assertEqual( 'model_analyzer_testlib.py:62:BuildFullModel (gradient)', tfprof_node.children[3].name) self.assertEqual('model_analyzer_testlib.py:64:BuildFullModel', tfprof_node.children[4].name) self.assertEqual('model_analyzer_testlib.py:65:BuildFullModel', tfprof_node.children[5].name) self.assertEqual( 'model_analyzer_testlib.py:65:BuildFullModel (gradient)', tfprof_node.children[6].name) self.assertEqual('model_analyzer_testlib.py:67:BuildFullModel', tfprof_node.children[7].name)
def testOpView(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types([ '.*' ]).with_min_occurrence(10).order_by('occurrence').select( ['params', 'micros', 'occurrence', 'input_shapes']).build()) with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile(sess.graph, run_meta, cmd='op', options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( 'nodename|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes\n', f.read().replace('\t', '').replace(' ', '')[0:120]) # pylint: enable=line-too-long total_children = 0 last_occurrence = 1e32 input_shapes = 0 last_total_micros = tfprof_node.total_exec_micros last_micros = tfprof_node.exec_micros while tfprof_node.children: for gnode in tfprof_node.graph_nodes: input_shapes += len(gnode.input_shapes) self.assertEqual(len(tfprof_node.children), 1) tfprof_node = tfprof_node.children[0] self.assertEqual(last_total_micros, tfprof_node.total_exec_micros + last_micros) last_total_micros = tfprof_node.total_exec_micros last_micros = tfprof_node.exec_micros total_children += 1 self.assertLessEqual(len(tfprof_node.graph_nodes), last_occurrence) last_occurrence = len(tfprof_node.graph_nodes) self.assertEqual(total_children, 15) self.assertGreater(input_shapes, 0)
def testOpView(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder(builder.trainable_variables_parameter()) .with_file_output(outfile) .with_accounted_types(['.*']) .with_min_occurrence(10) .order_by('occurrence') .select(['params', 'micros', 'bytes', 'peak_bytes', 'residual_bytes', 'output_bytes', 'occurrence', 'input_shapes']).build()) with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile( sess.graph, run_meta, cmd='op', options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( 'nodename|requestedbytes|peakbytes|residualbytes|outputbytes|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes', lib.CheckAndRemoveDoc(f.read()).replace('\t', '').replace(' ', '')[0:170]) # pylint: enable=line-too-long total_children = 0 last_occurrence = 1e32 input_shapes = 0 last_total_micros = tfprof_node.total_exec_micros last_micros = tfprof_node.exec_micros while tfprof_node.children: for gnode in tfprof_node.graph_nodes: input_shapes += len(gnode.input_shapes) self.assertEqual(len(tfprof_node.children), 1) tfprof_node = tfprof_node.children[0] self.assertEqual( last_total_micros, tfprof_node.total_exec_micros + last_micros) last_total_micros = tfprof_node.total_exec_micros last_micros = tfprof_node.exec_micros total_children += 1 self.assertLessEqual(len(tfprof_node.graph_nodes), last_occurrence) last_occurrence = len(tfprof_node.graph_nodes) self.assertGreater(input_shapes, 0)
def testSelectOption(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') def check_selection(selected, not_selected): with gfile.Open(outfile, 'r') as f: s = f.read() for attr in selected: self.assertTrue(s.find(attr) > 0, s) for attr in not_selected: self.assertFalse(s.find(attr) > 0, s) with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) opts = builder(builder.time_and_memory() ).with_file_output(outfile).select(['micros']).build() _ = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_selection(['total execution time', 'accelerator execution time'], ['bytes']) opts = builder(builder.time_and_memory() ).with_file_output(outfile).select(['bytes']).build() _ = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_selection(['requested bytes'], ['peak bytes', 'residual bytes', 'output bytes']) opts = builder(builder.time_and_memory()).with_file_output( outfile).select( ['peak_bytes', 'residual_bytes', 'output_bytes']).build() _ = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_selection(['peak bytes', 'residual bytes', 'output bytes'], ['requested_bytes'])
def testComplexCodeView(self): ops.reset_default_graph() opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS.copy() outfile = os.path.join(test.get_temp_dir(), 'dump') opts['output'] = 'file:outfile=' + outfile opts['account_type_regexes'] = ['.*'] opts['show_name_regexes'] = ['.*model_analyzer_testlib.py.*'] opts['account_displayed_op_only'] = False opts['select'] = ['params', 'float_ops'] with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile(sess.graph, run_meta, cmd='code', options=opts) # pylint: disable=line-too-long with gfile.Open(outfile, 'r') as f: lines = f.read().split('\n') result = '\n'.join([l[:min(len(l), 80)] for l in lines]) self.assertEqual( 'node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/91.04k flops)\n model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (0/1.80k para\n model_analyzer_testlib.py:35:BuildSmallModel:image = array_ops... (0/0 param\n model_analyzer_testlib.py:39:BuildSmallModel:initializer=init_... (0/4 param\n model_analyzer_testlib.py:43:BuildSmallModel:initializer=init_... (0/648 par\n model_analyzer_testlib.py:44:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n model_analyzer_testlib.py:48:BuildSmallModel:initializer=init_... (0/1.15k p\n model_analyzer_testlib.py:49:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (0/1.04k para\n model_analyzer_testlib.py:64:BuildFullModel:target = array_op... (0/0 params, \n model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (0/0 params, \n model_analyzer_testlib.py:67:BuildFullModel:return sgd_op.min... (0/0 params, \n', result) self.assertLess(0, tfprof_node.total_exec_micros) self.assertEqual(2844, tfprof_node.total_parameters) self.assertEqual(91040, tfprof_node.total_float_ops) self.assertEqual(5, len(tfprof_node.children)) self.assertEqual('_TFProfRoot', tfprof_node.name) self.assertEqual( 'model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_...', tfprof_node.children[0].name) self.assertEqual( 'model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c...', tfprof_node.children[1].name) self.assertEqual( 'model_analyzer_testlib.py:64:BuildFullModel:target = array_op...', tfprof_node.children[2].name) self.assertEqual( 'model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_...', tfprof_node.children[3].name) self.assertEqual( 'model_analyzer_testlib.py:67:BuildFullModel:return sgd_op.min...', tfprof_node.children[4].name)
def testOpView(self): ops.reset_default_graph() opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS.copy() outfile = os.path.join(test.get_temp_dir(), 'dump') opts['output'] = 'file:outfile=' + outfile opts['account_type_regexes'] = ['.*'] opts['min_occurrence'] = 10 opts['select'] = ['params', 'micros', 'occurrence', 'input_shapes'] opts['order_by'] = 'occurrence' with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile( sess.graph, run_meta, cmd='op', options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( 'nodename|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes\n', f.read().replace('\t', '').replace(' ', '')[0:120]) # pylint: enable=line-too-long total_children = 0 last_occurrence = 1e32 input_shapes = 0 last_total_micros = tfprof_node.total_exec_micros last_micros = tfprof_node.exec_micros while tfprof_node.children: for gnode in tfprof_node.graph_nodes: input_shapes += len(gnode.input_shapes) self.assertEqual(len(tfprof_node.children), 1) tfprof_node = tfprof_node.children[0] self.assertEqual( last_total_micros, tfprof_node.total_exec_micros + last_micros) last_total_micros = tfprof_node.total_exec_micros last_micros = tfprof_node.exec_micros total_children += 1 self.assertLessEqual(len(tfprof_node.graph_nodes), last_occurrence) last_occurrence = len(tfprof_node.graph_nodes) self.assertEqual(total_children, 15) self.assertGreater(input_shapes, 0)
def get_flops(self): @tf.function(input_signature=[ tf.TensorSpec(shape=(1, ) + self._eval_model.input_shape[1:], dtype=tf.float32, name='images') ]) def inference_function(images): return self._eval_model(images, training=False) concrete_funtion = inference_function.get_concrete_function() graph_info = profile(concrete_funtion.graph, options=ProfileOptionBuilder.float_operation()) flops = graph_info.total_float_ops // 2 return flops
def _run_loop_model(): with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile( sess.graph, run_meta, options=builder.time_and_memory()) return tfprof_node, run_meta
def pprof_test_helper(self, attribute, should_fail=False): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), attribute + '_pprof.pb.gz') opts = (builder(builder.time_and_memory()) .select([attribute]) .with_max_depth(100000) .with_node_names(trim_name_regexes=['ops.py.*']) .with_pprof_output(outfile).build()) with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run( x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) _ = model_analyzer.profile( sess.graph, run_meta, cmd='code', options=opts) if should_fail: self.assertFalse(gfile.Exists(outfile)) return profile_pb = profile_pb2.Profile() with gfile.Open(outfile, 'rb') as f: with gzip.GzipFile(fileobj=io.BytesIO(f.read())) as gzipf: profile_pb.ParseFromString(gzipf.read()) self.assertGreater(len(profile_pb.sample), 10) self.assertGreater(len(profile_pb.location), 10) self.assertGreater(len(profile_pb.function), 10) self.assertGreater(len(profile_pb.string_table), 30) has_rnn = False has_loop = False for s in profile_pb.string_table: if s.find('rnn') > 0: has_rnn = True if s.find('while') > 0: has_loop = True self.assertFalse(s.startswith('ops.py')) self.assertTrue(has_rnn) self.assertTrue(has_loop)
def pprof_test_helper(self, attribute, should_fail=False): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), attribute + '_pprof.pb.gz') opts = (builder(builder.time_and_memory()).select([ attribute ]).with_max_depth(100000).with_node_names( trim_name_regexes=['ops.py.*']).with_pprof_output(outfile).build()) with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) _ = model_analyzer.profile(sess.graph, run_meta, cmd='code', options=opts) if should_fail: self.assertFalse(gfile.Exists(outfile)) return profile_pb = profile_pb2.Profile() with gfile.Open(outfile, 'rb') as f: with gzip.GzipFile(fileobj=io.BytesIO(f.read())) as gzipf: profile_pb.ParseFromString(gzipf.read()) self.assertGreater(len(profile_pb.sample), 10) self.assertGreater(len(profile_pb.location), 10) self.assertGreater(len(profile_pb.function), 10) self.assertGreater(len(profile_pb.string_table), 30) has_rnn = False has_loop = False for s in profile_pb.string_table: if s.find('rnn') > 0: has_rnn = True if s.find('while') > 0: has_loop = True self.assertFalse(s.startswith('ops.py')) self.assertTrue(has_rnn) self.assertTrue(has_loop)
def get_num_flops(model, input_shape, readable_format=True): if hasattr(model, 'model'): model = model.model forward_pass = tf.function( model.call, input_signature=[tf.TensorSpec(shape=(1, ) + input_shape)]) graph_info = profile(forward_pass.get_concrete_function().graph, options=ProfileOptionBuilder.float_operation()) # The //2 is necessary since `profile` counts multiply and accumulate # as two flops, here we report the total number of multiply accumulate ops flops = graph_info.total_float_ops // 2 if readable_format: flops = float(flops) * 1e-9 return flops
def get_flops(self): @tf.function(input_signature=[ tf.TensorSpec(shape=(1, ) + self._eval_model.input_shape[1:], dtype=tf.float32, name='images') ]) def inference_function(images): return self._eval_model(images, training=False) concrete_funtion = inference_function.get_concrete_function() graph_info = profile(concrete_funtion.graph, options=ProfileOptionBuilder.float_operation()) # The //2 is necessary since `profile` counts multiply and accumulate # as two flops. We report the total number of multiply accumulate ops, # polularly referred to as MACs flops = graph_info.total_float_ops // 2 return flops
def _run_model(): x = random_ops.random_normal(shape=[1, SIZE]) w = random_ops.random_normal(shape=[SIZE, 2 * SIZE]) y = math_ops.matmul(x, w) with session.Session() as sess: run_metadata = config_pb2.RunMetadata() opts = builder.time_and_memory() opts['min_micros'] = 0 opts['min_bytes'] = 0 _ = sess.run(y, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_metadata) tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_metadata, options=opts) return tfprof_node, run_metadata
def _run_model(): x = random_ops.random_normal(shape=[1, SIZE]) w = random_ops.random_normal(shape=[SIZE, 2 * SIZE]) y = math_ops.matmul(x, w) with session.Session() as sess: run_metadata = config_pb2.RunMetadata() opts = builder.time_and_memory() opts['min_micros'] = 0 opts['min_bytes'] = 0 _ = sess.run(y, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_metadata) tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_metadata, options=opts) return tfprof_node, run_metadata
def _run_loop_model(): config = config_pb2.ConfigProto() # Grappler might fuse MatMul with BiasAdd in remapper optimizer. config.graph_options.rewrite_options.remapping = ( rewriter_config_pb2.RewriterConfig.OFF) with session.Session(config=config) as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.SOFTWARE_TRACE), run_metadata=run_meta) opts = builder.time_and_memory() opts['order_by'] = 'name' opts['output'] = 'none' tfprof_node = model_analyzer.profile( sess.graph, run_meta, options=opts) return tfprof_node, run_meta
def testTraceLoopBytes(self): if not test.is_gpu_available(): return ops.reset_default_graph() steps = 100 with ops.device('/gpu:0'): x = array_ops.ones((100, 100), dtype=dtypes.float32) n = array_ops.constant(steps, dtype=dtypes.int32) x1 = array_ops.ones((100, 100)) x *= x1 def loop_body(i, x): x *= x return i + 1, x _, y = control_flow_ops.while_loop(lambda i, x: i < n, loop_body, [array_ops.constant(0), x]) grad = gradients.gradients(y, [x1]) with session.Session(config=self._no_rewrite_session_config()) as sess: run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() sess.run(grad, options=run_options, run_metadata=run_metadata) options = option_builder.ProfileOptionBuilder.time_and_memory() options['min_bytes'] = 0 options['min_micros'] = 0 options['select'] = ('bytes', 'peak_bytes', 'output_bytes', 'residual_bytes') options['output'] = 'none' ret_pb = model_analyzer.profile(sess.graph, run_meta=run_metadata, cmd='scope', options=options) self.assertGreater(ret_pb.total_requested_bytes, 1000000)
def _run_loop_model(): config = config_pb2.ConfigProto() # Grappler might fuse MatMul with BiasAdd in remapper optimizer. config.graph_options.rewrite_options.remapping = ( rewriter_config_pb2.RewriterConfig.OFF) with session.Session(config=config) as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) opts = builder.time_and_memory() opts['order_by'] = 'name' opts['output'] = 'none' tfprof_node = model_analyzer.profile( sess.graph, run_meta, options=opts) return tfprof_node, run_meta
def testTraceLoopBytes(self): if not test.is_gpu_available(): return ops.reset_default_graph() steps = 100 with ops.device('/gpu:0'): x = array_ops.ones((100, 100), dtype=dtypes.float32) n = array_ops.constant(steps, dtype=dtypes.int32) x1 = array_ops.ones((100, 100)) x *= x1 def loop_body(i, x): x *= x return i + 1, x _, y = control_flow_ops.while_loop( lambda i, x: i < n, loop_body, [array_ops.constant(0), x]) grad = gradients.gradients(y, [x1]) with session.Session() as sess: run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() sess.run(grad, options=run_options, run_metadata=run_metadata) options = option_builder.ProfileOptionBuilder.time_and_memory() options['min_bytes'] = 0 options['min_micros'] = 0 options['select'] = ('bytes', 'peak_bytes', 'output_bytes', 'residual_bytes') options['output'] = 'none' ret_pb = model_analyzer.profile( sess.graph, run_meta=run_metadata, cmd='scope', options=options) self.assertGreater(ret_pb.total_requested_bytes, 1000000)
def testProfileBasic(self): ops.reset_default_graph() opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS.copy() opts['account_type_regexes'] = ['.*'] opts['select'] = ['params', 'float_ops', 'micros', 'bytes', 'device', 'op_types', 'occurrence'] outfile = os.path.join(test.get_temp_dir(), 'dump') opts['output'] = 'file:outfile=' + outfile # Test the output without run_meta. sess = session.Session() r = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) profiler = model_analyzer.Profiler(sess.graph) profiler.profile_name_scope(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile( sess.graph, cmd='scope', options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) # Test the output with run_meta. run_meta = config_pb2.RunMetadata() _ = sess.run(r, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) profiler.add_step(1, run_meta) profiler.profile_graph(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile( sess.graph, cmd='graph', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) profiler.profile_python(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile( sess.graph, cmd='code', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) profiler.profile_operations(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile( sess.graph, cmd='op', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) model_analyzer.profile( sess.graph, cmd='scope', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertNotEqual(pma_str, profiler_str) opts2 = opts.copy() opts2['select'] = ['params', 'float_ops'] profiler.profile_name_scope(opts2) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile( sess.graph, cmd='scope', run_meta=run_meta, options=opts2) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str)
def testProfileBasic(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder(builder.trainable_variables_parameter()) .with_file_output(outfile) .with_accounted_types(['.*']) .select(['params', 'float_ops', 'micros', 'bytes', 'device', 'op_types', 'occurrence']).build()) # Test the output without run_meta. sess = session.Session() r = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) # Test the output with run_meta. run_meta = config_pb2.RunMetadata() _ = sess.run(r, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) profiler = model_analyzer.Profiler(sess.graph) profiler.add_step(1, run_meta) profiler.profile_graph(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile( sess.graph, cmd='graph', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) profiler.profile_name_scope(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile( sess.graph, cmd='scope', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) profiler.profile_python(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile( sess.graph, cmd='code', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) profiler.profile_operations(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile( sess.graph, cmd='op', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) model_analyzer.profile( sess.graph, cmd='scope', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertNotEqual(pma_str, profiler_str)
def testMinOption(self): ops.reset_default_graph() def check_min(nodes, mm=0, mam=0, mcm=0, mb=0, mpb=0, mrb=0, mob=0): for n in nodes: if mm > 0: self.assertGreaterEqual(n.exec_micros, mm) if mam > 0: self.assertGreaterEqual(n.accelerator_exec_micros, mam) if mcm > 0: self.assertGreaterEqual(n.cpu_exec_micros, mcm) if mb > 0: self.assertGreaterEqual(n.requested_bytes, mb) if mpb > 0: self.assertGreaterEqual(n.peak_bytes, mpb) if mrb > 0: self.assertGreaterEqual(n.residual_bytes, mrb) if mob > 0: self.assertGreaterEqual(n.output_bytes, mob) check_min(n.children, mm, mam, mcm, mb, mpb, mrb, mob) with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) min_val = random.randint(0, 10000) opts = builder(builder.time_and_memory( min_micros=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mm=min_val) opts = builder( builder.time_and_memory(min_accelerator_micros=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mam=min_val) opts = builder(builder.time_and_memory( min_cpu_micros=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mcm=min_val) opts = builder(builder.time_and_memory( min_bytes=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mb=min_val) opts = builder(builder.time_and_memory( min_peak_bytes=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mpb=min_val) opts = builder(builder.time_and_memory( min_residual_bytes=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mrb=min_val) opts = builder(builder.time_and_memory( min_output_bytes=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mob=min_val)
def testMinOption(self): ops.reset_default_graph() def check_min(nodes, mm=0, mam=0, mcm=0, mb=0, mpb=0, mrb=0, mob=0): for n in nodes: if mm > 0: self.assertGreaterEqual(n.exec_micros, mm) if mam > 0: self.assertGreaterEqual(n.accelerator_exec_micros, mam) if mcm > 0: self.assertGreaterEqual(n.cpu_exec_micros, mcm) if mb > 0: self.assertGreaterEqual(n.requested_bytes, mb) if mpb > 0: self.assertGreaterEqual(n.peak_bytes, mpb) if mrb > 0: self.assertGreaterEqual(n.residual_bytes, mrb) if mob > 0: self.assertGreaterEqual(n.output_bytes, mob) check_min(n.children, mm, mam, mcm, mb, mpb, mrb, mob) with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) min_val = random.randint(0, 10000) opts = builder(builder.time_and_memory(min_micros=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mm=min_val) opts = builder(builder.time_and_memory(min_accelerator_micros=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mam=min_val) opts = builder(builder.time_and_memory(min_cpu_micros=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mcm=min_val) opts = builder(builder.time_and_memory(min_bytes=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mb=min_val) opts = builder(builder.time_and_memory(min_peak_bytes=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mpb=min_val) opts = builder(builder.time_and_memory(min_residual_bytes=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mrb=min_val) opts = builder(builder.time_and_memory(min_output_bytes=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mob=min_val)
def testProfileBasic(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder(builder.trainable_variables_parameter()) .with_file_output(outfile) .with_accounted_types(['.*']) .select(['params', 'float_ops', 'micros', 'bytes', 'device', 'op_types', 'occurrence']).build()) # Test the output without run_meta. sess = session.Session() r = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) # Test the output with run_meta. run_meta = config_pb2.RunMetadata() _ = sess.run(r, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) profiler = model_analyzer.Profiler(sess.graph) profiler.add_step(1, run_meta) profiler.profile_graph(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile( sess.graph, cmd='graph', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) profiler.profile_name_scope(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile( sess.graph, cmd='scope', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) profiler.profile_python(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile( sess.graph, cmd='code', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) profiler.profile_operations(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile( sess.graph, cmd='op', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) model_analyzer.profile( sess.graph, cmd='scope', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertNotEqual(pma_str, profiler_str)
def testSelectEverthingDetail(self): ops.reset_default_graph() dev = '/gpu:0' if test.is_gpu_available() else '/cpu:0' outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).select([ 'micros', 'bytes', 'params', 'float_ops', 'occurrence', 'device', 'op_types', 'input_shapes' ]).build()) config = config_pb2.ConfigProto() with session.Session(config=config) as sess, ops.device(dev): x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile(sess.graph, run_meta, options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long outputs = f.read().split('\n') self.assertEqual( outputs[0], 'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes' ) for o in outputs[1:]: if o.find('Conv2D ') > 0: metrics = o[o.find('(') + 1:o.find(')')].split(',') # Make sure time is profiled. gap = 1 if test.is_gpu_available() else 2 for i in range(3, 6, gap): mat = re.search('(.*)us/(.*)us', metrics[i]) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # Make sure device is profiled. if test.is_gpu_available(): self.assertTrue(metrics[6].find('gpu') > 0) self.assertFalse(metrics[6].find('cpu') > 0) else: self.assertFalse(metrics[6].find('gpu') > 0) self.assertTrue(metrics[6].find('cpu') > 0) # Make sure float_ops is profiled. mat = re.search('(.*)k/(.*)k flops', metrics[1].strip()) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # Make sure op_count is profiled. self.assertEqual(metrics[8].strip(), '1/1|1/1') # Make sure input_shapes is profiled. self.assertEqual(metrics[9].strip(), '0:2x6x6x3|1:3x3x3x6') if o.find('DW (3x3x3x6') > 0: metrics = o[o.find('(') + 1:o.find(')')].split(',') mat = re.search('(.*)/(.*) params', metrics[1].strip()) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0)