def testArithmeticOptimizationActive(self): """Tests that tfdbg can dump the tensor from nodes created by Grappler.""" with session.Session( config=_grappler_enabled_session_config()) as sess: u = variables.VariableV1([[1, 2], [3, 4]], name="u", dtype=dtypes.float32) # The next two ops should be optimized by Grappler into a single op: # either an AddN op or a Mul op. x = math_ops.add(u, u) x = math_ops.add(x, u) y = math_ops.multiply(x, u) sess.run(variables.global_variables_initializer()) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=[self._debug_url]) run_metadata = config_pb2.RunMetadata() run_result = sess.run(y, options=run_options, run_metadata=run_metadata) self.assertAllClose(run_result, [[3, 12], [27, 48]]) dump_data = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs, validate=True) original_node_names = set(op.name for op in sess.graph.get_operations()) dumped_node_names = set(dump_data.nodes()) grappler_created_node_names = dumped_node_names - original_node_names grappler_removed_node_names = original_node_names - dumped_node_names # Assert that Grappler should have replaced some of the nodes from the # original graph with new nodes. self.assertTrue(grappler_created_node_names) self.assertTrue(grappler_removed_node_names) # Iterate through the nodes created by Grappler. One of them should be # be the result of replacing the original add ops with an AddN op or a # Mul op. found_optimized_node = False for grappler_node_name in grappler_created_node_names: node_op_type = dump_data.node_op_type(grappler_node_name) # Look for the node created by Grappler's arithmetic optimization. if ((test_util.IsMklEnabled() and node_op_type in ("_MklAddN", "Mul")) or (node_op_type in ("AddN", "Mul"))): datum = dump_data.get_tensors(grappler_node_name, 0, "DebugIdentity") self.assertEqual(1, len(datum)) self.assertAllClose(datum[0], [[3, 6], [9, 12]]) found_optimized_node = True break self.assertTrue( found_optimized_node, "Failed to find optimized node created by Grappler's arithmetic " "optimization.")
def testDumpToFileOverlappingParentDir(self): with session.Session() as sess: u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]]) v_init_val = np.array([[2.0], [-1.0]]) # Use node names with overlapping namespace (i.e., parent directory) to # test concurrent, non-racing directory creation. u_name = "testDumpToFile/u" v_name = "testDumpToFile/v" u_init = constant_op.constant(u_init_val, shape=[2, 2]) u = variables.Variable(u_init, name=u_name) v_init = constant_op.constant(v_init_val, shape=[2, 1]) v = variables.Variable(v_init, name=v_name) w = math_ops.matmul(u, v, name="testDumpToFile/matmul") u.initializer.run() v.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_urls = "file://%s" % self._dump_root # Add debug tensor watch for u. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % u_name, 0, debug_urls=debug_urls) # Add debug tensor watch for v. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % v_name, 0, debug_urls=debug_urls) run_metadata = config_pb2.RunMetadata() # Invoke Session.run(). sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertTrue(dump.loaded_partition_graphs()) # Verify the dumped tensor values for u and v. self.assertEqual(2, dump.size) self.assertAllClose([u_init_val], dump.get_tensors("%s/read" % u_name, 0, "DebugIdentity")) self.assertAllClose([v_init_val], dump.get_tensors("%s/read" % v_name, 0, "DebugIdentity")) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % u_name, 0, "DebugIdentity")[0], 0) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % v_name, 0, "DebugIdentity")[0], 0)
def testMinOption(self): ops.reset_default_graph() def check_min(nodes, mm=0, mam=0, mcm=0, mb=0, mpb=0, mrb=0, mob=0): for n in nodes: if mm > 0: self.assertGreaterEqual(n.exec_micros, mm) if mam > 0: self.assertGreaterEqual(n.accelerator_exec_micros, mam) if mcm > 0: self.assertGreaterEqual(n.cpu_exec_micros, mcm) if mb > 0: self.assertGreaterEqual(n.requested_bytes, mb) if mpb > 0: self.assertGreaterEqual(n.peak_bytes, mpb) if mrb > 0: self.assertGreaterEqual(n.residual_bytes, mrb) if mob > 0: self.assertGreaterEqual(n.output_bytes, mob) check_min(n.children, mm, mam, mcm, mb, mpb, mrb, mob) with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) min_val = random.randint(0, 10000) opts = builder(builder.time_and_memory( min_micros=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mm=min_val) opts = builder( builder.time_and_memory(min_accelerator_micros=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mam=min_val) opts = builder(builder.time_and_memory( min_cpu_micros=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mcm=min_val) opts = builder(builder.time_and_memory( min_bytes=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mb=min_val) opts = builder(builder.time_and_memory( min_peak_bytes=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mpb=min_val) opts = builder(builder.time_and_memory( min_residual_bytes=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mrb=min_val) opts = builder(builder.time_and_memory( min_output_bytes=min_val)).with_empty_output().build() tfprof_node = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mob=min_val)
def testFindNodesWithBadTensorValues(self): with session.Session() as sess: u_name = "testFindNodesWithBadTensorValues/u" v_name = "testFindNodesWithBadTensorValues/v" w_name = "testFindNodesWithBadTensorValues/w" x_name = "testFindNodesWithBadTensorValues/x" y_name = "testFindNodesWithBadTensorValues/y" z_name = "testFindNodesWithBadTensorValues/z" u_init = constant_op.constant([2.0, 4.0]) u = variables.Variable(u_init, name=u_name) v_init = constant_op.constant([2.0, 1.0]) v = variables.Variable(v_init, name=v_name) # Expected output: [0.0, 3.0] w = math_ops.sub(u, v, name=w_name) # Expected output: [inf, 1.3333] x = math_ops.div(u, w, name=x_name) # Expected output: [nan, 4.0] y = math_ops.mul(w, x, name=y_name) z = math_ops.mul(y, y, name=z_name) u.initializer.run() v.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(z, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) def has_bad_value(_, tensor): return np.any(np.isnan(tensor)) or np.any(np.isinf(tensor)) # Find all "offending tensors". bad_data = dump.find(has_bad_value) # Verify that the nodes with bad values are caught through running find # on the debug dump. self.assertEqual(3, len(bad_data)) self.assertEqual(x_name, bad_data[0].node_name) self.assertEqual(y_name, bad_data[1].node_name) self.assertEqual(z_name, bad_data[2].node_name) # Test first_n kwarg of find(): Find the first offending tensor. first_bad_datum = dump.find(has_bad_value, first_n=1) self.assertEqual(1, len(first_bad_datum)) self.assertEqual(x_name, first_bad_datum[0].node_name)
def testDumpCausalityCheck(self): with session.Session() as sess: u_name = "testDumpCausalityCheck/u" v_name = "testDumpCausalityCheck/v" w_name = "testDumpCausalityCheck/w" u_init = constant_op.constant([2.0, 4.0]) u = variables.Variable(u_init, name=u_name) v = math_ops.add(u, u, name=v_name) w = math_ops.add(v, v, name=w_name) u.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) # First, loading the original dump without supplying the # partition_graphs should not cause a RuntimeError, validation occurs # only with partition_graphs loaded. debug_data.DebugDumpDir(self._dump_root) # Now, loading the original dump with partition graphs supplied should # succeed. The validation should pass quietly. dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Get the dump file names and compute their timestamps. self.assertEqual( 1, len(dump.get_tensor_file_paths(u_name, 0, "DebugIdentity"))) u_file_path = dump.get_tensor_file_paths(u_name, 0, "DebugIdentity")[0] self.assertEqual( 1, len(dump.get_tensor_file_paths(v_name, 0, "DebugIdentity"))) v_file_path = dump.get_tensor_file_paths(v_name, 0, "DebugIdentity")[0] u_timestamp = int(u_file_path[u_file_path.rindex("_") + 1:]) v_timestamp = int(v_file_path[v_file_path.rindex("_") + 1:]) # Swap the time stamps new_u_file_path = u_file_path[:u_file_path. rindex("_")] + "_%d" % v_timestamp new_v_file_path = v_file_path[:v_file_path. rindex("_")] + "_%d" % u_timestamp os.rename(u_file_path, new_u_file_path) os.rename(v_file_path, new_v_file_path) # Load the dump directory again. Now a ValueError is expected to be # raised due to the timestamp swap. with self.assertRaisesRegexp(ValueError, "Causality violated"): dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Loading the dump directory with kwarg "validate" set explicitly to # False should get rid of the error. dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs, validate=False)
def run_op_benchmark(self, sess, op_or_tensor, feed_dict=None, burn_iters=2, min_iters=10, store_trace=False, name=None): """Run an op or tensor in the given session. Report the results. Args: sess: `Session` object to use for timing. op_or_tensor: `Operation` or `Tensor` to benchmark. feed_dict: A `dict` of values to feed for each op iteration (see the `feed_dict` parameter of `Session.run`). burn_iters: Number of burn-in iterations to run. min_iters: Minimum number of iterations to use for timing. store_trace: Boolean, whether to run an extra untimed iteration and store the trace of iteration in the benchmark report. The trace will be stored as a string in Google Chrome trace format in the extras field "full_trace_chrome_format". name: (optional) Override the BenchmarkEntry name with `name`. Otherwise it is inferred from the top-level method name. """ for _ in range(burn_iters): sess.run(op_or_tensor, feed_dict=feed_dict) deltas = [None] * min_iters for i in range(min_iters): start_time = time.time() sess.run(op_or_tensor, feed_dict=feed_dict) end_time = time.time() delta = end_time - start_time deltas[i] = delta extras = {} if store_trace: run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() sess.run(op_or_tensor, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) extras[ "full_trace_chrome_format"] = tl.generate_chrome_trace_format( ) def _median(x): if not x: return -1 s = sorted(x) l = len(x) lm1 = l - 1 return (s[l // 2] + s[lm1 // 2]) / 2.0 median_delta = _median(deltas) self.report_benchmark(iters=min_iters, wall_time=median_delta, extras=extras, name=name)
def testDumpStringTensorsToFileSystem(self): with session.Session() as sess: str1_init_val = np.array(b"abc") str2_init_val = np.array(b"def") str1_init = constant_op.constant(str1_init_val) str2_init = constant_op.constant(str2_init_val) str1_name = "str1" str2_name = "str2" str1 = variables.Variable(str1_init, name=str1_name) str2 = variables.Variable(str2_init, name=str2_name) # Concatenate str1 and str2 str_concat = math_ops.add(str1, str2, name="str_concat") str1.initializer.run() str2.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_urls = self._debug_urls() # Add debug tensor watch for u. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % str1_name, 0, debug_urls=debug_urls) # Add debug tensor watch for v. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % str2_name, 0, debug_urls=debug_urls) run_metadata = config_pb2.RunMetadata() sess.run(str_concat, options=run_options, run_metadata=run_metadata) # String ops are located on CPU. self.assertEqual(1, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertIn(str1_name, dump.nodes()) self.assertIn(str2_name, dump.nodes()) self.assertEqual(2, dump.size) self.assertEqual([str1_init_val], dump.get_tensors("%s/read" % str1_name, 0, "DebugIdentity")) self.assertEqual([str2_init_val], dump.get_tensors("%s/read" % str2_name, 0, "DebugIdentity")) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % str1_name, 0, "DebugIdentity")[0], 0) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % str2_name, 0, "DebugIdentity")[0], 0)
def testAddingSummaryGraphAndRunMetadata(self): test_dir = self._CleanTestDir("basics") sw = writer.FileWriter(test_dir) sw.add_session_log(event_pb2.SessionLog(status=SessionLog.START), 1) sw.add_summary( summary_pb2.Summary( value=[summary_pb2.Summary.Value( tag="mee", simple_value=10.0)]), 10) sw.add_summary( summary_pb2.Summary( value=[summary_pb2.Summary.Value( tag="boo", simple_value=20.0)]), 20) with ops.Graph().as_default() as g: constant_op.constant([0], name="zero") sw.add_graph(g, global_step=30) run_metadata = config_pb2.RunMetadata() device_stats = run_metadata.step_stats.dev_stats.add() device_stats.device = "test" sw.add_run_metadata(run_metadata, "test run", global_step=40) sw.close() rr = self._EventsReader(test_dir) # The first event should list the file_version. ev = next(rr) self._assertRecent(ev.wall_time) self.assertEquals("brain.Event:2", ev.file_version) # The next event should be the START message. ev = next(rr) self._assertRecent(ev.wall_time) self.assertEquals(1, ev.step) self.assertEquals(SessionLog.START, ev.session_log.status) # The next event should have the value 'mee=10.0'. ev = next(rr) self._assertRecent(ev.wall_time) self.assertEquals(10, ev.step) self.assertProtoEquals(""" value { tag: 'mee' simple_value: 10.0 } """, ev.summary) # The next event should have the value 'boo=20.0'. ev = next(rr) self._assertRecent(ev.wall_time) self.assertEquals(20, ev.step) self.assertProtoEquals(""" value { tag: 'boo' simple_value: 20.0 } """, ev.summary) # The next event should have the graph_def. ev = next(rr) self._assertRecent(ev.wall_time) self.assertEquals(30, ev.step) ev_graph = graph_pb2.GraphDef() ev_graph.ParseFromString(ev.graph_def) self.assertProtoEquals(g.as_graph_def(add_shapes=True), ev_graph) # The next event should have metadata for the run. ev = next(rr) self._assertRecent(ev.wall_time) self.assertEquals(40, ev.step) self.assertEquals("test run", ev.tagged_run_metadata.tag) parsed_run_metadata = config_pb2.RunMetadata() parsed_run_metadata.ParseFromString(ev.tagged_run_metadata.run_metadata) self.assertProtoEquals(run_metadata, parsed_run_metadata) # We should be done. self.assertRaises(StopIteration, lambda: next(rr))
def test_matmul_biasadd_gelu_fusion(self, mode): """Test MatMul+BiasAdd+Gelu fusion.""" self._maybe_skip(mode) is_bf16_supported = _pywrap_utils.IsBF16SupportedByOneDNNOnThisCPU() run_options = config_pb2.RunOptions(output_partition_graphs=True) metadata = config_pb2.RunMetadata() m, n, k = (3, 3, 4) # Matrix dimensions for precision in ('float32', 'bfloat16'): for approximate in (False, True): # Gelu exact (approximate=False) is not supported with bfloat16 # precision since no support for Erf with bfloat16 data type. # TODO(intel-tf): Enable gelu exact with bfloat16, when Erf op is # supported with bfloat16. if precision == 'bfloat16': if not (approximate and is_bf16_supported): continue # Create MatMul + BiasAdd + Gelu graph ops.reset_default_graph() x = _input([m, k]) w = _weight([k, n]) b = _bias([n]) if precision == 'bfloat16': x = math_ops.cast(x, dtypes.bfloat16) w = math_ops.cast(w, dtypes.bfloat16) b = math_ops.cast(b, dtypes.bfloat16) y = math_ops.matmul(x, w) z = nn.bias_add(y, b) out = nn.gelu(z, approximate=approximate) # Compute reference value. config = _get_config(remapping_on=False) with session.Session(config=config) as sess: sess.run(variables.global_variables_initializer()) output_val_ref = sess.run(out, options=run_options, run_metadata=metadata) # Compute output with fusion. config = _get_config(remapping_on=True) with session.Session(config=config) as sess: sess.run(variables.global_variables_initializer()) output_val = sess.run(out, options=run_options, run_metadata=metadata) graph = metadata.partition_graphs[0] # Graph should contain fused op. found_fused_op = False gelu_type = b'GeluApproximate' if approximate else b'GeluExact' for node in graph.node: if node.op in ('_MklNativeFusedMatMul', '_MklFusedMatMul'): fused_ops = node.attr['fused_ops'].list.s found_fused_op = len(fused_ops) == 2 and \ fused_ops[0] == b'BiasAdd' and fused_ops[1] == gelu_type break self.assertTrue(found_fused_op) # Computed output value should be close to reference value. tol = 1e-5 if precision == 'float32' else 1e-2 self.assertAllClose(output_val_ref, output_val, atol=tol, rtol=tol)
def run(self, fetches, feed_dict=None, options=None, run_metadata=None): """Wrapper around Session.run() that inserts tensor watch options. Args: fetches: Same as the fetches arg to regular Session.run() feed_dict: Same as the feed_dict arg to regular Session.run() options: Same as the options arg to regular Session.run() run_metadata: Same as the run_metadata to regular Session.run() Returns: Simply forwards the output of the wrapped Session.run() call. Raises: ValueError: On invalid OnRunStartAction value. """ self._run_call_count += 1 # Invoke on-run-start callback and obtain response. run_start_resp = self.on_run_start( OnRunStartRequest(fetches, feed_dict, options, run_metadata, self._run_call_count)) _check_type(run_start_resp, OnRunStartResponse) if run_start_resp.action == OnRunStartAction.DEBUG_RUN: # Decorate RunOption to fill in debugger tensor watch specifications. decorated_run_options = options or config_pb2.RunOptions() run_metadata = run_metadata or config_pb2.RunMetadata() self._decorate_run_options(decorated_run_options, run_start_resp.debug_urls) # Invoke the run() method of the wrapped Session. retvals = self._sess.run(fetches, feed_dict=feed_dict, options=decorated_run_options, run_metadata=run_metadata) # Prepare arg for the on-run-end callback. run_end_req = OnRunEndRequest(run_start_resp.action, run_metadata=run_metadata) elif run_start_resp.action == OnRunStartAction.NON_DEBUG_RUN: # Invoke run() method of the wrapped session. retvals = self._sess.run(fetches, feed_dict=feed_dict, options=options, run_metadata=run_metadata) # Prepare arg for the on-run-end callback. run_end_req = OnRunEndRequest(run_start_resp.action) elif run_start_resp.action == OnRunStartAction.INVOKE_STEPPER: # TODO(cais): Implement stepper loop. raise NotImplementedError( "OnRunStartAction INVOKE_STEPPER has not been implemented.") else: raise ValueError("Invalid OnRunStartAction value: %s" % run_start_resp.action) # Invoke on-run-end callback and obtain response. run_end_resp = self.on_run_end(run_end_req) _check_type(run_end_resp, OnRunEndResponse) # Currently run_end_resp is only a placeholder. No action is taken on it. return retvals
def setUpClass(cls): cls._dump_root = tempfile.mkdtemp() cls._is_gpu_available = test.is_gpu_available() if cls._is_gpu_available: cls._main_device = "/job:localhost/replica:0/task:0/gpu:0" else: cls._main_device = "/job:localhost/replica:0/task:0/cpu:0" with session.Session() as sess: u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]]) v_init_val = np.array([[2.0], [-1.0]]) u_name = "simple_mul_add/u" v_name = "simple_mul_add/v" u_init = constant_op.constant(u_init_val, shape=[2, 2]) u = variables.Variable(u_init, name=u_name) v_init = constant_op.constant(v_init_val, shape=[2, 1]) v = variables.Variable(v_init, name=v_name) w = math_ops.matmul(u, v, name="simple_mul_add/matmul") x = math_ops.add(w, w, name="simple_mul_add/add") u.initializer.run() v.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls="file://%s" % cls._dump_root) # Invoke Session.run(). run_metadata = config_pb2.RunMetadata() sess.run(x, options=run_options, run_metadata=run_metadata) cls._debug_dump = debug_data.DebugDumpDir( cls._dump_root, partition_graphs=run_metadata.partition_graphs) # Construct the analyzer. cls._analyzer = analyzer_cli.DebugAnalyzer(cls._debug_dump) # Construct the handler registry. cls._registry = debugger_cli_common.CommandHandlerRegistry() # Register command handlers. cls._registry.register_command_handler( "list_tensors", cls._analyzer.list_tensors, cls._analyzer.get_help("list_tensors"), prefix_aliases=["lt"]) cls._registry.register_command_handler( "node_info", cls._analyzer.node_info, cls._analyzer.get_help("node_info"), prefix_aliases=["ni"]) cls._registry.register_command_handler( "print_tensor", cls._analyzer.print_tensor, cls._analyzer.get_help("print_tensor"), prefix_aliases=["pt"])
def testLazyCompilation(self): @function.Defun(compiled=True) def CompiledFunction(x): return math_ops.log(x) with session_lib.Session(config=NoRewriteSessionConfig()) as sess: x = array_ops.placeholder(dtypes.float32) y = CompiledFunction(x) # The very first run of the cluster is always compiled (non-lazily). run_metadata_for_first_run = config_pb2.RunMetadata() sess.run(y, feed_dict={x: [2., 10., 19., 77., 100.]}, run_metadata=run_metadata_for_first_run, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE)) self.assertTrue( InLabels(RunMetadataLabels(run_metadata_for_first_run), "_XlaCompile")) self.assertTrue( InLabels(RunMetadataLabels(run_metadata_for_first_run), "_XlaRun")) run_metadata_before_warmup = config_pb2.RunMetadata() sess.run(y, feed_dict={x: [2., 10.]}, run_metadata=run_metadata_before_warmup, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE)) self.assertTrue( InLabels(RunMetadataLabels(run_metadata_before_warmup), "_XlaCompile")) self.assertFalse( InLabels(RunMetadataLabels(run_metadata_before_warmup), "_XlaRun")) # We compile when we see the same shape a second time. run_metadata_after_warmup = config_pb2.RunMetadata() sess.run(y, feed_dict={x: [2., 10.]}, run_metadata=run_metadata_after_warmup, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE)) self.assertTrue( InLabels(RunMetadataLabels(run_metadata_after_warmup), "_XlaCompile")) self.assertTrue( InLabels(RunMetadataLabels(run_metadata_after_warmup), "_XlaRun")) run_metadata_for_new_shape = config_pb2.RunMetadata() sess.run(y, feed_dict={x: [2., 10., 12.]}, run_metadata=run_metadata_for_new_shape, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE)) self.assertTrue( InLabels(RunMetadataLabels(run_metadata_for_new_shape), "_XlaCompile")) self.assertFalse( InLabels(RunMetadataLabels(run_metadata_for_new_shape), "_XlaRun"))
def testDumpToFileWhileLoop(self): with session.Session() as sess: num_iter = 10 # "u" is the Variable being updated in the loop. u_name = "testDumpToFileWhileLoop/u" u_namespace = u_name.split("/")[0] u_init_val = np.array(11.0) u_init = constant_op.constant(u_init_val) u = variables.Variable(u_init, name=u_name) # "v" is the increment. v_name = "testDumpToFileWhileLoop/v" v_namespace = v_name.split("/")[0] v_init_val = np.array(2.0) v_init = constant_op.constant(v_init_val) v = variables.Variable(v_init, name=v_name) u.initializer.run() v.initializer.run() i = constant_op.constant(0, name="testDumpToFileWhileLoop/i") def cond(i): return math_ops.less(i, num_iter) def body(i): new_u = state_ops.assign_add(u, v) new_i = math_ops.add(i, 1) op = control_flow_ops.group(new_u) new_i = control_flow_ops.with_dependencies([op], new_i) return [new_i] loop = control_flow_ops.while_loop(cond, body, [i], parallel_iterations=1) # Create RunOptions for debug-watching tensors run_options = config_pb2.RunOptions() debug_url = "file://%s" % self.dump_root_ # Add debug tensor watch for u. self._addDebugTensorWatch(run_options, u_name, 0, debug_urls=[debug_url]) # Add debug tensor watch for v. self._addDebugTensorWatch(run_options, "%s/read" % v_name, 0, debug_urls=[debug_url]) # Add debug tensor watch for while/Identity. self._addDebugTensorWatch(run_options, "while/Identity", 0, debug_urls=[debug_url]) run_metadata = config_pb2.RunMetadata() r = sess.run(loop, options=run_options, run_metadata=run_metadata) self.assertEqual(num_iter, r) u_val_final = sess.run(u) self.assertAllClose(u_init_val + num_iter * v_init_val, u_val_final) # Verify dump files self.assertTrue(os.path.isdir(self.dump_root_)) self.assertTrue( os.path.isdir(os.path.join(self.dump_root_, u_namespace))) self.assertTrue( os.path.isdir(os.path.join(self.dump_root_, v_namespace, "v"))) # Verify the dump file for tensor "u". dump_files = glob.glob( os.path.join(self.dump_root_, u_namespace, "u_0_*")) self.assertEqual(1, len(dump_files)) dump_file = os.path.join(self.dump_root_, u_namespace, dump_files[0]) self.assertTrue(os.path.isfile(dump_file)) self._verifyTensorDumpFile(dump_file, "%s:0" % u_name, "DebugIdentity", 0, u_init_val) # Verify the dump file for tensor "v". dump_files = os.listdir(os.path.join(self.dump_root_, v_name)) self.assertEqual(1, len(dump_files)) self.assertTrue(dump_files[0].startswith("read_0_")) dump_file = os.path.join(self.dump_root_, v_name, dump_files[0]) self._verifyTensorDumpFile(dump_file, "%s/read:0" % v_name, "DebugIdentity", 0, v_init_val) # Verify the dump files for tensor while/Identity while_identity_dump_files = sorted( os.listdir(os.path.join(self.dump_root_, "while"))) self.assertEqual(num_iter, len(while_identity_dump_files)) # Verify the content of the individual for k in xrange(len(while_identity_dump_files)): dump_file_path = os.path.join(self.dump_root_, "while", while_identity_dump_files[k]) self._verifyTensorDumpFile(dump_file_path, "while/Identity:0", "DebugIdentity", 0, np.array(k))
def _run_with_debugging(self, run_start_resp, fetches, feed_dict, options, run_metadata, callable_runner, callable_runner_args, callable_options): """Perform a session.run() or callable with debugging.""" # Decorate RunOption to fill in debugger tensor watch specifications. decorated_run_options = None if callable_options: callable_options_id = id(callable_options) if callable_options_id not in self._cached_callables_from_options: # Make a copy of callable_options to avoid mutating it. new_callable_options = config_pb2.CallableOptions() new_callable_options.CopyFrom(callable_options) decorated_run_options = new_callable_options.run_options else: decorated_run_options = options or config_pb2.RunOptions() run_metadata = run_metadata or config_pb2.RunMetadata() if decorated_run_options: self._decorate_run_options_for_debug( decorated_run_options, run_start_resp.debug_urls, debug_ops=run_start_resp.debug_ops, node_name_regex_allowlist=(run_start_resp.node_name_regex_allowlist), op_type_regex_allowlist=run_start_resp.op_type_regex_allowlist, tensor_dtype_regex_allowlist=( run_start_resp.tensor_dtype_regex_allowlist), tolerate_debug_op_creation_failures=( run_start_resp.tolerate_debug_op_creation_failures)) # Invoke the run() method of the wrapped Session. Catch any TensorFlow # runtime errors. tf_error = None try: if callable_runner: retvals = callable_runner(*callable_runner_args, options=decorated_run_options, run_metadata=run_metadata) elif callable_options: # pylint:disable=protected-access if callable_options_id in self._cached_callables_from_options: callable_object = self._cached_callables_from_options[ callable_options_id] else: callable_object = self._sess._make_callable_from_options( new_callable_options) self._cached_callables_from_options[ callable_options_id] = callable_object # pylint:enable=protected-access retvals = callable_object( *callable_runner_args, run_metadata=run_metadata) else: retvals = self._sess.run(fetches, feed_dict=feed_dict, options=decorated_run_options, run_metadata=run_metadata) except errors.OpError as op_error: if self._pass_through_operrors: raise op_error tf_error = op_error retvals = op_error return retvals, OnRunEndRequest( run_start_resp.action, run_metadata=run_metadata, client_graph_def=self._sess.graph.as_graph_def(), tf_error=tf_error)
def testMultiStepProfile(self): ops.reset_default_graph() opts = builder.time_and_memory() with session.Session() as sess: r1, r2, r3 = lib.BuildSplitableModel() sess.run(variables.global_variables_initializer()) profiler = model_analyzer.Profiler(sess.graph) pb0 = profiler.profile_name_scope(opts) run_meta = config_pb2.RunMetadata() _ = sess.run(r1, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) profiler.add_step(1, run_meta) pb1 = profiler.profile_name_scope(opts) self.assertNotEqual(lib.SearchTFProfNode(pb1, 'DW'), None) self.assertEqual(lib.SearchTFProfNode(pb1, 'DW2'), None) self.assertEqual(lib.SearchTFProfNode(pb1, 'add'), None) run_meta2 = config_pb2.RunMetadata() _ = sess.run(r2, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta2) profiler.add_step(2, run_meta2) pb2 = profiler.profile_name_scope(opts) self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW'), None) self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW2'), None) self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None) run_meta3 = config_pb2.RunMetadata() _ = sess.run(r3, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta3) profiler.add_step(3, run_meta3) pb3 = profiler.profile_name_scope(opts) self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW'), None) self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW2'), None) self.assertNotEqual(lib.SearchTFProfNode(pb3, 'add'), None) self.assertEqual(lib.SearchTFProfNode(pb0, 'Conv2D'), None) self.assertGreater( lib.SearchTFProfNode(pb1, 'Conv2D').exec_micros, 0) self.assertEqual(lib.SearchTFProfNode(pb1, 'Conv2D_1'), None) self.assertGreater( lib.SearchTFProfNode(pb2, 'Conv2D_1').exec_micros, 0) self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None) self.assertGreater(lib.SearchTFProfNode(pb3, 'add').exec_micros, 0) advice_pb = profiler.advise(model_analyzer.ALL_ADVICE) self.assertTrue( 'AcceleratorUtilizationChecker' in advice_pb.checkers) self.assertTrue('ExpensiveOperationChecker' in advice_pb.checkers) self.assertTrue('OperationChecker' in advice_pb.checkers) checker = advice_pb.checkers['AcceleratorUtilizationChecker'] if test.is_gpu_available(): self.assertGreater(len(checker.reports), 0) else: self.assertEqual(len(checker.reports), 0) checker = advice_pb.checkers['ExpensiveOperationChecker'] self.assertGreater(len(checker.reports), 0)
def _profiled_run(self, fetches, feed_dict=None, options=None, run_metadata=None): """Overwrites the session.run().""" # pylint: disable=protected-access # Count the session steps. with self.profile_context._new_step() as state: step, locked = state # Fast path if no need for profiling. if locked and not self.profile_context._is_fast_path(step): # Maybe trace this step. if self.profile_context._should_trace(step, self.graph, fetches): if self.profile_context._debug: sys.stderr.write('debug: tracing step: %d\n' % step) # Enable tracing, perform auto profiling or auto dump. if not run_metadata: run_metadata = config_pb2.RunMetadata() if not options: options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) old_trace_level = options.trace_level else: old_trace_level = options.trace_level options.trace_level = config_pb2.RunOptions.FULL_TRACE ret = self._profiler_run_internal(fetches, feed_dict, options, run_metadata) if self.profile_context._debug: self.profile_context._dump_file(run_metadata, 'run_meta_%d' % step) self.profile_context.profiler._graph = self.graph self.profile_context.profiler.add_step(step, run_metadata) options.trace_level = old_trace_level else: ret = self._profiler_run_internal(fetches, feed_dict, options) # Maybe dump profile. self.profile_context._maybe_dump(step) # Maybe profile: to_profiles = self.profile_context._profile_candidates() for to_prof in to_profiles: cmd, opts, _ = to_prof saved_views = self.profile_context._views.setdefault(cmd, {}) if self.profile_context._debug: sys.stderr.write('debug: profiling %s step: %d\n' % (cmd, step)) if cmd == 'graph': saved_views[ step] = self.profile_context.profiler.profile_graph( opts) elif cmd == 'scope': saved_views[ step] = self.profile_context.profiler.profile_name_scope( opts) elif cmd == 'op': saved_views[ step] = self.profile_context.profiler.profile_operations( opts) elif cmd == 'code': saved_views[ step] = self.profile_context.profiler.profile_python( opts) else: raise ValueError('Unknown cmd: %s\n' % cmd) return ret # Fast no lock path. return self._profiler_run_internal(fetches, feed_dict, options, run_metadata)
def testProfileBasic(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).select([ 'params', 'float_ops', 'micros', 'bytes', 'device', 'op_types', 'occurrence' ]).build()) # Test the output without run_meta. sess = session.Session() r = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) profiler = model_analyzer.Profiler(sess.graph) profiler.profile_name_scope(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile(sess.graph, cmd='scope', options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) # Test the output with run_meta. run_meta = config_pb2.RunMetadata() _ = sess.run(r, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) profiler.add_step(1, run_meta) profiler.profile_graph(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile(sess.graph, cmd='graph', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) profiler.profile_python(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile(sess.graph, cmd='code', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) profiler.profile_operations(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile(sess.graph, cmd='op', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) model_analyzer.profile(sess.graph, cmd='scope', run_meta=run_meta, options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertNotEqual(pma_str, profiler_str) opts2 = opts.copy() opts2['select'] = ['params', 'float_ops'] profiler.profile_name_scope(opts2) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.profile(sess.graph, cmd='scope', run_meta=run_meta, options=opts2) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str)
def run(self, fetches, feed_dict=None, options=None, run_metadata=None, callable_runner=None, callable_runner_args=None, callable_options=None): """Wrapper around Session.run() that inserts tensor watch options. Args: fetches: Same as the `fetches` arg to regular `Session.run()`. feed_dict: Same as the `feed_dict` arg to regular `Session.run()`. options: Same as the `options` arg to regular `Session.run()`. run_metadata: Same as the `run_metadata` arg to regular `Session.run()`. callable_runner: A `callable` returned by `Session.make_callable()`. If not `None`, `fetches` and `feed_dict` must both be `None`. Mutually exclusive with `callable_options`. callable_runner_args: An optional list of arguments to `callable_runner` or for `callable_options`. callable_options: An instance of `config_pb2.CallableOptions`, to be used with `Session._make_callable_from_options()`. Mutually exclusive with `callable_runner`. Returns: Simply forwards the output of the wrapped `Session.run()` call. Raises: ValueError: On invalid `OnRunStartAction` value. Or if `callable_runner` is not `None` and either or both of `fetches` and `feed_dict` is `None`. """ if callable_runner and callable_options: raise ValueError( "callable_runner and callable_options are mutually exclusive, but " "are both specified in this call to BaseDebugWrapperSession.run()." ) if callable_runner and (fetches or feed_dict): raise ValueError( "callable_runner and fetches/feed_dict are mutually exclusive, " "but are used simultaneously.") elif callable_options and (fetches or feed_dict): raise ValueError( "callable_options and fetches/feed_dict are mutually exclusive, " "but are used simultaneously.") self.increment_run_call_count() empty_fetches = not nest.flatten(fetches) if empty_fetches: tf_logging.info( "Due to empty fetches, tfdbg Session wrapper is letting a " "Session.run pass through without any debugging actions.") if self._is_disabled_thread() or empty_fetches: if callable_runner: return callable_runner(*callable_runner_args) elif callable_options: # pylint:disable=protected-access return self._sess._make_callable_from_options( callable_options)(*callable_runner_args) # pylint:enable=protected-access else: return self._sess.run(fetches, feed_dict=feed_dict, options=options, run_metadata=run_metadata) # Invoke on-run-start callback and obtain response. run_start_resp = self.on_run_start( OnRunStartRequest(fetches, feed_dict, options, run_metadata, self._run_call_count, is_callable_runner=bool(callable_runner))) _check_type(run_start_resp, OnRunStartResponse) if run_start_resp.action == OnRunStartAction.DEBUG_RUN: # Decorate RunOption to fill in debugger tensor watch specifications. decorated_run_options = None if callable_options: callable_options_id = id(callable_options) if callable_options_id not in self._cached_callables_from_options: # Make a copy of callable_options to avoid mutating it. new_callable_options = config_pb2.CallableOptions() new_callable_options.CopyFrom(callable_options) decorated_run_options = new_callable_options.run_options else: decorated_run_options = options or config_pb2.RunOptions() run_metadata = run_metadata or config_pb2.RunMetadata() if decorated_run_options: self._decorate_run_options_for_debug( decorated_run_options, run_start_resp.debug_urls, debug_ops=run_start_resp.debug_ops, node_name_regex_whitelist=run_start_resp. node_name_regex_whitelist, op_type_regex_whitelist=run_start_resp. op_type_regex_whitelist, tensor_dtype_regex_whitelist=( run_start_resp.tensor_dtype_regex_whitelist), tolerate_debug_op_creation_failures=( run_start_resp.tolerate_debug_op_creation_failures)) # Invoke the run() method of the wrapped Session. Catch any TensorFlow # runtime errors. tf_error = None try: if callable_runner: retvals = callable_runner(*callable_runner_args, options=decorated_run_options, run_metadata=run_metadata) elif callable_options: # pylint:disable=protected-access if callable_options_id in self._cached_callables_from_options: callable_object = self._cached_callables_from_options[ callable_options_id] else: callable_object = self._sess._make_callable_from_options( new_callable_options) self._cached_callables_from_options[ callable_options_id] = callable_object # pylint:enable=protected-access retvals = callable_object(*callable_runner_args, run_metadata=run_metadata) else: retvals = self._sess.run(fetches, feed_dict=feed_dict, options=decorated_run_options, run_metadata=run_metadata) except errors.OpError as op_error: if self._pass_through_operrors: raise op_error tf_error = op_error retvals = op_error run_end_req = OnRunEndRequest( run_start_resp.action, run_metadata=run_metadata, client_graph_def=self._sess.graph.as_graph_def(), tf_error=tf_error) elif run_start_resp.action == OnRunStartAction.PROFILE_RUN: decorated_run_options = options or config_pb2.RunOptions() run_metadata = run_metadata or config_pb2.RunMetadata() self._decorate_run_options_for_profile(decorated_run_options) if callable_runner: retvals = callable_runner(*callable_runner_args, options=decorated_run_options, run_metadata=run_metadata) else: retvals = self._sess.run(fetches, feed_dict=feed_dict, options=decorated_run_options, run_metadata=run_metadata) run_end_req = OnRunEndRequest( run_start_resp.action, run_metadata=run_metadata, client_graph_def=self._sess.graph.as_graph_def()) elif (run_start_resp.action == OnRunStartAction.NON_DEBUG_RUN or run_start_resp.action == OnRunStartAction.INVOKE_STEPPER): if callable_runner: raise NotImplementedError( "Stepper mode is not implemented for callables created by " "Session.make_callable().") if run_start_resp.action == OnRunStartAction.INVOKE_STEPPER: with stepper.NodeStepper(self._sess, fetches, feed_dict) as node_stepper: retvals = self.invoke_node_stepper( node_stepper, restore_variable_values_on_exit=True) # Invoke run() method of the wrapped session. retvals = self._sess.run(fetches, feed_dict=feed_dict, options=options, run_metadata=run_metadata) # Prepare arg for the on-run-end callback. run_end_req = OnRunEndRequest(run_start_resp.action) else: raise ValueError("Invalid OnRunStartAction value: %s" % run_start_resp.action) # Invoke on-run-end callback and obtain response. run_end_resp = self.on_run_end(run_end_req) _check_type(run_end_resp, OnRunEndResponse) # Currently run_end_resp is only a placeholder. No action is taken on it. return retvals
def testAllowsDifferentWatchesOnDifferentRuns(self): """Test watching different tensors on different runs of the same graph.""" with session.Session(config=self._no_rewrite_session_config()) as sess: u_init_val = [[5.0, 3.0], [-1.0, 0.0]] v_init_val = [[2.0], [-1.0]] # Use node names with overlapping namespace (i.e., parent directory) to # test concurrent, non-racing directory creation. u_name = "diff_Watch/u" v_name = "diff_Watch/v" u_init = constant_op.constant(u_init_val, shape=[2, 2]) u = variables.Variable(u_init, name=u_name) v_init = constant_op.constant(v_init_val, shape=[2, 1]) v = variables.Variable(v_init, name=v_name) w = math_ops.matmul(u, v, name="diff_Watch/matmul") u.initializer.run() v.initializer.run() for i in range(2): run_options = config_pb2.RunOptions(output_partition_graphs=True) run_dump_root = self._debug_dump_dir(run_number=i) debug_urls = self._debug_urls(run_number=i) if i == 0: # First debug run: Add debug tensor watch for u. debug_utils.add_debug_tensor_watch( run_options, "%s/read" % u_name, 0, debug_urls=debug_urls) else: # Second debug run: Add debug tensor watch for v. debug_utils.add_debug_tensor_watch( run_options, "%s/read" % v_name, 0, debug_urls=debug_urls) run_metadata = config_pb2.RunMetadata() # Invoke Session.run(). sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( run_dump_root, partition_graphs=run_metadata.partition_graphs) self.assertTrue(dump.loaded_partition_graphs()) # Each run should have generated only one dumped tensor, not two. self.assertEqual(1, dump.size) if i == 0: self.assertAllClose([u_init_val], dump.get_tensors("%s/read" % u_name, 0, "DebugIdentity")) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % u_name, 0, "DebugIdentity")[0], 0) else: self.assertAllClose([v_init_val], dump.get_tensors("%s/read" % v_name, 0, "DebugIdentity")) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % v_name, 0, "DebugIdentity")[0], 0)
def run_op_benchmark(self, sess, op_or_tensor, feed_dict=None, burn_iters=2, min_iters=10, store_trace=False, store_memory_usage=True, name=None, extras=None, mbs=0): """Run an op or tensor in the given session. Report the results. Args: sess: `Session` object to use for timing. op_or_tensor: `Operation` or `Tensor` to benchmark. feed_dict: A `dict` of values to feed for each op iteration (see the `feed_dict` parameter of `Session.run`). burn_iters: Number of burn-in iterations to run. min_iters: Minimum number of iterations to use for timing. store_trace: Boolean, whether to run an extra untimed iteration and store the trace of iteration in the benchmark report. The trace will be stored as a string in Google Chrome trace format in the extras field "full_trace_chrome_format". store_memory_usage: Boolean, whether to run an extra untimed iteration, calculate memory usage, and store that in extras fields. name: (optional) Override the BenchmarkEntry name with `name`. Otherwise it is inferred from the top-level method name. extras: (optional) Dict mapping string keys to additional benchmark info. Values may be either floats or values that are convertible to strings. mbs: (optional) The number of megabytes moved by this op, used to calculate the ops throughput. Returns: A `dict` containing the key-value pairs that were passed to `report_benchmark`. """ store_memory_usage &= _benchmark_tests_can_log_memory() for _ in range(burn_iters): sess.run(op_or_tensor, feed_dict=feed_dict) deltas = [None] * min_iters for i in range(min_iters): start_time = time.time() sess.run(op_or_tensor, feed_dict=feed_dict) end_time = time.time() delta = end_time - start_time deltas[i] = delta extras = extras if extras is not None else {} if store_trace or store_memory_usage: run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() sess.run(op_or_tensor, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) if store_trace: extras[ "full_trace_chrome_format"] = tl.generate_chrome_trace_format( ) if store_memory_usage: step_stats_analysis = tl.analyze_step_stats(show_memory=True) allocator_maximums = step_stats_analysis.allocator_maximums for k, v in allocator_maximums.items(): extras["allocator_maximum_num_bytes_%s" % k] = v.num_bytes def _median(x): if not x: return -1 s = sorted(x) l = len(x) lm1 = l - 1 return (s[l // 2] + s[lm1 // 2]) / 2.0 median_delta = _median(deltas) benchmark_values = { "iters": min_iters, "wall_time": median_delta, "extras": extras, "name": name, "throughput": mbs / median_delta } self.report_benchmark(**benchmark_values) return benchmark_values
def testDumpToFileWhileLoop(self): with session.Session() as sess: num_iter = 10 # "u" is the Variable being updated in the loop. u_name = "testDumpToFileWhileLoop/u" u_namespace = u_name.split("/")[0] u_init_val = np.array(11.0) u_init = constant_op.constant(u_init_val) u = variables.Variable(u_init, name=u_name) # "v" is the increment. v_name = "testDumpToFileWhileLoop/v" v_namespace = v_name.split("/")[0] v_init_val = np.array(2.0) v_init = constant_op.constant(v_init_val) v = variables.Variable(v_init, name=v_name) u.initializer.run() v.initializer.run() i = constant_op.constant(0, name="testDumpToFileWhileLoop/i") def cond(i): return math_ops.less(i, num_iter) def body(i): new_u = state_ops.assign_add(u, v) new_i = math_ops.add(i, 1) op = control_flow_ops.group(new_u) new_i = control_flow_ops.with_dependencies([op], new_i) return [new_i] loop = control_flow_ops.while_loop(cond, body, [i], parallel_iterations=1) # Create RunOptions for debug-watching tensors run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_urls = self._debug_urls() # Add debug tensor watch for u. debug_utils.add_debug_tensor_watch(run_options, u_name, 0, debug_urls=debug_urls) # Add debug tensor watch for v. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % v_name, 0, debug_urls=debug_urls) # Add debug tensor watch for while/Identity. debug_utils.add_debug_tensor_watch(run_options, "while/Identity", 0, debug_urls=debug_urls) # Add debug tensor watch for while/Add/y. debug_utils.add_debug_tensor_watch(run_options, "while/Add/y", 0, debug_urls=debug_urls) run_metadata = config_pb2.RunMetadata() r = sess.run(loop, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) self.assertEqual(num_iter, r) u_val_final = sess.run(u) self.assertAllClose(u_init_val + num_iter * v_init_val, u_val_final) # Verify dump files self.assertTrue(os.path.isdir(self._dump_root)) self.assertTrue( os.path.isdir(os.path.join(self._dump_root, u_namespace))) self.assertTrue( os.path.isdir(os.path.join(self._dump_root, v_namespace, "v"))) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Expected dumped tensors: u, v/read, 10 iterations of while/Identity, # and 10 iterations of while/Add/y. self.assertEqual(1 + 1 + num_iter + num_iter, dump.size) # Verify tensor values. self.assertAllClose([u_init_val], dump.get_tensors(u_name, 0, "DebugIdentity")) self.assertAllClose([v_init_val], dump.get_tensors("%s/read" % v_name, 0, "DebugIdentity")) while_id_tensors = dump.get_tensors("while/Identity", 0, "DebugIdentity") self.assertEqual(10, len(while_id_tensors)) for k in xrange(len(while_id_tensors)): self.assertAllClose(np.array(k), while_id_tensors[k]) # Verify ascending timestamps from the while loops. while_id_rel_timestamps = dump.get_rel_timestamps( "while/Identity", 0, "DebugIdentity") self.assertEqual(10, len(while_id_rel_timestamps)) prev_rel_time = 0 for rel_time in while_id_rel_timestamps: self.assertGreaterEqual(rel_time, prev_rel_time) prev_rel_time = rel_time # Test querying debug watch keys from node name. watch_keys = dump.debug_watch_keys("while/Identity") self.assertEqual(["while/Identity:0:DebugIdentity"], watch_keys) # Test querying debug datum instances from debug watch key. self.assertEqual(10, len(dump.watch_key_to_data(watch_keys[0]))) self.assertEqual([], dump.watch_key_to_data("foo"))
def _GenerateTestData(self): """Generates the test data directory. The test data has a single run named run1 which contains: - a histogram - an image at timestamp and step 0 - scalar events containing the value i at step 10 * i and wall time 100 * i, for i in [1, _SCALAR_COUNT). - a graph definition Returns: temp_dir: The directory the test data is generated under. """ temp_dir = tempfile.mkdtemp(prefix=self.get_temp_dir()) self.addCleanup(shutil.rmtree, temp_dir) run1_path = os.path.join(temp_dir, 'run1') os.makedirs(run1_path) writer = writer_lib.FileWriter(run1_path) histogram_value = summary_pb2.HistogramProto(min=0, max=2, num=3, sum=6, sum_squares=5, bucket_limit=[0, 1, 2], bucket=[1, 1, 1]) # Add a simple graph event. graph_def = graph_pb2.GraphDef() node1 = graph_def.node.add() node1.name = 'a' node2 = graph_def.node.add() node2.name = 'b' node2.attr['very_large_attr'].s = b'a' * 2048 # 2 KB attribute meta_graph_def = meta_graph_pb2.MetaGraphDef(graph_def=graph_def) if self._only_use_meta_graph: writer.add_meta_graph(meta_graph_def) else: writer.add_graph(graph_def) # Add a simple run metadata event. run_metadata = config_pb2.RunMetadata() device_stats = run_metadata.step_stats.dev_stats.add() device_stats.device = 'test device' writer.add_run_metadata(run_metadata, 'test run') # 1x1 transparent GIF. encoded_image = base64.b64decode( 'R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7') image_value = summary_pb2.Summary.Image( height=1, width=1, colorspace=1, encoded_image_string=encoded_image) audio_value = summary_pb2.Summary.Audio(sample_rate=44100, length_frames=22050, num_channels=2, encoded_audio_string=b'', content_type='audio/wav') writer.add_event( event_pb2.Event( wall_time=0, step=0, summary=summary_pb2.Summary(value=[ summary_pb2.Summary.Value(tag='histogram', histo=histogram_value), summary_pb2.Summary.Value(tag='image', image=image_value), summary_pb2.Summary.Value(tag='audio', audio=audio_value) ]))) # Write 100 simple values. for i in xrange(1, self._SCALAR_COUNT + 1): writer.add_event( event_pb2.Event( # We use different values for wall time, step, and the value so we # can tell them apart. wall_time=100 * i, step=10 * i, summary=summary_pb2.Summary(value=[ summary_pb2.Summary.Value(tag='simple_values', simple_value=i) ]))) writer.flush() writer.close() return temp_dir
def testDumpGraphStructureLookup(self): # TODO(cais): Separate this test into multiple test methods. with session.Session() as sess: u_name = "testDumpGraphStructureLookup/u" v_name = "testDumpGraphStructureLookup/v" w_name = "testDumpGraphStructureLookup/w" u_init = constant_op.constant([2.0, 4.0]) u = variables.Variable(u_init, name=u_name) v = math_ops.add(u, u, name=v_name) w = math_ops.add(v, v, name=w_name) u.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) u_read_name = u_name + "/read" # Test node name list lookup of the DebugDumpDir object. node_names = dump.nodes() self.assertTrue(u_name in node_names) self.assertTrue(u_read_name in node_names) # Test querying node attributes. u_attr = dump.node_attributes(u_name) self.assertEqual(dtypes.float32, u_attr["dtype"].type) self.assertEqual(1, len(u_attr["shape"].shape.dim)) self.assertEqual(2, u_attr["shape"].shape.dim[0].size) with self.assertRaisesRegexp(ValueError, "No node named \"foo\" exists"): dump.node_attributes("foo") # Test querying the debug watch keys with node names. self.assertEqual(["%s:0:DebugIdentity" % u_name], dump.debug_watch_keys(u_name)) self.assertEqual(["%s:0:DebugIdentity" % v_name], dump.debug_watch_keys(v_name)) self.assertEqual(["%s:0:DebugIdentity" % w_name], dump.debug_watch_keys(w_name)) self.assertEqual([], dump.debug_watch_keys("foo")) # Test querying debug datum instances from debug watch. u_data = dump.watch_key_to_data(dump.debug_watch_keys(u_name)[0]) self.assertEqual(1, len(u_data)) self.assertEqual(u_name, u_data[0].node_name) self.assertEqual(0, u_data[0].output_slot) self.assertEqual("DebugIdentity", u_data[0].debug_op) self.assertGreaterEqual(u_data[0].timestamp, 0) self.assertEqual([], dump.watch_key_to_data("foo")) # Test the inputs lookup of the DebugDumpDir object. self.assertEqual([], dump.node_inputs(u_name)) self.assertEqual([u_name], dump.node_inputs(u_read_name)) self.assertEqual([u_read_name] * 2, dump.node_inputs(v_name)) self.assertEqual([v_name] * 2, dump.node_inputs(w_name)) self.assertEqual([], dump.node_inputs(u_name, is_control=True)) self.assertEqual([], dump.node_inputs(u_read_name, is_control=True)) self.assertEqual([], dump.node_inputs(v_name, is_control=True)) self.assertEqual([], dump.node_inputs(w_name, is_control=True)) # Test the outputs recipient lookup of the DebugDumpDir object. self.assertTrue(u_read_name in dump.node_recipients(u_name)) self.assertEqual(2, dump.node_recipients(u_read_name).count(v_name)) self.assertEqual(2, dump.node_recipients(v_name).count(w_name)) self.assertEqual([], dump.node_recipients(u_name, is_control=True)) self.assertEqual([], dump.node_recipients(u_read_name, is_control=True)) self.assertEqual([], dump.node_recipients(v_name, is_control=True)) self.assertEqual([], dump.node_recipients(w_name, is_control=True)) # Test errors raised on invalid node names. with self.assertRaisesRegexp(ValueError, "does not exist in partition graphs"): dump.node_inputs(u_name + "foo") with self.assertRaisesRegexp(ValueError, "does not exist in partition graphs"): dump.node_recipients(u_name + "foo") # Test transitive_inputs(). self.assertEqual([], dump.transitive_inputs(u_name)) self.assertEqual([u_name], dump.transitive_inputs(u_read_name)) self.assertEqual(set([u_name, u_read_name]), set(dump.transitive_inputs(v_name))) self.assertEqual(set([u_name, u_read_name, v_name]), set(dump.transitive_inputs(w_name))) with self.assertRaisesRegexp(ValueError, "does not exist in partition graphs"): dump.transitive_inputs(u_name + "foo") # Test num_devices(). self.assertEqual(self._expected_num_devices, len(dump.devices())) # Test node_device(). self.assertEqual(self._main_device, dump.node_device(u_name)) with self.assertRaisesRegexp(ValueError, "does not exist in partition graphs"): dump.node_device(u_name + "foo") # Test node_exists(). self.assertTrue(dump.node_exists(u_name)) self.assertTrue(dump.node_exists(u_name + "/read")) self.assertFalse(dump.node_exists(u_name + "/read" + "/foo")) # Test node_op_type(). self.assertEqual("Variable", dump.node_op_type(u_name)) self.assertEqual("Identity", dump.node_op_type(u_name + "/read")) self.assertEqual("Add", dump.node_op_type(v_name)) self.assertEqual("Add", dump.node_op_type(w_name)) with self.assertRaisesRegexp(ValueError, "does not exist in partition graphs"): dump.node_op_type(u_name + "foo") # Now load the dump again, without the parition graphs, so we can check # the errors raised for no partition graphs loaded. dump = debug_data.DebugDumpDir(self._dump_root, validate=False) with self.assertRaisesRegexp( RuntimeError, "No partition graphs have been loaded"): dump.partition_graphs() self.assertFalse(dump.loaded_partition_graphs()) with self.assertRaisesRegexp( RuntimeError, "Node inputs are not loaded from partition graphs yet"): dump.node_inputs(u_name) with self.assertRaisesRegexp( RuntimeError, "No partition graphs have been loaded"): dump.nodes() with self.assertRaisesRegexp( RuntimeError, "Node recipients are not loaded from partition graphs yet" ): dump.node_recipients(u_name) with self.assertRaisesRegexp( RuntimeError, "Node inputs are not loaded from partition graphs yet"): dump.transitive_inputs(u_name) with self.assertRaisesRegexp( RuntimeError, "Devices are not loaded from partition graphs yet"): dump.devices() with self.assertRaisesRegexp( RuntimeError, "Node devices are not loaded from partition graphs yet"): dump.node_device(u_name) with self.assertRaisesRegexp( RuntimeError, "Node op types are not loaded from partition graphs yet"): dump.node_op_type(u_name)
def testValidProfile(self): output_dir = test.get_temp_dir() run_metadata = config_pb2.RunMetadata() node1 = step_stats_pb2.NodeExecStats(node_name='Add/123', op_start_rel_micros=3, op_end_rel_micros=5, all_end_rel_micros=4) run_metadata = config_pb2.RunMetadata() device1 = run_metadata.step_stats.dev_stats.add() device1.device = 'deviceA' device1.node_stats.extend([node1]) graph = test.mock.MagicMock() op1 = test.mock.MagicMock() op1.name = 'Add/123' op1.traceback = [('a/b/file1', 10, 'apply_op', 'abc'), ('a/c/file2', 12, 'my_op', 'def')] op1.type = 'add' graph.get_operations.return_value = [op1] expected_proto = """sample_type { type: 5 unit: 5 } sample_type { type: 6 unit: 7 } sample_type { type: 8 unit: 7 } sample { value: 1 value: 4 value: 2 label { key: 1 str: 2 } label { key: 3 str: 4 } } string_table: "" string_table: "node_name" string_table: "Add/123" string_table: "op_type" string_table: "add" string_table: "count" string_table: "all_time" string_table: "nanoseconds" string_table: "op_time" string_table: "Device 1 of 1: deviceA" comment: 9 """ # Test with protos profiles = pprof_profiler.get_profiles(graph, run_metadata) self.assertEquals(1, len(profiles)) self.assertTrue('deviceA' in profiles) self.assertEquals(expected_proto, str(profiles['deviceA'])) # Test with files profile_files = pprof_profiler.profile(graph, run_metadata, output_dir) self.assertEquals(1, len(profile_files)) with gzip.open(profile_files[0]) as profile_file: profile_contents = profile_file.read() profile = profile_pb2.Profile() profile.ParseFromString(profile_contents) self.assertEquals(expected_proto, str(profile))
def testWatchingUnconnectedOutputTensor(self): """Watch an output slot not emitting any edges. (Not even control edges from the node.) """ with session.Session() as sess: x_init = constant_op.constant([2, 2, 3, 5, 5]) x = variables.Variable(x_init, name="unconnected/x") # The UniqueOp (tf.unique) has two output slots. Use only slot 0 in the # graph. Let the debugger watch the unused slot 1. unique_x, _ = tf.unique(x, name="unconnected/unique_x") y = tf.add(unique_x, [0, 1, 2], name="unconnected/y") x.initializer.run() # Verify that only slot 0 of unique_x has recipients, while slot 1 of the # same node does not have recipients. unique_x_slot_0_recipients = [] unique_x_slot_1_recipients = [] for op in sess.graph.get_operations(): for inp in op.inputs: if inp.name == "unconnected/unique_x:0": unique_x_slot_0_recipients.append(op.name) elif inp.name == "unconnected/unique_x:1": unique_x_slot_1_recipients.append(op.name) self.assertEqual(["unconnected/y"], unique_x_slot_0_recipients) self.assertEqual([], unique_x_slot_1_recipients) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() result = sess.run(y, options=run_options, run_metadata=run_metadata) self.assertAllClose([2, 4, 7], result) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Assert that the connected slot (slot 0) is dumped properly. unique_x_slot_0_dumps = dump.watch_key_to_data( "unconnected/unique_x:0:DebugIdentity") self.assertEqual(1, len(unique_x_slot_0_dumps)) self.assertEqual("unconnected/unique_x", unique_x_slot_0_dumps[0].node_name) self.assertEqual(0, unique_x_slot_0_dumps[0].output_slot) self.assertAllClose([2, 3, 5], unique_x_slot_0_dumps[0].get_tensor()) # Assert that the unconnected slot (slot 1) is dumped properly. unique_x_slot_1_dumps = dump.watch_key_to_data( "unconnected/unique_x:1:DebugIdentity") self.assertEqual(1, len(unique_x_slot_1_dumps)) self.assertEqual("unconnected/unique_x", unique_x_slot_1_dumps[0].node_name) self.assertEqual(1, unique_x_slot_1_dumps[0].output_slot) self.assertAllClose([0, 0, 1, 2, 2], unique_x_slot_1_dumps[0].get_tensor())
def testClusterSpecPropagationThreeServersOneCluster(self): """Boots 3 servers, ensures appropriate communication across workers. Additionally, in this cluster, we ensure the master is not the 0-th worker. Note: this test only uses one session. """ server1 = server_lib.Server.create_local_server() server2 = server_lib.Server.create_local_server() server3 = server_lib.Server.create_local_server() cluster_def = cluster_pb2.ClusterDef() job = cluster_def.job.add() job.name = 'worker' job.tasks[0] = server3.target[len('grpc://'):] job.tasks[1] = server2.target[len('grpc://'):] job.tasks[2] = server1.target[len('grpc://'):] config = config_pb2.ConfigProto(cluster_def=cluster_def) # Add ops to the devices in non-linear order. with ops.device('/job:worker/task:1'): feed1 = array_ops.placeholder(dtypes.float32, shape=(2)) const1 = constant_op.constant(2.0) mul1 = const1 * feed1 with ops.device('/job:worker/task:2'): feed2 = array_ops.placeholder(dtypes.float32, shape=(2)) const2 = constant_op.constant(2.0) mul2 = const2 * feed2 with ops.device('/job:worker/task:0'): feed0 = array_ops.placeholder(dtypes.float32, shape=(2)) const0 = constant_op.constant(2.0) mul0 = const0 * feed0 sum_op = mul0 + mul1 + mul2 ones = np.ones([2]) run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() # Run! with session.Session(server1.target, config=config) as sess: output = sess.run(sum_op, options=run_options, run_metadata=run_metadata, feed_dict={ feed1: ones, feed2: ones, feed0: ones }) self.assertAllEqual(6 * ones, output) self.assertEqual( 3, len([ dev_stats.device for dev_stats in run_metadata.step_stats.dev_stats for node_stats in dev_stats.node_stats if '/job:worker/replica:0/task:' in dev_stats.device and node_stats.node_name.startswith('Const') ]), run_metadata)
def train_step(sess, train_op, endpoint, batch, loggis, loss, global_step, number_of_steps, train_step_kwargs): """Function that takes a gradient step and specifies whether to stop. Args: sess: The current session. train_op: An `Operation` that evaluates the gradients and returns the total loss. global_step: A `Tensor` representing the global training step. train_step_kwargs: A dictionary of keyword arguments. Returns: The total loss and a boolean indicating whether or not to stop training. Raises: ValueError: if 'should_trace' is in `train_step_kwargs` but `logdir` is not. """ start_time = time.time() trace_run_options = None run_metadata = None if 'should_trace' in train_step_kwargs: if 'logdir' not in train_step_kwargs: raise ValueError( 'logdir must be present in train_step_kwargs when ' 'should_trace is present') if sess.run(train_step_kwargs['should_trace']): trace_run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() # print "Loggitss and loss call started..." # loggiss, losss,batchh,endpointt = sess.run([loggis, loss,batch,endpoint], # options=trace_run_options, # run_metadata=run_metadata) # # print(loggiss) # print(losss) # print "batchh..." # print(batchh) # for item in batchh: # print(item) # print(item.shape) # # print "endpoint..." # for key,val in endpointt.iteritems(): # print(key) # print(val.shape) # print(val) # print "Loggitss and loss call ended..." total_loss, np_global_step = sess.run([train_op, global_step], options=trace_run_options, run_metadata=run_metadata) time_elapsed = time.time() - start_time # print "Lossssssssssssss" # print total_loss if run_metadata is not None: tl = timeline.Timeline(run_metadata.step_stats) trace = tl.generate_chrome_trace_format() trace_filename = os.path.join(train_step_kwargs['logdir'], 'tf_trace-%d.json' % np_global_step) logging.info('Writing trace to %s', trace_filename) file_io.write_string_to_file(trace_filename, trace) if 'summary_writer' in train_step_kwargs: train_step_kwargs['summary_writer'].add_run_metadata( run_metadata, 'run_metadata-%d' % np_global_step) if 'should_log' in train_step_kwargs: if sess.run(train_step_kwargs['should_log']): logging.info('global step %d/%d : loss = %.4f (%.2f sec/step)', np_global_step, number_of_steps, total_loss, time_elapsed) # TODO(nsilberman): figure out why we can't put this into sess.run. The # issue right now is that the stop check depends on the global step. The # increment of global step often happens via the train op, which used # created using optimizer.apply_gradients. # # Since running `train_op` causes the global step to be incremented, one # would expected that using a control dependency would allow the # should_stop check to be run in the same session.run call: # # with ops.control_dependencies([train_op]): # should_stop_op = ... # # However, this actually seems not to work on certain platforms. if 'should_stop' in train_step_kwargs: should_stop = sess.run(train_step_kwargs['should_stop']) else: should_stop = False return total_loss, should_stop
def run(self, fetches, feed_dict=None, options=None, run_metadata=None, callable_runner=None, callable_runner_args=None): """Wrapper around Session.run() that inserts tensor watch options. Args: fetches: Same as the `fetches` arg to regular `Session.run()`. feed_dict: Same as the `feed_dict` arg to regular `Session.run()`. options: Same as the `options` arg to regular `Session.run()`. run_metadata: Same as the `run_metadata` arg to regular `Session.run()`. callable_runner: A `callable` returned by `Session.make_callable()`. If not `None`, `fetches` and `feed_dict` must both be `None`. callable_runner_args: An optional list of arguments to `callable_runner`. Returns: Simply forwards the output of the wrapped `Session.run()` call. Raises: ValueError: On invalid `OnRunStartAction` value. Or if `callable_runner` is not `None` and either or both of `fetches` and `feed_dict` is `None`. """ if not callable_runner: self.increment_run_call_count() else: if fetches or feed_dict: raise ValueError( "callable_runner and fetches/feed_dict are mutually exclusive, but " "are used simultaneously.") if self._is_disabled_thread(): if callable_runner: return callable_runner(*callable_runner_args) else: return self._sess.run(fetches, feed_dict=feed_dict, options=options, run_metadata=run_metadata) # Invoke on-run-start callback and obtain response. run_start_resp = self.on_run_start( OnRunStartRequest(fetches, feed_dict, options, run_metadata, self._run_call_count, is_callable_runner=bool(callable_runner))) _check_type(run_start_resp, OnRunStartResponse) if run_start_resp.action == OnRunStartAction.DEBUG_RUN: # Decorate RunOption to fill in debugger tensor watch specifications. decorated_run_options = options or config_pb2.RunOptions() run_metadata = run_metadata or config_pb2.RunMetadata() self._decorate_run_options_for_debug( decorated_run_options, run_start_resp.debug_urls, debug_ops=run_start_resp.debug_ops, node_name_regex_whitelist=run_start_resp.node_name_regex_whitelist, op_type_regex_whitelist=run_start_resp.op_type_regex_whitelist, tensor_dtype_regex_whitelist=( run_start_resp.tensor_dtype_regex_whitelist), tolerate_debug_op_creation_failures=( run_start_resp.tolerate_debug_op_creation_failures)) # Invoke the run() method of the wrapped Session. Catch any TensorFlow # runtime errors. tf_error = None try: if callable_runner: retvals = callable_runner(*callable_runner_args, options=decorated_run_options, run_metadata=run_metadata) else: retvals = self._sess.run(fetches, feed_dict=feed_dict, options=decorated_run_options, run_metadata=run_metadata) except errors.OpError as op_error: if self._pass_through_operrors: raise op_error tf_error = op_error retvals = op_error run_end_req = OnRunEndRequest( run_start_resp.action, run_metadata=run_metadata, client_graph_def=self._sess.graph.as_graph_def(), tf_error=tf_error) elif run_start_resp.action == OnRunStartAction.PROFILE_RUN: decorated_run_options = options or config_pb2.RunOptions() run_metadata = run_metadata or config_pb2.RunMetadata() self._decorate_run_options_for_profile(decorated_run_options) if callable_runner: retvals = callable_runner(*callable_runner_args, options=decorated_run_options, run_metadata=run_metadata) else: retvals = self._sess.run(fetches, feed_dict=feed_dict, options=decorated_run_options, run_metadata=run_metadata) run_end_req = OnRunEndRequest( run_start_resp.action, run_metadata=run_metadata, client_graph_def=self._sess.graph.as_graph_def()) elif (run_start_resp.action == OnRunStartAction.NON_DEBUG_RUN or run_start_resp.action == OnRunStartAction.INVOKE_STEPPER): if callable_runner: raise NotImplementedError( "Stepper mode is not implemented for callables created by " "Session.make_callable().") if run_start_resp.action == OnRunStartAction.INVOKE_STEPPER: with stepper.NodeStepper( self._sess, fetches, feed_dict) as node_stepper: retvals = self.invoke_node_stepper( node_stepper, restore_variable_values_on_exit=True) # Invoke run() method of the wrapped session. retvals = self._sess.run( fetches, feed_dict=feed_dict, options=options, run_metadata=run_metadata) # Prepare arg for the on-run-end callback. run_end_req = OnRunEndRequest(run_start_resp.action) else: raise ValueError( "Invalid OnRunStartAction value: %s" % run_start_resp.action) # Invoke on-run-end callback and obtain response. run_end_resp = self.on_run_end(run_end_req) _check_type(run_end_resp, OnRunEndResponse) # Currently run_end_resp is only a placeholder. No action is taken on it. return retvals
def testToggleEnableTwoDebugWatchesNoCrosstalkBetweenDebugNodes(self): with session.Session(config=no_rewrite_session_config()) as sess: v_1 = variables.Variable(50.0, name="v_1") v_2 = variables.Variable(-50.0, name="v_1") delta_1 = constant_op.constant(5.0, name="delta_1") delta_2 = constant_op.constant(-5.0, name="delta_2") inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1") inc_v_2 = state_ops.assign_add(v_2, delta_2, name="inc_v_2") sess.run([v_1.initializer, v_2.initializer]) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=[ "DebugIdentity(gated_grpc=true)", "DebugNumericSummary(gated_grpc=true)" ], debug_urls=[self._debug_server_url_1]) for i in xrange(4): self._server_1.clear_data() if i % 2 == 0: self._server_1.request_watch("delta_1", 0, "DebugIdentity") self._server_1.request_watch("delta_2", 0, "DebugIdentity") self._server_1.request_unwatch("delta_1", 0, "DebugNumericSummary") self._server_1.request_unwatch("delta_2", 0, "DebugNumericSummary") else: self._server_1.request_unwatch("delta_1", 0, "DebugIdentity") self._server_1.request_unwatch("delta_2", 0, "DebugIdentity") self._server_1.request_watch("delta_1", 0, "DebugNumericSummary") self._server_1.request_watch("delta_2", 0, "DebugNumericSummary") sess.run([inc_v_1, inc_v_2], options=run_options, run_metadata=run_metadata) # Watched debug tensors are: # Run 0: delta_[1,2]:0:DebugIdentity # Run 1: delta_[1,2]:0:DebugNumericSummary # Run 2: delta_[1,2]:0:DebugIdentity # Run 3: delta_[1,2]:0:DebugNumericSummary self.assertEqual(2, len(self._server_1.debug_tensor_values)) if i % 2 == 0: self.assertAllClose( [5.0], self._server_1. debug_tensor_values["delta_1:0:DebugIdentity"]) self.assertAllClose( [-5.0], self._server_1. debug_tensor_values["delta_2:0:DebugIdentity"]) else: self.assertAllClose( [[ 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 5.0, 5.0, 0.0, 1.0, 0.0 ]], self._server_1. debug_tensor_values["delta_1:0:DebugNumericSummary"]) self.assertAllClose( [[ 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, -5.0, -5.0, -5.0, 0.0, 1.0, 0.0 ]], self._server_1. debug_tensor_values["delta_2:0:DebugNumericSummary"])
def testToggleBreakpointsWorks(self): with session.Session(config=session_debug_testlib. no_rewrite_session_config()) as sess: v_1 = variables.VariableV1(50.0, name="v_1") v_2 = variables.VariableV1(-50.0, name="v_2") delta_1 = constant_op.constant(5.0, name="delta_1") delta_2 = constant_op.constant(-5.0, name="delta_2") inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1") inc_v_2 = state_ops.assign_add(v_2, delta_2, name="inc_v_2") sess.run([v_1.initializer, v_2.initializer]) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity(gated_grpc=true)"], debug_urls=[self._debug_server_url_1]) for i in xrange(4): self._server_1.clear_data() if i in (0, 2): # Enable breakpoint at delta_[1,2]:0:DebugIdentity in runs 0 and 2. self._server_1.request_watch("delta_1", 0, "DebugIdentity", breakpoint=True) self._server_1.request_watch("delta_2", 0, "DebugIdentity", breakpoint=True) else: # Disable the breakpoint in runs 1 and 3. self._server_1.request_unwatch("delta_1", 0, "DebugIdentity") self._server_1.request_unwatch("delta_2", 0, "DebugIdentity") output = sess.run([inc_v_1, inc_v_2], options=run_options, run_metadata=run_metadata) self.assertAllClose( [50.0 + 5.0 * (i + 1), -50 - 5.0 * (i + 1)], output) if i in (0, 2): # During runs 0 and 2, the server should have received the published # debug tensor delta:0:DebugIdentity. The breakpoint should have been # unblocked by EventReply reponses from the server. self.assertAllClose( [5.0], self._server_1. debug_tensor_values["delta_1:0:DebugIdentity"]) self.assertAllClose( [-5.0], self._server_1. debug_tensor_values["delta_2:0:DebugIdentity"]) # After the runs, the server should have properly registered the # breakpoints due to the request_unwatch calls. self.assertSetEqual( {("delta_1", 0, "DebugIdentity"), ("delta_2", 0, "DebugIdentity")}, self._server_1.breakpoints) else: # After the end of runs 1 and 3, the server has received the requests # to disable the breakpoint at delta:0:DebugIdentity. self.assertSetEqual(set(), self._server_1.breakpoints)