def testDebugDumpDir_invalidFileNamingPattern(self): # File name with too few underscores should lead to an exception. open(os.path.join(self._dump_root, "node1_DebugIdentity_1234"), "wb") with self.assertRaisesRegexp(ValueError, "does not conform to the naming pattern"): debug_data.DebugDumpDir(self._dump_root)
def testDebugNumericSummaryOnUninitializedTensorGivesCorrectResult(self): with session.Session() as sess: a = variables.Variable([42], dtype=np.float32, name="numeric_summary_uninit/a") run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugNumericSummary"], debug_urls=self._debug_urls()) sess.run(a.initializer, options=run_options, run_metadata=run_metadata) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertTrue(dump.loaded_partition_graphs()) # DebugNumericSummary output should reflect the uninitialized state of # the watched tensor. numeric_summary = dump.get_tensors("numeric_summary_uninit/a", 0, "DebugNumericSummary")[0] self.assertAllClose([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], numeric_summary[0:8]) self.assertTrue(np.isinf(numeric_summary[8])) self.assertGreater(numeric_summary[8], 0.0) self.assertTrue(np.isinf(numeric_summary[9])) self.assertLess(numeric_summary[9], 0.0) self.assertTrue(np.isnan(numeric_summary[10])) self.assertTrue(np.isnan(numeric_summary[11]))
def testDebugNumericSummaryOnInitializedTensorGivesCorrectResult(self): with session.Session() as sess: a = variables.Variable([ np.nan, np.nan, 0.0, 0.0, 0.0, -1.0, -3.0, 3.0, 7.0, -np.inf, -np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.nan, np.nan ], dtype=np.float32, name="numeric_summary/a") b = variables.Variable([0.0] * 18, dtype=np.float32, name="numeric_summary/b") c = math_ops.add(a, b, name="numeric_summary/c") sess.run(variables.global_variables_initializer()) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugNumericSummary"], debug_urls=self._debug_urls()) sess.run(c, options=run_options, run_metadata=run_metadata) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertTrue(dump.loaded_partition_graphs()) self.assertAllClose([[ 1.0, 18.0, 2.0, 2.0, 3.0, 2.0, 5.0, 4.0, -3.0, 7.0, 0.85714286, 8.97959184 ]], dump.get_tensors("numeric_summary/a/read", 0, "DebugNumericSummary"))
def testWatchingOutputSlotWithoutOutgoingEdge(self): """Test watching output slots not attached to any outgoing edges.""" with session.Session() as sess: u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]]) u = constant_op.constant(u_init_val, shape=[2, 2], name="u") # Create a control edge from a node with an output: From u to z. # Node u will get executed only because of the control edge. The output # tensor u:0 is not attached to any outgoing edge in the graph. This test # checks that the debugger can watch such a tensor. with ops.control_dependencies([u]): z = control_flow_ops.no_op(name="z") run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(z, options=run_options, run_metadata=run_metadata) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Assert that the DebugIdentity watch on u works properly. self.assertEqual(1, len(dump.dumped_tensor_data)) datum = dump.dumped_tensor_data[0] self.assertEqual("u", datum.node_name) self.assertEqual(0, datum.output_slot) self.assertEqual("DebugIdentity", datum.debug_op) self.assertAllClose([[5.0, 3.0], [-1.0, 0.0]], datum.get_tensor())
def setUpClass(cls): cls._dump_root = tempfile.mkdtemp() with session.Session() as sess: # 2400 elements should exceed the default threshold (2000). x = constant_op.constant(np.zeros([300, 8]), name="large_tensors/x") run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls="file://%s" % cls._dump_root) # Invoke Session.run(). run_metadata = config_pb2.RunMetadata() sess.run(x, options=run_options, run_metadata=run_metadata) cls._debug_dump = debug_data.DebugDumpDir( cls._dump_root, partition_graphs=run_metadata.partition_graphs) # Construct the analyzer. cls._analyzer = analyzer_cli.DebugAnalyzer(cls._debug_dump) # Construct the handler registry. cls._registry = debugger_cli_common.CommandHandlerRegistry() # Register command handler. cls._registry.register_command_handler( "print_tensor", cls._analyzer.print_tensor, cls._analyzer.get_help("print_tensor"), prefix_aliases=["pt"])
def _session_run_for_graph_structure_lookup(self): with session.Session() as sess: u_name = "testDumpGraphStructureLookup/u" v_name = "testDumpGraphStructureLookup/v" w_name = "testDumpGraphStructureLookup/w" u_init = constant_op.constant([2.0, 4.0]) u = variables.Variable(u_init, name=u_name) v = math_ops.add(u, u, name=v_name) w = math_ops.add(v, v, name=w_name) u.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) return u_name, v_name, w_name, dump
def testSessionRun(self): wrapper = TestDebugWrapperSession(self._sess, self._dump_root, self._observer) # Check initial state of the observer. self.assertEqual(0, self._observer["on_run_start_count"]) self.assertEqual(0, self._observer["on_run_end_count"]) s = wrapper.run(self._s) # Assert the run return value is correct. self.assertAllClose(np.array([[3.0], [4.0]]), s) # Assert the on-run-start method is invoked. self.assertEqual(1, self._observer["on_run_start_count"]) # Assert the on-run-start request reflects the correct fetch. self.assertEqual(self._s, self._observer["run_fetches"]) # Assert the on-run-start request reflects the correct feed_dict. self.assertIsNone(self._observer["run_feed_dict"]) # Assert the file debug URL has led to dump on the filesystem. dump = debug_data.DebugDumpDir(self._dump_root) self.assertEqual(7, len(dump.dumped_tensor_data)) # Assert the on-run-end method is invoked. self.assertEqual(1, self._observer["on_run_end_count"]) # Assert the performed action field in the on-run-end callback request is # correct. self.assertEqual(framework.OnRunStartAction.DEBUG_RUN, self._observer["performed_action"])
def testRunWithError(self): """Test the debug tensor dumping when error occurs in graph runtime.""" with session.Session() as sess: ph = tf.placeholder(tf.float32, name="mismatch/ph") x = tf.transpose(ph, name="mismatch/x") m = constant_op.constant(np.array([[1.0, 2.0]], dtype=np.float32), name="mismatch/m") y = math_ops.matmul(m, x, name="mismatch/y") run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) with self.assertRaises(errors.OpError): sess.run(y, options=run_options, feed_dict={ph: np.array([[-3.0], [0.0]])}) dump = debug_data.DebugDumpDir(self._dump_root) self.assertFalse(dump.loaded_partition_graphs()) m_dumps = dump.watch_key_to_data("mismatch/m:0:DebugIdentity") self.assertEqual(1, len(m_dumps)) self.assertAllClose(np.array([[1.0, 2.0]]), m_dumps[0].get_tensor()) x_dumps = dump.watch_key_to_data("mismatch/x:0:DebugIdentity") self.assertEqual(1, len(x_dumps)) self.assertAllClose(np.array([[-3.0, 0.0]]), x_dumps[0].get_tensor())
def testDumpStringTensorsToFileSystem(self): with session.Session() as sess: str1_init_val = np.array(b"abc") str2_init_val = np.array(b"def") str1_init = constant_op.constant(str1_init_val) str2_init = constant_op.constant(str2_init_val) str1_name = "str1" str2_name = "str2" str1 = variables.Variable(str1_init, name=str1_name) str2 = variables.Variable(str2_init, name=str2_name) # Concatenate str1 and str2 str_concat = math_ops.add(str1, str2, name="str_concat") str1.initializer.run() str2.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_url = "file://%s" % self._dump_root # Add debug tensor watch for u. self._addDebugTensorWatch(run_options, "%s/read" % str1_name, 0, debug_urls=[debug_url]) # Add debug tensor watch for v. self._addDebugTensorWatch(run_options, "%s/read" % str2_name, 0, debug_urls=[debug_url]) run_metadata = config_pb2.RunMetadata() sess.run(str_concat, options=run_options, run_metadata=run_metadata) # String ops are located on CPU. self.assertEqual(1, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertEqual(2, dump.size) self.assertEqual([str1_init_val], dump.get_tensors("%s/read" % str1_name, 0, "DebugIdentity")) self.assertEqual([str2_init_val], dump.get_tensors("%s/read" % str2_name, 0, "DebugIdentity")) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % str1_name, 0, "DebugIdentity")[0], 0) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % str2_name, 0, "DebugIdentity")[0], 0)
def setUpClass(cls): cls._dump_root = tempfile.mkdtemp() cls._is_gpu_available = test.is_gpu_available() if cls._is_gpu_available: cls._main_device = "/job:localhost/replica:0/task:0/gpu:0" else: cls._main_device = "/job:localhost/replica:0/task:0/cpu:0" with session.Session() as sess: x_init_val = np.array([5.0, 3.0]) x_init = constant_op.constant(x_init_val, shape=[2]) x = variables.Variable(x_init, name="control_deps/x") y = math_ops.add(x, x, name="control_deps/y") y = control_flow_ops.with_dependencies( [x], y, name="control_deps/ctrl_dep_y") z = math_ops.mul(x, y, name="control_deps/z") z = control_flow_ops.with_dependencies( [x, y], z, name="control_deps/ctrl_dep_z") x.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls="file://%s" % cls._dump_root) # Invoke Session.run(). run_metadata = config_pb2.RunMetadata() sess.run(z, options=run_options, run_metadata=run_metadata) debug_dump = debug_data.DebugDumpDir( cls._dump_root, partition_graphs=run_metadata.partition_graphs) # Construct the analyzer. analyzer = analyzer_cli.DebugAnalyzer(debug_dump) # Construct the handler registry. cls._registry = debugger_cli_common.CommandHandlerRegistry() # Register command handlers. cls._registry.register_command_handler("node_info", analyzer.node_info, analyzer.get_help("node_info"), prefix_aliases=["ni"]) cls._registry.register_command_handler( "list_inputs", analyzer.list_inputs, analyzer.get_help("list_inputs"), prefix_aliases=["li"]) cls._registry.register_command_handler( "list_outputs", analyzer.list_outputs, analyzer.get_help("list_outputs"), prefix_aliases=["lo"])
def testGraphStructureLookupWithoutPartitionGraphsDoesNotErrorOut(self): _, _, _, dump = self._session_run_for_graph_structure_lookup() # Now load the dump again, without the partition graphs, so we can check # errors are not raised because the partition graphs are loaded from the # dump directory. dump = debug_data.DebugDumpDir(self._dump_root, validate=False) self.assertTrue(dump.loaded_partition_graphs())
def _load_dumped_intermediate_tensors(self, dump_path, target_name): dump_dir = debug_data.DebugDumpDir(dump_path, validate=False) for dump in dump_dir.dumped_tensor_data: if (dump.tensor_name not in self._ref_tensor_names and dump.tensor_name not in self._tensor_handles and dump.tensor_name not in self._override_tensors and dump.tensor_name != target_name): self._dumped_intermediate_tensors[dump.tensor_name] = dump
def testFindNodesWithBadTensorValues(self): with session.Session() as sess: u_name = "testFindNodesWithBadTensorValues/u" v_name = "testFindNodesWithBadTensorValues/v" w_name = "testFindNodesWithBadTensorValues/w" x_name = "testFindNodesWithBadTensorValues/x" y_name = "testFindNodesWithBadTensorValues/y" z_name = "testFindNodesWithBadTensorValues/z" u_init = constant_op.constant([2.0, 4.0]) u = variables.Variable(u_init, name=u_name) v_init = constant_op.constant([2.0, 1.0]) v = variables.Variable(v_init, name=v_name) # Expected output: [0.0, 3.0] w = math_ops.sub(u, v, name=w_name) # Expected output: [inf, 1.3333] x = math_ops.div(u, w, name=x_name) # Expected output: [nan, 4.0] y = math_ops.mul(w, x, name=y_name) z = math_ops.mul(y, y, name=z_name) u.initializer.run() v.initializer.run() run_options = config_pb2.RunOptions() debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls="file://%s" % self._dump_root) run_metadata = config_pb2.RunMetadata() sess.run(z, options=run_options, run_metadata=run_metadata) dump = debug_data.DebugDumpDir(self._dump_root) def has_bad_value(_, tensor): return np.any(np.isnan(tensor)) or np.any(np.isinf(tensor)) # Find all "offending tensors". bad_data = dump.find(has_bad_value) # Verify that the nodes with bad values are caught through running find # on the debug dump. self.assertEqual(3, len(bad_data)) self.assertEqual(x_name, bad_data[0].node_name) self.assertEqual(y_name, bad_data[1].node_name) self.assertEqual(z_name, bad_data[2].node_name) # Test first_n kwarg of find(): Find the first offending tensor. first_bad_datum = dump.find(has_bad_value, first_n=1) self.assertEqual(1, len(first_bad_datum)) self.assertEqual(x_name, first_bad_datum[0].node_name)
def setUpClass(cls): cls._dump_root = tempfile.mkdtemp() cls._is_gpu_available = test.is_gpu_available() if cls._is_gpu_available: cls._main_device = "/job:localhost/replica:0/task:0/gpu:0" else: cls._main_device = "/job:localhost/replica:0/task:0/cpu:0" with session.Session() as sess: u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]]) v_init_val = np.array([[2.0], [-1.0]]) u_name = "simple_mul_add/u" v_name = "simple_mul_add/v" u_init = constant_op.constant(u_init_val, shape=[2, 2]) u = variables.Variable(u_init, name=u_name) v_init = constant_op.constant(v_init_val, shape=[2, 1]) v = variables.Variable(v_init, name=v_name) w = math_ops.matmul(u, v, name="simple_mul_add/matmul") x = math_ops.add(w, w, name="simple_mul_add/add") u.initializer.run() v.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls="file://%s" % cls._dump_root) # Invoke Session.run(). run_metadata = config_pb2.RunMetadata() sess.run(x, options=run_options, run_metadata=run_metadata) debug_dump = debug_data.DebugDumpDir( cls._dump_root, partition_graphs=run_metadata.partition_graphs) # Construct the analyzer. analyzer = analyzer_cli.DebugAnalyzer(debug_dump) # Construct the handler registry. cls._registry = debugger_cli_common.CommandHandlerRegistry() # Register command handlers. cls._registry.register_command_handler( "list_tensors", analyzer.list_tensors, analyzer.get_help("list_tensors"), prefix_aliases=["lt"]) cls._registry.register_command_handler("node_info", analyzer.node_info, analyzer.get_help("node_info"), prefix_aliases=["ni"])
def testDumpToFileOverlappingParentDir(self): with session.Session() as sess: u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]]) v_init_val = np.array([[2.0], [-1.0]]) # Use node names with overlapping namespace (i.e., parent directory) to # test concurrent, non-racing directory creation. u_name = "testDumpToFile/u" v_name = "testDumpToFile/v" u_init = constant_op.constant(u_init_val, shape=[2, 2]) u = variables.Variable(u_init, name=u_name) v_init = constant_op.constant(v_init_val, shape=[2, 1]) v = variables.Variable(v_init, name=v_name) w = math_ops.matmul(u, v, name="testDumpToFile/matmul") u.initializer.run() v.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_urls = "file://%s" % self._dump_root # Add debug tensor watch for u. debug_utils.add_debug_tensor_watch( run_options, "%s/read" % u_name, 0, debug_urls=debug_urls) # Add debug tensor watch for v. debug_utils.add_debug_tensor_watch( run_options, "%s/read" % v_name, 0, debug_urls=debug_urls) run_metadata = config_pb2.RunMetadata() # Invoke Session.run(). sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertTrue(dump.loaded_partition_graphs()) # Verify the dumped tensor values for u and v. self.assertEqual(2, dump.size) self.assertAllClose([u_init_val], dump.get_tensors("%s/read" % u_name, 0, "DebugIdentity")) self.assertAllClose([v_init_val], dump.get_tensors("%s/read" % v_name, 0, "DebugIdentity")) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % u_name, 0, "DebugIdentity")[0], 0) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % v_name, 0, "DebugIdentity")[0], 0)
def testDumpUninitializedVariable(self): op_namespace = "testDumpUninitializedVariable" with session.Session() as sess: u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]]) s_init_val = b"str1" u_name = "%s/u" % op_namespace s_name = "%s/s" % op_namespace u_init = constant_op.constant(u_init_val, shape=[2, 2]) u = variables.Variable(u_init, name=u_name) s_init = constant_op.constant(s_init_val) s = variables.Variable(s_init, name=s_name) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_urls = self._debug_urls() # Add debug tensor watch for u. debug_utils.add_debug_tensor_watch(run_options, "%s" % u_name, 0, debug_urls=debug_urls) debug_utils.add_debug_tensor_watch(run_options, "%s" % s_name, 0, debug_urls=debug_urls) run_metadata = config_pb2.RunMetadata() # Initialize u and s. sess.run(variables.global_variables_initializer(), options=run_options, run_metadata=run_metadata) # Verify the dump file for the uninitialized value of u. dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertEqual(2, dump.size) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) # Verify that the variable is properly initialized by the run() call. u_vals = dump.get_tensors(u_name, 0, "DebugIdentity") s_vals = dump.get_tensors(s_name, 0, "DebugIdentity") self.assertEqual(1, len(u_vals)) self.assertIsNone(u_vals[0]) self.assertEqual(1, len(s_vals)) self.assertIsNone(s_vals[0]) # Call run() again, to check that u is initialized properly. self.assertAllClose(u_init_val, sess.run(u)) self.assertEqual(s_init_val, sess.run(s))
def _generate_dump_from_simple_addition_graph(self): with session.Session() as sess: u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]]) v_init_val = np.array([[2.0], [-1.0]]) # Use node names with overlapping namespace (i.e., parent directory) to # test concurrent, non-racing directory creation. u_name = "u" v_name = "v" w_name = "w" u_init = constant_op.constant(u_init_val, shape=[2, 2]) u = variables.Variable(u_init, name=u_name) v_init = constant_op.constant(v_init_val, shape=[2, 1]) v = variables.Variable(v_init, name=v_name) w = math_ops.matmul(u, v, name=w_name) u.initializer.run() v.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_urls = "file://%s" % self._dump_root # Add debug tensor watch for u. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % u_name, 0, debug_urls=debug_urls) # Add debug tensor watch for v. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % v_name, 0, debug_urls=debug_urls) run_metadata = config_pb2.RunMetadata() # Invoke Session.run(). sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) simple_add_results = collections.namedtuple("SimpleAddResults", [ "u_init_val", "v_init_val", "u", "v", "w", "u_name", "v_name", "w_name", "dump" ]) return simple_add_results(u_init_val, v_init_val, u, v, w, u_name, v_name, w_name, dump)
def on_run_end(self, request): """Overrides on-run-end callback. Actions taken: 1) Load the debug dump. 2) Bring up the Analyzer CLI. Args: request: An instance of OnSessionInitRequest. Returns: An instance of OnSessionInitResponse. """ if request.performed_action == framework.OnRunStartAction.DEBUG_RUN: partition_graphs = None if request.run_metadata and request.run_metadata.partition_graphs: partition_graphs = request.run_metadata.partition_graphs elif request.client_graph_def: partition_graphs = [request.client_graph_def] debug_dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=partition_graphs) debug_dump.set_python_graph(self._sess.graph) passed_filter = None if self._active_tensor_filter: if not debug_dump.find( self._tensor_filters[self._active_tensor_filter], first_n=1): # No dumped tensor passes the filter in this run. Clean up the dump # directory and move on. self._remove_dump_root() return framework.OnRunEndResponse() else: # Some dumped tensor(s) from this run passed the filter. passed_filter = self._active_tensor_filter self._active_tensor_filter = None self._prep_cli_for_run_end(debug_dump, request.tf_error, passed_filter) self._run_start_response = self._launch_cli() # Clean up the dump generated by this run. self._remove_dump_root() else: # No debug information to show following a non-debug run() call. self._run_start_response = None # Return placeholder response that currently holds no additional # information. return framework.OnRunEndResponse()
def testWatchingVariableUpdateOps(self): """Watch output slots on Variable-updating ops, with no emitted edges.""" with session.Session() as sess: u_init = constant_op.constant(10.0) u = variables.Variable(u_init, name="gdo/u") v_init = constant_op.constant(20.0) v = variables.Variable(v_init, name="gdo/v") w = math_ops.mul(u, v, name="gdo/w") # gdo stands for GradientDescentOptimizer. train_op = tf.train.GradientDescentOptimizer( learning_rate=0.1).minimize(w, name="gdo/train") u.initializer.run() v.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(train_op, options=run_options, run_metadata=run_metadata) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) update_u_data = dump.watch_key_to_data( "gdo/train/update_gdo/u/ApplyGradientDescent:0:DebugIdentity") self.assertEqual(1, len(update_u_data)) # Gradient descent on u: w = u * v, so dw / du = v. # Updated value of u should be: # 10.0 - learning_rate * v = 10.0 - 0.1 * 20.0 = 8.0 self.assertAllClose(8.0, update_u_data[0].get_tensor()) update_v_data = dump.watch_key_to_data( "gdo/train/update_gdo/v/ApplyGradientDescent:0:DebugIdentity") self.assertEqual(1, len(update_v_data)) # Gradient descent on u: w = u * v, so dw / dv = u. # Updated value of u should be: # 20.0 - learning_rate * u = 20.0 - 0.1 * 10.0 = 19.0 self.assertAllClose(19.0, update_v_data[0].get_tensor()) # Verify that the Variables u and v are updated properly. self.assertAllClose(8.0, sess.run(u)) self.assertAllClose(19.0, sess.run(v))
def testDumpingOnASingleRunWorks(self): sess = dumping_wrapper.DumpingDebugWrapperSession( self.sess, session_root=self.session_root, log_usage=False) sess.run(self.inc_v) dump_dirs = glob.glob(os.path.join(self.session_root, "run_*")) self.assertEqual(1, len(dump_dirs)) self._assert_correct_run_subdir_naming(os.path.basename(dump_dirs[0])) dump = debug_data.DebugDumpDir(dump_dirs[0]) self.assertAllClose([10.0], dump.get_tensors("v", 0, "DebugIdentity")) self.assertEqual(repr(self.inc_v), dump.run_fetches_info) self.assertEqual(repr(None), dump.run_feed_keys_info)
def testLookUpNodePythonTracebackWorks(self): with session.Session() as sess: u_init = constant_op.constant(10.0) u = variables.Variable(u_init, name="traceback/u") v_init = constant_op.constant(20.0) v = variables.Variable(v_init, name="traceback/v") w = math_ops.multiply(u, v, name="traceback/w") sess.run(variables.global_variables_initializer()) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_urls=self._debug_urls()) sess.run(w, options=run_options, run_metadata=run_metadata) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Prior to setting the Python graph, attempts to do traceback lookup # should lead to exceptions. with self.assertRaisesRegexp( LookupError, "Python graph is not available for traceback lookup"): dump.node_traceback("traceback/w") dump.set_python_graph(sess.graph) # After setting the Python graph, attempts to look up nonexistent nodes # should lead to exceptions. with self.assertRaisesRegexp( KeyError, r"Cannot find node \"foo\" in Python graph"): dump.node_traceback("foo") # Lookup should work with node name input. traceback = dump.node_traceback("traceback/w") self.assertIsInstance(traceback, list) self.assertGreater(len(traceback), 0) for trace in traceback: self.assertIsInstance(trace, tuple) # Lookup should also work with tensor name input. traceback = dump.node_traceback("traceback/w:0") self.assertIsInstance(traceback, list) self.assertGreater(len(traceback), 0) for trace in traceback: self.assertIsInstance(trace, tuple)
def testDumpingDebugHookWithoutWatchFnWorks(self): dumping_hook = hooks.DumpingDebugHook(self.session_root, log_usage=False) mon_sess = monitored_session._HookedSession(self.sess, [dumping_hook]) mon_sess.run(self.inc_v) dump_dirs = glob.glob(os.path.join(self.session_root, "run_*")) self.assertEqual(1, len(dump_dirs)) self._assert_correct_run_subdir_naming(os.path.basename(dump_dirs[0])) dump = debug_data.DebugDumpDir(dump_dirs[0]) self.assertAllClose([10.0], dump.get_tensors("v", 0, "DebugIdentity")) self.assertEqual(repr(self.inc_v), dump.run_fetches_info) self.assertEqual(repr(None), dump.run_feed_keys_info)
def main(_): if FLAGS.log_usage: pass # No logging for open-source. if not FLAGS.dump_dir: print("ERROR: dump_dir flag is empty.", file=sys.stderr) sys.exit(1) print("tfdbg offline: FLAGS.dump_dir = %s" % FLAGS.dump_dir) debug_dump = debug_data.DebugDumpDir(FLAGS.dump_dir) cli = analyzer_cli.create_analyzer_curses_cli( debug_dump, tensor_filters={"has_inf_or_nan": debug_data.has_inf_or_nan}) title = "tfdbg offline @ %s" % FLAGS.dump_dir cli.run_ui(title=title, init_command="lt")
def testDumpingOnMultipleRunsWorks(self): sess = dumping_wrapper.DumpingDebugWrapperSession( self.sess, session_root=self.session_root, log_usage=False) for _ in range(3): sess.run(self.inc_v) dump_dirs = glob.glob(os.path.join(self.session_root, "run_*")) dump_dirs = sorted( dump_dirs, key=lambda x: int(os.path.basename(x).split("_")[1])) self.assertEqual(3, len(dump_dirs)) for i, dump_dir in enumerate(dump_dirs): self._assert_correct_run_subdir_naming(os.path.basename(dump_dir)) dump = debug_data.DebugDumpDir(dump_dir) self.assertAllClose([10.0 + 1.0 * i], dump.get_tensors("v", 0, "DebugIdentity")) self.assertEqual(repr(self.inc_v), dump.run_fetches_info) self.assertEqual(repr(None), dump.run_feed_keys_info)
def testWatchingOnlyOneOfTwoOutputSlotsDoesNotLeadToCausalityFailure(self): with session.Session() as sess: x_name = "oneOfTwoSlots/x" u_name = "oneOfTwoSlots/u" v_name = "oneOfTwoSlots/v" w_name = "oneOfTwoSlots/w" y_name = "oneOfTwoSlots/y" x = variables.Variable([1, 3, 3, 7], dtype=dtypes.int32, name=x_name) sess.run(x.initializer) unique_x, indices, _ = array_ops.unique_with_counts(x, name=u_name) v = math_ops.add(unique_x, unique_x, name=v_name) w = math_ops.add(indices, indices, name=w_name) y = math_ops.add(w, w, name=y_name) run_options = config_pb2.RunOptions(output_partition_graphs=True) # Watch only the first output slot of u, even though it has two output # slots. debug_utils.add_debug_tensor_watch(run_options, u_name, 0, debug_urls=self._debug_urls()) debug_utils.add_debug_tensor_watch(run_options, w_name, 0, debug_urls=self._debug_urls()) debug_utils.add_debug_tensor_watch(run_options, y_name, 0, debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run([v, y], options=run_options, run_metadata=run_metadata) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs, validate=True) self.assertAllClose([1, 3, 7], dump.get_tensors(u_name, 0, "DebugIdentity")[0])
def setUpClass(cls): cls._dump_root = tempfile.mkdtemp() with session.Session() as sess: loop_var = constant_op.constant(0, name="while_loop_test/loop_var") cond = lambda loop_var: math_ops.less(loop_var, 10) body = lambda loop_var: math_ops.add(loop_var, 1) while_loop = control_flow_ops.while_loop(cond, body, [loop_var], parallel_iterations=1) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_url = "file://%s" % cls._dump_root watch_opts = run_options.debug_options.debug_tensor_watch_opts # Add debug tensor watch for "while/Identity". watch = watch_opts.add() watch.node_name = "while/Identity" watch.output_slot = 0 watch.debug_ops.append("DebugIdentity") watch.debug_urls.append(debug_url) # Invoke Session.run(). run_metadata = config_pb2.RunMetadata() sess.run(while_loop, options=run_options, run_metadata=run_metadata) cls._debug_dump = debug_data.DebugDumpDir( cls._dump_root, partition_graphs=run_metadata.partition_graphs) cls._analyzer = analyzer_cli.DebugAnalyzer(cls._debug_dump) cls._registry = debugger_cli_common.CommandHandlerRegistry() cls._registry.register_command_handler( "list_tensors", cls._analyzer.list_tensors, cls._analyzer.get_help("list_tensors"), prefix_aliases=["lt"]) cls._registry.register_command_handler( "print_tensor", cls._analyzer.print_tensor, cls._analyzer.get_help("print_tensor"), prefix_aliases=["pt"])
def testDumpingWithWatchFnWithNonDefaultDebugOpsWorks(self): """Use a watch_fn tha specifies non-default debug ops.""" def watch_fn(fetches, feeds): del fetches, feeds return ["DebugIdentity", "DebugNumericSummary"], r".*", r".*" sess = dumping_wrapper.DumpingDebugWrapperSession( self.sess, session_root=self.session_root, watch_fn=watch_fn, log_usage=False) sess.run(self.inc_v) dump_dirs = glob.glob(os.path.join(self.session_root, "run_*")) self.assertEqual(1, len(dump_dirs)) dump = debug_data.DebugDumpDir(dump_dirs[0]) self.assertAllClose([10.0], dump.get_tensors("v", 0, "DebugIdentity")) self.assertEqual( 12, len(dump.get_tensors("v", 0, "DebugNumericSummary")[0]))
def testDumpingWithWatchFnOnFetchesWorks(self): """Use a watch_fn that returns different whitelists for different runs.""" def watch_fn(fetches, feeds): del feeds # A watch_fn that picks fetch name. if fetches.name == "inc_v:0": # If inc_v, watch everything. return "DebugIdentity", r".*", r".*" else: # If dec_v, watch nothing. return "DebugIdentity", r"$^", r"$^" sess = dumping_wrapper.DumpingDebugWrapperSession( self.sess, session_root=self.session_root, watch_fn=watch_fn, log_usage=False) for _ in range(3): sess.run(self.inc_v) sess.run(self.dec_v) dump_dirs = glob.glob(os.path.join(self.session_root, "run_*")) dump_dirs = sorted( dump_dirs, key=lambda x: int(os.path.basename(x).split("_")[1])) self.assertEqual(6, len(dump_dirs)) for i, dump_dir in enumerate(dump_dirs): self._assert_correct_run_subdir_naming(os.path.basename(dump_dir)) dump = debug_data.DebugDumpDir(dump_dir) if i % 2 == 0: self.assertGreater(dump.size, 0) self.assertAllClose([10.0 - 0.4 * (i / 2)], dump.get_tensors("v", 0, "DebugIdentity")) self.assertEqual(repr(self.inc_v), dump.run_fetches_info) self.assertEqual(repr(None), dump.run_feed_keys_info) else: self.assertEqual(0, dump.size) self.assertEqual(repr(self.dec_v), dump.run_fetches_info) self.assertEqual(repr(None), dump.run_feed_keys_info)
def testDebuggingDuringOpError(self): """Test the debug tensor dumping when error occurs in graph runtime.""" with session.Session() as sess: ph = array_ops.placeholder(dtypes.float32, name="mismatch/ph") x = array_ops.transpose(ph, name="mismatch/x") m = constant_op.constant(np.array([[1.0, 2.0]], dtype=np.float32), name="mismatch/m") y = math_ops.matmul(m, x, name="mismatch/y") run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) with self.assertRaises(errors.OpError): sess.run(y, options=run_options, feed_dict={ph: np.array([[-3.0], [0.0]])}) dump = debug_data.DebugDumpDir(self._dump_root) # Despite the fact that the run() call errored out and partition_graphs # are not available via run_metadata, the partition graphs should still # have been loaded from the dump directory. self.assertTrue(dump.loaded_partition_graphs()) m_dumps = dump.watch_key_to_data("mismatch/m:0:DebugIdentity") self.assertEqual(1, len(m_dumps)) self.assertAllClose(np.array([[1.0, 2.0]]), m_dumps[0].get_tensor()) x_dumps = dump.watch_key_to_data("mismatch/x:0:DebugIdentity") self.assertEqual(1, len(x_dumps)) self.assertAllClose(np.array([[-3.0, 0.0]]), x_dumps[0].get_tensor())
def testDumpingDebugHookWithStatefulWatchFnWorks(self): watch_fn_state = {"run_counter": 0} def counting_watch_fn(fetches, feed_dict): del fetches, feed_dict watch_fn_state["run_counter"] += 1 if watch_fn_state["run_counter"] % 2 == 1: # If odd-index run (1-based), watch everything. return "DebugIdentity", r".*", r".*" else: # If even-index run, watch nothing. return "DebugIdentity", r"$^", r"$^" dumping_hook = hooks.DumpingDebugHook(self.session_root, watch_fn=counting_watch_fn, log_usage=False) mon_sess = monitored_session._HookedSession(self.sess, [dumping_hook]) for _ in range(4): mon_sess.run(self.inc_v) dump_dirs = glob.glob(os.path.join(self.session_root, "run_*")) dump_dirs = sorted( dump_dirs, key=lambda x: int(os.path.basename(x).split("_")[1])) self.assertEqual(4, len(dump_dirs)) for i, dump_dir in enumerate(dump_dirs): self._assert_correct_run_subdir_naming(os.path.basename(dump_dir)) dump = debug_data.DebugDumpDir(dump_dir) if i % 2 == 0: self.assertAllClose([10.0 + 1.0 * i], dump.get_tensors("v", 0, "DebugIdentity")) else: self.assertEqual(0, dump.size) self.assertEqual(repr(self.inc_v), dump.run_fetches_info) self.assertEqual(repr(None), dump.run_feed_keys_info)