def testConcurrentExecutionUpdateAndRandomRead(self): circular_buffer_size = -1 writer = debug_events_writer.DebugEventsWriter(self.dump_root, self.tfdbg_run_id, circular_buffer_size) writer_state = {"counter": 0, "done": False} with debug_events_reader.DebugDataReader(self.dump_root) as reader: def write_and_update_job(): while True: if writer_state["done"]: break execution = debug_event_pb2.Execution() execution.op_type = "OpType%d" % writer_state["counter"] writer_state["counter"] += 1 writer.WriteExecution(execution) writer.FlushExecutionFiles() reader.update() # On the sub-thread, keep writing and reading new Execution protos. write_and_update_thread = threading.Thread(target=write_and_update_job) write_and_update_thread.start() # On the main thread, do concurrent random read. while True: exec_digests = reader.executions(digest=True) if exec_digests: exec_0 = reader.read_execution(exec_digests[0]) self.assertEqual(exec_0.op_type, "OpType0") writer_state["done"] = True break else: time.sleep(0.1) continue write_and_update_thread.join()
def testWriteGraphOpCreationAndDebuggedGraphs(self): writer = debug_events_writer.DebugEventsWriter(self.dump_root) num_op_creations = 10 for i in range(num_op_creations): graph_op_creation = debug_event_pb2.GraphOpCreation() graph_op_creation.op_type = "Conv2D" graph_op_creation.op_name = "Conv2D_%d" % i writer.WriteGraphOpCreation(graph_op_creation) debugged_graph = debug_event_pb2.DebuggedGraph() debugged_graph.graph_id = "deadbeaf" debugged_graph.graph_name = "MyGraph1" writer.WriteDebuggedGraph(debugged_graph) writer.FlushNonExecutionFiles() source_files_paths = glob.glob(os.path.join(self.dump_root, "*.graphs")) self.assertEqual(len(source_files_paths), 1) actuals = ReadDebugEvents(source_files_paths[0]) self.assertEqual(len(actuals), num_op_creations + 1) for i in range(num_op_creations): self.assertEqual(actuals[i].graph_op_creation.op_type, "Conv2D") self.assertEqual(actuals[i].graph_op_creation.op_name, "Conv2D_%d" % i) self.assertEqual(actuals[num_op_creations].debugged_graph.graph_id, "deadbeaf")
def testConcurrentSourceFileRandomReads(self): writer = debug_events_writer.DebugEventsWriter(self.dump_root, self.tfdbg_run_id) for i in range(100): source_file = debug_event_pb2.SourceFile( host_name="localhost", file_path="/tmp/file_%d.py" % i) source_file.lines.append("# File %d" % i) writer.WriteSourceFile(source_file) writer.FlushNonExecutionFiles() reader = debug_events_reader.DebugDataReader(self.dump_root) reader.update() lines = [None] * 100 def read_job_1(): # Read in the reverse order to enhance randomness of the read access. for i in range(49, -1, -1): lines[i] = reader.source_lines("localhost", "/tmp/file_%d.py" % i) def read_job_2(): for i in range(99, 49, -1): lines[i] = reader.source_lines("localhost", "/tmp/file_%d.py" % i) thread_1 = threading.Thread(target=read_job_1) thread_2 = threading.Thread(target=read_job_2) thread_1.start() thread_2.start() thread_1.join() thread_2.join() for i in range(100): self.assertEqual(lines[i], ["# File %d" % i])
def __init__(self, dump_root, tensor_debug_mode, circular_buffer_size, op_regex, output_regex=None): self._dump_root = dump_root if horovod_enabled(): self._dump_root = os.path.join(self._dump_root, f"rank_{hvd_rank()}") self._tensor_debug_mode = debug_event_pb2.TensorDebugMode.Value( tensor_debug_mode) self._circular_buffer_size = circular_buffer_size self._op_regex = re.compile(op_regex) if isinstance(op_regex, str) else op_regex self._output_regex = re.compile(output_regex) if isinstance( output_regex, str) else output_regex self._tfdbg_run_id = '' self._dump_op_counter = 0 debug_writer_args = { "dump_root": self._dump_root, "circular_buffer_size": self._circular_buffer_size } if not tf.__version__.startswith("2.2"): debug_writer_args["tfdbg_run_id"] = self._tfdbg_run_id self._writer = debug_events_writer.DebugEventsWriter( **debug_writer_args)
def setUp(self): super(DebugIdentityV2OpTest, self).setUp() # Testing using a small circular-buffer size. self.circular_buffer_size = 4 self.tfdbg_run_id = "test_tfdbg_run" self.writer = debug_events_writer.DebugEventsWriter( self.dump_root, self.tfdbg_run_id, self.circular_buffer_size)
def testWriteGraphExecutionTraceEventsWithCyclicBuffer(self): writer = debug_events_writer.DebugEventsWriter(self.dump_root) num_execution_events = debug_events_writer.DEFAULT_CYCLIC_BUFFER_SIZE * 2 for i in range(num_execution_events): trace = debug_event_pb2.GraphExecutionTrace() trace.op_name = "Op%d" % i writer.WriteGraphExecutionTrace(trace) trace_paths = glob.glob( os.path.join(self.dump_root, "*.graph_execution_traces")) self.assertEqual(len(trace_paths), 1) actuals = ReadDebugEvents(trace_paths[0]) # Before FlushExecutionFiles() is called. No data should have been written # to the file. self.assertEqual(len(actuals), 0) writer.FlushExecutionFiles() actuals = ReadDebugEvents(trace_paths[0]) self.assertEqual(len(actuals), debug_events_writer.DEFAULT_CYCLIC_BUFFER_SIZE) for i in range(debug_events_writer.DEFAULT_CYCLIC_BUFFER_SIZE): self.assertEqual( actuals[i].graph_execution_trace.op_name, "Op%d" % (i + debug_events_writer.DEFAULT_CYCLIC_BUFFER_SIZE))
def testRangeReadingGraphExecutionTraces(self, begin, end, expected_begin, expected_end): writer = debug_events_writer.DebugEventsWriter( self.dump_root, self.tfdbg_run_id, circular_buffer_size=-1) debugged_graph = debug_event_pb2.DebuggedGraph( graph_id="graph1", graph_name="graph1") writer.WriteDebuggedGraph(debugged_graph) for i in range(5): op_name = "Op_%d" % i graph_op_creation = debug_event_pb2.GraphOpCreation( op_name=op_name, graph_id="graph1") writer.WriteGraphOpCreation(graph_op_creation) trace = debug_event_pb2.GraphExecutionTrace( op_name=op_name, tfdbg_context_id="graph1") writer.WriteGraphExecutionTrace(trace) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() writer.Close() with debug_events_reader.DebugDataReader(self.dump_root) as reader: reader.update() traces = reader.graph_execution_traces(begin=begin, end=end) self.assertLen(traces, expected_end - expected_begin) self.assertEqual(traces[0].op_name, "Op_%d" % expected_begin) self.assertEqual(traces[-1].op_name, "Op_%d" % (expected_end - 1))
def testWriteGraphExecutionTraceEventsWithCircularBuffer(self): writer = debug_events_writer.DebugEventsWriter(self.dump_root, self.tfdbg_run_id) num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2 for i in range(num_execution_events): trace = debug_event_pb2.GraphExecutionTrace() trace.op_name = "Op%d" % i writer.WriteGraphExecutionTrace(trace) with debug_events_reader.DebugEventsReader(self.dump_root) as reader: actuals = list(reader.graph_execution_traces_iterators()[0]) # Before FlushExecutionFiles() is called. No data should have been written # to the file. self.assertEmpty(actuals) writer.FlushExecutionFiles() actuals = list( item.debug_event.graph_execution_trace for item in reader.graph_execution_traces_iterators()[0]) self.assertLen(actuals, debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE) for i in range(debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE): self.assertEqual( actuals[i].op_name, "Op%d" % (i + debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE))
def testReadingTwoFileSetsWithTheDifferentRootsLeadsToError(self): # To simulate a multi-host data dump, we first generate file sets in two # different directories, with different tfdbg_run_ids, and then combine # them. for i in range(2): writer = debug_events_writer.DebugEventsWriter( os.path.join(self.dump_root, str(i)), "run_id_%d" % i, circular_buffer_size=-1) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() # Move all files from the subdirectory /1 to subdirectory /0. dump_root_0 = os.path.join(self.dump_root, "0") src_paths = glob.glob(os.path.join(self.dump_root, "1", "*")) for src_path in src_paths: dst_path = os.path.join( dump_root_0, # Rename the file set to avoid file name collision. re.sub(r"(tfdbg_events\.\d+)", r"\g<1>1", os.path.basename(src_path))) os.rename(src_path, dst_path) with self.assertRaisesRegexp(ValueError, r"Found multiple \(2\) tfdbg2 runs"): debug_events_reader.DebugDataReader(dump_root_0)
def testWriteSourceFilesAndStackFrames(self): writer = debug_events_writer.DebugEventsWriter(self.dump_root, self.tfdbg_run_id) num_protos = 10 for i in range(num_protos): source_file = debug_event_pb2.SourceFile() source_file.file_path = "/home/tf2user/main.py" source_file.host_name = "machine.cluster" source_file.lines.append("print(%d)" % i) writer.WriteSourceFile(source_file) stack_frame = debug_event_pb2.StackFrameWithId() stack_frame.id = "stack_%d" % i stack_frame.file_line_col.file_index = i * 10 writer.WriteStackFrameWithId(stack_frame) writer.FlushNonExecutionFiles() with debug_events_reader.DebugEventsReader(self.dump_root) as reader: actuals = list(item.debug_event.source_file for item in reader.source_files_iterator()) self.assertLen(actuals, num_protos) for i in range(num_protos): self.assertEqual(actuals[i].file_path, "/home/tf2user/main.py") self.assertEqual(actuals[i].host_name, "machine.cluster") self.assertEqual(actuals[i].lines, ["print(%d)" % i]) actuals = list(item.debug_event.stack_frame_with_id for item in reader.stack_frames_iterator()) self.assertLen(actuals, num_protos) for i in range(num_protos): self.assertEqual(actuals[i].id, "stack_%d" % i) self.assertEqual(actuals[i].file_line_col.file_index, i * 10)
def get_writer(self): """Get the debug events writer for the currently configured dump root.""" if not self._writer: self._writer = debug_events_writer.DebugEventsWriter( self._dump_root, circular_buffer_size=self._circular_buffer_size) return self._writer
def testConcurrentExecutionRandomReads(self): circular_buffer_size = -1 writer = debug_events_writer.DebugEventsWriter(self.dump_root, self.tfdbg_run_id, circular_buffer_size) for i in range(100): execution = debug_event_pb2.Execution() execution.op_type = "OpType%d" % i writer.WriteExecution(execution) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() reader = debug_events_reader.DebugDataReader(self.dump_root) reader.update() executions = [None] * 100 def read_job_1(): execution_digests = reader.executions(digest=True) # Read in the reverse order to enhance randomness of the read access. for i in range(49, -1, -1): execution = reader.read_execution(execution_digests[i]) executions[i] = execution def read_job_2(): execution_digests = reader.executions(digest=True) for i in range(99, 49, -1): execution = reader.read_execution(execution_digests[i]) executions[i] = execution thread_1 = threading.Thread(target=read_job_1) thread_2 = threading.Thread(target=read_job_2) thread_1.start() thread_2.start() thread_1.join() thread_2.join() for i in range(100): self.assertEqual(executions[i].op_type, "OpType%d" % i)
def setUp(self): super(DebugIdentityV2OpTest, self).setUp() self.dump_root = tempfile.mkdtemp() # Testing using a small cyclic-buffer size. self.cyclic_buffer_size = 4 self.writer = debug_events_writer.DebugEventsWriter(self.dump_root, self.cyclic_buffer_size)
def testConcurrentWritesToExecutionFiles(self): circular_buffer_size = 5 writer = debug_events_writer.DebugEventsWriter(self.dump_root, self.tfdbg_run_id, circular_buffer_size) debugged_graph = debug_event_pb2.DebuggedGraph(graph_id="graph1", graph_name="graph1") writer.WriteDebuggedGraph(debugged_graph) execution_state = {"counter": 0, "lock": threading.Lock()} def write_execution(): execution = debug_event_pb2.Execution() with execution_state["lock"]: execution.op_type = "OpType%d" % execution_state["counter"] execution_state["counter"] += 1 writer.WriteExecution(execution) graph_execution_trace_state = {"counter": 0, "lock": threading.Lock()} def write_graph_execution_trace(): with graph_execution_trace_state["lock"]: op_name = "Op%d" % graph_execution_trace_state["counter"] graph_op_creation = debug_event_pb2.GraphOpCreation( op_type="FooOp", op_name=op_name, graph_id="graph1") trace = debug_event_pb2.GraphExecutionTrace( op_name=op_name, tfdbg_context_id="graph1") graph_execution_trace_state["counter"] += 1 writer.WriteGraphOpCreation(graph_op_creation) writer.WriteGraphExecutionTrace(trace) threads = [] for i in range(circular_buffer_size * 4): if i % 2 == 0: target = write_execution else: target = write_graph_execution_trace thread = threading.Thread(target=target) thread.start() threads.append(thread) for thread in threads: thread.join() writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() with debug_events_reader.DebugDataReader(self.dump_root) as reader: reader.update() # Verify the content of the .execution file. executions = reader.executions() executed_op_types = [execution.op_type for execution in executions] self.assertLen(executed_op_types, circular_buffer_size) self.assertLen(executed_op_types, len(set(executed_op_types))) # Verify the content of the .graph_execution_traces file. op_names = [ trace.op_name for trace in reader.graph_execution_traces() ] self.assertLen(op_names, circular_buffer_size) self.assertLen(op_names, len(set(op_names)))
def testWriteAndReadMetadata(self): t0 = time.time() writer = debug_events_writer.DebugEventsWriter(self.dump_root) writer.Close() with debug_events_reader.DebugDataReader(self.dump_root) as reader: self.assertIsInstance(reader.starting_wall_time(), float) self.assertGreaterEqual(reader.starting_wall_time(), t0) self.assertEqual(reader.tensorflow_version(), versions.__version__)
def _get_writer(): """Get the debug events writer for the currently configured dump root.""" # TODO(cais): Explore caching the object for possible performance gain. # TODO(cais): Rename circular_buffer_size to circular_buffer_size in C++ and # Python-bindng code. return debug_events_writer.DebugEventsWriter( _state.config.dump_root, circular_buffer_size=_state.config.circular_buffer_size)
def testConcurrentWritesToExecutionFiles(self): cyclic_buffer_size = 5 writer = debug_events_writer.DebugEventsWriter(self.dump_root, cyclic_buffer_size) execution_state = {"counter": 0, "lock": threading.Lock()} def WriteExecution(): execution = debug_event_pb2.Execution() with execution_state["lock"]: execution.op_type = "OpType%d" % execution_state["counter"] execution_state["counter"] += 1 writer.WriteExecution(execution) graph_execution_trace_state = {"counter": 0, "lock": threading.Lock()} def WriteGraphExecutionTrace(): trace = debug_event_pb2.GraphExecutionTrace() with graph_execution_trace_state["lock"]: trace.op_name = "Op%d" % graph_execution_trace_state["counter"] graph_execution_trace_state["counter"] += 1 writer.WriteGraphExecutionTrace(trace) threads = [] for i in range(cyclic_buffer_size * 4): if i % 2 == 0: target = WriteExecution else: target = WriteGraphExecutionTrace thread = threading.Thread(target=target) thread.start() threads.append(thread) for thread in threads: thread.join() writer.FlushExecutionFiles() # Verify the content of the .execution file. execution_paths = glob.glob(os.path.join(self.dump_root, "*.execution")) self.assertEqual(len(execution_paths), 1) actuals = ReadDebugEvents(execution_paths[0]) op_types = sorted([actual.execution.op_type for actual in actuals]) self.assertEqual(len(op_types), cyclic_buffer_size) self.assertEqual(len(op_types), len(set(op_types))) # Verify the content of the .execution file. traces_paths = glob.glob( os.path.join(self.dump_root, "*.graph_execution_traces")) self.assertEqual(len(traces_paths), 1) actuals = ReadDebugEvents(traces_paths[0]) op_names = sorted( [actual.graph_execution_trace.op_name for actual in actuals]) self.assertEqual(len(op_names), cyclic_buffer_size) self.assertEqual(len(op_names), len(set(op_names)))
def testReadingTwoFileSetsWithTheSameDumpRootSucceeds(self): # To simulate a multi-host data dump, we first generate file sets in two # different directories, with the same tfdbg_run_id, and then combine them. tfdbg_run_id = "foo" for i in range(2): writer = debug_events_writer.DebugEventsWriter( os.path.join(self.dump_root, str(i)), tfdbg_run_id, circular_buffer_size=-1) if i == 0: debugged_graph = debug_event_pb2.DebuggedGraph( graph_id="graph1", graph_name="graph1") writer.WriteDebuggedGraph(debugged_graph) op_name = "Op_0" graph_op_creation = debug_event_pb2.GraphOpCreation( op_type="FooOp", op_name=op_name, graph_id="graph1") writer.WriteGraphOpCreation(graph_op_creation) op_name = "Op_1" graph_op_creation = debug_event_pb2.GraphOpCreation( op_type="FooOp", op_name=op_name, graph_id="graph1") writer.WriteGraphOpCreation(graph_op_creation) for _ in range(10): trace = debug_event_pb2.GraphExecutionTrace( op_name="Op_%d" % i, tfdbg_context_id="graph1") writer.WriteGraphExecutionTrace(trace) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() # Move all files from the subdirectory /1 to subdirectory /0. dump_root_0 = os.path.join(self.dump_root, "0") src_paths = glob.glob(os.path.join(self.dump_root, "1", "*")) for src_path in src_paths: dst_path = os.path.join( dump_root_0, # Rename the file set to avoid file name collision. re.sub(r"(tfdbg_events\.\d+)", r"\g<1>1", os.path.basename(src_path))) os.rename(src_path, dst_path) with debug_events_reader.DebugDataReader(dump_root_0) as reader: reader.update() # Verify the content of the .graph_execution_traces file. trace_digests = reader.graph_execution_traces(digest=True) self.assertLen(trace_digests, 20) for _ in range(10): trace = reader.read_graph_execution_trace(trace_digests[i]) self.assertEqual(trace.op_name, "Op_0") for _ in range(10): trace = reader.read_graph_execution_trace(trace_digests[i + 10]) self.assertEqual(trace.op_name, "Op_1")
def testWriteExecutionEventsWithoutCircularBufferBehavior(self): # A circular buffer size of 0 abolishes the circular buffer behavior. writer = debug_events_writer.DebugEventsWriter(self.dump_root, 0) num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2 for i in range(num_execution_events): execution = debug_event_pb2.Execution() execution.op_type = "OpType%d" % i writer.WriteExecution(execution) writer.FlushExecutionFiles() executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile() self.assertLen(executed_op_types, num_execution_events) for i, executed_op_type in enumerate(executed_op_types): self.assertEqual(executed_op_type, "OpType%d" % i)
def testConcurrentWritesToExecutionFiles(self): circular_buffer_size = 5 writer = debug_events_writer.DebugEventsWriter(self.dump_root, circular_buffer_size) execution_state = {"counter": 0, "lock": threading.Lock()} def WriteExecution(): execution = debug_event_pb2.Execution() with execution_state["lock"]: execution.op_type = "OpType%d" % execution_state["counter"] execution_state["counter"] += 1 writer.WriteExecution(execution) graph_execution_trace_state = {"counter": 0, "lock": threading.Lock()} def WriteGraphExecutionTrace(): trace = debug_event_pb2.GraphExecutionTrace() with graph_execution_trace_state["lock"]: trace.op_name = "Op%d" % graph_execution_trace_state["counter"] graph_execution_trace_state["counter"] += 1 writer.WriteGraphExecutionTrace(trace) threads = [] for i in range(circular_buffer_size * 4): if i % 2 == 0: target = WriteExecution else: target = WriteGraphExecutionTrace thread = threading.Thread(target=target) thread.start() threads.append(thread) for thread in threads: thread.join() writer.FlushExecutionFiles() # Verify the content of the .execution file. executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile() self.assertLen(executed_op_types, circular_buffer_size) self.assertLen(executed_op_types, len(set(executed_op_types))) # Verify the content of the .execution file. with debug_events_reader.DebugEventsReader(self.dump_root) as reader: actuals = list( item.debug_event.graph_execution_trace for item in reader.graph_execution_traces_iterator()) op_names = sorted([actual.op_name for actual in actuals]) self.assertLen(op_names, circular_buffer_size) self.assertLen(op_names, len(set(op_names)))
def testWriteGraphExecutionTraceEventsWithoutCircularBufferBehavior(self): # A circular buffer size of 0 abolishes the circular buffer behavior. writer = debug_events_writer.DebugEventsWriter(self.dump_root, 0) num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2 for i in range(num_execution_events): trace = debug_event_pb2.GraphExecutionTrace() trace.op_name = "Op%d" % i writer.WriteGraphExecutionTrace(trace) writer.FlushExecutionFiles() with debug_events_reader.DebugEventsReader(self.dump_root) as reader: actuals = list(reader.graph_execution_traces_iterator()) self.assertLen(actuals, num_execution_events) for i in range(num_execution_events): self.assertEqual(actuals[i].graph_execution_trace.op_name, "Op%d" % i)
def testWriteExecutionEventsWithoutCircularBufferBehavior(self): # A circular buffer size of 0 abolishes the circular buffer behavior. writer = debug_events_writer.DebugEventsWriter(self.dump_root, 0) num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2 for i in range(num_execution_events): execution = debug_event_pb2.Execution() execution.op_type = "OpType%d" % i writer.WriteExecution(execution) writer.FlushExecutionFiles() reader = debug_events_reader.DebugEventsReader(self.dump_root) actuals = list(reader.execution_iterator()) self.assertLen(actuals, num_execution_events) for i in range(num_execution_events): self.assertEqual(actuals[i].execution.op_type, "OpType%d" % i)
def disable_dump_debug_info(): """Disable the currently-enabled debugging dumping. If the `enable_dump_debug_info()` method under the same Python namespace has been invoked before, calling this method disables it. If no call to `enable_dump_debug_info()` has been made, calling this method is a no-op. Calling this method more than once is idempotent. """ if hasattr(_state, "dumping_callback"): dump_root = _state.dumping_callback.dump_root debug_events_writer.DebugEventsWriter(dump_root).Close() op_callbacks.remove_op_callback(_state.dumping_callback.callback) delattr(_state, "dumping_callback") logging.info("Disabled dumping callback in thread %s (dump root: %s)", threading.current_thread().name, dump_root)
def testInvokingDebugIdentityV2OpBeforeCreatingDebugEventsWriterWorks( self): if not compat.forward_compatible(2020, 6, 24): self.skipTest("Functionality currently not supported.") circular_buffer_size = 3 @def_function.function def write_debug_trace(x): # DebugIdentityV2 is a stateful op. It ought to be included by auto # control dependency. square = math_ops.square(x) gen_debug_ops.debug_identity_v2( square, tfdbg_context_id="deadbeaf", op_name="Square", output_slot=0, tensor_debug_mode=debug_event_pb2.TensorDebugMode.FULL_TENSOR, debug_urls=["file://%s" % self.dump_root], circular_buffer_size=circular_buffer_size) return square # The DebugIdentityV2 ops are invokes *before* a DebugEventsWriter at the # same dump root is created. for i in range(circular_buffer_size * 2): self.assertAllClose( write_debug_trace(np.array([i]).astype(np.float32)), [i**2.0]) writer = debug_events_writer.DebugEventsWriter(self.dump_root, "test_tfdbg_run", circular_buffer_size) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() with debug_events_reader.DebugEventsReader(self.dump_root) as reader: graph_trace_iter = reader.graph_execution_traces_iterators()[0] graph_execution_traces = [] while True: try: graph_execution_traces.append( next(graph_trace_iter).debug_event. graph_execution_trace) except StopIteration: break self.assertLen(graph_execution_traces, circular_buffer_size) for i in range(circular_buffer_size): self.assertAllClose( tensor_util.MakeNdarray( graph_execution_traces[i].tensor_proto), [(i + circular_buffer_size)**2.0])
def testRangeReadingExecutions(self, begin, end, expected_begin, expected_end): writer = debug_events_writer.DebugEventsWriter( self.dump_root, self.tfdbg_run_id, circular_buffer_size=-1) for i in range(5): execution = debug_event_pb2.Execution(op_type="OpType%d" % i) writer.WriteExecution(execution) writer.FlushExecutionFiles() writer.Close() with debug_events_reader.DebugDataReader(self.dump_root) as reader: reader.update() executions = reader.executions(begin=begin, end=end) self.assertLen(executions, expected_end - expected_begin) self.assertEqual(executions[0].op_type, "OpType%d" % expected_begin) self.assertEqual(executions[-1].op_type, "OpType%d" % (expected_end - 1))
def testWriteExecutionEventsWithoutCircularBufferBehavior(self): # A circular buffer size of 0 abolishes the circular buffer behavior. writer = debug_events_writer.DebugEventsWriter(self.dump_root, 0) num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2 for i in range(num_execution_events): execution = debug_event_pb2.Execution() execution.op_type = "OpType%d" % i writer.WriteExecution(execution) writer.FlushExecutionFiles() execution_paths = glob.glob(os.path.join(self.dump_root, "*.execution")) self.assertEqual(len(execution_paths), 1) actuals = ReadDebugEvents(execution_paths[0]) self.assertEqual(len(actuals), num_execution_events) for i in range(num_execution_events): self.assertEqual(actuals[i].execution.op_type, "OpType%d" % i)
def testWriteGraphExecutionTraceEventsWithoutCircularBufferBehavior(self): # A circular buffer size of 0 abolishes the circular buffer behavior. writer = debug_events_writer.DebugEventsWriter(self.dump_root, 0) num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2 for i in range(num_execution_events): trace = debug_event_pb2.GraphExecutionTrace() trace.op_name = "Op%d" % i writer.WriteGraphExecutionTrace(trace) writer.FlushExecutionFiles() trace_paths = glob.glob( os.path.join(self.dump_root, "*.graph_execution_traces")) self.assertEqual(len(trace_paths), 1) actuals = ReadDebugEvents(trace_paths[0]) self.assertEqual(len(actuals), num_execution_events) for i in range(num_execution_events): self.assertEqual(actuals[i].graph_execution_trace.op_name, "Op%d" % i)
def testWriteExecutionEventsWithoutCircularBufferBehavior(self): # A circular buffer size of 0 abolishes the circular buffer behavior. writer = debug_events_writer.DebugEventsWriter(self.dump_root, self.tfdbg_run_id, 0) num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2 for i in range(num_execution_events): execution = debug_event_pb2.Execution() execution.op_type = "OpType%d" % i writer.WriteExecution(execution) writer.FlushExecutionFiles() with debug_events_reader.DebugDataReader(self.dump_root) as reader: reader.update() executions = reader.executions() self.assertLen(executions, num_execution_events) for i, execution in enumerate(executions): self.assertEqual(execution.op_type, "OpType%d" % i)
def testConcurrentGraphExecutionTraceUpdateAndRandomRead(self): circular_buffer_size = -1 writer = debug_events_writer.DebugEventsWriter(self.dump_root, self.tfdbg_run_id, circular_buffer_size) debugged_graph = debug_event_pb2.DebuggedGraph(graph_id="graph1", graph_name="graph1") writer.WriteDebuggedGraph(debugged_graph) writer_state = {"counter": 0, "done": False} with debug_events_reader.DebugDataReader(self.dump_root) as reader: def write_and_update_job(): while True: if writer_state["done"]: break op_name = "Op%d" % writer_state["counter"] graph_op_creation = debug_event_pb2.GraphOpCreation( op_type="FooOp", op_name=op_name, graph_id="graph1") writer.WriteGraphOpCreation(graph_op_creation) trace = debug_event_pb2.GraphExecutionTrace( op_name=op_name, tfdbg_context_id="graph1") writer.WriteGraphExecutionTrace(trace) writer_state["counter"] += 1 writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() reader.update() # On the sub-thread, keep writing and reading new GraphExecutionTraces. write_and_update_thread = threading.Thread( target=write_and_update_job) write_and_update_thread.start() # On the main thread, do concurrent random read. while True: digests = reader.graph_execution_traces(digest=True) if digests: trace_0 = reader.read_graph_execution_trace(digests[0]) self.assertEqual(trace_0.op_name, "Op0") writer_state["done"] = True break else: time.sleep(0.1) continue write_and_update_thread.join()
def testWriteExecutionEventsWithCircularBuffer(self): writer = debug_events_writer.DebugEventsWriter(self.dump_root) num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2 for i in range(num_execution_events): execution = debug_event_pb2.Execution() execution.op_type = "OpType%d" % i writer.WriteExecution(execution) # Before FlushExecutionFiles() is called. No data should have been written # to the file. executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile() self.assertFalse(executed_op_types) writer.FlushExecutionFiles() executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile() for i, executed_op_type in enumerate(executed_op_types): self.assertEqual( executed_op_type, "OpType%d" % (i + debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE))