Esempio n. 1
0
    def testCallingEnableTracingTwiceWithDifferentDumpRootsOverwrites(self):
        dumping_callback.enable_dumping(self.dump_root)
        new_dump_root = self.dump_root + "_new_dump_root"
        writer = dumping_callback.enable_dumping(new_dump_root)

        x = constant_op.constant([10.0, 12.0, 10.0])
        for _ in range(2):
            array_ops.unique(x)

        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        reader = debug_events_reader.DebugEventsDir(new_dump_root)
        execution_iter = reader.execution_iterator()
        for _ in range(2):
            debug_event = next(execution_iter)
            self.assertGreater(debug_event.wall_time, 0)
            execution = debug_event.execution
            self.assertEqual(execution.op_type, "Unique")
            self.assertEqual(execution.num_outputs, 2)
            self.assertTrue(execution.code_location)
        with self.assertRaises(StopIteration):
            next(execution_iter)

        old_dump_root_reader = debug_events_reader.DebugEventsDir(
            self.dump_root)
        execution_iter = old_dump_root_reader.execution_iterator()
        # The old dump root shouldn't have been written to.
        with self.assertRaises(StopIteration):
            next(execution_iter)
Esempio n. 2
0
 def _readAndCheckMetadataFile(self):
     """Read and check the .metadata debug-events file."""
     reader = debug_events_reader.DebugEventsDir(self.dump_root)
     metadata_iter = reader.metadata_iterator()
     metadata = next(metadata_iter).debug_metadata
     self.assertEqual(metadata.tensorflow_version, versions.__version__)
     self.assertTrue(metadata.file_version.startswith("debug.Event"))
Esempio n. 3
0
    def _readAndCheckGraphsFile(self, stack_frame_by_id):
        """Read and verify the content of the .graphs debug-event file.

    Args:
      stack_frame_by_id: A dict mapping unique string IDs to stack frames.
        It is used by this method to look up stack frames.

    Returns:
      context_ids: IDs of op creation contexts (e.g., TensorFlow graphs), as a
        `list` of `str`s.
      op_types: Types of the ops that are created, as a `list` of `str`s with
        the same length as `context_ids`.
      op_name_to_op_type: A `dict` mapping op name to op type.
    """
        reader = debug_events_reader.DebugEventsDir(self.dump_root)
        graphs_iter = reader.graphs_iterator()
        prev_wall_time = 0
        op_types = []
        op_name_to_op_type = dict()
        context_ids = set()
        for debug_event in graphs_iter:
            self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
            prev_wall_time = debug_event.wall_time
            graph_op_creation = debug_event.graph_op_creation
            self.assertTrue(graph_op_creation.op_type)
            op_types.append(graph_op_creation.op_type)
            self.assertTrue(graph_op_creation.op_name)
            op_name_to_op_type[
                graph_op_creation.op_name] = graph_op_creation.op_type
            self.assertTrue(graph_op_creation.graph_id)
            context_ids.add(graph_op_creation.graph_id)
            self.assertTrue(graph_op_creation.code_location)
            for stack_frame_id in graph_op_creation.code_location.stack_frame_ids:
                self.assertIn(stack_frame_id, stack_frame_by_id)
        return context_ids, op_types, op_name_to_op_type
Esempio n. 4
0
    def _readAndCheckGraphExecutionTracesFile(self, context_ids):
        """Read & verify the content of the .graph_execution_trace debug-event file.

    Args:
      context_ids: Op-creation context IDs from _readAndCheckGraphsFile().

    Returns:
      op_names: Names of the ops that are executed, as a `list` of `str`s.
      output_slots: Output slots, as a `list` of `int`s, with the same length as
        `op_names`. In other words, for an executed op with N output tensors,
        there will be N entries in this `list` and in `op_names`, at
        corresponding indices.
      tensor_values: Tensor values or their concise summaries, depending on
        TensorDebugMode.
    """
        reader = debug_events_reader.DebugEventsDir(self.dump_root)
        graph_execution_traces_iter = reader.graph_execution_traces_iterator()
        op_names = []
        output_slots = []
        tensor_values = []
        for debug_event in graph_execution_traces_iter:
            self.assertGreaterEqual(debug_event.wall_time, 0)
            graph_execution_trace = debug_event.graph_execution_trace
            op_names.append(graph_execution_trace.op_name)
            # All the ops in the graph have only one output.
            self.assertTrue(graph_execution_trace.tfdbg_context_id)
            self.assertIn(graph_execution_trace.tfdbg_context_id, context_ids)
            output_slots.append(graph_execution_trace.output_slot)
            # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought to
            # be an empty float32 tensor.
            tensor_values.append(
                tensor_util.MakeNdarray(graph_execution_trace.tensor_proto))
        return op_names, output_slots, tensor_values
Esempio n. 5
0
    def _readAndCheckExecutionFile(self):
        """Read and verify the content of the .execution debug-event file.

    Returns:
      executed_op_types: Types of ops that are created, as a `list` of `str`.
      input_tensor_ids: Input tensor IDs for each of the ops executed, as a
        `list` of `list` of `int`s, with the same length as `executed_op_types`.
      output_tensor_ids: Output tensor IDs for each of the ops executed, as a
        `list` of `list` of `int`s, with the same length as `executed_op_types`.
      tensor_debug_modes: Tensor debug modes used to instrument each of ops
        executed.
    """
        reader = debug_events_reader.DebugEventsDir(self.dump_root)
        execution_iter = reader.execution_iterator()
        prev_wall_time = 1
        executed_op_types = []
        input_tensor_ids = []
        output_tensor_ids = []
        tensor_debug_modes = []
        for debug_event in execution_iter:
            self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
            prev_wall_time = debug_event.wall_time
            execution = debug_event.execution
            executed_op_types.append(execution.op_type)
            input_tensor_ids.append(execution.input_tensor_ids)
            output_tensor_ids.append(execution.output_tensor_ids)
            tensor_debug_modes.append(execution.tensor_debug_mode)

        # TODO(cais): When tensor debug modes other than NO_TENSOR is supported,
        # return tensor_values as well.
        return (executed_op_types, input_tensor_ids, output_tensor_ids,
                tensor_debug_modes)
Esempio n. 6
0
    def testMultiThreadedExecution(self, tensor_debug_mode):
        writer = dumping_callback.enable_dumping(
            self.dump_root, tensor_debug_mode=tensor_debug_mode)
        x = variables.Variable(10.0, dtype=dtypes.float32)
        y = variables.Variable(3.0, dtype=dtypes.float32)

        @def_function.function
        def increase_x():
            return x.assign_add(y * 2.0)

        increase_x()

        num_threads = 3
        threads = []
        for _ in range(num_threads):
            threads.append(threading.Thread(target=increase_x))
        for thread in threads:
            thread.start()
        for thread in threads:
            thread.join()
        # 10 --> 16 --> 22 --> 28 --> 34.
        self.assertAllClose(x.read_value(), 34.0)

        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
        reader = debug_events_reader.DebugEventsDir(self.dump_root)
        execution_iter = reader.execution_iterator()
        prev_wall_time = 1
        for debug_event in execution_iter:
            self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
            prev_wall_time = debug_event.wall_time

        (context_ids, _,
         op_name_to_op_type) = self._readAndCheckGraphsFile(stack_frame_by_id)

        (op_names, _, output_slots, tensor_values
         ) = self._readAndCheckGraphExecutionTracesFile(context_ids)
        executed_op_types = [
            op_name_to_op_type[op_name] for op_name in op_names
        ]
        self.assertEqual(executed_op_types.count("Mul"), 1 + num_threads)
        self.assertEqual(executed_op_types.count("ReadVariableOp"),
                         2 * (1 + num_threads))
        for output_slot in output_slots:
            self.assertEqual(output_slot, 0)
        if tensor_debug_mode == "NO_TENSOR":
            for tensor_value in tensor_values:
                self.assertEqual(tensor_value.dtype, np.float32)
                self.assertEqual(tensor_value.shape, (0, ))
        elif tensor_debug_mode == "FULL_TENSOR":
            mul_values = [
                tensor_values[i] for i, op_type in enumerate(executed_op_types)
                if op_type == "Mul"
            ]
            self.assertAllClose(mul_values, [6.0, 6.0, 6.0, 6.0])
Esempio n. 7
0
    def _readAndCheckSourceFilesAndStackFrames(self):
        """Read and verify the .source_files & .stack_frames debug-event files.

    Returns:
      A dict mapping stack frame IDs to stack frames (FileLineCol).
    """
        reader = debug_events_reader.DebugEventsDir(self.dump_root)
        # Check the content of the .source_files file.
        source_files_iter = reader.source_files_iterator()
        source_file_paths = []
        prev_wall_time = 1
        for debug_event in source_files_iter:
            self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
            prev_wall_time = debug_event.wall_time
            source_file = debug_event.source_file
            self.assertEqual(source_file.host_name, socket.gethostname())
            self.assertTrue(source_file.file_path)
            if source_file.lines:
                self.assertTrue(os.path.isfile(source_file.file_path))
            source_file_paths.append(source_file.file_path)
        # Assert the file paths are unique.
        self.assertEqual(len(source_file_paths), len(set(source_file_paths)))

        # Check the content of the .stack_frames file.
        stack_frame_by_id = dict()  # A map from ID to stack frame.
        stack_frames_iter = reader.stack_frames_iterator()
        prev_wall_time = 0
        for debug_event in stack_frames_iter:
            self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
            prev_wall_time = debug_event.wall_time
            stack_frame_with_id = debug_event.stack_frame_with_id
            stack_frame_id = stack_frame_with_id.id
            file_line_col = stack_frame_with_id.file_line_col
            self.assertTrue(stack_frame_id)
            self.assertNotIn(stack_frame_id, stack_frame_by_id,
                             "Duplicate stack frame ID: %s" % id)
            stack_frame_by_id[stack_frame_id] = (file_line_col.file_index,
                                                 file_line_col.line,
                                                 file_line_col.func)
            self.assertGreaterEqual(file_line_col.file_index, 0)
            self.assertLess(file_line_col.file_index, len(source_file_paths))
            self.assertTrue(file_line_col.line)  # Line numbers are 1-based.
            self.assertTrue(file_line_col.func)
        # Assert the stack frames are unique.
        self.assertEqual(len(stack_frame_by_id.values()),
                         len(set(stack_frame_by_id.values())))
        return stack_frame_by_id
Esempio n. 8
0
    def testCallingEnableTracingTwiceWithTheSameDumpRootIsIdempotent(self):
        dumping_callback.enable_dumping(self.dump_root)
        writer = dumping_callback.enable_dumping(self.dump_root)

        x = constant_op.constant([10.0, 12.0, 10.0])
        for _ in range(2):
            array_ops.unique(x)

        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        reader = debug_events_reader.DebugEventsDir(self.dump_root)
        execution_iter = reader.execution_iterator()
        for _ in range(2):
            debug_event = next(execution_iter)
            self.assertGreater(debug_event.wall_time, 0)
            execution = debug_event.execution
            self.assertEqual(execution.op_type, "Unique")
            self.assertEqual(execution.num_outputs, 2)
            self.assertTrue(execution.code_location)
        with self.assertRaises(StopIteration):
            next(execution_iter)
Esempio n. 9
0
    def testDisableTracingWorks(self):
        writer = dumping_callback.enable_dumping(self.dump_root)
        dumping_callback.disable_dumping()

        x = constant_op.constant([10.0, 12.0, 10.0])
        for _ in range(2):
            array_ops.unique(x)

        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        reader = debug_events_reader.DebugEventsDir(self.dump_root)
        source_files_iter = reader.source_files_iterator()
        stack_frames_iter = reader.stack_frames_iterator()
        execution_iter = reader.execution_iterator()
        # No source-file, stack-frame or execution data should have been dumped.
        with self.assertRaises(StopIteration):
            next(source_files_iter)
        with self.assertRaises(StopIteration):
            next(stack_frames_iter)
        with self.assertRaises(StopIteration):
            next(execution_iter)
Esempio n. 10
0
    def testPureEagerOpExecution(self, tensor_debug_mode):
        """Test catching Infinity in eager op execution: float32."""
        writer = dumping_callback.enable_dumping(
            self.dump_root, tensor_debug_mode=tensor_debug_mode)

        x = constant_op.constant(10.0)
        zero = constant_op.constant(0.0)
        one = constant_op.constant(1.0)
        two = constant_op.constant(2.0)
        three = constant_op.constant(3.0)
        # Use Collatz conjecture as a test case.
        while x > one:
            if math_ops.equal(x % two, zero):
                x = x / two
            else:
                x = x * three + one

        writer.FlushNonExecutionFiles()
        self._readAndCheckMetadataFile()
        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()

        # Before FlushExecutionFiles() is called, the .execution file should be
        # empty.
        reader = debug_events_reader.DebugEventsDir(self.dump_root)
        execution_iter = reader.execution_iterator()
        with self.assertRaises(StopIteration):
            next(execution_iter)

        # After the flushing, the .execution file should hold the appropriate
        # contents.
        writer.FlushExecutionFiles()
        execution_iter = reader.execution_iterator()
        prev_wall_time = 1
        executed_op_types = []
        tensor_values = collections.defaultdict(lambda: [])
        for debug_event in execution_iter:
            self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
            prev_wall_time = debug_event.wall_time
            execution = debug_event.execution
            executed_op_types.append(execution.op_type)
            self.assertTrue(execution.input_tensor_ids)
            self.assertTrue(execution.output_tensor_ids)
            if tensor_debug_mode == "NO_TENSOR":
                # Due to the NO_TENSOR tensor debug mode, tensor_protos ought to
                # be empty.
                self.assertFalse(execution.tensor_protos)
            elif tensor_debug_mode == "FULL_TENSOR":
                # Under the FULL_TENSOR mode, the value of the tensor should be
                # available through `tensor_protos`.
                tensor_value = float(
                    tensor_util.MakeNdarray(execution.tensor_protos[0]))
                tensor_values[execution.op_type].append(tensor_value)
            # Verify the code_location field.
            self.assertTrue(execution.code_location.stack_frame_ids)
            for stack_frame_id in execution.code_location.stack_frame_ids:
                self.assertIn(stack_frame_id, stack_frame_by_id)
        if tensor_debug_mode == "FULL_TENSOR":
            self.assertAllClose(tensor_values["Greater"],
                                [1, 1, 1, 1, 1, 1, 0])
            self.assertAllClose(tensor_values["RealDiv"], [5, 8, 4, 2, 1])
            self.assertAllClose(tensor_values["Mul"], [15])
            self.assertAllClose(tensor_values["AddV2"], [16])

        self.assertEqual(
            executed_op_types,
            [
                "Greater",
                "FloorMod",
                "Equal",
                "RealDiv",  # 10 --> 5
                "Greater",
                "FloorMod",
                "Equal",
                "Mul",
                "AddV2",  # 5 --> 16
                "Greater",
                "FloorMod",
                "Equal",
                "RealDiv",  # 16 --> 8
                "Greater",
                "FloorMod",
                "Equal",
                "RealDiv",  # 8 --> 4
                "Greater",
                "FloorMod",
                "Equal",
                "RealDiv",  # 4 --> 2
                "Greater",
                "FloorMod",
                "Equal",
                "RealDiv",  # 2 --> 1
                "Greater"
            ])

        # Due to the pure eager op execution, the .graph file and the
        # .graph_execution_traces file ought to be empty.
        graphs_iterator = reader.graphs_iterator()
        with self.assertRaises(StopIteration):
            next(graphs_iterator)
        graph_trace_iter = reader.graph_execution_traces_iterator()
        with self.assertRaises(StopIteration):
            next(graph_trace_iter)
Esempio n. 11
0
    def testFunctionExecutionWithControlFlow(self, tensor_debug_mode):
        writer = dumping_callback.enable_dumping(
            self.dump_root, tensor_debug_mode=tensor_debug_mode)

        @def_function.function
        def iterative_doubling(x, times):
            i = constant_op.constant(0, dtype=dtypes.int32)
            while i < times:
                x = x * 2.0
                i += 1
            return x

        x = constant_op.constant(0.5, dtype=dtypes.float32)
        times = constant_op.constant(4, dtype=dtypes.int32)
        self.assertAllClose(self.evaluate(iterative_doubling(x, times)), 8.0)

        writer.FlushNonExecutionFiles()
        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()

        # Verify the content of the .graphs file.
        context_ids, op_types, op_name_to_op_type = (
            self._readAndCheckGraphsFile(stack_frame_by_id))
        self.assertIn("Less", op_types)
        self.assertIn("Mul", op_types)
        self.assertIn("AddV2", op_types)

        # Before FlushExecutionFiles() is called, the .execution and
        # .graph_execution_traces files should be both empty.
        reader = debug_events_reader.DebugEventsDir(self.dump_root)
        execution_iter = reader.execution_iterator()
        graph_execution_traces_iter = reader.graph_execution_traces_iterator()
        with self.assertRaises(StopIteration):
            next(execution_iter)
        with self.assertRaises(StopIteration):
            next(graph_execution_traces_iter)

        # TODO(cais): Backport execution instrumentation to tf.Session.
        writer.FlushExecutionFiles()
        # After the flushing, the .execution file should hold the appropriate
        # contents.
        if context.executing_eagerly():
            (executed_op_types, input_tensor_ids, output_tensor_ids,
             tensor_debug_modes,
             tensor_values) = self._readAndCheckExecutionFile()
            # NOTE(b/142486213): Execution of the TF function happens with
            # Session.run() in v1 graph mode, hence it doesn't get logged to the
            # .execution file.
            self.assertLen(executed_op_types, 1)
            self.assertIn("iterative_doubling", executed_op_types[0])
            self.assertLen(input_tensor_ids[0], 2)
            self.assertLen(output_tensor_ids[0], 1)
            self.assertEqual(
                tensor_debug_modes[0],
                debug_event_pb2.TensorDebugMode.Value(tensor_debug_mode))
            if tensor_debug_mode == "FULL_TENSOR":
                self.assertAllClose(tensor_values, [[8.0]])

        (op_names, _, output_slots, tensor_values
         ) = self._readAndCheckGraphExecutionTracesFile(context_ids)
        executed_op_types = [
            op_name_to_op_type[op_name] for op_name in op_names
        ]
        # The Less op should have been executed 5 times.
        self.assertEqual(executed_op_types.count("Less"), 5)
        # The last executed op should be Less.
        self.assertEqual(executed_op_types[-1], "Less")
        # The Mul op should have been executed 4 times.
        self.assertEqual(executed_op_types.count("Mul"), 4)
        # The AddV2 op should have been run, but we refrain from asserting on how
        # many times it's executed.
        self.assertIn("AddV2", executed_op_types)
        for output_slot in output_slots:
            self.assertEqual(output_slot, 0)
        if tensor_debug_mode == "NO_TENSOR":
            # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought to
            # be an empty float32 tensor.
            for tensor_value in tensor_values:
                self.assertEqual(tensor_value.dtype, np.float32)
                self.assertEqual(tensor_value.shape, (0, ))
        elif tensor_debug_mode == "FULL_TENSOR":
            less_values = [
                tensor_values[i] for i, op_type in enumerate(executed_op_types)
                if op_type == "Less"
            ]
            self.assertAllClose(less_values, [True, True, True, True, False])
            mul_values = [
                tensor_values[i] for i, op_type in enumerate(executed_op_types)
                if op_type == "Mul"
            ]
            self.assertAllClose(mul_values, [1.0, 2.0, 4.0, 8.0])