Ejemplo n.º 1
0
    def testCallingEnableRepeatedlyWithDifferentTensorDebugMode(self):
        """Assert that calling enable_dump_debug_info() with different tensor-debug modes.

    It should lead to overwriting of the previously-configured mode.
    """
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root, tensor_debug_mode="NO_TENSOR")

        @def_function.function
        def add_1_divide_by_2(x):
            return (x + 1.0) / 2.0

        self.assertAllClose(add_1_divide_by_2(constant_op.constant(4.0)), 2.5)
        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()
        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
        context_ids, _, _, _ = self._readAndCheckGraphsFile(stack_frame_by_id)
        _, _, _, _, tensor_values = self._readAndCheckExecutionFile()
        self.assertEqual(tensor_values, [[]])
        (_, _, _, tensor_values
         ) = self._readAndCheckGraphExecutionTracesFile(context_ids)
        self.assertLen(tensor_values, 2)
        for tensor_value in tensor_values:
            self.assertEqual(tensor_value.dtype, np.float32)
            self.assertEqual(tensor_value.shape, (0, ))

        with self.assertRaisesRegexp(
                ValueError,
                r"already.*NO_TENSOR.*FULL_TENSOR.*not be honored"):
            dumping_callback.enable_dump_debug_info(
                self.dump_root, tensor_debug_mode="FULL_TENSOR")
Ejemplo n.º 2
0
 def testInvalidTensorDebugModeCausesError(self):
     with self.assertRaisesRegexp(
             ValueError,
             r"Invalid value in tensor_debug_mode \(\'NONSENSICAL\'\).*"
             r"Valid options.*NO_TENSOR.*"):
         dumping_callback.enable_dump_debug_info(
             self.dump_root, tensor_debug_mode="NONSENSICAL")
Ejemplo n.º 3
0
    def testCallingEnableTracingTwiceWithDifferentDumpRootsOverwrites(self):
        dumping_callback.enable_dump_debug_info(self.dump_root)
        new_dump_root = self.dump_root + "_new_dump_root"
        writer = dumping_callback.enable_dump_debug_info(new_dump_root)

        x = constant_op.constant([10.0, 12.0, 10.0])
        for _ in range(2):
            array_ops.unique(x)

        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        with debug_events_reader.DebugEventsReader(new_dump_root) as reader:
            execution_iter = reader.execution_iterator()
            for _ in range(2):
                debug_event = next(execution_iter)
                self.assertGreater(debug_event.wall_time, 0)
                execution = debug_event.execution
                self.assertEqual(execution.op_type, "Unique")
                self.assertEqual(execution.num_outputs, 2)
                self.assertTrue(execution.code_location)
            with self.assertRaises(StopIteration):
                next(execution_iter)

            with debug_events_reader.DebugEventsReader(
                    self.dump_root) as old_dump_root_reader:
                execution_iter = old_dump_root_reader.execution_iterator()
                # The old dump root shouldn't have been written to.
                with self.assertRaises(StopIteration):
                    next(execution_iter)
Ejemplo n.º 4
0
    def testSimpleKerasRecurrentModelPredict(self, tensor_debug_mode):
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root, tensor_debug_mode=tensor_debug_mode)
        model = _create_simple_recurrent_keras_model([3, 4])
        batch_size = 5
        xs = np.ones([batch_size, 3, 4])
        self.assertAllClose(model.predict(xs), np.zeros([batch_size, 1]))

        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
        (context_ids, op_types, op_name_to_op_type,
         _) = self._readAndCheckGraphsFile(stack_frame_by_id)
        # Simply assert that graph are recorded and refrain from asserting on the
        # internal details of the Keras model.
        self.assertTrue(context_ids)
        self.assertTrue(op_types)
        self.assertTrue(op_name_to_op_type)

        if context.executing_eagerly():
            # NOTE(b/142486213): Execution of the TF function happens with
            # Session.run() in v1 graph mode, hence it doesn't get logged to the
            # .execution file.
            (executed_op_types, _, _, _,
             tensor_values) = self._readAndCheckExecutionFile()
            self.assertTrue(executed_op_types)

            for value_list in tensor_values:
                if tensor_debug_mode == "NO_TENSOR":
                    self.assertFalse(value_list)

        (op_names, _, _, tensor_values
         ) = self._readAndCheckGraphExecutionTracesFile(context_ids)
        executed_op_types = [
            op_name_to_op_type[op_name] for op_name in op_names
        ]
        # These are the ops that we can safely assume to have been executed during
        # the model prediction.
        self.assertIn("MatMul", executed_op_types)
        self.assertIn("BiasAdd", executed_op_types)
        # On the GPU, CudnnRNN is used in lieu of the default op-by-op
        # implementation.
        self.assertTrue(
            ("Sigmoid" in executed_op_types and "Tanh" in executed_op_types
             or "CudnnRNN" in executed_op_types))
        # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought to
        # be an empty float32 tensor.
        if tensor_debug_mode == "NO_TENSOR":
            for tensor_value in tensor_values:
                self.assertEqual(tensor_value.dtype, np.float32)
                self.assertEqual(tensor_value.shape, (0, ))
        else:
            # Refrain from asserting the internal implementation details of the LSTM
            # layer.
            concrete_tensor_values = [
                value for value in tensor_values
                if value is not None and value.size > 0
            ]
            self.assertTrue(concrete_tensor_values)
  def testOnExecutionIsCalled(self, tensor_debug_mode):
    writer = dumping_callback.enable_dump_debug_info(
        self.dump_root, tensor_debug_mode=tensor_debug_mode)
    x = constant_op.constant([[1, 2], [3, 4]], dtype=dtypes.float32)
    y = constant_op.constant([[-1], [1]], dtype=dtypes.float32)
    math_ops.matmul(x, y)
    writer.FlushNonExecutionFiles()
    writer.FlushExecutionFiles()

    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
      test_monitor = TestMonitor(reader)
      reader.update()
      self.assertLen(test_monitor.executions, 1)
      self.assertEmpty(test_monitor.graph_execution_traces)
      execution = test_monitor.executions[0]
      self.assertTrue(execution.wall_time)
      self.assertEqual(execution.op_type, "MatMul")
      self.assertLen(execution.output_tensor_device_ids, 1)
      self.assertLen(execution.input_tensor_ids, 2)
      self.assertLen(execution.output_tensor_ids, 1)
      self.assertEqual(execution.num_outputs, 1)
      self.assertEqual(execution.graph_id, "")
      if tensor_debug_mode == "NO_TENSOR":
        self.assertIsNone(execution.debug_tensor_values)
      elif tensor_debug_mode == "CONCISE_HEALTH":
        self.assertLen(execution.debug_tensor_values, 1)
        # [tensor_id, element_count, neg_inf_count, pos_inf_count, nan_count].
        self.assertLen(execution.debug_tensor_values[0], 5)
      elif tensor_debug_mode == "FULL_TENSOR":
        # Full tensor values are not stored in the debug_tensor_values field.
        self.assertIsNone(execution.debug_tensor_values)
        self.assertAllClose(
            reader.execution_to_tensor_values(execution), [[[1.], [1.]]])
Ejemplo n.º 6
0
 def add_negative_v1_squared_to_itself():
     writer = dumping_callback.enable_dump_debug_info(
         dump_root_1, tensor_debug_mode="FULL_TENSOR")
     # Run in a loop to facilitate interleaving between threads.
     for _ in range(3):
         v1.assign_add(-(v1**2.0))
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
Ejemplo n.º 7
0
    def testMultiThreadedExecutionWithSameSetting(self, tensor_debug_mode):
        """Dumping from multiple threads using the same setting."""
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root, tensor_debug_mode=tensor_debug_mode)
        x = variables.Variable(10.0, dtype=dtypes.float32)
        y = variables.Variable(3.0, dtype=dtypes.float32)

        @def_function.function
        def increase_x():
            return x.assign_add(y * 2.0)

        increase_x()

        num_threads = 3
        threads = []
        for _ in range(num_threads):
            threads.append(threading.Thread(target=increase_x))
        for thread in threads:
            thread.start()
        for thread in threads:
            thread.join()
        # 10 --> 16 --> 22 --> 28 --> 34.
        self.assertAllClose(x.read_value(), 34.0)

        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
        with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
            execution_iter = reader.execution_iterator()
            prev_wall_time = 1
            for debug_event in execution_iter:
                self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
                prev_wall_time = debug_event.wall_time

        (context_ids, _, op_name_to_op_type,
         _) = self._readAndCheckGraphsFile(stack_frame_by_id)

        (op_names, _, output_slots, tensor_values
         ) = self._readAndCheckGraphExecutionTracesFile(context_ids)
        executed_op_types = [
            op_name_to_op_type[op_name] for op_name in op_names
        ]
        self.assertEqual(executed_op_types.count("Mul"), 1 + num_threads)
        self.assertEqual(executed_op_types.count("ReadVariableOp"),
                         2 * (1 + num_threads))
        for output_slot in output_slots:
            self.assertEqual(output_slot, 0)
        if tensor_debug_mode == "NO_TENSOR":
            for tensor_value in tensor_values:
                self.assertEqual(tensor_value.dtype, np.float32)
                self.assertEqual(tensor_value.shape, (0, ))
        elif tensor_debug_mode == "FULL_TENSOR":
            mul_values = [
                tensor_values[i] for i, op_type in enumerate(executed_op_types)
                if op_type == "Mul"
            ]
            self.assertAllClose(mul_values, [6.0, 6.0, 6.0, 6.0])
Ejemplo n.º 8
0
    def testNestedContextIsCapturedByGraphOpCreationHistory(self):
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root, tensor_debug_mode="NO_TENSOR")

        @def_function.function
        def iterative_doubling(x, times):
            i = constant_op.constant(0, dtype=dtypes.int32)
            while i < times:
                x = x * 2.0 - 1.0
                i += 1
            return x

        x = constant_op.constant(2.0, dtype=dtypes.float32)
        times = constant_op.constant(4, dtype=dtypes.int32)
        # 2 * 2 - 1 = 3; 3 * 2 - 1 = 5; 5 * 2 - 1 = 9; 9 * 2 - 1 = 17.
        self.assertAllClose(self.evaluate(iterative_doubling(x, times)), 17.0)

        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
        (_, _, op_name_to_op_type, op_name_to_context_id
         ) = self._readAndCheckGraphsFile(stack_frame_by_id)

        less_op_names = [
            op_name for op_name in op_name_to_op_type
            if op_name_to_op_type[op_name] == "Less"
        ]
        less_context_ids = [
            op_name_to_context_id[op_name] for op_name in less_op_names
        ]
        mul_op_names = [
            op_name for op_name in op_name_to_op_type
            if op_name_to_op_type[op_name] == "Mul"
        ]
        mul_context_ids = [
            op_name_to_context_id[op_name] for op_name in mul_op_names
        ]
        sub_op_names = [
            op_name for op_name in op_name_to_op_type
            if op_name_to_op_type[op_name] == "Sub"
        ]
        sub_context_ids = [
            op_name_to_context_id[op_name] for op_name in sub_op_names
        ]
        self.assertLen(less_context_ids, 1)
        self.assertLen(mul_context_ids, 1)
        self.assertLen(sub_context_ids, 1)
        self.assertTrue(less_context_ids[0])
        self.assertTrue(mul_context_ids[0])
        self.assertTrue(sub_context_ids[0])
        # The Less op is from the while-loop cond context and hence should have
        # a different innermost context ID from the mul and sub ops, which are both
        # from the while-loop body context.
        self.assertNotEqual(less_context_ids[0], mul_context_ids[0])
        self.assertNotEqual(less_context_ids[0], sub_context_ids[0])
        # The Mul and Sub ops are from the same innermost context.
        self.assertEqual(mul_context_ids[0], sub_context_ids[0])
Ejemplo n.º 9
0
    def testSimpleKerasRecurrentModelFit(self, tensor_debug_mode):
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root, tensor_debug_mode=tensor_debug_mode)
        model = _create_simple_recurrent_keras_model([3, 4])
        xs = np.ones([5, 3, 4])
        ys = np.ones([5, 1])

        history = model.fit(xs, ys, epochs=3, verbose=0)
        self.assertAllClose(history.history["loss"],
                            [1.0, 0.9603999853134155, 0.9223681688308716])

        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
        (context_ids, op_types, op_name_to_op_type,
         _) = self._readAndCheckGraphsFile(stack_frame_by_id)
        # Simply assert that graph are recorded and refrain from asserting on the
        # internal details of the Keras model.
        self.assertTrue(context_ids)
        self.assertTrue(op_types)
        self.assertTrue(op_name_to_op_type)

        if context.executing_eagerly():
            # NOTE(b/142486213): Execution of the TF function happens with
            # Session.run() in v1 graph mode, hence it doesn't get logged to the
            # .execution file.
            (executed_op_types, _, _, _,
             tensor_values) = self._readAndCheckExecutionFile()
            self.assertTrue(executed_op_types)
            if tensor_debug_mode == "NO_TENSOR":
                for value_list in tensor_values:
                    self.assertFalse(value_list)

        (op_names, _, _, tensor_values
         ) = self._readAndCheckGraphExecutionTracesFile(context_ids)
        executed_op_types = [
            op_name_to_op_type[op_name] for op_name in op_names
        ]
        # These are the ops that we can safely assume to have been executed during
        # the recurrent model's fit() call.
        self.assertIn("MatMul", executed_op_types)
        self.assertIn("BiasAdd", executed_op_types)
        # On the GPU, CudnnRNN is used in lieu of the default op-by-op
        # implementation.
        self.assertTrue(
            ("Sigmoid" in executed_op_types and "Tanh" in executed_op_types
             or "CudnnRNN" in executed_op_types))
        self.assertTrue(("SigmoidGrad" in executed_op_types
                         and "TanhGrad" in executed_op_types
                         or "CudnnRNNBackprop" in executed_op_types))
        if tensor_debug_mode == "NO_TENSOR":
            # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought
            # to be an empty float32 tensor.
            for tensor_value in tensor_values:
                self.assertEqual(tensor_value.dtype, np.float32)
                self.assertEqual(tensor_value.shape, (0, ))
Ejemplo n.º 10
0
    def testNestedFunctionExecutionWithoutControlFlow(self, tensor_debug_mode):
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root, tensor_debug_mode=tensor_debug_mode)

        @def_function.function
        def log_sum(x, y):
            return math_ops.log(x + y)

        @def_function.function
        def sin1p_log_sum(x, y):
            return math_ops.sin(1.0 + log_sum(x, y))

        x = constant_op.constant(2.0)
        y = constant_op.constant(3.0)
        self.assertAllClose(sin1p_log_sum(x, y), np.sin(1.0 + np.log(5.0)))
        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        if context.executing_eagerly():
            # NOTE(b/142486213): Execution of the TF function happens with
            # Session.run() in v1 graph mode, so doesn't get logged to the
            # .execution file.
            executed_op_types, _, _, _, _ = self._readAndCheckExecutionFile()
            executed_op_types = [
                op_type for op_type in executed_op_types
                if "sin1p_log_sum" in op_type
            ]
            self.assertLen(executed_op_types, 1)

        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
        (context_ids, op_types, op_name_to_op_type,
         _) = self._readAndCheckGraphsFile(stack_frame_by_id)
        self.assertIn("AddV2", op_types)
        self.assertIn("Log", op_types)
        self.assertIn("Sin", op_types)

        (op_names, _, _, tensor_values
         ) = self._readAndCheckGraphExecutionTracesFile(context_ids)
        executed_op_types = [
            op_name_to_op_type[op_name] for op_name in op_names
        ]
        self.assertEqual(executed_op_types, ["AddV2", "Log", "AddV2", "Sin"])

        if tensor_debug_mode == "NO_TENSOR":
            # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought to
            # be an empty float32 tensor.
            for tensor_value in tensor_values:
                self.assertEqual(tensor_value.dtype, np.float32)
                self.assertEqual(tensor_value.shape, (0, ))
        elif tensor_debug_mode == "FULL_TENSOR":
            self.assertAllClose(tensor_values[0], 5.0)  # 1st AddV2 op.
            self.assertAllClose(tensor_values[1], np.log(5.0))  # Log op.
            self.assertAllClose(tensor_values[2],
                                np.log(5.0) + 1.0)  # 2nd AddV2 op.
            self.assertAllClose(tensor_values[3],
                                np.sin(np.log(5.0) + 1.0))  # Sin op.
Ejemplo n.º 11
0
    def testCallingEnableTracingTwiceWithTheSameDumpRootIsIdempotent(self):
        dumping_callback.enable_dump_debug_info(self.dump_root)
        writer = dumping_callback.enable_dump_debug_info(self.dump_root)

        x = constant_op.constant([10.0, 12.0, 10.0])
        for _ in range(2):
            array_ops.unique(x)

        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
            execution_iter = reader.execution_iterator()
            for _ in range(2):
                debug_event = next(execution_iter)
                self.assertGreater(debug_event.wall_time, 0)
                execution = debug_event.execution
                self.assertEqual(execution.op_type, "Unique")
                self.assertEqual(execution.num_outputs, 2)
                self.assertTrue(execution.code_location)
            with self.assertRaises(StopIteration):
                next(execution_iter)
Ejemplo n.º 12
0
 def add_negative_v2_squared_to_itself():
     writer = dumping_callback.enable_dump_debug_info(
         dump_root_2, tensor_debug_mode="FULL_TENSOR")
     v2_squared = v2**2.0
     # Since dumping is disabled before the Neg op is called, no tensor data
     # should be dumped from the op, but this shouldn't affect the dumping of
     # the tensor data from the Neg op in `add_negative_v1_squared_to_itself`.
     # Both behavior is checked below.
     dumping_callback.disable_dump_debug_info()
     negative_v2_squared = -v2_squared
     v2.assign_add(negative_v2_squared)
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
Ejemplo n.º 13
0
    def testOpRegex(self, op_regex):
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root, tensor_debug_mode="FULL_TENSOR", op_regex=op_regex)

        @def_function.function
        def log_sum(x, y):
            return math_ops.log(x + y)

        @def_function.function
        def sin1p_log_sum(x, y):
            return math_ops.sin(1.0 + log_sum(x, y))

        x = constant_op.constant(2.0)
        y = constant_op.constant(3.0)
        self.assertAllClose(self.evaluate(sin1p_log_sum(x, y)),
                            np.sin(1.0 + np.log(5.0)))
        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
        (context_ids, op_types, op_name_to_op_type,
         _) = self._readAndCheckGraphsFile(stack_frame_by_id)
        self.assertIn("AddV2", op_types)
        self.assertIn("Log", op_types)
        self.assertIn("Sin", op_types)

        (op_names, _, _, tensor_values
         ) = self._readAndCheckGraphExecutionTracesFile(context_ids)
        executed_op_types = [
            op_name_to_op_type[op_name] for op_name in op_names
        ]

        if op_regex == "AddV2":
            self.assertEqual(executed_op_types, ["AddV2", "AddV2"])
            self.assertLen(tensor_values, 2)
            self.assertAllClose(tensor_values[0], 5.0)  # 1st AddV2 op.
            self.assertAllClose(tensor_values[1],
                                np.log(5.0) + 1.0)  # 2nd AddV2 op.
        elif op_regex == "Log":
            self.assertEqual(executed_op_types, ["Log"])
            self.assertLen(tensor_values, 1)
            self.assertAllClose(tensor_values[0], np.log(5.0))  # Log op.
        else:  # "(AddV2|Log)"
            self.assertEqual(executed_op_types, ["AddV2", "Log", "AddV2"])
            self.assertLen(tensor_values, 3)
            self.assertAllClose(tensor_values[0], 5.0)  # 1st AddV2 op.
            self.assertAllClose(tensor_values[1], np.log(5.0))  # Log op.
            self.assertAllClose(tensor_values[2],
                                np.log(5.0) + 1.0)  # 2nd AddV2 op.
Ejemplo n.º 14
0
 def testIncorrectTensorDTypeArgFormatLeadsToError(self):
     with self.assertRaisesRegexp(
             ValueError,
             r".*expected.*list.*tuple.*callable.*but received.*\{\}"):
         dumping_callback.enable_dump_debug_info(self.dump_root,
                                                 tensor_dtypes=dict())
     with self.assertRaisesRegexp(
             ValueError,
             r".*expected.*list.*tuple.*callable.*but received.*"):
         dumping_callback.enable_dump_debug_info(self.dump_root,
                                                 tensor_dtypes="float32")
     with self.assertRaisesRegexp(
             ValueError,
             r".*expected.*list.*tuple.*callable.*but received.*"):
         dumping_callback.enable_dump_debug_info(
             self.dump_root, tensor_dtypes=dtypes.float32)
     with self.assertRaises(TypeError):
         dumping_callback.enable_dump_debug_info(
             self.dump_root,
             tensor_dtypes=[
                 lambda dtype: dtype.is_floating,
                 lambda dtype: dtype.is_integer
             ])
Ejemplo n.º 15
0
    def testDisableTracingWorks(self, tensor_debug_mode):
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root, tensor_debug_mode=tensor_debug_mode)
        dumping_callback.disable_dump_debug_info()

        x = constant_op.constant([10.0, 12.0, 10.0])
        for _ in range(2):
            array_ops.unique(x)

        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
            source_files_iter = reader.source_files_iterator()
            stack_frames_iter = reader.stack_frames_iterator()
            execution_iter = reader.execution_iterator()
            # No source-file, stack-frame or execution data should have been dumped.
            with self.assertRaises(StopIteration):
                next(source_files_iter)
            with self.assertRaises(StopIteration):
                next(stack_frames_iter)
            with self.assertRaises(StopIteration):
                next(execution_iter)
Ejemplo n.º 16
0
    def testDumpingMiniModel(self, distribution, tensor_debug_mode):
        with distribution.scope():
            writer = dumping_callback.enable_dump_debug_info(
                self.dump_root, tensor_debug_mode=tensor_debug_mode)

            mini_model = MiniModel()
            optimizer = gradient_descent.GradientDescentOptimizer(0.25)

            def train_step():
                with backprop.GradientTape() as tape:
                    loss = mini_model(array_ops.ones([1, 10]))
                    grads = tape.gradient(loss, mini_model.weights)
                    grads_and_vars = zip(grads, mini_model.weights)
                    optimizer.apply_gradients(grads_and_vars)

            distribution.experimental_run_v2(train_step)

            updated_var_values = self.evaluate(mini_model.variables)
            num_devices = len(distribution.extended.worker_devices)
            assert num_devices in (1, 2)
            if num_devices == 1:
                self.assertAllEqual(0.75 * np.ones([10, 1]),
                                    updated_var_values[0])
                self.assertAllEqual([0.75], updated_var_values[1])
            else:
                self.assertAllEqual(0.5 * np.ones([10, 1]),
                                    updated_var_values[0])
                self.assertAllEqual([0.5], updated_var_values[1])

            writer.FlushNonExecutionFiles()
            writer.FlushExecutionFiles()

        device_name_0 = distribution.extended.worker_devices[0]
        logging.info("device_name_0 = %s", device_name_0)
        if num_devices > 1:
            device_name_1 = distribution.extended.worker_devices[1]
            logging.info("device_name_1 = %s", device_name_1)

        with debug_events_reader.DebugDataReader(self.dump_root) as reader:
            reader.update()
            traces = reader.graph_execution_traces()

            # Verify graph-execution traces are available for both devices.
            # We don't assert MatMul occurs exactly once because the gradient of
            # MatMul involves MatMul.
            device_0_executed_op_types = [
                trace.op_type for trace in traces
                if trace.device_name.endswith(device_name_0)
            ]
            if num_devices > 1:
                device_1_executed_op_types = [
                    trace.op_type for trace in traces
                    if trace.device_name.endswith(device_name_1)
                ]
            self.assertIn("MatMul", device_0_executed_op_types)
            self.assertEqual(device_0_executed_op_types.count("BiasAdd"), 1)
            if num_devices > 1:
                self.assertIn("MatMul", device_1_executed_op_types)
                self.assertEqual(device_1_executed_op_types.count("BiasAdd"),
                                 1)

            if tensor_debug_mode == "NO_TENSOR":
                for trace in traces:
                    self.assertEqual(trace.debug_tensor_value, [])
            elif tensor_debug_mode == "FULL_TENSOR":
                device_0_matmul_values = [
                    reader.graph_execution_trace_to_tensor_value(trace)
                    for trace in traces if trace.op_type == "MatMul"
                    and trace.device_name.endswith(device_name_0)
                ]
                device_0_bias_add_values = [
                    reader.graph_execution_trace_to_tensor_value(trace)
                    for trace in traces if trace.op_type == "BiasAdd"
                    and trace.device_name.endswith(device_name_0)
                ]
                self.assertAllClose(device_0_matmul_values[0], [[10.0]])
                self.assertAllClose(device_0_bias_add_values[0], [[11.0]])
                if num_devices > 1:
                    device_1_matmul_values = [
                        reader.graph_execution_trace_to_tensor_value(trace)
                        for trace in traces if trace.op_type == "MatMul"
                        and trace.device_name.endswith(device_name_1)
                    ]
                    device_1_bias_add_values = [
                        reader.graph_execution_trace_to_tensor_value(trace)
                        for trace in traces if trace.op_type == "BiasAdd"
                        and trace.device_name.endswith(device_name_1)
                    ]
                    self.assertAllClose(device_1_matmul_values[0], [[10.0]])
                    self.assertAllClose(device_1_bias_add_values[0], [[11.0]])
Ejemplo n.º 17
0
    def testMobiletNetV2Fit(self, tensor_debug_mode):
        """Test training Keras MobileNetV2 works with dumping."""
        # Use a large circular-buffer to make sure we capture all the executed ops.
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root,
            tensor_debug_mode=tensor_debug_mode,
            circular_buffer_size=100000)
        model = mobilenet_v2.MobileNetV2(input_shape=(32, 32, 3),
                                         alpha=0.1,
                                         weights=None)
        y = model.layers[22].output
        y = core.Flatten()(y)
        y = core.Dense(1)(y)
        model = models.Model(inputs=model.inputs, outputs=y)

        batch_size = 2
        xs = np.zeros([batch_size] + list(model.input_shape[1:]))
        ys = np.zeros([batch_size] + list(model.output_shape[1:]))
        model.compile(optimizer="sgd", loss="mse")
        epochs = 1
        history = model.fit(xs, ys, epochs=epochs, verbose=0)
        self.assertLen(history.history["loss"], epochs)

        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
        (context_ids, op_types, op_name_to_op_type,
         _) = self._readAndCheckGraphsFile(stack_frame_by_id)
        # Simply assert that graph are recorded and refrain from asserting on the
        # internal details of the Keras model.
        self.assertTrue(context_ids)
        self.assertTrue(op_types)
        self.assertTrue(op_name_to_op_type)

        if context.executing_eagerly():
            # NOTE(b/142486213): Execution of the TF function happens with
            # Session.run() in v1 graph mode, hence it doesn't get logged to the
            # .execution file.
            executed_op_types, _, _, _, _ = self._readAndCheckExecutionFile()
            self.assertTrue(executed_op_types)

        (op_names, _, _, tensor_values
         ) = self._readAndCheckGraphExecutionTracesFile(context_ids)
        executed_op_types = [
            op_name_to_op_type[op_name] for op_name in op_names
        ]
        # These are the ops that we can safely assume to have been executed during
        # the model's fit() call.
        self.assertIn("Conv2D", executed_op_types)
        self.assertIn("Relu6", executed_op_types)
        self.assertIn("Conv2DBackpropFilter", executed_op_types)
        self.assertIn("Relu6Grad", executed_op_types)
        if tensor_debug_mode == "NO_TENSOR":
            # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought to
            # be an empty float32 tensor.
            for tensor_value in tensor_values:
                self.assertEqual(tensor_value.dtype, np.float32)
                self.assertEqual(tensor_value.shape, (0, ))
        elif tensor_debug_mode == "FULL_TENSOR":
            conv2d_values = [
                tensor_values[i] for i, op_type in enumerate(executed_op_types)
                if op_type == "Conv2D"
            ]
            self.assertTrue(conv2d_values)
            for conv2d_value in conv2d_values:
                self.assertGreater(len(conv2d_value.shape), 1)
                self.assertEqual(conv2d_value.shape[0], batch_size)
            relu6_values = [
                tensor_values[i] for i, op_type in enumerate(executed_op_types)
                if op_type == "Relu6"
            ]
            self.assertTrue(relu6_values)
            for relu6_value in relu6_values:
                self.assertGreater(len(relu6_value.shape), 1)
                self.assertEqual(relu6_value.shape[0], batch_size)
            conv2d_bp_filter_values = [
                tensor_values[i] for i, op_type in enumerate(executed_op_types)
                if op_type == "Conv2DBackpropFilter"
            ]
            self.assertTrue(conv2d_bp_filter_values)
            for conv2d_bp_filter_value in conv2d_bp_filter_values:
                self.assertGreater(len(conv2d_bp_filter_value.shape), 1)
            relu6_grad_values = [
                tensor_values[i] for i, op_type in enumerate(executed_op_types)
                if op_type == "Relu6Grad"
            ]
            self.assertTrue(relu6_grad_values)
            for relu6_grad_value in relu6_grad_values:
                self.assertGreater(len(relu6_grad_value.shape), 1)
Ejemplo n.º 18
0
    def testPureEagerOpExecution(self, tensor_debug_mode):
        """Test catching Infinity in eager op execution: float32."""
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root, tensor_debug_mode=tensor_debug_mode)

        x = constant_op.constant(10.0)
        zero = constant_op.constant(0.0)
        one = constant_op.constant(1.0)
        two = constant_op.constant(2.0)
        three = constant_op.constant(3.0)
        # Use Collatz conjecture as a test case.
        while x > one:
            if math_ops.equal(x % two, zero):
                x = x / two
            else:
                x = x * three + one

        writer.FlushNonExecutionFiles()
        self._readAndCheckMetadataFile()
        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()

        # Before FlushExecutionFiles() is called, the .execution file should be
        # empty.
        with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
            execution_iter = reader.execution_iterator()
            with self.assertRaises(StopIteration):
                next(execution_iter)

            # After the flushing, the .execution file should hold the appropriate
            # contents.
            writer.FlushExecutionFiles()
            execution_iter = reader.execution_iterator()
            prev_wall_time = 1
            executed_op_types = []
            tensor_values = collections.defaultdict(lambda: [])
            for debug_event in execution_iter:
                self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
                prev_wall_time = debug_event.wall_time
                execution = debug_event.execution
                executed_op_types.append(execution.op_type)
                self.assertTrue(execution.input_tensor_ids)
                self.assertTrue(execution.output_tensor_ids)
                if tensor_debug_mode == "NO_TENSOR":
                    # Due to the NO_TENSOR tensor debug mode, tensor_protos ought to
                    # be empty.
                    self.assertFalse(execution.tensor_protos)
                elif tensor_debug_mode == "FULL_TENSOR":
                    # Under the FULL_TENSOR mode, the value of the tensor should be
                    # available through `tensor_protos`.
                    tensor_value = float(
                        tensor_util.MakeNdarray(execution.tensor_protos[0]))
                    tensor_values[execution.op_type].append(tensor_value)
                # Verify the code_location field.
                self.assertTrue(execution.code_location.stack_frame_ids)
                for stack_frame_id in execution.code_location.stack_frame_ids:
                    self.assertIn(stack_frame_id, stack_frame_by_id)
            if tensor_debug_mode == "FULL_TENSOR":
                self.assertAllClose(tensor_values["Greater"],
                                    [1, 1, 1, 1, 1, 1, 0])
                self.assertAllClose(tensor_values["RealDiv"], [5, 8, 4, 2, 1])
                self.assertAllClose(tensor_values["Mul"], [15])
                self.assertAllClose(tensor_values["AddV2"], [16])

            self.assertEqual(
                executed_op_types,
                [
                    "Greater",
                    "FloorMod",
                    "Equal",
                    "RealDiv",  # 10 --> 5
                    "Greater",
                    "FloorMod",
                    "Equal",
                    "Mul",
                    "AddV2",  # 5 --> 16
                    "Greater",
                    "FloorMod",
                    "Equal",
                    "RealDiv",  # 16 --> 8
                    "Greater",
                    "FloorMod",
                    "Equal",
                    "RealDiv",  # 8 --> 4
                    "Greater",
                    "FloorMod",
                    "Equal",
                    "RealDiv",  # 4 --> 2
                    "Greater",
                    "FloorMod",
                    "Equal",
                    "RealDiv",  # 2 --> 1
                    "Greater"
                ])

            # Due to the pure eager op execution, the .graph file and the
            # .graph_execution_traces file ought to be empty.
            graphs_iterator = reader.graphs_iterator()
            with self.assertRaises(StopIteration):
                next(graphs_iterator)
            graph_trace_iter = reader.graph_execution_traces_iterator()
            with self.assertRaises(StopIteration):
                next(graph_trace_iter)
    def testKerasModelFitOnOneOrTwoDevices(self, distribution,
                                           tensor_debug_mode):
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root, tensor_debug_mode=tensor_debug_mode)

        with distribution.scope():
            model = keras.Sequential()
            model.add(
                keras.layers.Dense(units=10,
                                   input_shape=[5],
                                   activation="relu"))
            model.add(keras.layers.Dense(units=1))
            model.compile(loss="mse", optimizer="sgd")

            batch_size = 20
            x = np.ones([batch_size, 5])
            y = np.ones([batch_size, 1])
            epochs = 1
            history = model.fit(x, y, epochs=epochs, verbose=0)
            self.assertLen(history.history["loss"], epochs)

            writer.FlushNonExecutionFiles()
            writer.FlushExecutionFiles()

        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
        (context_ids, _,
         op_name_to_op_type) = self._readAndCheckGraphsFile(stack_frame_by_id)
        (op_names, device_names, _, tensor_values
         ) = self._readAndCheckGraphExecutionTracesFile(context_ids)

        # Eager execution of tf.function should be recorded.
        executed_op_types, _, _, _, _ = self._readAndCheckExecutionFile()
        fit_functions = [
            op_type for op_type in executed_op_types
            if "_distributed_function" in op_type
        ]
        self.assertLen(fit_functions, epochs)

        num_devices = len(distribution.extended.worker_devices)

        device_name_0 = distribution.extended.worker_devices[0]
        logging.info("device_name_0 = %s", device_name_0)
        if num_devices > 1:
            device_name_1 = distribution.extended.worker_devices[1]
            logging.info("device_name_1 = %s", device_name_1)

        executed_op_types = [
            op_name_to_op_type[op_name] for op_name in op_names
        ]
        device_0_executed_op_types = filter_by_device_name(
            executed_op_types, device_names, device_name_0)
        if num_devices > 1:
            device_1_executed_op_types = filter_by_device_name(
                executed_op_types, device_names, device_name_1)

        self.assertIn("MatMul", device_0_executed_op_types)
        self.assertIn("BiasAdd", device_0_executed_op_types)
        self.assertIn("Relu", device_0_executed_op_types)
        self.assertIn("ReluGrad", device_0_executed_op_types)
        if num_devices > 1:
            # If there are two devices involved, assert the ops inside tf.functions
            # are executed and recorded for the equal numbers of times by the
            # dumping op-callback.
            self.assertEqual(device_0_executed_op_types.count("MatMul"),
                             device_1_executed_op_types.count("MatMul"))
            self.assertEqual(device_0_executed_op_types.count("BiasAdd"),
                             device_1_executed_op_types.count("BiasAdd"))
            self.assertEqual(device_0_executed_op_types.count("Relu"),
                             device_1_executed_op_types.count("Relu"))
            self.assertEqual(device_0_executed_op_types.count("ReluGrad"),
                             device_1_executed_op_types.count("ReluGrad"))

        if tensor_debug_mode == "NO_TENSOR":
            for value_list in tensor_values:
                for tensor_value in value_list:
                    self.assertEqual(tensor_value.dtype, np.float32)
                    self.assertEqual(tensor_value.shape, [])
        elif tensor_debug_mode == "FULL_TENSOR":
            gpu_0_relu_values = filter_by_device_name_and_op_type(
                tensor_values, device_names, executed_op_types, device_name_0,
                "Relu")
            self.assertTrue(gpu_0_relu_values)
            gpu_0_relu_grad_values = filter_by_device_name_and_op_type(
                tensor_values, device_names, executed_op_types, device_name_0,
                "ReluGrad")
            self.assertTrue(gpu_0_relu_grad_values)
            if num_devices > 1:
                gpu_1_relu_values = filter_by_device_name_and_op_type(
                    tensor_values, device_names, executed_op_types,
                    device_name_1, "Relu")
                self.assertTrue(gpu_1_relu_values)
                for i in range(len(gpu_0_relu_values)):
                    self.assertEqual(gpu_0_relu_values[i].shape,
                                     gpu_1_relu_values[i].shape)
                gpu_1_relu_grad_values = filter_by_device_name_and_op_type(
                    tensor_values, device_names, executed_op_types,
                    device_name_1, "ReluGrad")
                self.assertTrue(gpu_1_relu_grad_values)
                for i in range(len(gpu_0_relu_grad_values)):
                    self.assertEqual(gpu_0_relu_grad_values[i].shape,
                                     gpu_1_relu_grad_values[i].shape)
    def testDumpingMiniModel(self, distribution, tensor_debug_mode):
        with distribution.scope():
            writer = dumping_callback.enable_dump_debug_info(
                self.dump_root, tensor_debug_mode=tensor_debug_mode)

            mini_model = MiniModel()
            optimizer = gradient_descent.GradientDescentOptimizer(0.25)

            def train_step():
                with backprop.GradientTape() as tape:
                    loss = mini_model(array_ops.ones([1, 10]))
                    grads = tape.gradient(loss, mini_model.weights)
                    grads_and_vars = zip(grads, mini_model.weights)
                    optimizer.apply_gradients(grads_and_vars)

            distribution.experimental_run_v2(train_step)

            updated_var_values = self.evaluate(mini_model.variables)
            num_devices = len(distribution.extended.worker_devices)
            assert num_devices in (1, 2)
            if num_devices == 1:
                self.assertAllEqual(0.75 * np.ones([10, 1]),
                                    updated_var_values[0])
                self.assertAllEqual([0.75], updated_var_values[1])
            else:
                self.assertAllEqual(0.5 * np.ones([10, 1]),
                                    updated_var_values[0])
                self.assertAllEqual([0.5], updated_var_values[1])

            writer.FlushNonExecutionFiles()
            writer.FlushExecutionFiles()

        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
        (context_ids, _,
         op_name_to_op_type) = self._readAndCheckGraphsFile(stack_frame_by_id)
        (op_names, device_names, _, tensor_values
         ) = self._readAndCheckGraphExecutionTracesFile(context_ids)
        executed_op_types = [
            op_name_to_op_type[op_name] for op_name in op_names
        ]

        device_name_0 = distribution.extended.worker_devices[0]
        logging.info("device_name_0 = %s", device_name_0)
        if num_devices > 1:
            device_name_1 = distribution.extended.worker_devices[1]
            logging.info("device_name_1 = %s", device_name_1)

        device_0_executed_op_types = filter_by_device_name(
            executed_op_types, device_names, device_name_0)
        if num_devices > 1:
            device_1_executed_op_types = filter_by_device_name(
                executed_op_types, device_names, device_name_1)
        # Verify graph-execution traces are available for both devices.
        # We don't assert MatMul occurs exactly once because the gradient of MatMul
        # involves MatMul.
        self.assertIn("MatMul", device_0_executed_op_types)
        self.assertEqual(device_0_executed_op_types.count("BiasAdd"), 1)
        if num_devices > 1:
            self.assertIn("MatMul", device_1_executed_op_types)
            self.assertEqual(device_1_executed_op_types.count("BiasAdd"), 1)

        if tensor_debug_mode == "NO_TENSOR":
            for value_list in tensor_values:
                for tensor_value in value_list:
                    self.assertEqual(tensor_value.dtype, np.float32)
                    self.assertEqual(tensor_value.shape, [])
        elif tensor_debug_mode == "FULL_TENSOR":
            device_0_matmul_values = filter_by_device_name_and_op_type(
                tensor_values, device_names, executed_op_types, device_name_0,
                "MatMul")
            device_0_bias_add_values = filter_by_device_name_and_op_type(
                tensor_values, device_names, executed_op_types, device_name_0,
                "BiasAdd")
            self.assertAllClose(device_0_matmul_values[0], [[10.0]])
            self.assertAllClose(device_0_bias_add_values[0], [[11.0]])
            if num_devices > 1:
                device_1_matmul_values = filter_by_device_name_and_op_type(
                    tensor_values, device_names, executed_op_types,
                    device_name_1, "MatMul")
                device_1_bias_add_values = filter_by_device_name_and_op_type(
                    tensor_values, device_names, executed_op_types,
                    device_name_1, "BiasAdd")
                self.assertAllClose(device_1_matmul_values[0], [[10.0]])
                self.assertAllClose(device_1_bias_add_values[0], [[11.0]])
Ejemplo n.º 21
0
    def testTensorDTypesAndOpRegexFilters(self, tensor_dtypes, op_regex):
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root,
            tensor_debug_mode="FULL_TENSOR",
            tensor_dtypes=tensor_dtypes,
            op_regex=op_regex)

        @def_function.function
        def unique_sum(xs):
            """Sum over the unique values, for testing."""
            unique_xs, indices = array_ops.unique(xs)
            return math_ops.reduce_sum(unique_xs), indices

        xs = constant_op.constant([2., 6., 8., 1., 2.], dtype=dtypes.float32)
        y, indices = self.evaluate(unique_sum(xs))
        self.assertAllClose(y, 17.)
        self.assertAllEqual(indices, [0, 1, 2, 3, 0])

        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()
        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
        (context_ids, _, op_name_to_op_type,
         _) = self._readAndCheckGraphsFile(stack_frame_by_id)
        (op_names, _, _, tensor_values
         ) = self._readAndCheckGraphExecutionTracesFile(context_ids)
        executed_op_types = [
            op_name_to_op_type[op_name] for op_name in op_names
        ]

        if tensor_dtypes == [dtypes.float32] and not op_regex:
            self.assertEqual(executed_op_types, ["Unique", "Sum"])
            self.assertLen(tensor_values, 2)
            self.assertAllClose(tensor_values[0],
                                [2., 6., 8., 1.])  # Unique values.
            self.assertAllClose(tensor_values[1], 17.)  # Sum.
        elif tensor_dtypes == ["float32"] and op_regex == "Sum":
            self.assertEqual(executed_op_types, ["Sum"])
            self.assertLen(tensor_values, 1)
            self.assertAllClose(tensor_values[0], 17.)  # Sum.
        elif tensor_dtypes == (dtypes.float32, ) and op_regex == "(?!Sum)":
            self.assertEqual(executed_op_types, ["Unique"])
            self.assertLen(tensor_values, 1)
            self.assertAllClose(tensor_values[0],
                                [2., 6., 8., 1.])  # Unique values.
        elif tensor_dtypes == [dtypes.int32] and not op_regex:
            self.assertEqual(executed_op_types, ["Unique"])
            self.assertLen(tensor_values, 1)
            self.assertAllEqual(tensor_values[0],
                                [0, 1, 2, 3, 0])  # Unique indices.
        elif callable(tensor_dtypes) and not op_regex:
            self.assertEqual(executed_op_types, ["Unique"])
            self.assertLen(tensor_values, 1)
            self.assertAllEqual(tensor_values[0],
                                [0, 1, 2, 3, 0])  # Unique indices.
        elif not tensor_dtypes and op_regex == "(?!Sum)":
            self.assertEqual(executed_op_types, ["Unique", "Unique"])
            self.assertLen(tensor_values, 2)
            self.assertAllClose(tensor_values[0],
                                [2., 6., 8., 1.])  # Unique values.
            self.assertAllEqual(tensor_values[1],
                                [0, 1, 2, 3, 0])  # Unique indices.
        else:  # "All".
            self.assertEqual(executed_op_types, ["Unique", "Unique", "Sum"])
            self.assertLen(tensor_values, 3)
            self.assertAllClose(tensor_values[0],
                                [2., 6., 8., 1.])  # Unique values.
            self.assertAllEqual(tensor_values[1],
                                [0, 1, 2, 3, 0])  # Unique indices.
            self.assertAllClose(tensor_values[2], 17.)  # Sum.
Ejemplo n.º 22
0
    def testKerasModelFitOnOneOrTwoDevices(self, distribution,
                                           tensor_debug_mode):
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root, tensor_debug_mode=tensor_debug_mode)

        with distribution.scope():
            model = keras.Sequential()
            model.add(
                keras.layers.Dense(units=10,
                                   input_shape=[5],
                                   activation="relu"))
            model.add(keras.layers.Dense(units=1))
            model.compile(loss="mse", optimizer="sgd")

            batch_size = 20
            x = np.ones([batch_size, 5])
            y = np.ones([batch_size, 1])
            epochs = 1
            history = model.fit(x, y, epochs=epochs, verbose=0)
            self.assertLen(history.history["loss"], epochs)

            writer.FlushNonExecutionFiles()
            writer.FlushExecutionFiles()

        with debug_events_reader.DebugDataReader(self.dump_root) as reader:
            reader.update()
            executions = reader.executions()
            fit_executions = [
                execution.op_type for execution in executions
                if "_distributed_function" in execution.op_type
            ]
            self.assertLen(fit_executions, epochs)

            traces = reader.graph_execution_traces()
            num_devices = len(distribution.extended.worker_devices)
            device_name_0 = distribution.extended.worker_devices[0]
            if num_devices > 1:
                device_name_1 = distribution.extended.worker_devices[1]
            device_0_executed_op_types = [
                trace.op_type for trace in traces
                if trace.device_name.endswith(device_name_0)
            ]
            if num_devices > 1:
                device_1_executed_op_types = [
                    trace.op_type for trace in traces
                    if trace.device_name.endswith(device_name_1)
                ]

            self.assertIn("MatMul", device_0_executed_op_types)
            self.assertIn("BiasAdd", device_0_executed_op_types)
            self.assertIn("Relu", device_0_executed_op_types)
            self.assertIn("ReluGrad", device_0_executed_op_types)
            if num_devices > 1:
                # If there are two devices involved, assert the ops inside tf.functions
                # are executed and recorded for the equal numbers of times by the
                # dumping op-callback.
                self.assertEqual(device_0_executed_op_types.count("MatMul"),
                                 device_1_executed_op_types.count("MatMul"))
                self.assertEqual(device_0_executed_op_types.count("BiasAdd"),
                                 device_1_executed_op_types.count("BiasAdd"))
                self.assertEqual(device_0_executed_op_types.count("Relu"),
                                 device_1_executed_op_types.count("Relu"))
                self.assertEqual(device_0_executed_op_types.count("ReluGrad"),
                                 device_1_executed_op_types.count("ReluGrad"))

            if tensor_debug_mode == "NO_TENSOR":
                for trace in traces:
                    self.assertEqual(trace.debug_tensor_value, [])
            elif tensor_debug_mode == "FULL_TENSOR":
                gpu_0_relu_values = [
                    reader.graph_execution_trace_to_tensor_value(trace)
                    for trace in traces if trace.op_type == "Relu"
                    and trace.device_name.endswith(device_name_0)
                ]
                self.assertTrue(gpu_0_relu_values)
                gpu_0_relu_grad_values = [
                    reader.graph_execution_trace_to_tensor_value(trace)
                    for trace in traces if trace.op_type == "ReluGrad"
                    and trace.device_name.endswith(device_name_0)
                ]
                self.assertTrue(gpu_0_relu_grad_values)
                if num_devices > 1:
                    gpu_1_relu_values = [
                        reader.graph_execution_trace_to_tensor_value(trace)
                        for trace in traces if trace.op_type == "Relu"
                        and trace.device_name.endswith(device_name_1)
                    ]
                    self.assertTrue(gpu_1_relu_values)
                    for i in range(len(gpu_0_relu_values)):
                        self.assertEqual(gpu_0_relu_values[i].shape,
                                         gpu_1_relu_values[i].shape)
                    gpu_1_relu_grad_values = [
                        reader.graph_execution_trace_to_tensor_value(trace)
                        for trace in traces if trace.op_type == "ReluGrad"
                        and trace.device_name.endswith(device_name_1)
                    ]
                    self.assertTrue(gpu_1_relu_grad_values)
                    for i in range(len(gpu_0_relu_grad_values)):
                        self.assertEqual(gpu_0_relu_grad_values[i].shape,
                                         gpu_1_relu_grad_values[i].shape)
    def testOnGraphExecutionTraceIsCalled(self, tensor_debug_mode):
        xs = constant_op.constant([2., 6., 8., 1., 2.], dtype=dtypes.float32)
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root, tensor_debug_mode=tensor_debug_mode)

        @def_function.function
        def unique_sum(xs):
            """Sum over the unique values, for testing."""
            unique_xs, indices = array_ops.unique(xs)
            return math_ops.reduce_sum(unique_xs), indices

        unique_sum(xs)
        writer.FlushNonExecutionFiles()
        writer.FlushExecutionFiles()

        with debug_events_reader.DebugDataReader(self.dump_root) as reader:
            test_monitor = TestMonitor(reader)
            reader.update()
            self.assertLen(test_monitor.executions, 1)

            execution = test_monitor.executions[0]
            self.assertTrue(execution.wall_time)
            self.assertStartsWith(execution.op_type, "__inference_unique_sum")
            self.assertLen(execution.output_tensor_device_ids, 2)
            self.assertLen(execution.input_tensor_ids, 1)
            self.assertLen(execution.output_tensor_ids, 2)
            self.assertEqual(execution.num_outputs, 2)
            self.assertTrue(execution.graph_id)

            traces = test_monitor.graph_execution_traces
            if tensor_debug_mode == "CONCISE_HEALTH":
                self.assertLen(traces, 3)  # [Placeholder:0, Unique:0 , Sum:0].
                self.assertEqual(traces[0].op_type, "Placeholder")
                self.assertEqual(traces[0].output_slot, 0)
                self.assertEqual(traces[1].op_type, "Unique")
                self.assertEqual(traces[1].output_slot, 0)
                # Unique:1 is not traced under CONCISE_HEALTH mode, as it's int-dtype.
                self.assertEqual(traces[2].op_type, "Sum")
                self.assertEqual(traces[2].output_slot, 0)
                # [tensor_id, element_count, neg_inf_count, pos_inf_count, nan_count].
                self.assertLen(traces[0].debug_tensor_value, 5)
                self.assertLen(traces[1].debug_tensor_value, 5)
                self.assertLen(traces[2].debug_tensor_value, 5)
            elif tensor_debug_mode == "FULL_HEALTH":
                self.assertLen(traces, 3)  # [Placeholder:0, Unique:0 , Sum:0].
                self.assertEqual(traces[0].op_type, "Placeholder")
                self.assertEqual(traces[0].output_slot, 0)
                self.assertEqual(traces[1].op_type, "Unique")
                self.assertEqual(traces[1].output_slot, 0)
                # Unique:1 is not traced under FULL_HEALTH mode, as it's int-dtype.
                self.assertEqual(traces[2].op_type, "Sum")
                self.assertEqual(traces[2].output_slot, 0)
                # [tensor_id, device_id, dtype, rank, element_count,
                #  neg_inf_count, pos_inf_count, nan_count,
                #  neg_finite_count, zero_count, pos_finite_count].
                self.assertLen(traces[0].debug_tensor_value, 11)
                self.assertLen(traces[1].debug_tensor_value, 11)
                self.assertLen(traces[2].debug_tensor_value, 11)
            elif tensor_debug_mode == "FULL_TENSOR":
                # [Placeholder:0, Unique:0, Unique:1, Const:0, Sum:0].
                self.assertLen(traces, 5)
                self.assertEqual(traces[0].op_type, "Placeholder")
                self.assertEqual(traces[0].output_slot, 0)
                self.assertIsNone(traces[0].debug_tensor_value)
                self.assertAllEqual(
                    reader.graph_execution_trace_to_tensor_value(traces[0]),
                    [2., 6., 8., 1., 2.])
                self.assertEqual(traces[1].op_type, "Unique")
                self.assertEqual(traces[1].output_slot, 0)
                self.assertIsNone(traces[1].debug_tensor_value)
                self.assertAllEqual(
                    reader.graph_execution_trace_to_tensor_value(traces[1]),
                    [2., 6., 8., 1.])
                self.assertEqual(traces[2].op_type, "Unique")
                self.assertEqual(traces[2].output_slot, 1)
                self.assertIsNone(traces[2].debug_tensor_value)
                self.assertAllEqual(
                    reader.graph_execution_trace_to_tensor_value(traces[2]),
                    [0, 1, 2, 3, 0])
                self.assertEqual(traces[3].op_type, "Const")
                self.assertEqual(traces[3].output_slot, 0)
                self.assertIsNone(traces[3].debug_tensor_value)
                self.assertAllClose(
                    reader.graph_execution_trace_to_tensor_value(traces[3]),
                    [0])
                self.assertEqual(traces[4].op_type, "Sum")
                self.assertEqual(traces[4].output_slot, 0)
                self.assertIsNone(traces[4].debug_tensor_value)
                self.assertAllClose(
                    reader.graph_execution_trace_to_tensor_value(traces[4]),
                    17.)
Ejemplo n.º 24
0
    def testFunctionExecutionWithControlFlow(self, tensor_debug_mode):
        writer = dumping_callback.enable_dump_debug_info(
            self.dump_root, tensor_debug_mode=tensor_debug_mode)

        @def_function.function
        def iterative_doubling(x, times):
            i = constant_op.constant(0, dtype=dtypes.int32)
            while i < times:
                x = x * 2.0
                i += 1
            return x

        x = constant_op.constant(0.5, dtype=dtypes.float32)
        times = constant_op.constant(4, dtype=dtypes.int32)
        self.assertAllClose(self.evaluate(iterative_doubling(x, times)), 8.0)

        writer.FlushNonExecutionFiles()
        stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()

        # Verify the content of the .graphs file.
        context_ids, op_types, op_name_to_op_type, _ = (
            self._readAndCheckGraphsFile(stack_frame_by_id))
        self.assertIn("Less", op_types)
        self.assertIn("Mul", op_types)
        self.assertIn("AddV2", op_types)

        # Before FlushExecutionFiles() is called, the .execution and
        # .graph_execution_traces files should be both empty.
        with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
            execution_iter = reader.execution_iterator()
            graph_execution_traces_iter = reader.graph_execution_traces_iterator(
            )
            with self.assertRaises(StopIteration):
                next(execution_iter)
            with self.assertRaises(StopIteration):
                next(graph_execution_traces_iter)

            # TODO(cais): Backport execution instrumentation to tf.Session.
            writer.FlushExecutionFiles()
            # After the flushing, the .execution file should hold the appropriate
            # contents.
            if context.executing_eagerly():
                (executed_op_types, input_tensor_ids, output_tensor_ids,
                 tensor_debug_modes,
                 tensor_values) = self._readAndCheckExecutionFile()
                # NOTE(b/142486213): Execution of the TF function happens with
                # Session.run() in v1 graph mode, hence it doesn't get logged to the
                # .execution file.
                self.assertLen(executed_op_types, 1)
                self.assertIn("iterative_doubling", executed_op_types[0])
                self.assertLen(input_tensor_ids[0], 2)
                self.assertLen(output_tensor_ids[0], 1)
                self.assertEqual(
                    tensor_debug_modes[0],
                    debug_event_pb2.TensorDebugMode.Value(tensor_debug_mode))
                if tensor_debug_mode == "FULL_TENSOR":
                    self.assertAllClose(tensor_values, [[8.0]])

            (op_names, _, output_slots, tensor_values
             ) = self._readAndCheckGraphExecutionTracesFile(context_ids)
            executed_op_types = [
                op_name_to_op_type[op_name] for op_name in op_names
            ]
            # The Less op should have been executed 5 times.
            self.assertEqual(executed_op_types.count("Less"), 5)
            # The last executed op should be Less.
            self.assertEqual(executed_op_types[-1], "Less")
            # The Mul op should have been executed 4 times.
            self.assertEqual(executed_op_types.count("Mul"), 4)
            # The AddV2 op should have been run, but we refrain from asserting on how
            # many times it's executed.
            self.assertIn("AddV2", executed_op_types)
            for output_slot in output_slots:
                self.assertEqual(output_slot, 0)
            if tensor_debug_mode == "NO_TENSOR":
                # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought
                # to be an empty float32 tensor.
                for tensor_value in tensor_values:
                    self.assertEqual(tensor_value.dtype, np.float32)
                    self.assertEqual(tensor_value.shape, (0, ))
            elif tensor_debug_mode == "FULL_TENSOR":
                less_values = [
                    tensor_values[i]
                    for i, op_type in enumerate(executed_op_types)
                    if op_type == "Less"
                ]
                self.assertAllClose(less_values,
                                    [True, True, True, True, False])
                mul_values = [
                    tensor_values[i]
                    for i, op_type in enumerate(executed_op_types)
                    if op_type == "Mul"
                ]
                self.assertAllClose(mul_values, [1.0, 2.0, 4.0, 8.0])