def testCallingEnableRepeatedlyWithDifferentTensorDebugMode(self): """Assert that calling enable_dump_debug_info() with different tensor-debug modes. It should lead to overwriting of the previously-configured mode. """ writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode="NO_TENSOR") @def_function.function def add_1_divide_by_2(x): return (x + 1.0) / 2.0 self.assertAllClose(add_1_divide_by_2(constant_op.constant(4.0)), 2.5) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames() context_ids, _, _, _ = self._readAndCheckGraphsFile(stack_frame_by_id) _, _, _, _, tensor_values = self._readAndCheckExecutionFile() self.assertEqual(tensor_values, [[]]) (_, _, _, tensor_values ) = self._readAndCheckGraphExecutionTracesFile(context_ids) self.assertLen(tensor_values, 2) for tensor_value in tensor_values: self.assertEqual(tensor_value.dtype, np.float32) self.assertEqual(tensor_value.shape, (0, )) with self.assertRaisesRegexp( ValueError, r"already.*NO_TENSOR.*FULL_TENSOR.*not be honored"): dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode="FULL_TENSOR")
def testInvalidTensorDebugModeCausesError(self): with self.assertRaisesRegexp( ValueError, r"Invalid value in tensor_debug_mode \(\'NONSENSICAL\'\).*" r"Valid options.*NO_TENSOR.*"): dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode="NONSENSICAL")
def testCallingEnableTracingTwiceWithDifferentDumpRootsOverwrites(self): dumping_callback.enable_dump_debug_info(self.dump_root) new_dump_root = self.dump_root + "_new_dump_root" writer = dumping_callback.enable_dump_debug_info(new_dump_root) x = constant_op.constant([10.0, 12.0, 10.0]) for _ in range(2): array_ops.unique(x) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() with debug_events_reader.DebugEventsReader(new_dump_root) as reader: execution_iter = reader.execution_iterator() for _ in range(2): debug_event = next(execution_iter) self.assertGreater(debug_event.wall_time, 0) execution = debug_event.execution self.assertEqual(execution.op_type, "Unique") self.assertEqual(execution.num_outputs, 2) self.assertTrue(execution.code_location) with self.assertRaises(StopIteration): next(execution_iter) with debug_events_reader.DebugEventsReader( self.dump_root) as old_dump_root_reader: execution_iter = old_dump_root_reader.execution_iterator() # The old dump root shouldn't have been written to. with self.assertRaises(StopIteration): next(execution_iter)
def testSimpleKerasRecurrentModelPredict(self, tensor_debug_mode): writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode) model = _create_simple_recurrent_keras_model([3, 4]) batch_size = 5 xs = np.ones([batch_size, 3, 4]) self.assertAllClose(model.predict(xs), np.zeros([batch_size, 1])) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames() (context_ids, op_types, op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id) # Simply assert that graph are recorded and refrain from asserting on the # internal details of the Keras model. self.assertTrue(context_ids) self.assertTrue(op_types) self.assertTrue(op_name_to_op_type) if context.executing_eagerly(): # NOTE(b/142486213): Execution of the TF function happens with # Session.run() in v1 graph mode, hence it doesn't get logged to the # .execution file. (executed_op_types, _, _, _, tensor_values) = self._readAndCheckExecutionFile() self.assertTrue(executed_op_types) for value_list in tensor_values: if tensor_debug_mode == "NO_TENSOR": self.assertFalse(value_list) (op_names, _, _, tensor_values ) = self._readAndCheckGraphExecutionTracesFile(context_ids) executed_op_types = [ op_name_to_op_type[op_name] for op_name in op_names ] # These are the ops that we can safely assume to have been executed during # the model prediction. self.assertIn("MatMul", executed_op_types) self.assertIn("BiasAdd", executed_op_types) # On the GPU, CudnnRNN is used in lieu of the default op-by-op # implementation. self.assertTrue( ("Sigmoid" in executed_op_types and "Tanh" in executed_op_types or "CudnnRNN" in executed_op_types)) # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought to # be an empty float32 tensor. if tensor_debug_mode == "NO_TENSOR": for tensor_value in tensor_values: self.assertEqual(tensor_value.dtype, np.float32) self.assertEqual(tensor_value.shape, (0, )) else: # Refrain from asserting the internal implementation details of the LSTM # layer. concrete_tensor_values = [ value for value in tensor_values if value is not None and value.size > 0 ] self.assertTrue(concrete_tensor_values)
def testOnExecutionIsCalled(self, tensor_debug_mode): writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode) x = constant_op.constant([[1, 2], [3, 4]], dtype=dtypes.float32) y = constant_op.constant([[-1], [1]], dtype=dtypes.float32) math_ops.matmul(x, y) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() with debug_events_reader.DebugDataReader(self.dump_root) as reader: test_monitor = TestMonitor(reader) reader.update() self.assertLen(test_monitor.executions, 1) self.assertEmpty(test_monitor.graph_execution_traces) execution = test_monitor.executions[0] self.assertTrue(execution.wall_time) self.assertEqual(execution.op_type, "MatMul") self.assertLen(execution.output_tensor_device_ids, 1) self.assertLen(execution.input_tensor_ids, 2) self.assertLen(execution.output_tensor_ids, 1) self.assertEqual(execution.num_outputs, 1) self.assertEqual(execution.graph_id, "") if tensor_debug_mode == "NO_TENSOR": self.assertIsNone(execution.debug_tensor_values) elif tensor_debug_mode == "CONCISE_HEALTH": self.assertLen(execution.debug_tensor_values, 1) # [tensor_id, element_count, neg_inf_count, pos_inf_count, nan_count]. self.assertLen(execution.debug_tensor_values[0], 5) elif tensor_debug_mode == "FULL_TENSOR": # Full tensor values are not stored in the debug_tensor_values field. self.assertIsNone(execution.debug_tensor_values) self.assertAllClose( reader.execution_to_tensor_values(execution), [[[1.], [1.]]])
def add_negative_v1_squared_to_itself(): writer = dumping_callback.enable_dump_debug_info( dump_root_1, tensor_debug_mode="FULL_TENSOR") # Run in a loop to facilitate interleaving between threads. for _ in range(3): v1.assign_add(-(v1**2.0)) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles()
def testMultiThreadedExecutionWithSameSetting(self, tensor_debug_mode): """Dumping from multiple threads using the same setting.""" writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode) x = variables.Variable(10.0, dtype=dtypes.float32) y = variables.Variable(3.0, dtype=dtypes.float32) @def_function.function def increase_x(): return x.assign_add(y * 2.0) increase_x() num_threads = 3 threads = [] for _ in range(num_threads): threads.append(threading.Thread(target=increase_x)) for thread in threads: thread.start() for thread in threads: thread.join() # 10 --> 16 --> 22 --> 28 --> 34. self.assertAllClose(x.read_value(), 34.0) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames() with debug_events_reader.DebugEventsReader(self.dump_root) as reader: execution_iter = reader.execution_iterator() prev_wall_time = 1 for debug_event in execution_iter: self.assertGreaterEqual(debug_event.wall_time, prev_wall_time) prev_wall_time = debug_event.wall_time (context_ids, _, op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id) (op_names, _, output_slots, tensor_values ) = self._readAndCheckGraphExecutionTracesFile(context_ids) executed_op_types = [ op_name_to_op_type[op_name] for op_name in op_names ] self.assertEqual(executed_op_types.count("Mul"), 1 + num_threads) self.assertEqual(executed_op_types.count("ReadVariableOp"), 2 * (1 + num_threads)) for output_slot in output_slots: self.assertEqual(output_slot, 0) if tensor_debug_mode == "NO_TENSOR": for tensor_value in tensor_values: self.assertEqual(tensor_value.dtype, np.float32) self.assertEqual(tensor_value.shape, (0, )) elif tensor_debug_mode == "FULL_TENSOR": mul_values = [ tensor_values[i] for i, op_type in enumerate(executed_op_types) if op_type == "Mul" ] self.assertAllClose(mul_values, [6.0, 6.0, 6.0, 6.0])
def testNestedContextIsCapturedByGraphOpCreationHistory(self): writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode="NO_TENSOR") @def_function.function def iterative_doubling(x, times): i = constant_op.constant(0, dtype=dtypes.int32) while i < times: x = x * 2.0 - 1.0 i += 1 return x x = constant_op.constant(2.0, dtype=dtypes.float32) times = constant_op.constant(4, dtype=dtypes.int32) # 2 * 2 - 1 = 3; 3 * 2 - 1 = 5; 5 * 2 - 1 = 9; 9 * 2 - 1 = 17. self.assertAllClose(self.evaluate(iterative_doubling(x, times)), 17.0) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames() (_, _, op_name_to_op_type, op_name_to_context_id ) = self._readAndCheckGraphsFile(stack_frame_by_id) less_op_names = [ op_name for op_name in op_name_to_op_type if op_name_to_op_type[op_name] == "Less" ] less_context_ids = [ op_name_to_context_id[op_name] for op_name in less_op_names ] mul_op_names = [ op_name for op_name in op_name_to_op_type if op_name_to_op_type[op_name] == "Mul" ] mul_context_ids = [ op_name_to_context_id[op_name] for op_name in mul_op_names ] sub_op_names = [ op_name for op_name in op_name_to_op_type if op_name_to_op_type[op_name] == "Sub" ] sub_context_ids = [ op_name_to_context_id[op_name] for op_name in sub_op_names ] self.assertLen(less_context_ids, 1) self.assertLen(mul_context_ids, 1) self.assertLen(sub_context_ids, 1) self.assertTrue(less_context_ids[0]) self.assertTrue(mul_context_ids[0]) self.assertTrue(sub_context_ids[0]) # The Less op is from the while-loop cond context and hence should have # a different innermost context ID from the mul and sub ops, which are both # from the while-loop body context. self.assertNotEqual(less_context_ids[0], mul_context_ids[0]) self.assertNotEqual(less_context_ids[0], sub_context_ids[0]) # The Mul and Sub ops are from the same innermost context. self.assertEqual(mul_context_ids[0], sub_context_ids[0])
def testSimpleKerasRecurrentModelFit(self, tensor_debug_mode): writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode) model = _create_simple_recurrent_keras_model([3, 4]) xs = np.ones([5, 3, 4]) ys = np.ones([5, 1]) history = model.fit(xs, ys, epochs=3, verbose=0) self.assertAllClose(history.history["loss"], [1.0, 0.9603999853134155, 0.9223681688308716]) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames() (context_ids, op_types, op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id) # Simply assert that graph are recorded and refrain from asserting on the # internal details of the Keras model. self.assertTrue(context_ids) self.assertTrue(op_types) self.assertTrue(op_name_to_op_type) if context.executing_eagerly(): # NOTE(b/142486213): Execution of the TF function happens with # Session.run() in v1 graph mode, hence it doesn't get logged to the # .execution file. (executed_op_types, _, _, _, tensor_values) = self._readAndCheckExecutionFile() self.assertTrue(executed_op_types) if tensor_debug_mode == "NO_TENSOR": for value_list in tensor_values: self.assertFalse(value_list) (op_names, _, _, tensor_values ) = self._readAndCheckGraphExecutionTracesFile(context_ids) executed_op_types = [ op_name_to_op_type[op_name] for op_name in op_names ] # These are the ops that we can safely assume to have been executed during # the recurrent model's fit() call. self.assertIn("MatMul", executed_op_types) self.assertIn("BiasAdd", executed_op_types) # On the GPU, CudnnRNN is used in lieu of the default op-by-op # implementation. self.assertTrue( ("Sigmoid" in executed_op_types and "Tanh" in executed_op_types or "CudnnRNN" in executed_op_types)) self.assertTrue(("SigmoidGrad" in executed_op_types and "TanhGrad" in executed_op_types or "CudnnRNNBackprop" in executed_op_types)) if tensor_debug_mode == "NO_TENSOR": # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought # to be an empty float32 tensor. for tensor_value in tensor_values: self.assertEqual(tensor_value.dtype, np.float32) self.assertEqual(tensor_value.shape, (0, ))
def testNestedFunctionExecutionWithoutControlFlow(self, tensor_debug_mode): writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode) @def_function.function def log_sum(x, y): return math_ops.log(x + y) @def_function.function def sin1p_log_sum(x, y): return math_ops.sin(1.0 + log_sum(x, y)) x = constant_op.constant(2.0) y = constant_op.constant(3.0) self.assertAllClose(sin1p_log_sum(x, y), np.sin(1.0 + np.log(5.0))) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() if context.executing_eagerly(): # NOTE(b/142486213): Execution of the TF function happens with # Session.run() in v1 graph mode, so doesn't get logged to the # .execution file. executed_op_types, _, _, _, _ = self._readAndCheckExecutionFile() executed_op_types = [ op_type for op_type in executed_op_types if "sin1p_log_sum" in op_type ] self.assertLen(executed_op_types, 1) stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames() (context_ids, op_types, op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id) self.assertIn("AddV2", op_types) self.assertIn("Log", op_types) self.assertIn("Sin", op_types) (op_names, _, _, tensor_values ) = self._readAndCheckGraphExecutionTracesFile(context_ids) executed_op_types = [ op_name_to_op_type[op_name] for op_name in op_names ] self.assertEqual(executed_op_types, ["AddV2", "Log", "AddV2", "Sin"]) if tensor_debug_mode == "NO_TENSOR": # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought to # be an empty float32 tensor. for tensor_value in tensor_values: self.assertEqual(tensor_value.dtype, np.float32) self.assertEqual(tensor_value.shape, (0, )) elif tensor_debug_mode == "FULL_TENSOR": self.assertAllClose(tensor_values[0], 5.0) # 1st AddV2 op. self.assertAllClose(tensor_values[1], np.log(5.0)) # Log op. self.assertAllClose(tensor_values[2], np.log(5.0) + 1.0) # 2nd AddV2 op. self.assertAllClose(tensor_values[3], np.sin(np.log(5.0) + 1.0)) # Sin op.
def testCallingEnableTracingTwiceWithTheSameDumpRootIsIdempotent(self): dumping_callback.enable_dump_debug_info(self.dump_root) writer = dumping_callback.enable_dump_debug_info(self.dump_root) x = constant_op.constant([10.0, 12.0, 10.0]) for _ in range(2): array_ops.unique(x) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() with debug_events_reader.DebugEventsReader(self.dump_root) as reader: execution_iter = reader.execution_iterator() for _ in range(2): debug_event = next(execution_iter) self.assertGreater(debug_event.wall_time, 0) execution = debug_event.execution self.assertEqual(execution.op_type, "Unique") self.assertEqual(execution.num_outputs, 2) self.assertTrue(execution.code_location) with self.assertRaises(StopIteration): next(execution_iter)
def add_negative_v2_squared_to_itself(): writer = dumping_callback.enable_dump_debug_info( dump_root_2, tensor_debug_mode="FULL_TENSOR") v2_squared = v2**2.0 # Since dumping is disabled before the Neg op is called, no tensor data # should be dumped from the op, but this shouldn't affect the dumping of # the tensor data from the Neg op in `add_negative_v1_squared_to_itself`. # Both behavior is checked below. dumping_callback.disable_dump_debug_info() negative_v2_squared = -v2_squared v2.assign_add(negative_v2_squared) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles()
def testOpRegex(self, op_regex): writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode="FULL_TENSOR", op_regex=op_regex) @def_function.function def log_sum(x, y): return math_ops.log(x + y) @def_function.function def sin1p_log_sum(x, y): return math_ops.sin(1.0 + log_sum(x, y)) x = constant_op.constant(2.0) y = constant_op.constant(3.0) self.assertAllClose(self.evaluate(sin1p_log_sum(x, y)), np.sin(1.0 + np.log(5.0))) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames() (context_ids, op_types, op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id) self.assertIn("AddV2", op_types) self.assertIn("Log", op_types) self.assertIn("Sin", op_types) (op_names, _, _, tensor_values ) = self._readAndCheckGraphExecutionTracesFile(context_ids) executed_op_types = [ op_name_to_op_type[op_name] for op_name in op_names ] if op_regex == "AddV2": self.assertEqual(executed_op_types, ["AddV2", "AddV2"]) self.assertLen(tensor_values, 2) self.assertAllClose(tensor_values[0], 5.0) # 1st AddV2 op. self.assertAllClose(tensor_values[1], np.log(5.0) + 1.0) # 2nd AddV2 op. elif op_regex == "Log": self.assertEqual(executed_op_types, ["Log"]) self.assertLen(tensor_values, 1) self.assertAllClose(tensor_values[0], np.log(5.0)) # Log op. else: # "(AddV2|Log)" self.assertEqual(executed_op_types, ["AddV2", "Log", "AddV2"]) self.assertLen(tensor_values, 3) self.assertAllClose(tensor_values[0], 5.0) # 1st AddV2 op. self.assertAllClose(tensor_values[1], np.log(5.0)) # Log op. self.assertAllClose(tensor_values[2], np.log(5.0) + 1.0) # 2nd AddV2 op.
def testIncorrectTensorDTypeArgFormatLeadsToError(self): with self.assertRaisesRegexp( ValueError, r".*expected.*list.*tuple.*callable.*but received.*\{\}"): dumping_callback.enable_dump_debug_info(self.dump_root, tensor_dtypes=dict()) with self.assertRaisesRegexp( ValueError, r".*expected.*list.*tuple.*callable.*but received.*"): dumping_callback.enable_dump_debug_info(self.dump_root, tensor_dtypes="float32") with self.assertRaisesRegexp( ValueError, r".*expected.*list.*tuple.*callable.*but received.*"): dumping_callback.enable_dump_debug_info( self.dump_root, tensor_dtypes=dtypes.float32) with self.assertRaises(TypeError): dumping_callback.enable_dump_debug_info( self.dump_root, tensor_dtypes=[ lambda dtype: dtype.is_floating, lambda dtype: dtype.is_integer ])
def testDisableTracingWorks(self, tensor_debug_mode): writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode) dumping_callback.disable_dump_debug_info() x = constant_op.constant([10.0, 12.0, 10.0]) for _ in range(2): array_ops.unique(x) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() with debug_events_reader.DebugEventsReader(self.dump_root) as reader: source_files_iter = reader.source_files_iterator() stack_frames_iter = reader.stack_frames_iterator() execution_iter = reader.execution_iterator() # No source-file, stack-frame or execution data should have been dumped. with self.assertRaises(StopIteration): next(source_files_iter) with self.assertRaises(StopIteration): next(stack_frames_iter) with self.assertRaises(StopIteration): next(execution_iter)
def testDumpingMiniModel(self, distribution, tensor_debug_mode): with distribution.scope(): writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode) mini_model = MiniModel() optimizer = gradient_descent.GradientDescentOptimizer(0.25) def train_step(): with backprop.GradientTape() as tape: loss = mini_model(array_ops.ones([1, 10])) grads = tape.gradient(loss, mini_model.weights) grads_and_vars = zip(grads, mini_model.weights) optimizer.apply_gradients(grads_and_vars) distribution.experimental_run_v2(train_step) updated_var_values = self.evaluate(mini_model.variables) num_devices = len(distribution.extended.worker_devices) assert num_devices in (1, 2) if num_devices == 1: self.assertAllEqual(0.75 * np.ones([10, 1]), updated_var_values[0]) self.assertAllEqual([0.75], updated_var_values[1]) else: self.assertAllEqual(0.5 * np.ones([10, 1]), updated_var_values[0]) self.assertAllEqual([0.5], updated_var_values[1]) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() device_name_0 = distribution.extended.worker_devices[0] logging.info("device_name_0 = %s", device_name_0) if num_devices > 1: device_name_1 = distribution.extended.worker_devices[1] logging.info("device_name_1 = %s", device_name_1) with debug_events_reader.DebugDataReader(self.dump_root) as reader: reader.update() traces = reader.graph_execution_traces() # Verify graph-execution traces are available for both devices. # We don't assert MatMul occurs exactly once because the gradient of # MatMul involves MatMul. device_0_executed_op_types = [ trace.op_type for trace in traces if trace.device_name.endswith(device_name_0) ] if num_devices > 1: device_1_executed_op_types = [ trace.op_type for trace in traces if trace.device_name.endswith(device_name_1) ] self.assertIn("MatMul", device_0_executed_op_types) self.assertEqual(device_0_executed_op_types.count("BiasAdd"), 1) if num_devices > 1: self.assertIn("MatMul", device_1_executed_op_types) self.assertEqual(device_1_executed_op_types.count("BiasAdd"), 1) if tensor_debug_mode == "NO_TENSOR": for trace in traces: self.assertEqual(trace.debug_tensor_value, []) elif tensor_debug_mode == "FULL_TENSOR": device_0_matmul_values = [ reader.graph_execution_trace_to_tensor_value(trace) for trace in traces if trace.op_type == "MatMul" and trace.device_name.endswith(device_name_0) ] device_0_bias_add_values = [ reader.graph_execution_trace_to_tensor_value(trace) for trace in traces if trace.op_type == "BiasAdd" and trace.device_name.endswith(device_name_0) ] self.assertAllClose(device_0_matmul_values[0], [[10.0]]) self.assertAllClose(device_0_bias_add_values[0], [[11.0]]) if num_devices > 1: device_1_matmul_values = [ reader.graph_execution_trace_to_tensor_value(trace) for trace in traces if trace.op_type == "MatMul" and trace.device_name.endswith(device_name_1) ] device_1_bias_add_values = [ reader.graph_execution_trace_to_tensor_value(trace) for trace in traces if trace.op_type == "BiasAdd" and trace.device_name.endswith(device_name_1) ] self.assertAllClose(device_1_matmul_values[0], [[10.0]]) self.assertAllClose(device_1_bias_add_values[0], [[11.0]])
def testMobiletNetV2Fit(self, tensor_debug_mode): """Test training Keras MobileNetV2 works with dumping.""" # Use a large circular-buffer to make sure we capture all the executed ops. writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode, circular_buffer_size=100000) model = mobilenet_v2.MobileNetV2(input_shape=(32, 32, 3), alpha=0.1, weights=None) y = model.layers[22].output y = core.Flatten()(y) y = core.Dense(1)(y) model = models.Model(inputs=model.inputs, outputs=y) batch_size = 2 xs = np.zeros([batch_size] + list(model.input_shape[1:])) ys = np.zeros([batch_size] + list(model.output_shape[1:])) model.compile(optimizer="sgd", loss="mse") epochs = 1 history = model.fit(xs, ys, epochs=epochs, verbose=0) self.assertLen(history.history["loss"], epochs) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames() (context_ids, op_types, op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id) # Simply assert that graph are recorded and refrain from asserting on the # internal details of the Keras model. self.assertTrue(context_ids) self.assertTrue(op_types) self.assertTrue(op_name_to_op_type) if context.executing_eagerly(): # NOTE(b/142486213): Execution of the TF function happens with # Session.run() in v1 graph mode, hence it doesn't get logged to the # .execution file. executed_op_types, _, _, _, _ = self._readAndCheckExecutionFile() self.assertTrue(executed_op_types) (op_names, _, _, tensor_values ) = self._readAndCheckGraphExecutionTracesFile(context_ids) executed_op_types = [ op_name_to_op_type[op_name] for op_name in op_names ] # These are the ops that we can safely assume to have been executed during # the model's fit() call. self.assertIn("Conv2D", executed_op_types) self.assertIn("Relu6", executed_op_types) self.assertIn("Conv2DBackpropFilter", executed_op_types) self.assertIn("Relu6Grad", executed_op_types) if tensor_debug_mode == "NO_TENSOR": # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought to # be an empty float32 tensor. for tensor_value in tensor_values: self.assertEqual(tensor_value.dtype, np.float32) self.assertEqual(tensor_value.shape, (0, )) elif tensor_debug_mode == "FULL_TENSOR": conv2d_values = [ tensor_values[i] for i, op_type in enumerate(executed_op_types) if op_type == "Conv2D" ] self.assertTrue(conv2d_values) for conv2d_value in conv2d_values: self.assertGreater(len(conv2d_value.shape), 1) self.assertEqual(conv2d_value.shape[0], batch_size) relu6_values = [ tensor_values[i] for i, op_type in enumerate(executed_op_types) if op_type == "Relu6" ] self.assertTrue(relu6_values) for relu6_value in relu6_values: self.assertGreater(len(relu6_value.shape), 1) self.assertEqual(relu6_value.shape[0], batch_size) conv2d_bp_filter_values = [ tensor_values[i] for i, op_type in enumerate(executed_op_types) if op_type == "Conv2DBackpropFilter" ] self.assertTrue(conv2d_bp_filter_values) for conv2d_bp_filter_value in conv2d_bp_filter_values: self.assertGreater(len(conv2d_bp_filter_value.shape), 1) relu6_grad_values = [ tensor_values[i] for i, op_type in enumerate(executed_op_types) if op_type == "Relu6Grad" ] self.assertTrue(relu6_grad_values) for relu6_grad_value in relu6_grad_values: self.assertGreater(len(relu6_grad_value.shape), 1)
def testPureEagerOpExecution(self, tensor_debug_mode): """Test catching Infinity in eager op execution: float32.""" writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode) x = constant_op.constant(10.0) zero = constant_op.constant(0.0) one = constant_op.constant(1.0) two = constant_op.constant(2.0) three = constant_op.constant(3.0) # Use Collatz conjecture as a test case. while x > one: if math_ops.equal(x % two, zero): x = x / two else: x = x * three + one writer.FlushNonExecutionFiles() self._readAndCheckMetadataFile() stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames() # Before FlushExecutionFiles() is called, the .execution file should be # empty. with debug_events_reader.DebugEventsReader(self.dump_root) as reader: execution_iter = reader.execution_iterator() with self.assertRaises(StopIteration): next(execution_iter) # After the flushing, the .execution file should hold the appropriate # contents. writer.FlushExecutionFiles() execution_iter = reader.execution_iterator() prev_wall_time = 1 executed_op_types = [] tensor_values = collections.defaultdict(lambda: []) for debug_event in execution_iter: self.assertGreaterEqual(debug_event.wall_time, prev_wall_time) prev_wall_time = debug_event.wall_time execution = debug_event.execution executed_op_types.append(execution.op_type) self.assertTrue(execution.input_tensor_ids) self.assertTrue(execution.output_tensor_ids) if tensor_debug_mode == "NO_TENSOR": # Due to the NO_TENSOR tensor debug mode, tensor_protos ought to # be empty. self.assertFalse(execution.tensor_protos) elif tensor_debug_mode == "FULL_TENSOR": # Under the FULL_TENSOR mode, the value of the tensor should be # available through `tensor_protos`. tensor_value = float( tensor_util.MakeNdarray(execution.tensor_protos[0])) tensor_values[execution.op_type].append(tensor_value) # Verify the code_location field. self.assertTrue(execution.code_location.stack_frame_ids) for stack_frame_id in execution.code_location.stack_frame_ids: self.assertIn(stack_frame_id, stack_frame_by_id) if tensor_debug_mode == "FULL_TENSOR": self.assertAllClose(tensor_values["Greater"], [1, 1, 1, 1, 1, 1, 0]) self.assertAllClose(tensor_values["RealDiv"], [5, 8, 4, 2, 1]) self.assertAllClose(tensor_values["Mul"], [15]) self.assertAllClose(tensor_values["AddV2"], [16]) self.assertEqual( executed_op_types, [ "Greater", "FloorMod", "Equal", "RealDiv", # 10 --> 5 "Greater", "FloorMod", "Equal", "Mul", "AddV2", # 5 --> 16 "Greater", "FloorMod", "Equal", "RealDiv", # 16 --> 8 "Greater", "FloorMod", "Equal", "RealDiv", # 8 --> 4 "Greater", "FloorMod", "Equal", "RealDiv", # 4 --> 2 "Greater", "FloorMod", "Equal", "RealDiv", # 2 --> 1 "Greater" ]) # Due to the pure eager op execution, the .graph file and the # .graph_execution_traces file ought to be empty. graphs_iterator = reader.graphs_iterator() with self.assertRaises(StopIteration): next(graphs_iterator) graph_trace_iter = reader.graph_execution_traces_iterator() with self.assertRaises(StopIteration): next(graph_trace_iter)
def testKerasModelFitOnOneOrTwoDevices(self, distribution, tensor_debug_mode): writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode) with distribution.scope(): model = keras.Sequential() model.add( keras.layers.Dense(units=10, input_shape=[5], activation="relu")) model.add(keras.layers.Dense(units=1)) model.compile(loss="mse", optimizer="sgd") batch_size = 20 x = np.ones([batch_size, 5]) y = np.ones([batch_size, 1]) epochs = 1 history = model.fit(x, y, epochs=epochs, verbose=0) self.assertLen(history.history["loss"], epochs) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames() (context_ids, _, op_name_to_op_type) = self._readAndCheckGraphsFile(stack_frame_by_id) (op_names, device_names, _, tensor_values ) = self._readAndCheckGraphExecutionTracesFile(context_ids) # Eager execution of tf.function should be recorded. executed_op_types, _, _, _, _ = self._readAndCheckExecutionFile() fit_functions = [ op_type for op_type in executed_op_types if "_distributed_function" in op_type ] self.assertLen(fit_functions, epochs) num_devices = len(distribution.extended.worker_devices) device_name_0 = distribution.extended.worker_devices[0] logging.info("device_name_0 = %s", device_name_0) if num_devices > 1: device_name_1 = distribution.extended.worker_devices[1] logging.info("device_name_1 = %s", device_name_1) executed_op_types = [ op_name_to_op_type[op_name] for op_name in op_names ] device_0_executed_op_types = filter_by_device_name( executed_op_types, device_names, device_name_0) if num_devices > 1: device_1_executed_op_types = filter_by_device_name( executed_op_types, device_names, device_name_1) self.assertIn("MatMul", device_0_executed_op_types) self.assertIn("BiasAdd", device_0_executed_op_types) self.assertIn("Relu", device_0_executed_op_types) self.assertIn("ReluGrad", device_0_executed_op_types) if num_devices > 1: # If there are two devices involved, assert the ops inside tf.functions # are executed and recorded for the equal numbers of times by the # dumping op-callback. self.assertEqual(device_0_executed_op_types.count("MatMul"), device_1_executed_op_types.count("MatMul")) self.assertEqual(device_0_executed_op_types.count("BiasAdd"), device_1_executed_op_types.count("BiasAdd")) self.assertEqual(device_0_executed_op_types.count("Relu"), device_1_executed_op_types.count("Relu")) self.assertEqual(device_0_executed_op_types.count("ReluGrad"), device_1_executed_op_types.count("ReluGrad")) if tensor_debug_mode == "NO_TENSOR": for value_list in tensor_values: for tensor_value in value_list: self.assertEqual(tensor_value.dtype, np.float32) self.assertEqual(tensor_value.shape, []) elif tensor_debug_mode == "FULL_TENSOR": gpu_0_relu_values = filter_by_device_name_and_op_type( tensor_values, device_names, executed_op_types, device_name_0, "Relu") self.assertTrue(gpu_0_relu_values) gpu_0_relu_grad_values = filter_by_device_name_and_op_type( tensor_values, device_names, executed_op_types, device_name_0, "ReluGrad") self.assertTrue(gpu_0_relu_grad_values) if num_devices > 1: gpu_1_relu_values = filter_by_device_name_and_op_type( tensor_values, device_names, executed_op_types, device_name_1, "Relu") self.assertTrue(gpu_1_relu_values) for i in range(len(gpu_0_relu_values)): self.assertEqual(gpu_0_relu_values[i].shape, gpu_1_relu_values[i].shape) gpu_1_relu_grad_values = filter_by_device_name_and_op_type( tensor_values, device_names, executed_op_types, device_name_1, "ReluGrad") self.assertTrue(gpu_1_relu_grad_values) for i in range(len(gpu_0_relu_grad_values)): self.assertEqual(gpu_0_relu_grad_values[i].shape, gpu_1_relu_grad_values[i].shape)
def testDumpingMiniModel(self, distribution, tensor_debug_mode): with distribution.scope(): writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode) mini_model = MiniModel() optimizer = gradient_descent.GradientDescentOptimizer(0.25) def train_step(): with backprop.GradientTape() as tape: loss = mini_model(array_ops.ones([1, 10])) grads = tape.gradient(loss, mini_model.weights) grads_and_vars = zip(grads, mini_model.weights) optimizer.apply_gradients(grads_and_vars) distribution.experimental_run_v2(train_step) updated_var_values = self.evaluate(mini_model.variables) num_devices = len(distribution.extended.worker_devices) assert num_devices in (1, 2) if num_devices == 1: self.assertAllEqual(0.75 * np.ones([10, 1]), updated_var_values[0]) self.assertAllEqual([0.75], updated_var_values[1]) else: self.assertAllEqual(0.5 * np.ones([10, 1]), updated_var_values[0]) self.assertAllEqual([0.5], updated_var_values[1]) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames() (context_ids, _, op_name_to_op_type) = self._readAndCheckGraphsFile(stack_frame_by_id) (op_names, device_names, _, tensor_values ) = self._readAndCheckGraphExecutionTracesFile(context_ids) executed_op_types = [ op_name_to_op_type[op_name] for op_name in op_names ] device_name_0 = distribution.extended.worker_devices[0] logging.info("device_name_0 = %s", device_name_0) if num_devices > 1: device_name_1 = distribution.extended.worker_devices[1] logging.info("device_name_1 = %s", device_name_1) device_0_executed_op_types = filter_by_device_name( executed_op_types, device_names, device_name_0) if num_devices > 1: device_1_executed_op_types = filter_by_device_name( executed_op_types, device_names, device_name_1) # Verify graph-execution traces are available for both devices. # We don't assert MatMul occurs exactly once because the gradient of MatMul # involves MatMul. self.assertIn("MatMul", device_0_executed_op_types) self.assertEqual(device_0_executed_op_types.count("BiasAdd"), 1) if num_devices > 1: self.assertIn("MatMul", device_1_executed_op_types) self.assertEqual(device_1_executed_op_types.count("BiasAdd"), 1) if tensor_debug_mode == "NO_TENSOR": for value_list in tensor_values: for tensor_value in value_list: self.assertEqual(tensor_value.dtype, np.float32) self.assertEqual(tensor_value.shape, []) elif tensor_debug_mode == "FULL_TENSOR": device_0_matmul_values = filter_by_device_name_and_op_type( tensor_values, device_names, executed_op_types, device_name_0, "MatMul") device_0_bias_add_values = filter_by_device_name_and_op_type( tensor_values, device_names, executed_op_types, device_name_0, "BiasAdd") self.assertAllClose(device_0_matmul_values[0], [[10.0]]) self.assertAllClose(device_0_bias_add_values[0], [[11.0]]) if num_devices > 1: device_1_matmul_values = filter_by_device_name_and_op_type( tensor_values, device_names, executed_op_types, device_name_1, "MatMul") device_1_bias_add_values = filter_by_device_name_and_op_type( tensor_values, device_names, executed_op_types, device_name_1, "BiasAdd") self.assertAllClose(device_1_matmul_values[0], [[10.0]]) self.assertAllClose(device_1_bias_add_values[0], [[11.0]])
def testTensorDTypesAndOpRegexFilters(self, tensor_dtypes, op_regex): writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode="FULL_TENSOR", tensor_dtypes=tensor_dtypes, op_regex=op_regex) @def_function.function def unique_sum(xs): """Sum over the unique values, for testing.""" unique_xs, indices = array_ops.unique(xs) return math_ops.reduce_sum(unique_xs), indices xs = constant_op.constant([2., 6., 8., 1., 2.], dtype=dtypes.float32) y, indices = self.evaluate(unique_sum(xs)) self.assertAllClose(y, 17.) self.assertAllEqual(indices, [0, 1, 2, 3, 0]) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames() (context_ids, _, op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id) (op_names, _, _, tensor_values ) = self._readAndCheckGraphExecutionTracesFile(context_ids) executed_op_types = [ op_name_to_op_type[op_name] for op_name in op_names ] if tensor_dtypes == [dtypes.float32] and not op_regex: self.assertEqual(executed_op_types, ["Unique", "Sum"]) self.assertLen(tensor_values, 2) self.assertAllClose(tensor_values[0], [2., 6., 8., 1.]) # Unique values. self.assertAllClose(tensor_values[1], 17.) # Sum. elif tensor_dtypes == ["float32"] and op_regex == "Sum": self.assertEqual(executed_op_types, ["Sum"]) self.assertLen(tensor_values, 1) self.assertAllClose(tensor_values[0], 17.) # Sum. elif tensor_dtypes == (dtypes.float32, ) and op_regex == "(?!Sum)": self.assertEqual(executed_op_types, ["Unique"]) self.assertLen(tensor_values, 1) self.assertAllClose(tensor_values[0], [2., 6., 8., 1.]) # Unique values. elif tensor_dtypes == [dtypes.int32] and not op_regex: self.assertEqual(executed_op_types, ["Unique"]) self.assertLen(tensor_values, 1) self.assertAllEqual(tensor_values[0], [0, 1, 2, 3, 0]) # Unique indices. elif callable(tensor_dtypes) and not op_regex: self.assertEqual(executed_op_types, ["Unique"]) self.assertLen(tensor_values, 1) self.assertAllEqual(tensor_values[0], [0, 1, 2, 3, 0]) # Unique indices. elif not tensor_dtypes and op_regex == "(?!Sum)": self.assertEqual(executed_op_types, ["Unique", "Unique"]) self.assertLen(tensor_values, 2) self.assertAllClose(tensor_values[0], [2., 6., 8., 1.]) # Unique values. self.assertAllEqual(tensor_values[1], [0, 1, 2, 3, 0]) # Unique indices. else: # "All". self.assertEqual(executed_op_types, ["Unique", "Unique", "Sum"]) self.assertLen(tensor_values, 3) self.assertAllClose(tensor_values[0], [2., 6., 8., 1.]) # Unique values. self.assertAllEqual(tensor_values[1], [0, 1, 2, 3, 0]) # Unique indices. self.assertAllClose(tensor_values[2], 17.) # Sum.
def testKerasModelFitOnOneOrTwoDevices(self, distribution, tensor_debug_mode): writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode) with distribution.scope(): model = keras.Sequential() model.add( keras.layers.Dense(units=10, input_shape=[5], activation="relu")) model.add(keras.layers.Dense(units=1)) model.compile(loss="mse", optimizer="sgd") batch_size = 20 x = np.ones([batch_size, 5]) y = np.ones([batch_size, 1]) epochs = 1 history = model.fit(x, y, epochs=epochs, verbose=0) self.assertLen(history.history["loss"], epochs) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() with debug_events_reader.DebugDataReader(self.dump_root) as reader: reader.update() executions = reader.executions() fit_executions = [ execution.op_type for execution in executions if "_distributed_function" in execution.op_type ] self.assertLen(fit_executions, epochs) traces = reader.graph_execution_traces() num_devices = len(distribution.extended.worker_devices) device_name_0 = distribution.extended.worker_devices[0] if num_devices > 1: device_name_1 = distribution.extended.worker_devices[1] device_0_executed_op_types = [ trace.op_type for trace in traces if trace.device_name.endswith(device_name_0) ] if num_devices > 1: device_1_executed_op_types = [ trace.op_type for trace in traces if trace.device_name.endswith(device_name_1) ] self.assertIn("MatMul", device_0_executed_op_types) self.assertIn("BiasAdd", device_0_executed_op_types) self.assertIn("Relu", device_0_executed_op_types) self.assertIn("ReluGrad", device_0_executed_op_types) if num_devices > 1: # If there are two devices involved, assert the ops inside tf.functions # are executed and recorded for the equal numbers of times by the # dumping op-callback. self.assertEqual(device_0_executed_op_types.count("MatMul"), device_1_executed_op_types.count("MatMul")) self.assertEqual(device_0_executed_op_types.count("BiasAdd"), device_1_executed_op_types.count("BiasAdd")) self.assertEqual(device_0_executed_op_types.count("Relu"), device_1_executed_op_types.count("Relu")) self.assertEqual(device_0_executed_op_types.count("ReluGrad"), device_1_executed_op_types.count("ReluGrad")) if tensor_debug_mode == "NO_TENSOR": for trace in traces: self.assertEqual(trace.debug_tensor_value, []) elif tensor_debug_mode == "FULL_TENSOR": gpu_0_relu_values = [ reader.graph_execution_trace_to_tensor_value(trace) for trace in traces if trace.op_type == "Relu" and trace.device_name.endswith(device_name_0) ] self.assertTrue(gpu_0_relu_values) gpu_0_relu_grad_values = [ reader.graph_execution_trace_to_tensor_value(trace) for trace in traces if trace.op_type == "ReluGrad" and trace.device_name.endswith(device_name_0) ] self.assertTrue(gpu_0_relu_grad_values) if num_devices > 1: gpu_1_relu_values = [ reader.graph_execution_trace_to_tensor_value(trace) for trace in traces if trace.op_type == "Relu" and trace.device_name.endswith(device_name_1) ] self.assertTrue(gpu_1_relu_values) for i in range(len(gpu_0_relu_values)): self.assertEqual(gpu_0_relu_values[i].shape, gpu_1_relu_values[i].shape) gpu_1_relu_grad_values = [ reader.graph_execution_trace_to_tensor_value(trace) for trace in traces if trace.op_type == "ReluGrad" and trace.device_name.endswith(device_name_1) ] self.assertTrue(gpu_1_relu_grad_values) for i in range(len(gpu_0_relu_grad_values)): self.assertEqual(gpu_0_relu_grad_values[i].shape, gpu_1_relu_grad_values[i].shape)
def testOnGraphExecutionTraceIsCalled(self, tensor_debug_mode): xs = constant_op.constant([2., 6., 8., 1., 2.], dtype=dtypes.float32) writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode) @def_function.function def unique_sum(xs): """Sum over the unique values, for testing.""" unique_xs, indices = array_ops.unique(xs) return math_ops.reduce_sum(unique_xs), indices unique_sum(xs) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() with debug_events_reader.DebugDataReader(self.dump_root) as reader: test_monitor = TestMonitor(reader) reader.update() self.assertLen(test_monitor.executions, 1) execution = test_monitor.executions[0] self.assertTrue(execution.wall_time) self.assertStartsWith(execution.op_type, "__inference_unique_sum") self.assertLen(execution.output_tensor_device_ids, 2) self.assertLen(execution.input_tensor_ids, 1) self.assertLen(execution.output_tensor_ids, 2) self.assertEqual(execution.num_outputs, 2) self.assertTrue(execution.graph_id) traces = test_monitor.graph_execution_traces if tensor_debug_mode == "CONCISE_HEALTH": self.assertLen(traces, 3) # [Placeholder:0, Unique:0 , Sum:0]. self.assertEqual(traces[0].op_type, "Placeholder") self.assertEqual(traces[0].output_slot, 0) self.assertEqual(traces[1].op_type, "Unique") self.assertEqual(traces[1].output_slot, 0) # Unique:1 is not traced under CONCISE_HEALTH mode, as it's int-dtype. self.assertEqual(traces[2].op_type, "Sum") self.assertEqual(traces[2].output_slot, 0) # [tensor_id, element_count, neg_inf_count, pos_inf_count, nan_count]. self.assertLen(traces[0].debug_tensor_value, 5) self.assertLen(traces[1].debug_tensor_value, 5) self.assertLen(traces[2].debug_tensor_value, 5) elif tensor_debug_mode == "FULL_HEALTH": self.assertLen(traces, 3) # [Placeholder:0, Unique:0 , Sum:0]. self.assertEqual(traces[0].op_type, "Placeholder") self.assertEqual(traces[0].output_slot, 0) self.assertEqual(traces[1].op_type, "Unique") self.assertEqual(traces[1].output_slot, 0) # Unique:1 is not traced under FULL_HEALTH mode, as it's int-dtype. self.assertEqual(traces[2].op_type, "Sum") self.assertEqual(traces[2].output_slot, 0) # [tensor_id, device_id, dtype, rank, element_count, # neg_inf_count, pos_inf_count, nan_count, # neg_finite_count, zero_count, pos_finite_count]. self.assertLen(traces[0].debug_tensor_value, 11) self.assertLen(traces[1].debug_tensor_value, 11) self.assertLen(traces[2].debug_tensor_value, 11) elif tensor_debug_mode == "FULL_TENSOR": # [Placeholder:0, Unique:0, Unique:1, Const:0, Sum:0]. self.assertLen(traces, 5) self.assertEqual(traces[0].op_type, "Placeholder") self.assertEqual(traces[0].output_slot, 0) self.assertIsNone(traces[0].debug_tensor_value) self.assertAllEqual( reader.graph_execution_trace_to_tensor_value(traces[0]), [2., 6., 8., 1., 2.]) self.assertEqual(traces[1].op_type, "Unique") self.assertEqual(traces[1].output_slot, 0) self.assertIsNone(traces[1].debug_tensor_value) self.assertAllEqual( reader.graph_execution_trace_to_tensor_value(traces[1]), [2., 6., 8., 1.]) self.assertEqual(traces[2].op_type, "Unique") self.assertEqual(traces[2].output_slot, 1) self.assertIsNone(traces[2].debug_tensor_value) self.assertAllEqual( reader.graph_execution_trace_to_tensor_value(traces[2]), [0, 1, 2, 3, 0]) self.assertEqual(traces[3].op_type, "Const") self.assertEqual(traces[3].output_slot, 0) self.assertIsNone(traces[3].debug_tensor_value) self.assertAllClose( reader.graph_execution_trace_to_tensor_value(traces[3]), [0]) self.assertEqual(traces[4].op_type, "Sum") self.assertEqual(traces[4].output_slot, 0) self.assertIsNone(traces[4].debug_tensor_value) self.assertAllClose( reader.graph_execution_trace_to_tensor_value(traces[4]), 17.)
def testFunctionExecutionWithControlFlow(self, tensor_debug_mode): writer = dumping_callback.enable_dump_debug_info( self.dump_root, tensor_debug_mode=tensor_debug_mode) @def_function.function def iterative_doubling(x, times): i = constant_op.constant(0, dtype=dtypes.int32) while i < times: x = x * 2.0 i += 1 return x x = constant_op.constant(0.5, dtype=dtypes.float32) times = constant_op.constant(4, dtype=dtypes.int32) self.assertAllClose(self.evaluate(iterative_doubling(x, times)), 8.0) writer.FlushNonExecutionFiles() stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames() # Verify the content of the .graphs file. context_ids, op_types, op_name_to_op_type, _ = ( self._readAndCheckGraphsFile(stack_frame_by_id)) self.assertIn("Less", op_types) self.assertIn("Mul", op_types) self.assertIn("AddV2", op_types) # Before FlushExecutionFiles() is called, the .execution and # .graph_execution_traces files should be both empty. with debug_events_reader.DebugEventsReader(self.dump_root) as reader: execution_iter = reader.execution_iterator() graph_execution_traces_iter = reader.graph_execution_traces_iterator( ) with self.assertRaises(StopIteration): next(execution_iter) with self.assertRaises(StopIteration): next(graph_execution_traces_iter) # TODO(cais): Backport execution instrumentation to tf.Session. writer.FlushExecutionFiles() # After the flushing, the .execution file should hold the appropriate # contents. if context.executing_eagerly(): (executed_op_types, input_tensor_ids, output_tensor_ids, tensor_debug_modes, tensor_values) = self._readAndCheckExecutionFile() # NOTE(b/142486213): Execution of the TF function happens with # Session.run() in v1 graph mode, hence it doesn't get logged to the # .execution file. self.assertLen(executed_op_types, 1) self.assertIn("iterative_doubling", executed_op_types[0]) self.assertLen(input_tensor_ids[0], 2) self.assertLen(output_tensor_ids[0], 1) self.assertEqual( tensor_debug_modes[0], debug_event_pb2.TensorDebugMode.Value(tensor_debug_mode)) if tensor_debug_mode == "FULL_TENSOR": self.assertAllClose(tensor_values, [[8.0]]) (op_names, _, output_slots, tensor_values ) = self._readAndCheckGraphExecutionTracesFile(context_ids) executed_op_types = [ op_name_to_op_type[op_name] for op_name in op_names ] # The Less op should have been executed 5 times. self.assertEqual(executed_op_types.count("Less"), 5) # The last executed op should be Less. self.assertEqual(executed_op_types[-1], "Less") # The Mul op should have been executed 4 times. self.assertEqual(executed_op_types.count("Mul"), 4) # The AddV2 op should have been run, but we refrain from asserting on how # many times it's executed. self.assertIn("AddV2", executed_op_types) for output_slot in output_slots: self.assertEqual(output_slot, 0) if tensor_debug_mode == "NO_TENSOR": # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought # to be an empty float32 tensor. for tensor_value in tensor_values: self.assertEqual(tensor_value.dtype, np.float32) self.assertEqual(tensor_value.shape, (0, )) elif tensor_debug_mode == "FULL_TENSOR": less_values = [ tensor_values[i] for i, op_type in enumerate(executed_op_types) if op_type == "Less" ] self.assertAllClose(less_values, [True, True, True, True, False]) mul_values = [ tensor_values[i] for i, op_type in enumerate(executed_op_types) if op_type == "Mul" ] self.assertAllClose(mul_values, [1.0, 2.0, 4.0, 8.0])