def testInfNanMonitorOnGraphExecutionTraceUnderFullTensorModeWorks( self, tensor_value, dtype, expected_size, expected_num_neg_inf, expected_num_pos_inf, expected_num_nan): mock_reader = test.mock.MagicMock() mock_reader.graph_execution_trace_to_tensor_value.return_value = np.array( tensor_value, dtype=dtype) monitor = debug_events_monitors.InfNanMonitor(mock_reader) trace_digest = debug_events_reader.GraphExecutionTraceDigest( 1234, 1, "BazOp", "name_scope_3/BazOp_1", 2, "g1") trace = debug_events_reader.GraphExecutionTrace( trace_digest, ["g0", "g1"], debug_event_pb2.TensorDebugMode.FULL_TENSOR) monitor.on_graph_execution_trace(80, trace) if expected_num_neg_inf or expected_num_pos_inf or expected_num_nan: self.assertLen(monitor.alerts(), 1) alert = monitor.alerts()[0] self.assertEqual(alert.wall_time, 1234) self.assertEqual(alert.op_type, "BazOp") self.assertEqual(alert.output_slot, 2) self.assertEqual(alert.size, expected_size) self.assertEqual(alert.num_neg_inf, expected_num_neg_inf) self.assertEqual(alert.num_pos_inf, expected_num_pos_inf) self.assertEqual(alert.num_nan, expected_num_nan) self.assertIsNone(alert.execution_index) self.assertEqual(alert.graph_execution_trace_index, 80) else: self.assertEmpty(monitor.alerts())
def testGraphExecutionTraceNoTensorDebugValueNoDeviceNameToJson(self): trace_digest = debug_events_reader.GraphExecutionTraceDigest( 1234, 5678, "FooOp", "Model_1/Foo_2", 1, "deadbeef") trace = debug_events_reader.GraphExecutionTrace( trace_digest, ["g1", "g2", "deadbeef"], debug_event_pb2.TensorDebugMode.NO_TENSOR, debug_tensor_value=None, device_name=None) json = trace.to_json() self.assertEqual(json["wall_time"], 1234) self.assertEqual(json["op_type"], "FooOp") self.assertEqual(json["op_name"], "Model_1/Foo_2") self.assertEqual(json["output_slot"], 1) self.assertEqual(json["graph_id"], "deadbeef") self.assertEqual(json["graph_ids"], ("g1", "g2", "deadbeef")) self.assertEqual(json["tensor_debug_mode"], debug_event_pb2.TensorDebugMode.NO_TENSOR) self.assertIsNone(json["debug_tensor_value"]) self.assertIsNone(json["device_name"])
def testGraphExecutionTraceWithTensorDebugValueAndDeviceNameToJson(self): trace_digest = debug_events_reader.GraphExecutionTraceDigest( 1234, 5678, "FooOp", "Model_1/Foo_2", 1, "deadbeef") trace = debug_events_reader.GraphExecutionTrace( trace_digest, ["g1", "g2", "deadbeef"], debug_event_pb2.TensorDebugMode.CURT_HEALTH, debug_tensor_value=[3, 1], device_name="/device:GPU:0") json = trace.to_json() self.assertEqual(json["wall_time"], 1234) self.assertEqual(json["op_type"], "FooOp") self.assertEqual(json["op_name"], "Model_1/Foo_2") self.assertEqual(json["output_slot"], 1) self.assertEqual(json["graph_id"], "deadbeef") self.assertEqual(json["graph_ids"], ("g1", "g2", "deadbeef")) self.assertEqual(json["tensor_debug_mode"], debug_event_pb2.TensorDebugMode.CURT_HEALTH) self.assertEqual(json["debug_tensor_value"], (3, 1)) self.assertEqual(json["device_name"], "/device:GPU:0")
def testInfNaNMonitorOnGraphExecutionTraceCurtHealthMode(self): mock_reader = test.mock.MagicMock() monitor = debug_events_monitors.InfNanMonitor(mock_reader) trace_digest = debug_events_reader.GraphExecutionTraceDigest( 1234, 1, "FooOp", "FooOp_1", 2, "g1") trace = debug_events_reader.GraphExecutionTrace( trace_digest, ["g0", "g1"], debug_event_pb2.TensorDebugMode.CURT_HEALTH, debug_tensor_value=[9, 1]) # [tensor_id, any_inf_nan]. monitor.on_graph_execution_trace(55, trace) self.assertLen(monitor.alerts(), 1) alert = monitor.alerts()[0] self.assertEqual(alert.wall_time, 1234) self.assertEqual(alert.op_type, "FooOp") self.assertEqual(alert.output_slot, 2) # The four fields below are unavailable under CURT_HEALTH mode by design. self.assertIsNone(alert.size) self.assertIsNone(alert.num_neg_inf) self.assertIsNone(alert.num_pos_inf) self.assertIsNone(alert.num_nan) self.assertIsNone(alert.execution_index) self.assertEqual(alert.graph_execution_trace_index, 55)
def testInfNaNMonitorOnGraphExecutionTraceConciseHealthMode(self): mock_reader = test.mock.MagicMock() monitor = debug_events_monitors.InfNanMonitor(mock_reader) trace_digest = debug_events_reader.GraphExecutionTraceDigest( 1234, 1, "FooOp", "FooOp_1", 2, "g1") trace = debug_events_reader.GraphExecutionTrace( trace_digest, ["g0", "g1"], debug_event_pb2.TensorDebugMode.CONCISE_HEALTH, # [tensor_id, size, num_neg_inf, num_pos_inf, num_nan]. debug_tensor_value=[9, 100, 3, 2, 1]) monitor.on_graph_execution_trace(55, trace) self.assertLen(monitor.alerts(), 1) alert = monitor.alerts()[0] self.assertEqual(alert.wall_time, 1234) self.assertEqual(alert.op_type, "FooOp") self.assertEqual(alert.output_slot, 2) self.assertEqual(alert.size, 100) self.assertEqual(alert.num_neg_inf, 3) self.assertEqual(alert.num_pos_inf, 2) self.assertEqual(alert.num_nan, 1) self.assertEqual(alert.graph_execution_trace_index, 55)