def test_filter_expiration(self): log_content_before = self.get_event_logs() line_prefix = datetime.datetime.utcnow().strftime( "%Y-%m-%dT%H:%M:%S+00:00") with DbEventsFilter(type="NO_SPACE_ERROR", node="A"): DatabaseLogEvent(type="NO_SPACE_ERROR", regex="A").add_info_and_publish( node="A", line_number=22, line=line_prefix + " this is filtered") time.sleep(5) DatabaseLogEvent(type="NO_SPACE_ERROR", regex="A").add_info_and_publish(node="A", line_number=22, line=line_prefix + " this is filtered") time.sleep(5) line_prefix = datetime.datetime.utcnow().strftime( "%Y-%m-%dT%H:%M:%S+00:00") print(line_prefix) DatabaseLogEvent(type="NO_SPACE_ERROR", regex="A").add_info_and_publish(node="A", line_number=22, line=line_prefix + " not filtered") self.wait_for_event_log_change(log_content_before) log_content_after = self.get_event_logs() self.assertIn("not filtered", log_content_after) self.assertNotIn("this is filtered", log_content_after)
def test_filter(self): log_content_before = self.get_event_logs() enospc_line = "[99.80.124.204] [stdout] Mar 31 09:08:10 warning| [shard 8] commitlog - Exception in segment reservation: storage_io_error (Storage I/O error: 28: No space left on device)" enospc_line_2 = "2019-10-29T12:19:49+00:00 ip-172-30-0-184 !WARNING | scylla: [shard 2] storage_service - " \ "Commitlog error: std::filesystem::__cxx11::filesystem_error (error system:28, filesystem error: open failed: No space left on device [/var/lib/scylla/hints/2/172.30.0.116/HintsLog-1-36028797019122576.log])" with DbEventsFilter(type='NO_SPACE_ERROR'), \ DbEventsFilter(type='BACKTRACE', line='No space left on device'), \ DbEventsFilter(type='DATABASE_ERROR', line='No space left on device'), \ DbEventsFilter(type='FILESYSTEM_ERROR', line='No space left on device'): DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line=enospc_line) DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line=enospc_line) DatabaseLogEvent(type="FILESYSTEM_ERROR", regex="B").add_info_and_publish( node="A", line_number=22, line=enospc_line_2) DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line=enospc_line) DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line=enospc_line) log_content_after = self.get_event_logs() self.assertEqual(log_content_before, log_content_after)
def test_filter_repair(self): log_content_before = self.get_event_logs() failed_repaired_line = '2019-07-28T10:53:29+00:00 ip-10-0-167-91 !INFO | scylla.bin: [shard 0] repair - Got error in row level repair: std::runtime_error (repair id 1 is aborted on shard 0)' with DbEventsFilter(type='DATABASE_ERROR', line="repair's stream failed: streaming::stream_exception"), \ DbEventsFilter(type='RUNTIME_ERROR', line='Can not find stream_manager'), \ DbEventsFilter(type='RUNTIME_ERROR', line='is aborted'): DatabaseLogEvent(type="RUNTIME_ERROR", regex="B").add_info_and_publish( node="A", line_number=22, line=failed_repaired_line) DatabaseLogEvent(type="RUNTIME_ERROR", regex="B").add_info_and_publish( node="A", line_number=22, line=failed_repaired_line) DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish( node="B", line_number=22, line="not filtered") self.wait_for_event_log_change(log_content_before) log_content_after = self.get_event_logs() self.assertIn("not filtered", log_content_after) self.assertNotIn("repair id 1", log_content_after)
def test_severity_changer_db_log(self): """ https://github.com/scylladb/scylla-cluster-tests/issues/2115 """ # 1) lower DatabaseLogEvent to warning for 1sec log_content_before = self.get_event_log_file('warning.log') with EventsSeverityChangerFilter(event_class=DatabaseLogEvent, severity=Severity.WARNING, extra_time_to_expiration=1): DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line='critical that should be lowered') DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line='critical that should be lowered #2') log_content_after = self.wait_for_event_log_change('warning.log', log_content_before) self.assertIn('DatabaseLogEvent', log_content_after) self.assertIn('critical that should be lowered', log_content_after) self.assertIn('critical that should be lowered #2', log_content_after) # 2) one of the next DatabaseLogEvent event should expire the EventsSeverityChangerFilter # (and not crash all subscribers) log_content_before = self.get_event_log_file('error.log') for _ in range(2): time.sleep(1) DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line='critical that should be lowered #2') log_content_after = self.wait_for_event_log_change('error.log', log_content_before) self.assertIn('critical that should be lowered #2', log_content_after)
def test_failed_stall_during_filter(self): # pylint: disable=no-self-use with DbEventsFilter(type="NO_SPACE_ERROR"), \ DbEventsFilter(type='BACKTRACE', line='No space left on device'): event = DatabaseLogEvent(type="REACTOR_STALLED", regex="B") event.add_info_and_publish(node="A", line_number=22, line="[99.80.124.204] [stdout] Mar 31 09:08:10 warning| reactor stall 20") logging.info(event.severity) assert event.severity == Severity.ERROR, event.severity
def test_filter_by_node(self): log_content_before = self.get_event_log_file('events.log') with DbEventsFilter(type="NO_SPACE_ERROR", node="A"): DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line="this is filtered") DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="B", line_number=22, line="not filtered") log_content_after = self.wait_for_event_log_change('events.log', log_content_before) self.assertIn("not filtered", log_content_after) self.assertNotIn("this is filtered", log_content_after)
def test_filter(self): with DbEventsFilter(type="NO_SPACE_ERROR"), DbEventsFilter(type='BACKTRACE', line='No space left on device'): DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line="[99.80.124.204] [stdout] Mar 31 09:08:10 warning| [shard 8] commitlog - Exception in segment reservation: storage_io_error (Storage I/O error: 28: No space left on device)") DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line="[99.80.124.204] [stdout] Mar 31 09:08:10 warning| [shard 8] commitlog - Exception in segment reservation: storage_io_error (Storage I/O error: 28: No space left on device)") DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line="[99.80.124.204] [stdout] Mar 31 09:08:10 warning| [shard 8] commitlog - Exception in segment reservation: storage_io_error (Storage I/O error: 28: No space left on device)") DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line="[99.80.124.204] [stdout] Mar 31 09:08:10 warning| [shard 8] commitlog - Exception in segment reservation: storage_io_error (Storage I/O error: 28: No space left on device)")
def test_filter_upgrade(self): log_content_before = self.get_event_log_file('events.log') known_failure_line = '!ERR | scylla: [shard 3] storage_proxy - Exception when communicating with 10.142.0.56: std::runtime_error (Failed to load schema version b40e405f-462c-38f2-a90c-6f130ddbf6f3)' with DbEventsFilter(type='RUNTIME_ERROR', line='Failed to load schema'): DatabaseLogEvent(type="RUNTIME_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line=known_failure_line) DatabaseLogEvent(type="RUNTIME_ERROR", regex="B").add_info_and_publish(node="A", line_number=22, line=known_failure_line) DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B").add_info_and_publish(node="B", line_number=22, line="not filtered") log_content_after = self.wait_for_event_log_change('events.log', log_content_before) self.assertIn("not filtered", log_content_after) self.assertNotIn("Exception when communicating", log_content_after)
def test_error_signal_is_stopping_test(self): # pylint: disable=no-self-use DatabaseLogEvent(regex="", severity=Severity.CRITICAL, type="None").add_info_and_publish( "node1", "A line we didn't expect", "????") time.sleep(10) raise Exception("this won't happen")
def test_stall_severity(self): # pylint: disable=no-self-use event = DatabaseLogEvent(type="REACTOR_STALLED", regex="B") event.add_info_and_publish(node="A", line_number=22, line="[99.80.124.204] [stdout] Mar 31 09:08:10 warning| reactor stall 2000 ms") assert event.severity == Severity.NORMAL event = DatabaseLogEvent(type="REACTOR_STALLED", regex="B") event.add_info_and_publish(node="A", line_number=22, line="[99.80.124.204] [stdout] Mar 31 09:08:10 warning| reactor stall 5000 ms") assert event.severity == Severity.ERROR
def test_stall_severity(self): event = DatabaseLogEvent(type="REACTOR_STALLED", regex="B") event.add_info_and_publish(node="A", line_number=22, line="[99.80.124.204] [stdout] Mar 31 09:08:10 warning| reactor stall 2000 ms") self.assertTrue(event.severity == Severity.NORMAL) event = DatabaseLogEvent(type="REACTOR_STALLED", regex="B") event.add_info_and_publish(node="A", line_number=22, line="[99.80.124.204] [stdout] Mar 31 09:08:10 warning| reactor stall 5000 ms") self.assertTrue(event.severity == Severity.CRITICAL)
def test_default_filters(self): log_content_before = self.get_event_log_file('events.log') DatabaseLogEvent(type="BACKTRACE", regex="backtrace").add_info_and_publish(node="A", line_number=22, line="Jul 01 03:37:31 ip-10-0-127-151.eu-west-1. \ compute.internal scylla[6026]:\ Rate-limit: supressed 4294967292 \ backtraces on shard 5") DatabaseLogEvent(type="BACKTRACE", regex="backtrace").add_info_and_publish(node="A", line_number=22, line="other back trace that shouldn't be filtered") log_content_after = self.wait_for_event_log_change('events.log', log_content_before) self.assertIn('other back trace', log_content_after) self.assertNotIn('supressed', log_content_after)
def test_scylla_log_event(self): # pylint: disable=no-self-use str(DatabaseLogEvent(type="A", regex="B")) event = DatabaseLogEvent(type="A", regex="B") event.add_info("node", line='[99.80.124.204] [stdout] Mar 31 09:08:10 warning| reactor stall 2000 ms', line_number=213) event.add_backtrace_info("0x002342340\n0x12423434") str(event)
def test_measure_speed_of_events_processing(self): # pylint: disable=no-self-use non_guaranteed_delivery = [] for _ in range(5): start_time = time.time() for _ in range(1000): evt = DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B") evt.add_info(node="B", line_number=22, line="not filtered") evt.publish(guaranteed=False) non_guaranteed_delivery.append(time.time() - start_time) guaranteed_delivery = [] for _ in range(5): start_time = time.time() for _ in range(1000): evt = DatabaseLogEvent(type="NO_SPACE_ERROR", regex="B") evt.add_info(node="B", line_number=22, line="not filtered") evt.publish(guaranteed=True) guaranteed_delivery.append(time.time() - start_time) non_guaranteed_avg = sum(non_guaranteed_delivery) / len( non_guaranteed_delivery) guaranteed_avg = sum(guaranteed_delivery) / len(guaranteed_delivery) print( f"Rate of publishing 1000 events in guaranteed mode vs non guaranteed mode is {guaranteed_avg}/{non_guaranteed_avg}={(guaranteed_avg/non_guaranteed_avg) * 100}" )