def test_unique_id(self): first_thread_id = LogFileProcessor.generate_unique_thread_id() self.assertTrue(first_thread_id.startswith('log_')) sequence = int(first_thread_id[4:]) self.assertTrue(sequence > 0) self.assertEquals(first_thread_id, 'log_%d' % sequence) self.assertEquals(LogFileProcessor.generate_unique_thread_id(), 'log_%d' % (sequence + 1))
def test_log_attributes(self): log_processor = LogFileProcessor(self.__path, file_system=self.__file_system, log_attributes={'host': 'scalyr-1'}) log_processor.perform_processing(TestLogFileProcessor.TestAddEventsRequest(), current_time=self.__fake_time) self.append_file(self.__path, 'First line\nSecond line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing(events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(events.total_events(), 2) self.assertEquals('scalyr-1', events.events[0]['attrs']['host']) self.assertEquals('scalyr-1', events.events[1]['attrs']['host'])
def __scan_for_new_logs_if_necessary(self, current_time=None, checkpoints=None, logs_initial_positions=None, copy_at_index_zero=False): """If it has been sufficient time since we last checked, scan the file system for new files that match the logs that should be copied. @param checkpoints: A dict mapping file paths to the checkpoint to use for them to determine where copying should begin. @param logs_initial_positions: A dict mapping file paths to what file offset the copying should begin from if there is no checkpoint for them. @param copy_at_index_zero: If True, then any new file that doesn't have a checkpoint or an initial position, will begin being copied from the start of the file rather than the current end. @param current_time: If not None, the time to use as the current time. @type checkpoints: dict @type logs_initial_positions: dict @type copy_at_index_zero: bool @type current_time: float """ if current_time is None: current_time = time.time() if (self.__last_new_file_scan_time is None or current_time - self.__last_new_file_scan_time < self.__config.max_new_log_detection_time): return self.__last_new_file_scan_time = current_time if checkpoints is None: checkpoints = {} if logs_initial_positions is not None: for log_path in logs_initial_positions: if not log_path in checkpoints: checkpoints[log_path] = LogFileProcessor.create_checkpoint( logs_initial_positions[log_path]) # make a shallow copy of log_matchers log_matchers = [] self.__lock.acquire() try: log_matchers = self.__log_matchers[:] finally: self.__lock.release() # iterate over the copy so we don't have to lock the list of log_matchers # while we create the LogFileProcessors for matcher in log_matchers: for new_processor in matcher.find_matches( self.__log_paths_being_processed, checkpoints, copy_at_index_zero=copy_at_index_zero): self.__log_processors.append(new_processor) self.__log_paths_being_processed[ new_processor.log_path] = new_processor
def test_log_attributes(self): log_processor = LogFileProcessor(self.__path, file_system=self.__file_system, log_attributes={'host': 'scalyr-1'}) log_processor.perform_processing( TestLogFileProcessor.TestAddEventsRequest(), current_time=self.__fake_time) self.append_file(self.__path, 'First line\nSecond line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(events.total_events(), 2) self.assertEquals('scalyr-1', events.events[0]['attrs']['host']) self.assertEquals('scalyr-1', events.events[1]['attrs']['host'])
def setUp(self): self.__tempdir = tempfile.mkdtemp() self.__file_system = FileSystem() self.__path = os.path.join(self.__tempdir, 'text.txt') self.__fake_time = 10 # Create the processor to test. We have it do one scan of an empty # file so that when we next append lines to it, it will notice it. # For now, we create one that does not have any log attributes and only # counts the bytes of events messages as the cost. self.write_file(self.__path, '') self.log_processor = LogFileProcessor(self.__path, file_system=self.__file_system, log_attributes={}) (completion_callback, buffer_full) = self.log_processor.perform_processing( TestLogFileProcessor.TestAddEventsRequest(), current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS))
def __scan_for_new_logs_if_necessary(self, current_time=None, checkpoints=None, logs_initial_positions=None, copy_at_index_zero=False): """If it has been sufficient time since we last checked, scan the file system for new files that match the logs that should be copied. @param checkpoints: A dict mapping file paths to the checkpoint to use for them to determine where copying should begin. @param logs_initial_positions: A dict mapping file paths to what file offset the copying should begin from if there is no checkpoint for them. @param copy_at_index_zero: If True, then any new file that doesn't have a checkpoint or an initial position, will begin being copied from the start of the file rather than the current end. @param current_time: If not None, the time to use as the current time. @type checkpoints: dict @type logs_initial_positions: dict @type copy_at_index_zero: bool @type current_time: float """ if current_time is None: current_time = time.time() if (self.__last_new_file_scan_time is None or current_time - self.__last_new_file_scan_time < self.__config.max_new_log_detection_time): return self.__last_new_file_scan_time = current_time if checkpoints is None: checkpoints = {} if logs_initial_positions is not None: for log_path in logs_initial_positions: if not log_path in checkpoints: checkpoints[log_path] = LogFileProcessor.create_checkpoint(logs_initial_positions[log_path]) for matcher in self.__log_matchers: for new_processor in matcher.find_matches(self.__log_paths_being_processed, checkpoints, copy_at_index_zero=copy_at_index_zero): self.__log_processors.append(new_processor) self.__log_paths_being_processed[new_processor.log_path] = True
class TestLogFileProcessor(unittest.TestCase): def setUp(self): self.__tempdir = tempfile.mkdtemp() self.__file_system = FileSystem() self.__path = os.path.join(self.__tempdir, 'text.txt') self.__fake_time = 10 # Create the processor to test. We have it do one scan of an empty # file so that when we next append lines to it, it will notice it. # For now, we create one that does not have any log attributes and only # counts the bytes of events messages as the cost. self.write_file(self.__path, '') self.log_processor = LogFileProcessor(self.__path, file_system=self.__file_system, log_attributes={}) (completion_callback, buffer_full) = self.log_processor.perform_processing( TestLogFileProcessor.TestAddEventsRequest(), current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) def test_basic_usage(self): log_processor = self.log_processor self.append_file(self.__path, 'First line\nSecond line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) status = log_processor.generate_status() self.assertEquals(23L, status.total_bytes_pending) self.assertEquals(0L, status.total_bytes_copied) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(2, events.total_events()) self.assertEquals(events.get_message(0), 'First line\n') self.assertEquals(events.get_message(1), 'Second line\n') status = log_processor.generate_status() self.assertEquals(0L, status.total_bytes_pending) self.assertEquals(23L, status.total_bytes_copied) # Add some more text to make sure it appears. self.append_file(self.__path, 'Third line\n') log_processor.scan_for_new_bytes(current_time=self.__fake_time) events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing(events, current_time=self.__fake_time) status = log_processor.generate_status() self.assertEquals(11L, status.total_bytes_pending) self.assertEquals(23L, status.total_bytes_copied) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(events.get_message(0), 'Third line\n') status = log_processor.generate_status() self.assertEquals(0L, status.total_bytes_pending) self.assertEquals(34L, status.total_bytes_copied) def test_fail_and_retry(self): log_processor = self.log_processor self.append_file(self.__path, 'First line\nSecond line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertEquals(2, events.total_events()) self.assertEquals(events.get_message(0), 'First line\n') self.assertEquals(events.get_message(1), 'Second line\n') self.assertFalse(completion_callback(LogFileProcessor.FAIL_AND_RETRY)) events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing(events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(2, events.total_events()) self.assertEquals(events.get_message(0), 'First line\n') self.assertEquals(events.get_message(1), 'Second line\n') def test_fail_and_drop(self): log_processor = self.log_processor self.append_file(self.__path, 'First line\nSecond line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertEquals(2, events.total_events()) self.assertEquals(events.get_message(0), 'First line\n') self.assertEquals(events.get_message(1), 'Second line\n') self.assertFalse(completion_callback(LogFileProcessor.FAIL_AND_DROP)) # Add some more text to make sure it appears. self.append_file(self.__path, 'Third line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing(events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(1, events.total_events()) self.assertEquals(events.get_message(0), 'Third line\n') def test_sampling_rule(self): log_processor = self.log_processor log_processor.add_sampler('INFO', 0) log_processor.add_sampler('ERROR', 1) self.append_file(self.__path, 'INFO First line\nERROR Second line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing(events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(events.get_message(0), 'ERROR Second line\n') def test_redaction_rule(self): log_processor = self.log_processor log_processor.add_redacter('password=[^&]+', 'password=foo') self.append_file(self.__path, 'GET /foo&password=FakePassword&start=true\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing(events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(events.get_message(0), 'GET /foo&password=foo&start=true\n') def test_signals_deletion(self): log_processor = self.log_processor # Delete the file. os.remove(self.__path) # We won't signal that the file processor should be deleted until 10 mins have passed. events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing(events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(0, events.total_events()) self.__fake_time += 9 * 60 (completion_callback, buffer_full) = log_processor.perform_processing(events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.__fake_time += 62 (completion_callback, buffer_full) = log_processor.perform_processing(events, current_time=self.__fake_time) self.assertTrue(completion_callback(LogFileProcessor.SUCCESS)) def test_log_attributes(self): log_processor = LogFileProcessor(self.__path, file_system=self.__file_system, log_attributes={'host': 'scalyr-1'}) log_processor.perform_processing(TestLogFileProcessor.TestAddEventsRequest(), current_time=self.__fake_time) self.append_file(self.__path, 'First line\nSecond line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing(events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(events.total_events(), 2) self.assertEquals('scalyr-1', events.events[0]['attrs']['host']) self.assertEquals('scalyr-1', events.events[1]['attrs']['host']) def write_file(self, path, *lines): contents = ''.join(lines) file_handle = open(path, 'w') file_handle.write(contents) file_handle.close() def append_file(self, path, *lines): contents = ''.join(lines) file_handle = open(path, 'a') file_handle.write(contents) file_handle.close() class TestAddEventsRequest(object): def __init__(self, limit=10): self.events = [] self.__limit = limit def add_event(self, event): if len(self.events) < self.__limit: self.events.append(event) return True else: return False def position(self): return len(self.events) def set_position(self, position): self.events = self.events[0:position] def get_message(self, index): """Returns the message field from an events object.""" return self.events[index]['attrs']['message'] def total_events(self): return len(self.events)
class TestLogFileProcessor(unittest.TestCase): def setUp(self): self.__tempdir = tempfile.mkdtemp() self.__file_system = FileSystem() self.__path = os.path.join(self.__tempdir, 'text.txt') self.__fake_time = 10 # Create the processor to test. We have it do one scan of an empty # file so that when we next append lines to it, it will notice it. # For now, we create one that does not have any log attributes and only # counts the bytes of events messages as the cost. self.write_file(self.__path, '') self.log_processor = LogFileProcessor(self.__path, file_system=self.__file_system, log_attributes={}) (completion_callback, buffer_full) = self.log_processor.perform_processing( TestLogFileProcessor.TestAddEventsRequest(), current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) def test_basic_usage(self): log_processor = self.log_processor self.append_file(self.__path, 'First line\nSecond line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertEquals(len(events.threads), 1) status = log_processor.generate_status() self.assertEquals(23L, status.total_bytes_pending) self.assertEquals(0L, status.total_bytes_copied) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(2, events.total_events()) self.assertEquals(events.get_message(0), 'First line\n') self.assertEquals(events.get_message(1), 'Second line\n') status = log_processor.generate_status() self.assertEquals(0L, status.total_bytes_pending) self.assertEquals(23L, status.total_bytes_copied) # Add some more text to make sure it appears. self.append_file(self.__path, 'Third line\n') log_processor.scan_for_new_bytes(current_time=self.__fake_time) events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) status = log_processor.generate_status() self.assertEquals(11L, status.total_bytes_pending) self.assertEquals(23L, status.total_bytes_copied) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(events.get_message(0), 'Third line\n') status = log_processor.generate_status() self.assertEquals(0L, status.total_bytes_pending) self.assertEquals(34L, status.total_bytes_copied) def test_fail_and_retry(self): log_processor = self.log_processor self.append_file(self.__path, 'First line\nSecond line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertEquals(2, events.total_events()) self.assertEquals(events.get_message(0), 'First line\n') self.assertEquals(events.get_message(1), 'Second line\n') self.assertFalse(completion_callback(LogFileProcessor.FAIL_AND_RETRY)) events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(2, events.total_events()) self.assertEquals(events.get_message(0), 'First line\n') self.assertEquals(events.get_message(1), 'Second line\n') def test_fail_and_drop(self): log_processor = self.log_processor self.append_file(self.__path, 'First line\nSecond line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertEquals(2, events.total_events()) self.assertEquals(events.get_message(0), 'First line\n') self.assertEquals(events.get_message(1), 'Second line\n') self.assertFalse(completion_callback(LogFileProcessor.FAIL_AND_DROP)) # Add some more text to make sure it appears. self.append_file(self.__path, 'Third line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(1, events.total_events()) self.assertEquals(events.get_message(0), 'Third line\n') def test_sampling_rule(self): log_processor = self.log_processor log_processor.add_sampler('INFO', 0) log_processor.add_sampler('ERROR', 1) self.append_file(self.__path, 'INFO First line\nERROR Second line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(events.get_message(0), 'ERROR Second line\n') def test_redaction_rule(self): log_processor = self.log_processor log_processor.add_redacter('password=[^&]+', 'password=foo') self.append_file(self.__path, 'GET /foo&password=FakePassword&start=true\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(events.get_message(0), 'GET /foo&password=foo&start=true\n') def test_signals_deletion(self): log_processor = self.log_processor # Delete the file. os.remove(self.__path) # We won't signal that the file processor should be deleted until 10 mins have passed. events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(0, events.total_events()) self.__fake_time += 9 * 60 (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.__fake_time += 62 (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertTrue(completion_callback(LogFileProcessor.SUCCESS)) def test_log_attributes(self): log_processor = LogFileProcessor(self.__path, file_system=self.__file_system, log_attributes={'host': 'scalyr-1'}) log_processor.perform_processing( TestLogFileProcessor.TestAddEventsRequest(), current_time=self.__fake_time) self.append_file(self.__path, 'First line\nSecond line\n') events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(events.total_events(), 2) self.assertEquals('scalyr-1', events.events[0]['attrs']['host']) self.assertEquals('scalyr-1', events.events[1]['attrs']['host']) def test_unique_id(self): first_thread_id = LogFileProcessor.generate_unique_thread_id() self.assertTrue(first_thread_id.startswith('log_')) sequence = int(first_thread_id[4:]) self.assertTrue(sequence > 0) self.assertEquals(first_thread_id, 'log_%d' % sequence) self.assertEquals(LogFileProcessor.generate_unique_thread_id(), 'log_%d' % (sequence + 1)) def test_thread_id_fails_to_be_added(self): log_processor = self.log_processor self.append_file(self.__path, 'First line\nSecond line\n') # Make sure if adding the thread id in fails, then unread the lines and reset everything to normal. # We can see if it is normal by making sure the lines are read in the next successful call. events = TestLogFileProcessor.TestAddEventsRequest(thread_limit=0) (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(0, events.total_events()) self.assertEquals(len(events.threads), 0) # Now have a succuessful call and make sure we get the lines. events = TestLogFileProcessor.TestAddEventsRequest() (completion_callback, buffer_full) = log_processor.perform_processing( events, current_time=self.__fake_time) self.assertFalse(completion_callback(LogFileProcessor.SUCCESS)) self.assertEquals(2, events.total_events()) self.assertEquals(events.get_message(0), 'First line\n') self.assertEquals(events.get_message(1), 'Second line\n') self.assertEquals(len(events.threads), 1) def write_file(self, path, *lines): contents = ''.join(lines) file_handle = open(path, 'wb') file_handle.write(contents) file_handle.close() def append_file(self, path, *lines): contents = ''.join(lines) file_handle = open(path, 'ab') file_handle.write(contents) file_handle.close() class TestAddEventsRequest(object): def __init__(self, limit=10, thread_limit=10): self.events = [] self.__limit = limit self.__thread_limit = thread_limit self.threads = {} def add_event(self, event): if len(self.events) < self.__limit: self.events.append(event) return True else: return False def position(self): return [len(self.events), dict(self.threads)] def set_position(self, position): self.events = self.events[0:position[0]] self.threads = position[1] def add_thread(self, thread_id, thread_name): if self.__thread_limit == len(self.threads): return False self.threads[thread_id] = thread_name return True def get_message(self, index): """Returns the message field from an events object.""" return self.events[index]['attrs']['message'] def total_events(self): return len(self.events)