def _seek_to_end(self): self._log_debug('seek_to_end') if self._sincedb_path: sincedb_start_position = self._sincedb_start_position() if sincedb_start_position: self._start_position = sincedb_start_position if self._start_position == 'beginning': self._log_debug('no start_position specified') return line_count = 0 if str(self._start_position).isdigit(): self._log_debug('going to start position {0}'.format(self._start_position)) self._start_position = int(self._start_position) for encoding in ENCODINGS: line_count, encoded = self._seek_to_position(encoding=encoding, position=True) if line_count is None and encoded is None: return if encoded: break if self._start_position == 'beginning': self._log_debug('Bad start position specified') return if self._start_position == 'end': self._log_debug('getting end position') for encoding in ENCODINGS: line_count, encoded = self._seek_to_position(encoding=encoding) if line_count is None and encoded is None: return if encoded: break current_position = self._file.tell() self._log_debug('line count {0}'.format(line_count)) self._log_debug('current position {0}'.format(current_position)) self._sincedb_update_position(lines=line_count, force_update=True) if self._tail_lines: self._log_debug('tailing {0} lines'.format(self._tail_lines)) lines = self.tail(self._filename, encoding=self._encoding, window=self._tail_lines, position=current_position) if lines: if self._multiline_regex_after or self._multiline_regex_before: # Multiline is enabled for this file. events = multiline_merge( lines, self._current_event, self._multiline_regex_after, self._multiline_regex_before) else: events = lines self._callback_wrapper(events) return
def _run_pass(self): """Read lines from a file and performs a callback against them""" events = [] buffered_lines = 0 buffered_bytes = 0 run_start = time.time() while self.active: try: data = self._file.read(self._file_read_blocksize) except IOError, e: if e.errno == errno.ESTALE: self.active = False # break so we can still try to flush the existing buffers. break lines = self._buffer_extract(data) if not lines: if time.time() - run_start < self._buffered_lines_max_seconds: # If we are here, then theres no new lines but we have a _buffered_lines_max_seconds value to # respect, so just keep polling for new lines, with a bit of a sleep to avoid starving everything else. time.sleep(0.1) else: # Too much time without lines, flush what we've got so far. break else: self._last_activity = time.time() # We for sure have lines here tho. if self._multiline_regex_after or self._multiline_regex_before: # Multiline is enabled for this file. events += multiline_merge(lines, self._current_event, self._multiline_regex_after, self._multiline_regex_before) else: events += lines buffered_lines = len(events) buffered_bytes += len(data) if events and (buffered_bytes >= self._buffered_lines_max_bytes or buffered_lines >= self._buffered_lines_max_lines or time.time() - run_start >= self._buffered_lines_max_seconds): self._callback_wrapper(events) self._sincedb_update_position(lines=len(events)) events = [] buffered_lines = 0 buffered_bytes = 0 run_start = time.time()
def _run_pass(self, fid, file): """Read lines from a file and performs a callback against them""" while True: try: data = file.read(4096) except IOError, e: if e.errno == errno.ESTALE: self.active = False return False lines = self._buffer_extract(data=data, fid=fid) if not lines: # Before returning, check if an event (maybe partial) is waiting for too long. if self._file_map[fid]['current_event'] and time.time() - self._file_map[fid]['last_activity'] > 1: event = '\n'.join(self._file_map[fid]['current_event']) self._file_map[fid]['current_event'].clear() if self._file_map[fid]['include_filter_regex']: events = include_filter_line([event], self._file_map[fid]['include_filter_regex']) else: events = [event] if events: self._callback_wrapper(filename=file.name, lines=events) break self._file_map[fid]['last_activity'] = time.time() if self._file_map[fid]['multiline_regex_after'] or self._file_map[fid]['multiline_regex_before']: # Multiline is enabled for this file. events = multiline_merge( lines, self._file_map[fid]['current_event'], self._file_map[fid]['multiline_regex_after'], self._file_map[fid]['multiline_regex_before']) else: events = lines if events and self._file_map[fid]['include_filter_regex']: events = include_filter_line(events, self._file_map[fid]['include_filter_regex']) if events: self._callback_wrapper(filename=file.name, lines=events) if self._sincedb_path: current_line_count = len(lines) self._sincedb_update_position(file, fid=fid, lines=current_line_count)
def _run_pass(self, fid, file): """Read lines from a file and performs a callback against them""" line_count = 0 while True: try: data = file.read(4096) except IOError, e: if e.errno == errno.ESTALE: self.active = False return False lines = self._buffer_extract(data=data, fid=fid) if not lines: # Before returning, check if an event (maybe partial) is waiting for too long. if self._file_map[fid]['current_event'] and time.time( ) - self._file_map[fid]['last_activity'] > 1: event = '\n'.join(self._file_map[fid]['current_event']) self._file_map[fid]['current_event'].clear() self._callback_wrapper(filename=file.name, lines=[event]) break self._file_map[fid]['last_activity'] = time.time() if self._file_map[fid]['multiline_regex_after'] or self._file_map[ fid]['multiline_regex_before']: # Multiline is enabled for this file. events = multiline_merge( lines, self._file_map[fid]['current_event'], self._file_map[fid]['multiline_regex_after'], self._file_map[fid]['multiline_regex_before']) else: events = lines if events: self._callback_wrapper(filename=file.name, lines=events) if self._sincedb_path: current_line_count = len(lines) if not self._sincedb_update_position( file, fid=fid, lines=current_line_count): line_count += current_line_count
def _run_pass(self): """Read lines from a file and performs a callback against them""" line_count = 0 while True: try: data = self._file.read(4096) except IOError, e: if e.errno == errno.ESTALE: self.active = False return False lines = self._buffer_extract(data) if not lines: # Before returning, check if an event (maybe partial) is waiting for too long. if self._current_event and time.time() - self._last_activity > 1: event = '\n'.join(self._current_event) self._current_event.clear() self._callback_wrapper([event]) break self._last_activity = time.time() if self._multiline_regex_after or self._multiline_regex_before or self._multiline_regex_begin: # Multiline is enabled for this file. events = multiline_merge( lines, self._current_event, self._multiline_regex_after, self._multiline_regex_before, self._multiline_regex_begin) else: events = lines if events: self._callback_wrapper(events) if self._sincedb_path: current_line_count = len(lines) if not self._sincedb_update_position(lines=current_line_count): line_count += current_line_count
def _run_pass(self): """Read lines from a file and performs a callback against them""" while True: try: data = self._file.read(4096) except IOError, e: if e.errno == errno.ESTALE: self.active = False return False lines = self._buffer_extract(data) if not lines: # Before returning, check if an event (maybe partial) is waiting for too long. if self._current_event and time.time( ) - self._last_activity > 1: event = '\n'.join(self._current_event) self._current_event.clear() self._callback_wrapper([event]) break self._last_activity = time.time() if self._multiline_regex_after or self._multiline_regex_before: # Multiline is enabled for this file. events = multiline_merge(lines, self._current_event, self._multiline_regex_after, self._multiline_regex_before) else: events = lines if events: self._callback_wrapper(events) if self._sincedb_path: current_line_count = len(lines) self._sincedb_update_position(lines=current_line_count)
def _seek_to_end(self): self._log_debug('seek_to_end') if self._sincedb_path: sincedb_start_position = self._sincedb_start_position() if sincedb_start_position: self._start_position = sincedb_start_position if self._start_position == 'beginning': self._log_debug('no start_position specified') return line_count = 0 if str(self._start_position).isdigit(): self._log_debug('going to start position {0}'.format( self._start_position)) self._start_position = int(self._start_position) for encoding in ENCODINGS: line_count, encoded = self._seek_to_position(encoding=encoding, position=True) if line_count is None and encoded is None: return if encoded: break if self._start_position == 'beginning': self._log_debug('Bad start position specified') return if self._start_position == 'end': self._log_debug('getting end position') for encoding in ENCODINGS: line_count, encoded = self._seek_to_position(encoding=encoding) if line_count is None and encoded is None: return if encoded: break current_position = self._file.tell() self._log_debug('line count {0}'.format(line_count)) self._log_debug('current position {0}'.format(current_position)) self._sincedb_update_position(lines=line_count, force_update=True) # Reset this, so line added processed just after this initialization # will update the sincedb. Without this, if beaver run for less than # sincedb_write_interval it will always re-process the last lines. self._last_sincedb_write = 0 if self._tail_lines: self._log_debug('tailing {0} lines'.format(self._tail_lines)) lines = self.tail(self._filename, encoding=self._encoding, window=self._tail_lines, position=current_position) if lines: if self._multiline_regex_after or self._multiline_regex_before: # Multiline is enabled for this file. events = multiline_merge(lines, self._current_event, self._multiline_regex_after, self._multiline_regex_before) else: events = lines if self._ignoreline_regex: # Ignoreline is enabled for this file. self._log_debug( "################ Ignore line regex is enabled") tmpevents = [] for line in events: self._log_debug("check ignore line: " + line) if re_ignoreline and re_ignoreline.search(line): self._log_debug("ignore this line: " + line) else: tmpevents.append(line) self._callback_wrapper(events) return
def _seek_to_end(self): unwatch_list = [] # The first time we run the script we move all file markers at EOF. # In case of files created afterwards we don't do this. for fid, data in self._file_map.iteritems(): self._logger.debug("[{0}] - getting start position {1}".format(fid, data["file"].name)) start_position = self._beaver_config.get_field("start_position", data["file"].name) is_active = data["active"] if self._sincedb_path: sincedb_start_position = self._sincedb_start_position(data["file"], fid=fid) if sincedb_start_position: start_position = sincedb_start_position if start_position == "beginning": continue line_count = 0 if str(start_position).isdigit(): self._logger.debug( "[{0}] - going to start position {1} for {2}".format(fid, start_position, data["file"].name) ) start_position = int(start_position) for encoding in ENCODINGS: try: line_count = 0 while data["file"].readline(): line_count += 1 if line_count == start_position: break except UnicodeDecodeError: self._logger.debug( "[{0}] - UnicodeDecodeError raised for {1} with encoding {2}".format( fid, data["file"].name, data["encoding"] ) ) data["file"] = self.open(data["file"].name, encoding=encoding) if not data["file"]: unwatch_list.append(fid) is_active = False break data["encoding"] = encoding if line_count != start_position: self._logger.debug( "[{0}] - file at different position than {1}, assuming manual truncate for {2}".format( fid, start_position, data["file"].name ) ) data["file"].seek(0, os.SEEK_SET) start_position == "beginning" if not is_active: continue if start_position == "beginning": continue if start_position == "end": self._logger.debug("[{0}] - getting end position for {1}".format(fid, data["file"].name)) for encoding in ENCODINGS: try: line_count = 0 while data["file"].readline(): line_count += 1 break except UnicodeDecodeError: self._logger.debug( "[{0}] - UnicodeDecodeError raised for {1} with encoding {2}".format( fid, data["file"].name, data["encoding"] ) ) data["file"] = self.open(data["file"].name, encoding=encoding) if not data["file"]: unwatch_list.append(fid) is_active = False break data["encoding"] = encoding if not is_active: continue current_position = data["file"].tell() self._logger.debug("[{0}] - line count {1} for {2}".format(fid, line_count, data["file"].name)) self._sincedb_update_position(data["file"], fid=fid, lines=line_count, force_update=True) tail_lines = self._beaver_config.get_field("tail_lines", data["file"].name) tail_lines = int(tail_lines) if tail_lines: encoding = data["encoding"] lines = self.tail(data["file"].name, encoding=encoding, window=tail_lines, position=current_position) if lines: if self._file_map[fid]["multiline_regex_after"] or self._file_map[fid]["multiline_regex_before"]: # Multiline is enabled for this file. events = multiline_merge( lines, self._file_map[fid]["current_event"], self._file_map[fid]["multiline_regex_after"], self._file_map[fid]["multiline_regex_before"], ) else: events = lines self._callback_wrapper(filename=data["file"].name, lines=events) self.unwatch_list(unwatch_list)
def _seek_to_end(self): unwatch_list = [] # The first time we run the script we move all file markers at EOF. # In case of files created afterwards we don't do this. for fid, data in self._file_map.iteritems(): self._logger.debug("[{0}] - getting start position {1}".format(fid, data['file'].name)) start_position = self._beaver_config.get_field('start_position', data['file'].name) is_active = data['active'] if self._sincedb_path: sincedb_start_position = self._sincedb_start_position(data['file'], fid=fid) if sincedb_start_position: start_position = sincedb_start_position if start_position == "beginning": continue line_count = 0 if str(start_position).isdigit(): self._logger.debug("[{0}] - going to start position {1} for {2}".format(fid, start_position, data['file'].name)) start_position = int(start_position) for encoding in ENCODINGS: try: line_count = 0 while data['file'].readline(): line_count += 1 if line_count == start_position: break except UnicodeDecodeError: self._logger.debug("[{0}] - UnicodeDecodeError raised for {1} with encoding {2}".format(fid, data['file'].name, data['encoding'])) data['file'] = self.open(data['file'].name, encoding=encoding) if not data['file']: unwatch_list.append(fid) is_active = False break data['encoding'] = encoding if line_count != start_position: self._logger.debug("[{0}] - file at different position than {1}, assuming manual truncate for {2}".format(fid, start_position, data['file'].name)) data['file'].seek(0, os.SEEK_SET) start_position == "beginning" if not is_active: continue if start_position == "beginning": continue if start_position == "end": self._logger.debug("[{0}] - getting end position for {1}".format(fid, data['file'].name)) for encoding in ENCODINGS: try: line_count = 0 while data['file'].readline(): line_count += 1 break except UnicodeDecodeError: self._logger.debug("[{0}] - UnicodeDecodeError raised for {1} with encoding {2}".format(fid, data['file'].name, data['encoding'])) data['file'] = self.open(data['file'].name, encoding=encoding) if not data['file']: unwatch_list.append(fid) is_active = False break data['encoding'] = encoding if not is_active: continue current_position = data['file'].tell() self._logger.debug("[{0}] - line count {1} for {2}".format(fid, line_count, data['file'].name)) self._sincedb_update_position(data['file'], fid=fid, lines=line_count, force_update=True) tail_lines = self._beaver_config.get_field('tail_lines', data['file'].name) tail_lines = int(tail_lines) if tail_lines: encoding = data['encoding'] lines = self.tail(data['file'].name, encoding=encoding, window=tail_lines, position=current_position) if lines: if self._file_map[fid]['multiline_regex_after'] or self._file_map[fid]['multiline_regex_before']: # Multiline is enabled for this file. events = multiline_merge( lines, self._file_map[fid]['current_event'], self._file_map[fid]['multiline_regex_after'], self._file_map[fid]['multiline_regex_before']) else: events = lines self._callback_wrapper(filename=data['file'].name, lines=events) self.unwatch_list(unwatch_list)
def _seek_to_end(self): self._log_debug('seek_to_end') if self._sincedb_path: sincedb_start_position = self._sincedb_start_position() if sincedb_start_position: self._start_position = sincedb_start_position if self._start_position == 'beginning': self._log_debug('no start_position specified') return line_count = 0 if str(self._start_position).isdigit(): self._log_debug('going to start position {0}'.format( self._start_position)) self._start_position = int(self._start_position) for encoding in ENCODINGS: line_count, encoded = self._seek_to_position(encoding=encoding, position=True) if line_count is None and encoded is None: return if encoded: break if self._start_position == 'beginning': self._log_debug('Bad start position specified') return if self._start_position == 'end': self._log_debug('getting end position') for encoding in ENCODINGS: line_count, encoded = self._seek_to_position(encoding=encoding) if line_count is None and encoded is None: return if encoded: break current_position = self._file.tell() self._log_debug('line count {0}'.format(line_count)) self._log_debug('current position {0}'.format(current_position)) self._sincedb_update_position(lines=line_count, force_update=True) if self._tail_lines: self._log_debug('tailing {0} lines'.format(self._tail_lines)) lines = self.tail(self._filename, encoding=self._encoding, window=self._tail_lines, position=current_position) if lines: if self._multiline_regex_after or self._multiline_regex_before: # Multiline is enabled for this file. events = multiline_merge(lines, self._current_event, self._multiline_regex_after, self._multiline_regex_before) else: events = lines self._callback_wrapper(events) return
def _seek_to_end(self): unwatch_list = [] # The first time we run the script we move all file markers at EOF. # In case of files created afterwards we don't do this. for fid, data in self._file_map.iteritems(): self._logger.debug("[{0}] - getting start position {1}".format(fid, data['file'].name)) start_position = self._beaver_config.get_field('start_position', data['file'].name) is_active = data['active'] if self._sincedb_path: sincedb_start_position = self._sincedb_start_position(data['file'], fid=fid) if sincedb_start_position: start_position = sincedb_start_position if start_position == "beginning": continue line_count = 0 if str(start_position).isdigit(): self._logger.debug("[{0}] - going to start position {1} for {2}".format(fid, start_position, data['file'].name)) start_position = int(start_position) for encoding in ENCODINGS: try: line_count = 0 while data['file'].readline(): line_count += 1 if line_count == start_position: break except UnicodeDecodeError: self._logger.debug("[{0}] - UnicodeDecodeError raised for {1} with encoding {2}".format(fid, data['file'].name, data['encoding'])) data['file'] = self.open(data['file'].name, encoding=encoding) if not data['file']: unwatch_list.append(fid) is_active = False break data['encoding'] = encoding if line_count != start_position: self._logger.debug("[{0}] - file at different position than {1}, assuming manual truncate for {2}".format(fid, start_position, data['file'].name)) data['file'].seek(0, os.SEEK_SET) start_position == "beginning" if not is_active: continue if start_position == "beginning": continue if start_position == "end": self._logger.debug("[{0}] - getting end position for {1}".format(fid, data['file'].name)) for encoding in ENCODINGS: try: line_count = 0 while data['file'].readline(): line_count += 1 break except UnicodeDecodeError: self._logger.debug("[{0}] - UnicodeDecodeError raised for {1} with encoding {2}".format(fid, data['file'].name, data['encoding'])) data['file'] = self.open(data['file'].name, encoding=encoding) if not data['file']: unwatch_list.append(fid) is_active = False break data['encoding'] = encoding if not is_active: continue current_position = data['file'].tell() self._logger.debug("[{0}] - line count {1} for {2}".format(fid, line_count, data['file'].name)) self._sincedb_update_position(data['file'], fid=fid, lines=line_count, force_update=True) # Reset this, so line added processed just after this initialization # will update the sincedb. Without this, if beaver run for less than # sincedb_write_interval it will always re-process the last lines. data['update_time'] = 0 tail_lines = self._beaver_config.get_field('tail_lines', data['file'].name) tail_lines = int(tail_lines) if tail_lines: encoding = data['encoding'] lines = self.tail(data['file'].name, encoding=encoding, window=tail_lines, position=current_position) if lines: if self._file_map[fid]['multiline_regex_after'] or self._file_map[fid]['multiline_regex_before']: # Multiline is enabled for this file. events = multiline_merge( lines, self._file_map[fid]['current_event'], self._file_map[fid]['multiline_regex_after'], self._file_map[fid]['multiline_regex_before']) else: events = lines if events and self._file_map[fid]['include_filter_regex']: events = include_filter_line(events, self._file_map[fid]['include_filter_regex']) if events: self._callback_wrapper(filename=data['file'].name, lines=events) self.unwatch_list(unwatch_list)