def test_copytruncate_on_smaller(self): self.test_readlines() self.copytruncate() new_lines = "4\n5\n" self.append(new_lines) pygtail = Pygtail(self.logfile.name, copytruncate=True) self.assertEqual(pygtail.read(), new_lines)
def run(self): """ Main process """ self.logger.debug('Thread started') try: #purge new lines Pygtail(self.log_file_path).readlines() #handle new lines while self.running: try: for log_line in Pygtail(self.log_file_path): if isinstance(log_line, str): log_line = log_line.decode('utf-8') log_line = log_line.strip() self.logger.debug('New log line: %s' % log_line) self.send_log_callback(log_line) #pause time.sleep(0.5) except: self.logger.exception(u'Exception on log watcher:') except: self.logger.exception(u'Fatal exception on log watcher:') self.logger.debug(u'Thread stopped')
def test_subsequent_read_with_new_data(self): pygtail = Pygtail(self.logfile.name) self.assertEqual(pygtail.read(), self.test_str) new_lines = "4\n5\n" self.append(new_lines) new_pygtail = Pygtail(self.logfile.name) self.assertEqual(new_pygtail.read(), new_lines)
def __init__(self, filepath, groupby, groupname=None): self.groupmatch = re.compile(groupby) # write an offset file so that we start somewhat at the end of the file # either filepath is a path or a syslogd url (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(filepath) if scheme == 'syslog': host, port = netloc.split(':') self.fin = QueueFile() self.server = SocketServer.UDPServer((host, int(port)), SyslogUDPHandler) self.server.queue = self.fin th = threading.Thread(target=lambda: self.server.serve_forever(poll_interval=0.5)) th.setDaemon(True) th.start() else: # Create a temporal file with offset info self.offsetpath = "/tmp/" + str(uuid.uuid4()) try: inode = os.stat(filepath).st_ino offset = os.path.getsize(filepath) - 1024 except OSError: pass else: if offset > 0: foffset = open(self.offsetpath, "w") foffset.write("%s\n%s" % (inode, offset)) foffset.close() self.fin = Pygtail(filepath, offset_file=self.offsetpath, copytruncate=True) # List of matchings self.match_definitions = [] # Regex group name for grouping self.groupbygroup = groupname
def _test_copytruncate_larger(self, onoff): self.test_readlines() self.copytruncate() self.append(self.test_str) new_lines = "4\n5\n" self.append(new_lines) pygtail = Pygtail(self.logfile.name, copytruncate=onoff) self.assertEqual(pygtail.read(), new_lines)
def test_copytruncate_off_smaller(self): self.test_readlines() self.copytruncate() new_lines = "4\n5\n" self.append(new_lines) pygtail = Pygtail(self.logfile.name, copytruncate=False) self.assertEqual(pygtail.read(), None) self.assertRegexpMatches(sys.stderr.getvalue(), r".*?\bWARN\b.*?\bshrank\b.*")
def test_logrotate_without_close(self): new_lines = ["4\n5\n", "6\n7\n"] pygtail = Pygtail(self.logfile.name) pygtail.read() self.append(new_lines[0]) # note it doesn't matter what filename the file gets rotated to os.rename(self.logfile.name, "%s.somethingodd" % self.logfile.name) self.append(new_lines[1]) self.assertEqual(pygtail.read(), ''.join(new_lines))
def test_logrotate_with_delay_compress(self): new_lines = ["4\n5\n", "6\n7\n"] pygtail = Pygtail(self.logfile.name) pygtail.read() self.append(new_lines[0]) os.rename(self.logfile.name, "%s.1" % self.logfile.name) self.append(new_lines[1]) pygtail = Pygtail(self.logfile.name) self.assertEqual(pygtail.read(), ''.join(new_lines))
def test_timed_rotating_file_handler(self): new_lines = ["4\n5\n", "6\n7\n"] pygtail = Pygtail(self.logfile.name) pygtail.read() self.append(new_lines[0]) os.rename(self.logfile.name, "%s.2016-06-16" % self.logfile.name) self.append(new_lines[1]) pygtail = Pygtail(self.logfile.name) self.assertEqual(pygtail.read(), ''.join(new_lines))
def test_logrotate(self): new_lines = ["4\n5\n", "6\n7\n"] pygtail = Pygtail(self.logfile.name) pygtail.read() self.append(new_lines[0]) os.rename(self.logfile.name, "%s.1" % self.logfile.name) self.append(new_lines[1]) pygtail = Pygtail(self.logfile.name) self.assertEquals(pygtail.read(), "".join(new_lines))
def test_copytruncate_off_smaller_without_close(self): new_lines = ["4\n5\n", "6\n7\n"] pygtail = Pygtail(self.logfile.name, copytruncate=True) pygtail.read() self.append(new_lines[0]) read1 = pygtail.read() self.copytruncate() self.append(new_lines[1]) read2 = pygtail.read() self.assertEqual([read1,read2], new_lines)
def _tail_lines(self, filepath): tail = Pygtail(str(filepath)) while True: try: self._tail_worker_enabled.wait() if self._tail_worker_kill.is_set(): return line = tail.next() GLib.idle_add(self._add_line_to_log_buffer, line) except StopIteration: time.sleep(0.5)
def test_full_lines(self): """ Tests lines are logged only when they have a new line at the end. This is useful to ensure that log lines aren't unintentionally split up. """ pygtail = Pygtail(self.logfile.name, full_lines=True) new_lines = "4\n5," last_line = "5.5\n6\n" self.append(new_lines) pygtail.read() self.append(last_line) self.assertEqual(pygtail.read(), "5,5.5\n6\n")
def test_copytruncate_off_smaller(self): self.test_readlines() self.copytruncate() new_lines = "4\n5\n" self.append(new_lines) sys.stderr = captured = io.BytesIO() if PY2 else io.StringIO() pygtail = Pygtail(self.logfile.name, copytruncate=False) captured_value = captured.getvalue() sys.stderr = sys.__stderr__ self.assertRegexpMatches(captured_value, r".*?\bWARN\b.*?\bshrank\b.*") self.assertEqual(pygtail.read(), None)
def test_renamecreate(self): """ Tests "renamecreate" semantics where the currently processed file gets renamed and the original file gets recreated. This is the behavior of certain logfile rollers such as TimeBasedRollingPolicy in Java's Logback library. """ new_lines = ["4\n5\n", "6\n7\n"] pygtail = Pygtail(self.logfile.name) pygtail.read() os.rename(self.logfile.name, "%s.2018-03-10" % self.logfile.name) # append will recreate the original log file self.append(new_lines[0]) self.append(new_lines[1]) self.assertEqual(pygtail.read(), ''.join(new_lines))
def test_copytruncate_off_smaller(self): self.test_readlines() self.copytruncate() new_lines = "4\n5\n" self.append(new_lines) sys.stderr = captured = io.BytesIO() if PY2 else io.StringIO() pygtail = Pygtail(self.logfile.name, copytruncate=False) captured_value = captured.getvalue() sys.stderr = sys.__stderr__ assert_class = self.assertRegex if sys.version_info >= (3, 1) else self.assertRegexpMatches assert_class(captured_value, r".*?\bWARN\b.*?\bshrank\b.*") self.assertEqual(pygtail.read(), None)
def _collect_log_lines(self, log_file): """ Collects logs from logfiles, formats and puts in queue """ L = log_file fpath = L["fpath"] fmtfn = L["formatter_fn"] formatter = L["formatter"] freader = Pygtail(fpath) for line, line_info in self._iter_logs(freader, fmtfn): log = self._assign_default_log_values(fpath, line, formatter) try: _log = fmtfn(line) # Identify logs inside a log # Like process logs inside docker logs if isinstance(_log, RawLog): formatter, raw_log = _log["formatter"], _log["raw"] log.update(_log) # Give them to actual formatters _log = load_formatter_fn(formatter)(raw_log) log.update(_log) except (SystemExit, KeyboardInterrupt) as e: raise except: log["error"] = True log["error_tb"] = traceback.format_exc() self.log.exception("error_during_handling_log_line", log=log["raw"]) if log["id"] == None: log["id"] = uuid.uuid1().hex log = self._remove_redundancy(log) if self._validate_log_format(log) == "failed": continue self.queue.put( dict(log=json.dumps(log), freader=freader, line_info=line_info)) self.log.debug("tally:put_into_self.queue", size=self.queue.qsize()) while not freader.is_fully_acknowledged(): t = self.PYGTAIL_ACK_WAIT_TIME self.log.debug("waiting_for_pygtail_to_fully_ack", wait_time=t) time.sleep(t)
def liveLog(self, logdir): """ Method to tail log lines for every 5 seconds. :return: It yields the log lines. """ if logdir == None: pass else: if logdir == 'pgcli': logdir = PGC_LOGS log_file = Pygtail(logdir) ln = log_file.readlines() if ln: for log_line in ln: yield self.session.publish('com.bigsql.log', log_line)
class FileFollower(): ''' Use pygtail to keep track of EOF and rotated files, catch exceptions to make things more seamless ''' def __init__(self, path): self.path = path self.pygtail = None self.last_inode = 0 def next(self): line = '' curr_inode = 0 if self.pygtail is None: try: # remove last offset file if the log file is different # PygTail's inode detection doesn't work in certain cases curr_inode = os.stat(self.path).st_ino if self.last_inode != curr_inode: os.unlink(self.path + '.offset') self.last_inode = curr_inode log.debug('deleted offset file, inode difference') except Exception as e: log.info('inode checking failed (not terminal): %s' % e) self.pygtail = Pygtail(self.path) try: line = self.pygtail.next() except StopIteration as si: # Need to get a new instance of pygtail after this incase the inode # has changed self.pygtail = None return False return line
def tail_file(self, remove_offset: bool = True) -> Iterable: log_file = get_server_path(self.config.server_log) offset_file = get_server_path(".log_offset") if remove_offset: self.delete_offset() return Pygtail(log_file, offset_file=offset_file)
def _consume_loop(self): while self._is_running: sleep(1) # throttle polling for new logs for log_line in Pygtail(self._expanded_log_path, read_from_end=True, offset_file=self._offset_path): self._notify_subscribers(log_line)
def __init__(self, filepath, groupby): self.groupmatch = re.compile(groupby) # write an offset file so that we start somewhat at the end of the file self.offsetpath = "/tmp/" + str(uuid.uuid4()) #print self.offsetpath try: inode = os.stat(filepath).st_ino offset = os.path.getsize(filepath) - 1024 #print inode #print offset except OSError: pass else: if offset > 0: #print 'write offset' foffset = open(self.offsetpath, "w") foffset.write ("%s\n%s" % (inode, offset)) foffset.close() self.fin = Pygtail(filepath, offset_file=self.offsetpath, copytruncate=True) #self.fin.readlines() self.match_definitions = []
def main(): Thread(target=put_metrics).start() while not os.path.exists(METRICS_SH): print('Waiting for {}...'.format(METRICS_SH)) time.sleep(1) while True: for line in Pygtail(METRICS_SH): try: metric, value = line.split(': ') value = float(value.strip()) if ' ' in metric: raise ValueError except ValueError: print('invalid line', line.strip()) continue METRICS_QUEUE.put({ 'timestamp': time.time(), 'value': value, 'metric': metric, }) print('queued', line.strip()) time.sleep(5) print('no new data...')
def check(self): tx1 = RespendTx(time_regex=r'.*tx1: (\d{4}-\d{2}-\d{2} [0-9:]{8})', hex_regex=r'.*tx1 hex: ([0-9a-f]+)') tx2 = RespendTx( time_regex=r'(\d{4}-\d{2}-\d{2} [0-9:]{8}) Respend tx2', hex_regex=r'.*tx2 hex: ([0-9a-f]+)') found = False for line in Pygtail(self.logpath): # tx2 is logged before tx1 if not tx2.done(): tx2.parse_line(line) elif not tx1.done(): tx1.parse_line(line) if tx1.done() and tx2.done(): print("found double spend") self.on_respend(tx1, tx2) tx1.clear() tx2.clear() found = True return found
def PygtailLogger(logger: logging.Logger, filename: str, prefix: str = "2| ") -> Iterator[Callable[[], None]]: """ Helper for streaming task stderr into logger using pygtail. Context manager yielding a function which reads the latest lines from the file and writes them into logger at verbose level. This function also runs automatically on context exit. Truncates lines at 4KB in case writer goes haywire. """ pygtail = Pygtail(filename, full_lines=True) pygtail_ok = True def poll() -> None: nonlocal pygtail_ok if pygtail_ok: try: for line in pygtail: logger.verbose( (prefix + line.rstrip())[:4096]) # pyre-ignore except: pygtail_ok = False # cf. https://github.com/bgreenlee/pygtail/issues/48 logger.verbose( # pyre-ignore "incomplete log stream due to the following exception; see %s", filename, exc_info=sys.exc_info(), ) try: yield poll finally: poll()
def generate(): while True: file = Pygtail(LOG_FILE, every_n=1) for index, line in enumerate(file): yield "data:" + str(line) + "\n\n" time.sleep(0.1) time.sleep(1)
def ss_web_log_monitor(self): self.logger.debug("Doorbird.ss_web_log_monitor() called") #Get isDaylight variable ID isDaylightId = None for var in indigo.variables: if var.name == "isDaylight": isDaylightId = var.id lastTriggered = None lastError = None self.logger.info( indigo.devices[self.indigoID].name + ": Monitoring of the Security Spy web log has started") while self.monitorSSWeblogs: try: if os.path.isfile(indigo.devices[ self.indigoID].pluginProps["ssWebLogPath"]): if indigo.variables[isDaylightId].value == "false": for line in Pygtail(indigo.devices[ self.indigoID].pluginProps["ssWebLogPath"]): if (("cameraNum=" + indigo.devices[ self.indigoID].pluginProps["ssCameraNum"]) in line) and (indigo.devices[self.indigoID] .states["doorbirdOnOffState"] == True): if (lastTriggered == None) or ( time.time() - lastTriggered > 30): self.logger.debug( indigo.devices[self.indigoID].name + ": Security Spy web log criteria detected" ) self.turn_light_on() lastTriggered = time.time() else: if (lastError == None) or ( time.time() - lastError > 300 ): # only send error messages every 5 minutes so as not to spam the log lastError = time.time() self.logger.error( indigo.devices[self.indigoID].name + ": Invalid Security Spy web log path: " + indigo.devices[ self.indigoID].pluginProps["ssWebLogPath"]) except: if (lastError == None) or ( time.time() - lastError > 300 ): # only send error messages every 5 minutes so as not to spam the log lastError = time.time() self.logger.error(indigo.devices[self.indigoID].name + ": Error reading Security Spy web log") time.sleep(.5) self.logger.info( indigo.devices[self.indigoID].name + ": Monitoring of the Security Spy web log has stopped")
def parse_log(): ''' Parses ESS Log Data to store for the App ''' app = create_app(config.JobConfig) app_context = app.app_context() app_context.push() _detect_rotated_log(app) with app.app_context(): try: for line in Pygtail(app.config['ESS_LOG'], paranoid=True, full_lines=True, offset_file=app.config['ESS_LOG_OFFSET']): try: data = re.findall(r'\{.*\}', line) data = json.loads(data[0]) except Exception as r: app.logger.error(r) if _is_connection_test(data['account_id'], data['domain_id']): app.logger.info('Conncetion Test Detected. Skipping...') continue if _message_exists(app.logger, data['message_id']): app.logger.info('Message ID FOUND. Skipping...') continue app.logger.info('Message ID NOT FOUND. Processing...') try: _store_account(app.logger, data) _store_domain(app.logger, data) _store_message(app.logger, data) if data['recipients']: for recipient in data['recipients']: _store_recipient(app.logger, recipient, data['message_id']) if data['attachments']: for attachment in data['attachments']: _store_attachment(app.logger, attachment, data['message_id']) except Exception as e: db.session.rollback() app.logger.error("Failed to Process Message ({})".format( data['message_id'])) app.logger.error(e) else: db.session.commit() except Exception as f: app.logger.error(f) app.logger.info('Closing app context for parse_log') app_context.pop()
def liveLog(self, logdir): """ Method to tail log lines for every 5 seconds. :return: It yields the log lines. """ if logdir == None: pass else: if logdir == 'pgcli': logdir = PGC_LOGS log_file = Pygtail(logdir) ln = log_file.readlines() if ln: for log_line in ln: line = unicode(str(log_line), sys.getdefaultencoding(), errors='ignore').strip() yield self.session.publish('com.bigsql.log', line)
def collect_log_lines(self, log_file): L = log_file fpath = L['fpath'] fmtfn = L['formatter_fn'] formatter = L['formatter'] freader = Pygtail(fpath) for line, line_info in self._iter_logs(freader, fmtfn): log = self.assign_default_log_values(fpath, line, formatter) try: _log = fmtfn(line) if isinstance(_log, RawLog): formatter, raw_log = _log['formatter'], _log['raw'] log.update(_log) _log = load_formatter_fn(formatter)(raw_log) log.update(_log) except (SystemExit, KeyboardInterrupt) as e: raise except: log['error'] = True log['error_tb'] = traceback.format_exc() self.log.exception('error_during_handling_log_line', log=log['raw']) if log['id'] == None: log['id'] = uuid.uuid1().hex log = self._remove_redundancy(log) if self.validate_log_format(log) == 'failed': continue self.queue.put( dict(log=json.dumps(log), freader=freader, line_info=line_info)) self.log.debug('tally:put_into_self.queue', size=self.queue.qsize()) while not freader.is_fully_acknowledged(): t = self.PYGTAIL_ACK_WAIT_TIME self.log.debug('waiting_for_pygtail_to_fully_ack', wait_time=t) time.sleep(t) time.sleep(self.LOG_FILE_POLL_INTERVAL)
def run(self): print 'Starting Thread:' + self.objectName() self.started = True while self.started: for line in Pygtail(self.logger_path): try: self.emit(SIGNAL('Activated( QString )'), line.rstrip().split(' : ')[1]) except IndexError: pass
def test_logrotate_without_delay_compress(self): new_lines = ["4\n5\n", "6\n7\n"] pygtail = Pygtail(self.logfile.name) pygtail.read() self.append(new_lines[0]) # put content to gzip file gzip_handle = gzip.open("%s.1.gz" % self.logfile.name, 'wb') with open(self.logfile.name, 'rb') as logfile: gzip_handle.write(logfile.read()) gzip_handle.close() with open(self.logfile.name, 'w'): # truncate file pass self.append(new_lines[1]) pygtail = Pygtail(self.logfile.name) self.assertEqual(pygtail.read(), ''.join(new_lines))
class GroupingTail (object): def __init__(self, filepath, groupby): self.groupmatch = re.compile(groupby) # write an offset file so that we start somewhat at the end of the file self.offsetpath = "/tmp/" + str(uuid.uuid4()) #print self.offsetpath try: inode = os.stat(filepath).st_ino offset = os.path.getsize(filepath) - 1024 #print inode #print offset except OSError: pass else: if offset > 0: #print 'write offset' foffset = open(self.offsetpath, "w") foffset.write ("%s\n%s" % (inode, offset)) foffset.close() self.fin = Pygtail(filepath, offset_file=self.offsetpath, copytruncate=True) #self.fin.readlines() self.match_definitions = [] def update(self): for line in self.fin.readlines(): #print 'line: %s' % line mo = self.groupmatch.match(line) if mo is not None and mo.groups(): groupname = mo.groups()[0].replace(".", "_").replace("-", "_") for match in self.match_definitions: instrument = match["instrument"] instrument.write(groupname, line) def add_match(self, instance_name, valuetype, instrument): self.match_definitions.append(dict( instance_name=instance_name, valuetype=valuetype, instrument=instrument )) def read_metrics(self): for match in self.match_definitions: instance_name = match["instance_name"] instrument = match["instrument"] valuetype = match["valuetype"] for groupname, value in instrument.read(): metric_name = "%s.%s" % (groupname, instance_name) yield (metric_name, valuetype, value)
def test_logrotate_with_dateext_with_delaycompress(self): new_lines = ["4\n5\n", "6\n7\n"] pygtail = Pygtail(self.logfile.name) pygtail.read() self.append(new_lines[0]) os.rename(self.logfile.name, "%s-20160616" % self.logfile.name) self.append(new_lines[1]) pygtail = Pygtail(self.logfile.name) self.assertEqual(pygtail.read(), ''.join(new_lines))
def logIntLines(self, number, logdir): """ Method to tail the selected number of lines from the selected log. :return: It yields the log lines. """ if logdir == None: yield self.session.publish('com.bigsql.logError', "Log file does not exist") else: if logdir == 'pgcli': logdir = PGC_LOGS log_file = Pygtail(logdir) ln = log_file.readlines() read_file = open(logdir) _lines = read_file.readlines()[-number:] for _li in _lines: line = unicode(str(_li), sys.getdefaultencoding(), errors='ignore').strip() yield self.session.publish('com.bigsql.log', line)
def main(): stream = file('config.yaml', 'r') config = yaml.load(stream) tailer = Pygtail(config.get('tail').get('aof_path'), "offset_file") parser = Parser() for line in tailer: command = parser.parse_command(line) if not command: continue print command
def read_raw_logs(self, log_file="", start=False, offset_path=""): if not offset_path: offset_path = os.path.join(self.offset_path, self.conn_log_file + ".offset") if start: if os.path.exists(offset_path): os.remove(offset_path) if log_file: for line in Pygtail(log_file, offset_file=offset_path): if line is not None: yield line
def selectedLog(self, logdir): """ Method to tail the last 1000 lines from the PGC_LOGS to display default. :return: It yields the log lines. """ if logdir == None: yield self.session.publish('com.bigsql.logError', "Log file does not exist") else: if logdir == 'pgcli': logdir = PGC_LOGS self.session.publish('com.bigsql.pgcliDir', logdir) log_file = Pygtail(logdir) ln = log_file.readlines() read_file = open(logdir) _lines = read_file.readlines()[-1000:] for _li in _lines: line = unicode(str(_li), sys.getdefaultencoding(), errors='ignore').strip() yield self.session.publish('com.bigsql.log', line)
def test_custom_rotating_file_handler_with_prepend(self): new_lines = ["4\n5\n", "6\n7\n"] pygtail = Pygtail(self.logfile.name) pygtail.read() self.append(new_lines[0]) file_dir, rel_filename = os.path.split(self.logfile.name) os.rename(self.logfile.name, os.path.join(file_dir, "custom_log_pattern.%s" % rel_filename)) self.append(new_lines[1]) pygtail = Pygtail(self.logfile.name, rotated_filename_patterns=["custom_log_pattern.%s"]) self.assertEqual(pygtail.read(), ''.join(new_lines))
def pygtail_check_logs(): from pygtail import Pygtail key_phrases = [ "new-host-alert" ] # This is where we can add additional realtime-events to trigger based on. for line in Pygtail(REALTIME_LOG_PATH): if 'nnm:' in line: # This line is in case we're looking at /var/log/messages instead. for phrase in key_phrases: # Look for any real-time alerts which we want to act upon. if phrase in line: process_log(line)
def test_on_update_without_paranoid(self): updates = [0] def record_update(): updates[0] += 1 pygtail = Pygtail(self.logfile.name, on_update=record_update) self.assertEqual(updates[0], 0) for line in pygtail: self.assertEqual(updates[0], 0) self.assertEqual(updates[0], 1)
def download_file(self, url): try: # Check that the log file exists. assert os.path.isfile(LOG_FILE) # Check that the offset directory exists. assert os.path.isdir(OFFSET_DIR) # Check that the offset directory is writeable. assert os.access(OFFSET_DIR, os.W_OK) # Check that the log file is writeable. assert os.access(LOG_FILE, os.R_OK) except AssertionError: sys.stderr.write('Error: One or more preconditions failed.\n') # Exit 13, don't restart tcollector. sys.exit(13) # If the offset file exists, it had better not be empty. if os.path.isfile(OFFSET_FILE): try: assert os.path.getsize(OFFSET_FILE) > 0 except AssertionError: os.remove(OFFSET_FILE) # We're not using paranoid mode in Pygtail for performance. maillog = Pygtail(LOG_FILE, offset_file=OFFSET_FILE) for line in maillog: try: process_line(line) except LineProcessingError: pass if STOP: # Force the offset file to update before we shutdown. # If pygtail is not paranoid=True, this is necessary to ensure # that the offset gets written after SIGTERM. maillog._update_offset_file() break
def on_modified(self, event): super(TailContentCollector, self).on_modified(event) # what = 'directory' if event.is_directory else 'file' # logging.info("Modified %s: %s", what, event.src_path) # split_path = None log_file = event.src_path.split("/") # prepare offset file directory if not os.path.exists(self.offset_dir): os.makedirs(self.offset_dir) offset_file = "%s/%s.os" % (self.offset_dir, log_file[-1]) # offset file must separate with monitor directory, and is local variable... tailor = Pygtail(event.src_path, offset_file, paranoid=True) appended = tailor.read() if appended: # must use gbk decoding... decodelines = appended.decode("gbk") # execute callback function... self.onchange(event.src_path, decodelines) else: logging.info("empty content: %s", event.src_path)
def next(self): line = '' curr_inode = 0 if self.pygtail is None: try: # remove last offset file if the log file is different # PygTail's inode detection doesn't work in certain cases curr_inode = os.stat(self.path).st_ino if self.last_inode != curr_inode: os.unlink(self.path + '.offset') self.last_inode = curr_inode log.debug('deleted offset file, inode difference') except Exception as e: log.info('inode checking failed (not terminal): %s' % e) self.pygtail = Pygtail(self.path) try: line = self.pygtail.next() except StopIteration as si: # Need to get a new instance of pygtail after this incase the inode # has changed self.pygtail = None return False return line
class GroupingTail (object): def __init__(self, filepath, groupby, groupname=None): self.groupmatch = re.compile(groupby) # write an offset file so that we start somewhat at the end of the file # either filepath is a path or a syslogd url (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(filepath) if scheme == 'syslog': host, port = netloc.split(':') self.fin = QueueFile() self.server = SocketServer.UDPServer((host, int(port)), SyslogUDPHandler) self.server.queue = self.fin th = threading.Thread(target=lambda: self.server.serve_forever(poll_interval=0.5)) th.daemon = True th.start() else: self.offsetpath = "/tmp/" + str(uuid.uuid4()) #print self.offsetpath try: inode = os.stat(filepath).st_ino offset = os.path.getsize(filepath) - 1024 #print inode #print offset except OSError: pass else: if offset > 0: #print 'write offset' foffset = open(self.offsetpath, "w") foffset.write ("%s\n%s" % (inode, offset)) foffset.close() self.fin = Pygtail(filepath, offset_file=self.offsetpath, copytruncate=True) #self.fin.readlines() self.match_definitions = [] self.groupbygroup = groupname def __del__(self): if hasattr(self, 'server'): self.server.socket.close() def update(self): for line in self.fin.readlines(): #print 'line: %s' % line groupname = None mo = self.groupmatch.match(line) if mo is not None: if self.groupbygroup is None and mo.groups(): groupname = mo.groups()[0] elif self.groupbygroup is not None: groupname = mo.groupdict().get(self.groupbygroup) if groupname is not None: groupname = groupname.replace(".", "_").replace("-", "_") for match in self.match_definitions: instrument = match["instrument"] instrument.write(groupname, line) def add_match(self, instance_name, valuetype, instrument): self.match_definitions.append(dict( instance_name=instance_name, valuetype=valuetype, instrument=instrument )) def read_metrics(self): for match in self.match_definitions: instance_name = match["instance_name"] instrument = match["instrument"] valuetype = match["valuetype"] for groupname, value in instrument.read(): metric_name = "%s.%s" % (groupname, instance_name) yield (metric_name, valuetype, value)
class GroupingTail(object): def __init__(self, filepath, groupby, groupname=None): self.groupmatch = re.compile(groupby) # write an offset file so that we start somewhat at the end of the file # either filepath is a path or a syslogd url (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(filepath) if scheme == 'syslog': host, port = netloc.split(':') self.fin = QueueFile() self.server = SocketServer.UDPServer((host, int(port)), SyslogUDPHandler) self.server.queue = self.fin th = threading.Thread(target=lambda: self.server.serve_forever(poll_interval=0.5)) th.setDaemon(True) th.start() else: # Create a temporal file with offset info self.offsetpath = "/tmp/" + str(uuid.uuid4()) try: inode = os.stat(filepath).st_ino offset = os.path.getsize(filepath) - 1024 except OSError: pass else: if offset > 0: foffset = open(self.offsetpath, "w") foffset.write("%s\n%s" % (inode, offset)) foffset.close() self.fin = Pygtail(filepath, offset_file=self.offsetpath, copytruncate=True) # List of matchings self.match_definitions = [] # Regex group name for grouping self.groupbygroup = groupname def __del__(self): if hasattr(self, 'server'): self.server.socket.close() # Update method processing last lines def update(self): for line in self.fin.readlines(): groupname = None mo = self.groupmatch.match(line) if mo is not None: if self.groupbygroup is None and mo.groups(): # No groupbygroup get first group name groupname = mo.groups()[0] elif self.groupbygroup is not None: # Get groupname from line groupname = mo.groupdict().get(self.groupbygroup) if groupname is not None: # Normalize groupname groupname = groupname.replace(".", "_").replace("-", "_") # Check all possible matchings for match in self.match_definitions: instrument = match["instrument"] instrument.write(groupname, line) # Attatch match to groupingtail class def add_match(self, instance_name, valuetype, instrument): self.match_definitions.append(dict( instance_name=instance_name, valuetype=valuetype, instrument=instrument )) # Get stored values from instrument def read_metrics(self): # For all matchings for match in self.match_definitions: instance_name = match["instance_name"] instrument = match["instrument"] valuetype = match["valuetype"] # Get metric info for groupname, value in instrument.read(): # Construct grouping name for this metric value metric_name = "%s*%s" % (groupname, instance_name) # Send metric info yield (metric_name, valuetype, value)
if not os.path.exists(".offset"): os.makedirs(".offset") try: if is_erased(file_name): # forget about saved offset # delete offset file # print "remove file" # test point os.remove(OFFSET_FILE) except IOError: # Error occur when first time run pass try: pyg = Pygtail(file_name, OFFSET_FILE) first_line = pyg.next() # get log format and log type log_type, log_format = parser.detect_log_type(first_line) for line in Pygtail(file_name, OFFSET_FILE): # print line # test point error_info = parser.parse_log(line, log_type, log_format) status_code = error_info['status_code'] if status_code == 502 or status_code == 503: client.capture( "raven.events.Message", message=log_type + " " + str(status_code), extra=error_info, date=error_info['time']
def test_read(self): pygtail = Pygtail(self.logfile.name) self.assertEqual(pygtail.read(), self.test_str)
def test_readlines(self): pygtail = Pygtail(self.logfile.name) self.assertEqual(pygtail.readlines(), self.test_lines)
def test_subsequent_read_with_no_new_data(self): pygtail = Pygtail(self.logfile.name) self.assertEqual(pygtail.read(), self.test_str) self.assertEqual(pygtail.read(), None)