class KafkaJournalPump(ServiceDaemon): def __init__(self, config_path): self.stats = None ServiceDaemon.__init__(self, config_path=config_path, multi_threaded=True, log_level=logging.INFO) cursor = self.load_state() self.msg_buffer = MsgBuffer(cursor) self.journald_reader = None self.sender = None self.get_reader(cursor) def get_reader(self, cursor): if self.config.get("journal_path"): while True: try: self.journald_reader = Reader(path=self.config["journal_path"]) break except IOError as ex: if ex.errno == errno.ENOENT: self.log.warning("journal not available yet, waiting: %s: %s", ex.__class__.__name__, ex) time.sleep(5.0) else: raise else: self.journald_reader = Reader() for unit_to_match in self.config.get("units_to_match", []): self.journald_reader.add_match(_SYSTEMD_UNIT=unit_to_match) if cursor: self.journald_reader.seek_cursor(cursor) # pylint: disable=no-member self.journald_reader.get_next = types.MethodType(get_next, self.journald_reader) self.journald_reader._convert_field = types.MethodType(_convert_field, self.journald_reader) # pylint: disable=protected-access def handle_new_config(self): """Called by ServiceDaemon when config has changed""" stats = self.config.get("statsd", {}) self.stats = statsd.StatsClient( host=stats.get("host"), port=stats.get("port"), tags=stats.get("tags"), ) def sigterm(self, signum, frame): if self.sender: self.sender.running = False ServiceDaemon.sigterm(self, signum, frame) def load_state(self): filepath = self.config.get("json_state_file_path", "kafkajournalpump_state.json") if os.path.exists(filepath): with open(filepath, "r") as fp: state_file = json.load(fp) return state_file["cursor"] return None def check_match(self, entry): if not self.config.get("match_key"): return True elif entry.get(self.config["match_key"]) == self.config["match_value"]: return True return False def initialize_sender(self): if not self.sender: kafka_address = self.config.get("kafka_address") if not kafka_address: self.log.fatal("No kafka_address in configuration") return False try: self.sender = KafkaSender( self.config, self.msg_buffer, kafka_address=kafka_address, stats=self.stats) except kafka.common.KafkaUnavailableError: return False self.sender.start() return True def run(self): logging.getLogger("kafka").setLevel(logging.CRITICAL) # remove client-internal tracebacks from logging output while self.running: entry = None try: if not self.initialize_sender(): self.log.warning("No Kafka sender, sleeping") time.sleep(5.0) continue entry, cursor = next(self.journald_reader) if cursor is not None: if not self.check_match(entry): self.msg_buffer.set_cursor(cursor) continue json_entry = json.dumps(entry).encode("utf8") if len(json_entry) > MAX_KAFKA_MESSAGE_SIZE: self.stats.increase("journal.error", tags={"error": "too_long"}) error = "too large message {} bytes vs maximum {} bytes".format( len(json_entry), MAX_KAFKA_MESSAGE_SIZE) self.log.warning("%s: %s ...", error, json_entry[:1024]) entry = { "error": error, "partial_data": json_entry[:1024], } json_entry = json.dumps(entry).encode("utf8") self.stats.increase("journal.lines") self.stats.increase("journal.bytes", inc_value=len(json_entry)) self.msg_buffer.set_item(json_entry, cursor) else: self.log.debug("No more journal entries to read, sleeping") if time.monotonic() - self.msg_buffer.last_journal_msg_time > 180 and self.msg_buffer.cursor: self.log.info("We haven't seen any msgs in 180s, reinitiate Reader() and seek to: %r", self.msg_buffer.cursor) self.get_reader(self.msg_buffer.cursor) self.msg_buffer.last_journal_msg_time = time.monotonic() time.sleep(0.5) except StopIteration: self.log.debug("No more journal entries to read, sleeping") time.sleep(0.5) except Exception as ex: # pylint: disable=broad-except self.log.exception("Unexpected exception during handling entry: %r", entry) self.stats.unexpected_exception(ex=ex, where="mainloop", tags={"app": "kafkajournalpump"}) time.sleep(0.5) self.ping_watchdog()
class KafkaJournalPump(ServiceDaemon): def __init__(self, config_path): ServiceDaemon.__init__(self, config_path=config_path, multi_threaded=True, log_level=logging.INFO) cursor = self.load_state() self.msg_buffer = MsgBuffer(cursor) if self.config.get("journal_path"): while True: try: self.journald_reader = Reader(path=self.config["journal_path"]) break except IOError as ex: if ex.errno == errno.ENOENT: self.log.warning("journal not available yet, waiting: %s: %s", ex.__class__.__name__, ex) time.sleep(5.0) else: raise else: self.journald_reader = Reader() if cursor: self.journald_reader.seek_cursor(cursor) # pylint: disable=no-member self.journald_reader.get_next = types.MethodType(get_next, self.journald_reader) self.journald_reader._convert_field = types.MethodType(_convert_field, self.journald_reader) # pylint: disable=protected-access self.sender = None def sigterm(self, signum, frame): if self.sender: self.sender.running = False ServiceDaemon.sigterm(self, signum, frame) def load_state(self): filepath = self.config.get("json_state_file_path", "kafkajournalpump_state.json") if os.path.exists(filepath): with open(filepath, "r") as fp: state_file = json.load(fp) return state_file["cursor"] return None def check_match(self, entry): if not self.config.get("match_key"): return True elif entry.get(self.config["match_key"]) == self.config["match_value"]: return True return False def initialize_sender(self): if not self.sender: kafka_address = self.config.get("kafka_address") if not kafka_address: self.log.fatal("No kafka_address in configuration") return False try: self.sender = KafkaSender(self.config, self.msg_buffer, kafka_address=kafka_address) except kafka.common.KafkaUnavailableError: return False self.sender.start() return True def run(self): logging.getLogger("kafka").setLevel(logging.CRITICAL) # remove client-internal tracebacks from logging output while self.running: entry = None try: if not self.initialize_sender(): self.log.warning("No Kafka sender, sleeping") time.sleep(5.0) continue entry, cursor = next(self.journald_reader) if cursor is not None: if not self.check_match(entry): self.msg_buffer.set_cursor(cursor) continue json_entry = json.dumps(entry).encode("utf8") if len(json_entry) > MAX_KAFKA_MESSAGE_SIZE: error = "too large message {} bytes vs maximum {} bytes".format( len(json_entry), MAX_KAFKA_MESSAGE_SIZE) self.log.warning("%s: %s ...", error, json_entry[:1024]) entry = { "error": error, "partial_data": json_entry[:1024], } json_entry = json.dumps(entry).encode("utf8") self.msg_buffer.set_item(json_entry, cursor) else: self.log.debug("No more journal entries to read, sleeping") time.sleep(0.5) except StopIteration: self.log.debug("No more journal entries to read, sleeping") time.sleep(0.5) except: # pylint: disable=bare-except self.log.exception("Problem handling entry: %r", entry) time.sleep(0.5) self.ping_watchdog()
class JournalPump(ServiceDaemon): def __init__(self, config_path): self.stats = None ServiceDaemon.__init__(self, config_path=config_path, multi_threaded=True, log_level=logging.INFO) cursor = self.load_state() self.msg_buffer = MsgBuffer(cursor) self.journald_reader = None self.sender = None self.get_reader(cursor) def get_reader(self, cursor): if self.config.get("journal_path"): while True: try: self.journald_reader = Reader(path=self.config["journal_path"]) break except IOError as ex: if ex.errno == errno.ENOENT: self.log.warning("journal not available yet, waiting: %s: %s", ex.__class__.__name__, ex) time.sleep(5.0) else: raise else: self.journald_reader = Reader() for unit_to_match in self.config.get("units_to_match", []): self.journald_reader.add_match(_SYSTEMD_UNIT=unit_to_match) if cursor: self.journald_reader.seek_cursor(cursor) # pylint: disable=no-member self.journald_reader.get_next = types.MethodType(get_next, self.journald_reader) self.journald_reader._convert_field = types.MethodType(_convert_field, self.journald_reader) # pylint: disable=protected-access def handle_new_config(self): """Called by ServiceDaemon when config has changed""" stats = self.config.get("statsd") or {} self.stats = statsd.StatsClient( host=stats.get("host"), port=stats.get("port"), tags=stats.get("tags"), ) def sigterm(self, signum, frame): if self.sender: self.sender.running = False ServiceDaemon.sigterm(self, signum, frame) def load_state(self): filepath = self.config.get("json_state_file_path", "journalpump_state.json") if os.path.exists(filepath): with open(filepath, "r") as fp: state_file = json.load(fp) return state_file["cursor"] return None def check_match(self, entry): if not self.config.get("match_key"): return True elif entry.get(self.config["match_key"]) == self.config["match_value"]: return True return False def initialize_sender(self): if not self.sender: senders = { "elasticsearch": ElasticsearchSender, "kafka": KafkaSender, "logplex": LogplexSender, } class_name = senders.get(self.config["output_type"]) self.sender = class_name(config=self.config, msg_buffer=self.msg_buffer, stats=self.stats) self.sender.start() def run(self): while self.running: entry = None try: self.initialize_sender() msg_buffer_length = len(self.msg_buffer) if msg_buffer_length > 50000: # This makes the self.msg_buffer grow by one journal msg a second at most self.log.debug("%d entries in msg buffer, slowing down a bit by sleeping", msg_buffer_length) time.sleep(1.0) jobject = next(self.journald_reader) for key, value in jobject.entry.items(): if isinstance(value, bytes): jobject.entry[key] = repr(value) # value may be bytes in any encoding if jobject.cursor is not None: if not self.check_match(jobject.entry): self.msg_buffer.set_cursor(jobject.cursor) continue json_entry = json.dumps(jobject.entry).encode("utf8") if len(json_entry) > MAX_KAFKA_MESSAGE_SIZE: self.stats.increase("journal.error", tags={"error": "too_long"}) error = "too large message {} bytes vs maximum {} bytes".format( len(json_entry), MAX_KAFKA_MESSAGE_SIZE) self.log.warning("%s: %s ...", error, json_entry[:1024]) entry = { "error": error, "partial_data": json_entry[:1024], } json_entry = json.dumps(entry).encode("utf8") self.stats.increase("journal.lines") self.stats.increase("journal.bytes", inc_value=len(json_entry)) self.msg_buffer.set_item(json_entry, jobject.cursor) else: self.log.debug("No more journal entries to read, sleeping") if time.monotonic() - self.msg_buffer.last_journal_msg_time > 180 and self.msg_buffer.cursor: self.log.info("We haven't seen any msgs in 180s, reinitiate Reader() and seek to: %r", self.msg_buffer.cursor) self.get_reader(self.msg_buffer.cursor) self.msg_buffer.last_journal_msg_time = time.monotonic() time.sleep(0.5) except StopIteration: self.log.debug("No more journal entries to read, sleeping") time.sleep(0.5) except Exception as ex: # pylint: disable=broad-except self.log.exception("Unexpected exception during handling entry: %r", jobject) self.stats.unexpected_exception(ex=ex, where="mainloop", tags={"app": "journalpump"}) time.sleep(0.5) self.ping_watchdog()