class KafkaJournalPump(ServiceDaemon):
    def __init__(self, config_path):
        self.stats = None
        ServiceDaemon.__init__(self, config_path=config_path, multi_threaded=True, log_level=logging.INFO)
        cursor = self.load_state()
        self.msg_buffer = MsgBuffer(cursor)
        self.journald_reader = None
        self.sender = None
        self.get_reader(cursor)

    def get_reader(self, cursor):
        if self.config.get("journal_path"):
            while True:
                try:
                    self.journald_reader = Reader(path=self.config["journal_path"])
                    break
                except IOError as ex:
                    if ex.errno == errno.ENOENT:
                        self.log.warning("journal not available yet, waiting: %s: %s",
                                         ex.__class__.__name__, ex)
                        time.sleep(5.0)
                    else:
                        raise
        else:
            self.journald_reader = Reader()

        for unit_to_match in self.config.get("units_to_match", []):
            self.journald_reader.add_match(_SYSTEMD_UNIT=unit_to_match)

        if cursor:
            self.journald_reader.seek_cursor(cursor)  # pylint: disable=no-member

        self.journald_reader.get_next = types.MethodType(get_next, self.journald_reader)
        self.journald_reader._convert_field = types.MethodType(_convert_field, self.journald_reader)  # pylint: disable=protected-access

    def handle_new_config(self):
        """Called by ServiceDaemon when config has changed"""
        stats = self.config.get("statsd", {})
        self.stats = statsd.StatsClient(
            host=stats.get("host"),
            port=stats.get("port"),
            tags=stats.get("tags"),
        )

    def sigterm(self, signum, frame):
        if self.sender:
            self.sender.running = False
        ServiceDaemon.sigterm(self, signum, frame)

    def load_state(self):
        filepath = self.config.get("json_state_file_path", "kafkajournalpump_state.json")
        if os.path.exists(filepath):
            with open(filepath, "r") as fp:
                state_file = json.load(fp)
            return state_file["cursor"]
        return None

    def check_match(self, entry):
        if not self.config.get("match_key"):
            return True
        elif entry.get(self.config["match_key"]) == self.config["match_value"]:
            return True
        return False

    def initialize_sender(self):
        if not self.sender:
            kafka_address = self.config.get("kafka_address")
            if not kafka_address:
                self.log.fatal("No kafka_address in configuration")
                return False
            try:
                self.sender = KafkaSender(
                    self.config, self.msg_buffer, kafka_address=kafka_address,
                    stats=self.stats)
            except kafka.common.KafkaUnavailableError:
                return False
            self.sender.start()
        return True

    def run(self):
        logging.getLogger("kafka").setLevel(logging.CRITICAL)  # remove client-internal tracebacks from logging output
        while self.running:
            entry = None
            try:
                if not self.initialize_sender():
                    self.log.warning("No Kafka sender, sleeping")
                    time.sleep(5.0)
                    continue

                entry, cursor = next(self.journald_reader)
                if cursor is not None:
                    if not self.check_match(entry):
                        self.msg_buffer.set_cursor(cursor)
                        continue
                    json_entry = json.dumps(entry).encode("utf8")
                    if len(json_entry) > MAX_KAFKA_MESSAGE_SIZE:
                        self.stats.increase("journal.error", tags={"error": "too_long"})
                        error = "too large message {} bytes vs maximum {} bytes".format(
                            len(json_entry), MAX_KAFKA_MESSAGE_SIZE)
                        self.log.warning("%s: %s ...", error, json_entry[:1024])
                        entry = {
                            "error": error,
                            "partial_data": json_entry[:1024],
                        }
                        json_entry = json.dumps(entry).encode("utf8")
                    self.stats.increase("journal.lines")
                    self.stats.increase("journal.bytes", inc_value=len(json_entry))
                    self.msg_buffer.set_item(json_entry, cursor)
                else:
                    self.log.debug("No more journal entries to read, sleeping")
                    if time.monotonic() - self.msg_buffer.last_journal_msg_time > 180 and self.msg_buffer.cursor:
                        self.log.info("We haven't seen any msgs in 180s, reinitiate Reader() and seek to: %r",
                                      self.msg_buffer.cursor)
                        self.get_reader(self.msg_buffer.cursor)
                        self.msg_buffer.last_journal_msg_time = time.monotonic()
                    time.sleep(0.5)
            except StopIteration:
                self.log.debug("No more journal entries to read, sleeping")
                time.sleep(0.5)
            except Exception as ex:  # pylint: disable=broad-except
                self.log.exception("Unexpected exception during handling entry: %r", entry)
                self.stats.unexpected_exception(ex=ex, where="mainloop", tags={"app": "kafkajournalpump"})
                time.sleep(0.5)

            self.ping_watchdog()
class KafkaJournalPump(ServiceDaemon):
    def __init__(self, config_path):
        ServiceDaemon.__init__(self, config_path=config_path, multi_threaded=True, log_level=logging.INFO)
        cursor = self.load_state()
        self.msg_buffer = MsgBuffer(cursor)

        if self.config.get("journal_path"):
            while True:
                try:
                    self.journald_reader = Reader(path=self.config["journal_path"])
                    break
                except IOError as ex:
                    if ex.errno == errno.ENOENT:
                        self.log.warning("journal not available yet, waiting: %s: %s",
                                         ex.__class__.__name__, ex)
                        time.sleep(5.0)
                    else:
                        raise
        else:
            self.journald_reader = Reader()

        if cursor:
            self.journald_reader.seek_cursor(cursor)  # pylint: disable=no-member

        self.journald_reader.get_next = types.MethodType(get_next, self.journald_reader)
        self.journald_reader._convert_field = types.MethodType(_convert_field, self.journald_reader)  # pylint: disable=protected-access
        self.sender = None

    def sigterm(self, signum, frame):
        if self.sender:
            self.sender.running = False
        ServiceDaemon.sigterm(self, signum, frame)

    def load_state(self):
        filepath = self.config.get("json_state_file_path", "kafkajournalpump_state.json")
        if os.path.exists(filepath):
            with open(filepath, "r") as fp:
                state_file = json.load(fp)
            return state_file["cursor"]
        return None

    def check_match(self, entry):
        if not self.config.get("match_key"):
            return True
        elif entry.get(self.config["match_key"]) == self.config["match_value"]:
            return True
        return False

    def initialize_sender(self):
        if not self.sender:
            kafka_address = self.config.get("kafka_address")
            if not kafka_address:
                self.log.fatal("No kafka_address in configuration")
                return False
            try:
                self.sender = KafkaSender(self.config, self.msg_buffer,
                                          kafka_address=kafka_address)
            except kafka.common.KafkaUnavailableError:
                return False
            self.sender.start()
        return True

    def run(self):
        logging.getLogger("kafka").setLevel(logging.CRITICAL)  # remove client-internal tracebacks from logging output
        while self.running:
            entry = None
            try:
                if not self.initialize_sender():
                    self.log.warning("No Kafka sender, sleeping")
                    time.sleep(5.0)
                    continue

                entry, cursor = next(self.journald_reader)
                if cursor is not None:
                    if not self.check_match(entry):
                        self.msg_buffer.set_cursor(cursor)
                        continue
                    json_entry = json.dumps(entry).encode("utf8")
                    if len(json_entry) > MAX_KAFKA_MESSAGE_SIZE:
                        error = "too large message {} bytes vs maximum {} bytes".format(
                            len(json_entry), MAX_KAFKA_MESSAGE_SIZE)
                        self.log.warning("%s: %s ...", error, json_entry[:1024])
                        entry = {
                            "error": error,
                            "partial_data": json_entry[:1024],
                        }
                        json_entry = json.dumps(entry).encode("utf8")
                    self.msg_buffer.set_item(json_entry, cursor)
                else:
                    self.log.debug("No more journal entries to read, sleeping")
                    time.sleep(0.5)
            except StopIteration:
                self.log.debug("No more journal entries to read, sleeping")
                time.sleep(0.5)
            except:  # pylint: disable=bare-except
                self.log.exception("Problem handling entry: %r", entry)
                time.sleep(0.5)

            self.ping_watchdog()
Example #3
0
class JournalPump(ServiceDaemon):
    def __init__(self, config_path):
        self.stats = None
        ServiceDaemon.__init__(self, config_path=config_path, multi_threaded=True, log_level=logging.INFO)
        cursor = self.load_state()
        self.msg_buffer = MsgBuffer(cursor)
        self.journald_reader = None
        self.sender = None
        self.get_reader(cursor)

    def get_reader(self, cursor):
        if self.config.get("journal_path"):
            while True:
                try:
                    self.journald_reader = Reader(path=self.config["journal_path"])
                    break
                except IOError as ex:
                    if ex.errno == errno.ENOENT:
                        self.log.warning("journal not available yet, waiting: %s: %s",
                                         ex.__class__.__name__, ex)
                        time.sleep(5.0)
                    else:
                        raise
        else:
            self.journald_reader = Reader()

        for unit_to_match in self.config.get("units_to_match", []):
            self.journald_reader.add_match(_SYSTEMD_UNIT=unit_to_match)

        if cursor:
            self.journald_reader.seek_cursor(cursor)  # pylint: disable=no-member

        self.journald_reader.get_next = types.MethodType(get_next, self.journald_reader)
        self.journald_reader._convert_field = types.MethodType(_convert_field, self.journald_reader)  # pylint: disable=protected-access

    def handle_new_config(self):
        """Called by ServiceDaemon when config has changed"""
        stats = self.config.get("statsd") or {}
        self.stats = statsd.StatsClient(
            host=stats.get("host"),
            port=stats.get("port"),
            tags=stats.get("tags"),
        )

    def sigterm(self, signum, frame):
        if self.sender:
            self.sender.running = False
        ServiceDaemon.sigterm(self, signum, frame)

    def load_state(self):
        filepath = self.config.get("json_state_file_path", "journalpump_state.json")
        if os.path.exists(filepath):
            with open(filepath, "r") as fp:
                state_file = json.load(fp)
            return state_file["cursor"]
        return None

    def check_match(self, entry):
        if not self.config.get("match_key"):
            return True
        elif entry.get(self.config["match_key"]) == self.config["match_value"]:
            return True
        return False

    def initialize_sender(self):
        if not self.sender:
            senders = {
                "elasticsearch": ElasticsearchSender,
                "kafka": KafkaSender,
                "logplex": LogplexSender,
            }
            class_name = senders.get(self.config["output_type"])
            self.sender = class_name(config=self.config, msg_buffer=self.msg_buffer, stats=self.stats)
            self.sender.start()

    def run(self):
        while self.running:
            entry = None
            try:
                self.initialize_sender()
                msg_buffer_length = len(self.msg_buffer)
                if msg_buffer_length > 50000:
                    # This makes the self.msg_buffer grow by one journal msg a second at most
                    self.log.debug("%d entries in msg buffer, slowing down a bit by sleeping",
                                   msg_buffer_length)
                    time.sleep(1.0)

                jobject = next(self.journald_reader)
                for key, value in jobject.entry.items():
                    if isinstance(value, bytes):
                        jobject.entry[key] = repr(value)  # value may be bytes in any encoding

                if jobject.cursor is not None:
                    if not self.check_match(jobject.entry):
                        self.msg_buffer.set_cursor(jobject.cursor)
                        continue
                    json_entry = json.dumps(jobject.entry).encode("utf8")
                    if len(json_entry) > MAX_KAFKA_MESSAGE_SIZE:
                        self.stats.increase("journal.error", tags={"error": "too_long"})
                        error = "too large message {} bytes vs maximum {} bytes".format(
                            len(json_entry), MAX_KAFKA_MESSAGE_SIZE)
                        self.log.warning("%s: %s ...", error, json_entry[:1024])
                        entry = {
                            "error": error,
                            "partial_data": json_entry[:1024],
                        }
                        json_entry = json.dumps(entry).encode("utf8")
                    self.stats.increase("journal.lines")
                    self.stats.increase("journal.bytes", inc_value=len(json_entry))
                    self.msg_buffer.set_item(json_entry, jobject.cursor)
                else:
                    self.log.debug("No more journal entries to read, sleeping")
                    if time.monotonic() - self.msg_buffer.last_journal_msg_time > 180 and self.msg_buffer.cursor:
                        self.log.info("We haven't seen any msgs in 180s, reinitiate Reader() and seek to: %r",
                                      self.msg_buffer.cursor)
                        self.get_reader(self.msg_buffer.cursor)
                        self.msg_buffer.last_journal_msg_time = time.monotonic()
                    time.sleep(0.5)
            except StopIteration:
                self.log.debug("No more journal entries to read, sleeping")
                time.sleep(0.5)
            except Exception as ex:  # pylint: disable=broad-except
                self.log.exception("Unexpected exception during handling entry: %r", jobject)
                self.stats.unexpected_exception(ex=ex, where="mainloop", tags={"app": "journalpump"})
                time.sleep(0.5)

            self.ping_watchdog()