예제 #1
0
class MqttClient(object):
    """
    Holds connection and basic methods for accessing mqtt
    """

    def __init__(self, client_id, config, wait=True):
        """
        initialize mqtt client
        :param client_id: client id
        :param config: keeper configuration
        :param wait: whether to wait for connection
        """

        self.logger = Logger()
        user = config.get("mqtt.user")
        pwd = config.get("mqtt.pass")
        client = Client(client_id=client_id)
        client.on_connect = self._on_connect
        client.on_disconnect = self._on_disconnect
        client.on_message = self._on_message
        client.enable_logger(self.logger)
        if user and pwd:
            client.username_pw_set(user, pwd)

        client.connect_async(config["mqtt.broker"], config["mqtt.port"], 30)
        self.client = client
        self.connected = False
        self.manager = None
        self.wait = wait

    def __enter__(self):
        """
        entering context
        :return: MqttClient object
        """

        return self

    # noinspection PyShadowingBuiltins
    def __exit__(self, type, value, traceback):
        """
        disconnects client when exiting context
        :param type:
        :param value:
        :param traceback:
        """

        try:
            self.logger.debug("disconnecting mqtt client")
            self.client.disconnect()
        except Exception:
            pass

        self.client = None

    def set_manager(self, manager):
        """
        sets associated manager
        :param manager: manager using connection
        """

        self.manager = manager

    # noinspection PyProtectedMember
    def _on_disconnect(self, client, userdata, rc):
        """
        base on disconnect behaviour, can be extended wih custom
        methods from implementation
        :param client: mqtt client
        :param userdata: userdata dict
        :param rc: rc code
        """

        self.logger.info("disconnected from %s:%s" % (client._host, client._port))
        self.connected = False
        # call custom on disconnect methods if any defined
        try:
            self.logger.debug("calling custom on_disconnect")
            self.manager.on_disconnect(client, userdata, rc)
        except Exception as ex:
            if not isinstance(ex, (TypeError, AttributeError)):
                self.logger.error("failed to execute custom on_disconnect: %s" % ex)

    # noinspection PyProtectedMember
    def _on_connect(self, client, userdata, flags, rc):
        """
        base on connect behaviour, can be extended wih custom
        methods from implementation
        :param client: mqtt client
        :param userdata: userdata dict
        :param flags: flags
        :param rc: rc code
        """

        self.logger.info("connected to %s:%s" % (client._host, client._port))
        self.connected = rc == 0
        # call custom on connect methods if any defined
        try:
            self.logger.debug("calling custom on_connect")
            self.manager.on_connect(client, userdata, flags, rc)
        except Exception as ex:
            if not isinstance(ex, (TypeError, AttributeError)):
                self.logger.error("failed to execute custom on_connect: %s" % ex)

    def _on_message(self, client, userdata, message):
        """
        base on message behaviour, can be extended wih custom
        methods from implementation
        :param client: mqtt client
        :param userdata: userdata dict
        :param message: message received
        """

        # call custom on message methods if any defined
        try:
            self.logger.debug("calling custom on_message")
            self.manager.on_message(client, userdata, message)
        except Exception as ex:
            if not isinstance(ex, (TypeError, AttributeError)):
                self.logger.error("failed to execute custom on_message: %s" % ex)

    def connection_status(self):
        """
        Returns a connection status code.
        :return: connection status code. 0 is not connected, 1 is
        waiting for connection and 2 for connected
        """

        try:
            if self.client.loop() > 0:
                return 0

            if not self.connected:
                return 1

            return 2
        except Exception:
            return 0

    def wait_connection(self, timeout=-1):
        """
        blocks waiting for connection
        """

        connection_status = self.connection_status
        reconnect = self.client.reconnect
        status = connection_status()
        now = datetime.now
        limit = now() + timedelta(seconds=timeout)
        while status != 2 and (timeout == -1 or now() <= limit):
            # reconnects when not connected, status 0
            # status 1 should only wait for connection
            # instead of reconnecting
            self.logger.debug("connection status is %s", str(status))
            if status == 0:
                try:
                    self.logger.debug("reconnecting to mqtt")
                    reconnect()
                except Exception as ex:
                    self.logger.debug("failed to connect mqtt: %s", ex)

            self.logger.debug("waiting 1 second for connection")
            sleep(1)
            status = connection_status()

    # noinspection PyProtectedMember
    def reconnect(self):
        """
        reconnects to mqtt client
        :return: connection status
        """

        client = self.client
        self.logger.info("connecting to %s:%s" % (client._host, client._port))
        connection_status = self.connection_status
        reconnect = client.reconnect
        status = connection_status()
        wait = self.wait
        while status != 2:
            self.logger.debug("connection status is %s", str(status))
            if status == 0:
                try:
                    self.logger.debug("reconnecting to mqtt")
                    reconnect()
                except Exception as ex:
                    self.logger.debug("failed to connect mqtt: %s", ex)

            status = connection_status()
            if status == 0:
                try:
                    self.logger.debug("calling custom on_not_connect")
                    self.manager.on_not_connect()
                except Exception as ex:
                    if not isinstance(ex, (TypeError, AttributeError)):
                        self.logger.error("failed to execute custom on_not_connect: %s" % ex)

            if not wait:
                return status

            sleep(1)

        return status

    def register(self, metric, icon):
        """
        register a new metric using mqtt discovery
        :param metric: metric identification
        :param icon: metric icon
        """

        self.logger.debug("registering metrics %s", metric)
        self.client.publish(CONFIG_TOPIC % metric, CONFIG_PAYLOAD % (metric, metric, icon), 1, True)

    def publish_state(self, metric, state):
        """
        publish state to mqtt
        :param metric: metric identification
        :param state: state value
        """

        self.logger.debug("updating metric %s with state %s", metric, state)
        self.client.publish(STATE_TOPIC % metric, state, 1, True)

    def loop(self):
        """
        calls mqtt client loop
        """
        self.client.loop()
예제 #2
0
class Heartbeater(object):
    """
    Heartbeat that monitors heartbeat messages
    """
    def __init__(self, config, storage, mqtt_client):
        """
        initializes heartbeater
        :param config: keeper configuration dict
        :param storage: storage access
        :param mqtt_client: MQTT client
        """

        self.attempts = 0
        self.misses = 0
        self.ha_command = config["ha.restart.command"].split(" ")
        self.sys_command = config["system.restart.command"].split(" ")
        self.inc = storage.inc
        self.registered = False
        put = storage.put
        self.missed_heartbeats = put(
            HEARTBEATER_MISSED_HEARTBEAT,
            storage.get_int(HEARTBEATER_MISSED_HEARTBEAT))
        self.ha_restarts = put(HEARTBEATER_HA_RESTARTS,
                               storage.get_int(HEARTBEATER_HA_RESTARTS))
        self.system_restarts = put(
            HEARTBEATER_SYSTEM_RESTARTS,
            storage.get_int(HEARTBEATER_SYSTEM_RESTARTS))
        self.put = put
        self.get = storage.get
        self.now = datetime.now
        self.last_message = None
        self.last_known_message = None
        self.interval = config["heartbeat.interval"]
        self.topic = config["heartbeat.topic"]
        self.delay = config["heartbeat.delay"]
        self.states_queue = []
        mqtt_client.set_manager(self)
        self.mqtt_client = mqtt_client
        self.logger = Logger()

    def __enter__(self):
        """
        informs when entering context
        :return: Heartbeater object
        """

        self.logger.info("starting heartbeater manager[pid=%s]" % getpid())
        self.mqtt_client.reconnect()

        return self

    # noinspection PyShadowingBuiltins
    def __exit__(self, type, value, traceback):
        """
        publishes manager status when exiting context
        :param type:
        :param value:
        :param traceback:
        """

        self.logger.info("stopping heartbeater[pid=%s]" % getpid())
        try:
            self.mqtt_client.publish_state(HEARTBEATER_STATUS,
                                           STATUS_NOT_RUNNING)
        except Exception as ex:
            self.logger.error("failed to publish heartbeater status: %s" % ex)

    # noinspection PyUnusedLocal
    def on_connect(self, client, userdata, flags, rc):
        """
        subscribes to heartbeat topic
        registers sensors and sends metrics
        :param client: mqtt client
        :param userdata: userdata dict
        :param flags: flags
        :param rc: rc code
        """

        self.logger.info("subscribing topic %s" % self.topic)
        client.subscribe(self.topic)
        # first time we are connected we register metrics and
        # send initial values
        if not self.registered:
            self.logger.info("registering metrics")
            try:
                publish_state = self.mqtt_client.publish_state
                register = self.mqtt_client.register
                # register all metrics
                register(HEARTBEATER_STATUS, HEARTBEATER_STATUS_ICON)
                register(HEARTBEATER_MISSED_HEARTBEAT,
                         HEARTBEATER_MISSED_HEARTBEAT_ICON)
                register(HEARTBEATER_HA_RESTARTS, HEARTBEATER_HA_RESTARTS_ICON)
                register(HEARTBEATER_SYSTEM_RESTARTS,
                         HEARTBEATER_SYSTEM_RESTARTS_ICON)
                register(HEARTBEATER_LAST_HEARTBEAT,
                         HEARTBEATER_LAST_HEARTBEAT_ICON)
                register(HEARTBEATER_LAST_HA_RESTART,
                         HEARTBEATER_LAST_HA_RESTART_ICON)
                register(HEARTBEATER_LAST_SYSTEM_RESTART,
                         HEARTBEATER_LAST_SYSTEM_RESTART_ICON)
                # sends initial values
                publish_state(HEARTBEATER_STATUS, STATUS_RUNNING)
                publish_state(HEARTBEATER_MISSED_HEARTBEAT,
                              self.missed_heartbeats)
                publish_state(HEARTBEATER_HA_RESTARTS, self.ha_restarts)
                publish_state(HEARTBEATER_SYSTEM_RESTARTS,
                              self.system_restarts)
                publish_state(HEARTBEATER_LAST_HEARTBEAT,
                              self.get(HEARTBEATER_LAST_HEARTBEAT))
                publish_state(HEARTBEATER_LAST_HA_RESTART,
                              self.get(HEARTBEATER_LAST_HA_RESTART))
                publish_state(HEARTBEATER_LAST_SYSTEM_RESTART,
                              self.get(HEARTBEATER_LAST_SYSTEM_RESTART))
                self.registered = True
            except Exception as ex:
                self.logger.error("failed to register initial metrics: %s" %
                                  ex)

    # noinspection PyUnusedLocal
    def on_message(self, client, userdata, message):
        """
        updates heartbeat message timestamp
        :param client: mqtt client
        :param userdata: userdata dict
        :param message: message received
        """

        self.last_message = self.now()
        last_message_fmt = strftime(TIME_FORMAT)
        self.logger.debug("last heartbeat from ha at %s", last_message_fmt)
        self.states_queue.append((HEARTBEATER_LAST_HEARTBEAT,
                                  self.put(HEARTBEATER_LAST_HEARTBEAT,
                                           last_message_fmt)))

    def wait_ha_connection(self):
        """
        waits for a heartbeat message or timeout of 120 seconds
        """

        self.last_message = None
        self.last_known_message = None
        now = self.now
        limit = now() + timedelta(seconds=300)
        self.logger.info("waiting for ha heartbeat")
        while running and not self.last_message and now() < limit:
            try:
                self.mqtt_client.loop()
            except Exception as ex:
                self.logger.warning(ex)

            sleep(1)

        if self.last_message:
            self.logger.info("ha is reachable")
        else:
            self.last_message = self.now()
            self.last_known_message = self.last_message
            self.logger.warning("ha service still not reachable")

    def monitor(self):
        """
        monitors heartbeat messages and restarts ha if 3 messages are missed
        also restarts system after 3 ha restarts
        """

        if (self.now() - self.last_message
            ).total_seconds() > self.interval + self.delay:
            self.logger.warning("heartbeat threshold reached")
            if self.misses < 3:
                self.misses += 1
                self.last_message += timedelta(seconds=self.interval)
                self.missed_heartbeats = self.inc(HEARTBEATER_MISSED_HEARTBEAT,
                                                  self.missed_heartbeats)
                self.states_queue.append(
                    (HEARTBEATER_MISSED_HEARTBEAT, self.missed_heartbeats))
                self.logger.warning("tolerating missed heartbeat (%s of 3)" %
                                    self.misses)
            elif self.attempts < 3:
                self.attempts += 1
                self.misses = 0
                self.logger.warning("max of misses reached")
                self.logger.warning(
                    "restarting ha service (%s of 3) with command %s" %
                    (self.attempts, " ".join(self.ha_command)))
                if exec_command(self.ha_command):
                    append = self.states_queue.append
                    self.ha_restarts = self.inc(HEARTBEATER_HA_RESTARTS,
                                                self.ha_restarts)
                    append((HEARTBEATER_HA_RESTARTS, self.ha_restarts))
                    append((HEARTBEATER_LAST_HA_RESTART,
                            self.put(HEARTBEATER_LAST_HA_RESTART,
                                     strftime(TIME_FORMAT))))
                    self.wait_ha_connection()
            else:
                self.logger.warning("heartbeat still failing after 3 restarts")
                self.logger.warning("rebooting")
                append = self.states_queue.append
                self.system_restarts = self.inc(HEARTBEATER_SYSTEM_RESTARTS,
                                                self.system_restarts)
                append((HEARTBEATER_SYSTEM_RESTARTS, self.system_restarts))
                append((HEARTBEATER_LAST_SYSTEM_RESTART,
                        self.put(HEARTBEATER_LAST_SYSTEM_RESTART,
                                 strftime(TIME_FORMAT))))
                exec_command(self.sys_command)

            self.last_known_message = self.last_message

        if self.last_known_message != self.last_message:
            self.logger.debug("resetting counters")
            self.misses = 0
            self.attempts = 0

    def loop(self):
        """
        sleeps 1 second until next validation
        sends metrics if any to send
        """

        publish_state = self.mqtt_client.publish_state
        try:
            for states in self.states_queue:
                publish_state(states[0], states[1])

            self.states_queue = []
        except Exception as ex:
            self.logger.warning("unable to update metrics: %s" % ex)

        sleep(1)
예제 #3
0
class Connector(object):
    """
    Connector logic to restart connections
    """

    def __init__(self, config, storage, mqtt_client):
        """
        initializes connector
        :param config: keeper configuration dict
        :param storage: storage access
        :param mqtt_client: MQTT client
        """

        self.attempts = 0
        self.was_stable = True
        self.command = config["mqtt.restart.command"].split(" ")
        self.mqtt_client = None
        self.registered = False
        self.started_at = datetime.now()
        self.time_connected = 0
        self.connected_at = None
        put = storage.put
        self.mqtt_restarts = put(CONNECTOR_MQTT_RESTARTS, storage.get_int(CONNECTOR_MQTT_RESTARTS))
        self.failed_connections = put(CONNECTOR_FAILED_CONNECTIONS, storage.get_int(CONNECTOR_FAILED_CONNECTIONS))
        self.states_queue = []
        self.put = put
        self.get = storage.get
        self.inc = storage.inc
        mqtt_client.set_manager(self)
        self.mqtt_client = mqtt_client
        self.logger = Logger()

    def __enter__(self):
        """
        informs when entering context
        :return: Connector object
        """

        self.logger.info("starting connector manager[pid=%s]" % getpid())
        self.mqtt_client.reconnect()

        return self

    # noinspection PyShadowingBuiltins
    def __exit__(self, type, value, traceback):
        """
        publishes manager status when exiting context
        :param type:
        :param value:
        :param traceback:
        """

        self.logger.info("stopping connector[pid=%s]" % getpid())
        try:
            self.mqtt_client.publish_state(CONNECTOR_STATUS, STATUS_NOT_RUNNING)
        except Exception as ex:
            self.logger.error("failed to publish connector status: %s" % ex)

    # noinspection PyUnusedLocal
    def on_connect(self, client, userdata, flags, rc):
        """
        updates connection status on connect
        registers sensors and sends metrics
        :param client: mqtt client
        :param userdata: userdata dict
        :param flags: flags
        :param rc: rc code
        """

        self.connected_at = datetime.now()
        # first time we are connected we register metrics and
        # send initial values
        if not self.registered:
            self.logger.info("registering metrics")
            try:
                publish_state = self.mqtt_client.publish_state
                register = self.mqtt_client.register
                # register all metrics
                register(CONNECTOR_STATUS, CONNECTOR_STATUS_ICON)
                register(CONNECTOR_CONNECTION_STATUS, CONNECTOR_CONNECTION_STATUS_ICON)
                register(CONNECTOR_MQTT_RESTARTS, CONNECTOR_MQTT_RESTARTS_ICON)
                register(CONNECTOR_FAILED_CONNECTIONS, CONNECTOR_FAILED_CONNECTIONS_ICON)
                register(CONNECTOR_LAST_MQTT_RESTART, CONNECTOR_LAST_MQTT_RESTART_ICON)
                # sends initial values
                publish_state(CONNECTOR_STATUS, STATUS_RUNNING)
                publish_state(CONNECTOR_CONNECTION_STATUS, CONNECTOR_CONNECTION_OK)
                publish_state(CONNECTOR_MQTT_RESTARTS, self.mqtt_restarts)
                publish_state(CONNECTOR_FAILED_CONNECTIONS, self.failed_connections)
                publish_state(CONNECTOR_LAST_MQTT_RESTART, self.get(CONNECTOR_LAST_MQTT_RESTART))
                self.registered = True
            except Exception as ex:
                self.logger.error("failed to register initial metrics: %s" % ex)

    # noinspection PyUnusedLocal
    def on_disconnect(self, client, userdata, rc):
        """
        updates connection status on disconnect
        :param client: mqtt client
        :param userdata: userdata dict
        :param rc: rc code
        """

        self.was_stable = self.is_stable()
        self.states_queue.append(
            (CONNECTOR_CONNECTION_STATUS, CONNECTOR_CONNECTION_OK if self.was_stable else CONNECTOR_CONNECTION_NOK))

    def is_stable(self, update=True):
        """
        check if connection is stable by checking if it's up 90% of the time
        :param update: whether we should update total time connected
        :return: true if connection is stable, false otherwise
        """
        now = datetime.now()
        if update:
            self.time_connected += (now - self.connected_at).total_seconds()
            tc = self.time_connected
        else:
            tc = self.time_connected + (now - self.connected_at).total_seconds()

        self.logger.debug("spent %s seconds connected", tc)

        return (tc * 100) / (now - self.started_at).total_seconds() >= 90

    def on_not_connect(self):
        """
        behavior on connect to mqtt
        after 3 failed attempts we try to restart mqtt and wait it
        to connect again (max 180 seconds)
        """

        if self.attempts >= 3:
            self.logger.warning("max of 3 connection attempts was reached")
            self.logger.warning("restarting mqtt service")
            if exec_command(self.command):
                append = self.states_queue.append
                self.mqtt_restarts = self.inc(CONNECTOR_MQTT_RESTARTS, self.mqtt_restarts)
                append((CONNECTOR_MQTT_RESTARTS, self.mqtt_restarts))
                append((CONNECTOR_LAST_MQTT_RESTART, self.put(CONNECTOR_LAST_MQTT_RESTART, strftime(TIME_FORMAT))))
                self.mqtt_client.wait_connection(60)
                self.attempts = 0
        else:
            self.attempts += 1
            self.failed_connections = self.inc(CONNECTOR_FAILED_CONNECTIONS, self.failed_connections)
            self.states_queue.append((CONNECTOR_FAILED_CONNECTIONS, self.failed_connections))
            self.logger.warning("broker is not responding (%s of 3)" % self.attempts)
            sleep(10)

    def loop(self):
        """
        sleeps 1 second until next validation
        sends metrics if any to send
        """

        if not self.was_stable:
            self.was_stable = self.is_stable(False)
            self.states_queue.append(
                (CONNECTOR_CONNECTION_STATUS, CONNECTOR_CONNECTION_OK if self.was_stable else CONNECTOR_CONNECTION_NOK))

        publish_state = self.mqtt_client.publish_state
        try:
            for states in self.states_queue:
                publish_state(states[0], states[1])

            self.states_queue = []
        except Exception as ex:
            self.logger.warning("unable to update metrics: %s" % ex)

        sleep(1)