class MqttClient(object): """ Holds connection and basic methods for accessing mqtt """ def __init__(self, client_id, config, wait=True): """ initialize mqtt client :param client_id: client id :param config: keeper configuration :param wait: whether to wait for connection """ self.logger = Logger() user = config.get("mqtt.user") pwd = config.get("mqtt.pass") client = Client(client_id=client_id) client.on_connect = self._on_connect client.on_disconnect = self._on_disconnect client.on_message = self._on_message client.enable_logger(self.logger) if user and pwd: client.username_pw_set(user, pwd) client.connect_async(config["mqtt.broker"], config["mqtt.port"], 30) self.client = client self.connected = False self.manager = None self.wait = wait def __enter__(self): """ entering context :return: MqttClient object """ return self # noinspection PyShadowingBuiltins def __exit__(self, type, value, traceback): """ disconnects client when exiting context :param type: :param value: :param traceback: """ try: self.logger.debug("disconnecting mqtt client") self.client.disconnect() except Exception: pass self.client = None def set_manager(self, manager): """ sets associated manager :param manager: manager using connection """ self.manager = manager # noinspection PyProtectedMember def _on_disconnect(self, client, userdata, rc): """ base on disconnect behaviour, can be extended wih custom methods from implementation :param client: mqtt client :param userdata: userdata dict :param rc: rc code """ self.logger.info("disconnected from %s:%s" % (client._host, client._port)) self.connected = False # call custom on disconnect methods if any defined try: self.logger.debug("calling custom on_disconnect") self.manager.on_disconnect(client, userdata, rc) except Exception as ex: if not isinstance(ex, (TypeError, AttributeError)): self.logger.error("failed to execute custom on_disconnect: %s" % ex) # noinspection PyProtectedMember def _on_connect(self, client, userdata, flags, rc): """ base on connect behaviour, can be extended wih custom methods from implementation :param client: mqtt client :param userdata: userdata dict :param flags: flags :param rc: rc code """ self.logger.info("connected to %s:%s" % (client._host, client._port)) self.connected = rc == 0 # call custom on connect methods if any defined try: self.logger.debug("calling custom on_connect") self.manager.on_connect(client, userdata, flags, rc) except Exception as ex: if not isinstance(ex, (TypeError, AttributeError)): self.logger.error("failed to execute custom on_connect: %s" % ex) def _on_message(self, client, userdata, message): """ base on message behaviour, can be extended wih custom methods from implementation :param client: mqtt client :param userdata: userdata dict :param message: message received """ # call custom on message methods if any defined try: self.logger.debug("calling custom on_message") self.manager.on_message(client, userdata, message) except Exception as ex: if not isinstance(ex, (TypeError, AttributeError)): self.logger.error("failed to execute custom on_message: %s" % ex) def connection_status(self): """ Returns a connection status code. :return: connection status code. 0 is not connected, 1 is waiting for connection and 2 for connected """ try: if self.client.loop() > 0: return 0 if not self.connected: return 1 return 2 except Exception: return 0 def wait_connection(self, timeout=-1): """ blocks waiting for connection """ connection_status = self.connection_status reconnect = self.client.reconnect status = connection_status() now = datetime.now limit = now() + timedelta(seconds=timeout) while status != 2 and (timeout == -1 or now() <= limit): # reconnects when not connected, status 0 # status 1 should only wait for connection # instead of reconnecting self.logger.debug("connection status is %s", str(status)) if status == 0: try: self.logger.debug("reconnecting to mqtt") reconnect() except Exception as ex: self.logger.debug("failed to connect mqtt: %s", ex) self.logger.debug("waiting 1 second for connection") sleep(1) status = connection_status() # noinspection PyProtectedMember def reconnect(self): """ reconnects to mqtt client :return: connection status """ client = self.client self.logger.info("connecting to %s:%s" % (client._host, client._port)) connection_status = self.connection_status reconnect = client.reconnect status = connection_status() wait = self.wait while status != 2: self.logger.debug("connection status is %s", str(status)) if status == 0: try: self.logger.debug("reconnecting to mqtt") reconnect() except Exception as ex: self.logger.debug("failed to connect mqtt: %s", ex) status = connection_status() if status == 0: try: self.logger.debug("calling custom on_not_connect") self.manager.on_not_connect() except Exception as ex: if not isinstance(ex, (TypeError, AttributeError)): self.logger.error("failed to execute custom on_not_connect: %s" % ex) if not wait: return status sleep(1) return status def register(self, metric, icon): """ register a new metric using mqtt discovery :param metric: metric identification :param icon: metric icon """ self.logger.debug("registering metrics %s", metric) self.client.publish(CONFIG_TOPIC % metric, CONFIG_PAYLOAD % (metric, metric, icon), 1, True) def publish_state(self, metric, state): """ publish state to mqtt :param metric: metric identification :param state: state value """ self.logger.debug("updating metric %s with state %s", metric, state) self.client.publish(STATE_TOPIC % metric, state, 1, True) def loop(self): """ calls mqtt client loop """ self.client.loop()
class Heartbeater(object): """ Heartbeat that monitors heartbeat messages """ def __init__(self, config, storage, mqtt_client): """ initializes heartbeater :param config: keeper configuration dict :param storage: storage access :param mqtt_client: MQTT client """ self.attempts = 0 self.misses = 0 self.ha_command = config["ha.restart.command"].split(" ") self.sys_command = config["system.restart.command"].split(" ") self.inc = storage.inc self.registered = False put = storage.put self.missed_heartbeats = put( HEARTBEATER_MISSED_HEARTBEAT, storage.get_int(HEARTBEATER_MISSED_HEARTBEAT)) self.ha_restarts = put(HEARTBEATER_HA_RESTARTS, storage.get_int(HEARTBEATER_HA_RESTARTS)) self.system_restarts = put( HEARTBEATER_SYSTEM_RESTARTS, storage.get_int(HEARTBEATER_SYSTEM_RESTARTS)) self.put = put self.get = storage.get self.now = datetime.now self.last_message = None self.last_known_message = None self.interval = config["heartbeat.interval"] self.topic = config["heartbeat.topic"] self.delay = config["heartbeat.delay"] self.states_queue = [] mqtt_client.set_manager(self) self.mqtt_client = mqtt_client self.logger = Logger() def __enter__(self): """ informs when entering context :return: Heartbeater object """ self.logger.info("starting heartbeater manager[pid=%s]" % getpid()) self.mqtt_client.reconnect() return self # noinspection PyShadowingBuiltins def __exit__(self, type, value, traceback): """ publishes manager status when exiting context :param type: :param value: :param traceback: """ self.logger.info("stopping heartbeater[pid=%s]" % getpid()) try: self.mqtt_client.publish_state(HEARTBEATER_STATUS, STATUS_NOT_RUNNING) except Exception as ex: self.logger.error("failed to publish heartbeater status: %s" % ex) # noinspection PyUnusedLocal def on_connect(self, client, userdata, flags, rc): """ subscribes to heartbeat topic registers sensors and sends metrics :param client: mqtt client :param userdata: userdata dict :param flags: flags :param rc: rc code """ self.logger.info("subscribing topic %s" % self.topic) client.subscribe(self.topic) # first time we are connected we register metrics and # send initial values if not self.registered: self.logger.info("registering metrics") try: publish_state = self.mqtt_client.publish_state register = self.mqtt_client.register # register all metrics register(HEARTBEATER_STATUS, HEARTBEATER_STATUS_ICON) register(HEARTBEATER_MISSED_HEARTBEAT, HEARTBEATER_MISSED_HEARTBEAT_ICON) register(HEARTBEATER_HA_RESTARTS, HEARTBEATER_HA_RESTARTS_ICON) register(HEARTBEATER_SYSTEM_RESTARTS, HEARTBEATER_SYSTEM_RESTARTS_ICON) register(HEARTBEATER_LAST_HEARTBEAT, HEARTBEATER_LAST_HEARTBEAT_ICON) register(HEARTBEATER_LAST_HA_RESTART, HEARTBEATER_LAST_HA_RESTART_ICON) register(HEARTBEATER_LAST_SYSTEM_RESTART, HEARTBEATER_LAST_SYSTEM_RESTART_ICON) # sends initial values publish_state(HEARTBEATER_STATUS, STATUS_RUNNING) publish_state(HEARTBEATER_MISSED_HEARTBEAT, self.missed_heartbeats) publish_state(HEARTBEATER_HA_RESTARTS, self.ha_restarts) publish_state(HEARTBEATER_SYSTEM_RESTARTS, self.system_restarts) publish_state(HEARTBEATER_LAST_HEARTBEAT, self.get(HEARTBEATER_LAST_HEARTBEAT)) publish_state(HEARTBEATER_LAST_HA_RESTART, self.get(HEARTBEATER_LAST_HA_RESTART)) publish_state(HEARTBEATER_LAST_SYSTEM_RESTART, self.get(HEARTBEATER_LAST_SYSTEM_RESTART)) self.registered = True except Exception as ex: self.logger.error("failed to register initial metrics: %s" % ex) # noinspection PyUnusedLocal def on_message(self, client, userdata, message): """ updates heartbeat message timestamp :param client: mqtt client :param userdata: userdata dict :param message: message received """ self.last_message = self.now() last_message_fmt = strftime(TIME_FORMAT) self.logger.debug("last heartbeat from ha at %s", last_message_fmt) self.states_queue.append((HEARTBEATER_LAST_HEARTBEAT, self.put(HEARTBEATER_LAST_HEARTBEAT, last_message_fmt))) def wait_ha_connection(self): """ waits for a heartbeat message or timeout of 120 seconds """ self.last_message = None self.last_known_message = None now = self.now limit = now() + timedelta(seconds=300) self.logger.info("waiting for ha heartbeat") while running and not self.last_message and now() < limit: try: self.mqtt_client.loop() except Exception as ex: self.logger.warning(ex) sleep(1) if self.last_message: self.logger.info("ha is reachable") else: self.last_message = self.now() self.last_known_message = self.last_message self.logger.warning("ha service still not reachable") def monitor(self): """ monitors heartbeat messages and restarts ha if 3 messages are missed also restarts system after 3 ha restarts """ if (self.now() - self.last_message ).total_seconds() > self.interval + self.delay: self.logger.warning("heartbeat threshold reached") if self.misses < 3: self.misses += 1 self.last_message += timedelta(seconds=self.interval) self.missed_heartbeats = self.inc(HEARTBEATER_MISSED_HEARTBEAT, self.missed_heartbeats) self.states_queue.append( (HEARTBEATER_MISSED_HEARTBEAT, self.missed_heartbeats)) self.logger.warning("tolerating missed heartbeat (%s of 3)" % self.misses) elif self.attempts < 3: self.attempts += 1 self.misses = 0 self.logger.warning("max of misses reached") self.logger.warning( "restarting ha service (%s of 3) with command %s" % (self.attempts, " ".join(self.ha_command))) if exec_command(self.ha_command): append = self.states_queue.append self.ha_restarts = self.inc(HEARTBEATER_HA_RESTARTS, self.ha_restarts) append((HEARTBEATER_HA_RESTARTS, self.ha_restarts)) append((HEARTBEATER_LAST_HA_RESTART, self.put(HEARTBEATER_LAST_HA_RESTART, strftime(TIME_FORMAT)))) self.wait_ha_connection() else: self.logger.warning("heartbeat still failing after 3 restarts") self.logger.warning("rebooting") append = self.states_queue.append self.system_restarts = self.inc(HEARTBEATER_SYSTEM_RESTARTS, self.system_restarts) append((HEARTBEATER_SYSTEM_RESTARTS, self.system_restarts)) append((HEARTBEATER_LAST_SYSTEM_RESTART, self.put(HEARTBEATER_LAST_SYSTEM_RESTART, strftime(TIME_FORMAT)))) exec_command(self.sys_command) self.last_known_message = self.last_message if self.last_known_message != self.last_message: self.logger.debug("resetting counters") self.misses = 0 self.attempts = 0 def loop(self): """ sleeps 1 second until next validation sends metrics if any to send """ publish_state = self.mqtt_client.publish_state try: for states in self.states_queue: publish_state(states[0], states[1]) self.states_queue = [] except Exception as ex: self.logger.warning("unable to update metrics: %s" % ex) sleep(1)
class Connector(object): """ Connector logic to restart connections """ def __init__(self, config, storage, mqtt_client): """ initializes connector :param config: keeper configuration dict :param storage: storage access :param mqtt_client: MQTT client """ self.attempts = 0 self.was_stable = True self.command = config["mqtt.restart.command"].split(" ") self.mqtt_client = None self.registered = False self.started_at = datetime.now() self.time_connected = 0 self.connected_at = None put = storage.put self.mqtt_restarts = put(CONNECTOR_MQTT_RESTARTS, storage.get_int(CONNECTOR_MQTT_RESTARTS)) self.failed_connections = put(CONNECTOR_FAILED_CONNECTIONS, storage.get_int(CONNECTOR_FAILED_CONNECTIONS)) self.states_queue = [] self.put = put self.get = storage.get self.inc = storage.inc mqtt_client.set_manager(self) self.mqtt_client = mqtt_client self.logger = Logger() def __enter__(self): """ informs when entering context :return: Connector object """ self.logger.info("starting connector manager[pid=%s]" % getpid()) self.mqtt_client.reconnect() return self # noinspection PyShadowingBuiltins def __exit__(self, type, value, traceback): """ publishes manager status when exiting context :param type: :param value: :param traceback: """ self.logger.info("stopping connector[pid=%s]" % getpid()) try: self.mqtt_client.publish_state(CONNECTOR_STATUS, STATUS_NOT_RUNNING) except Exception as ex: self.logger.error("failed to publish connector status: %s" % ex) # noinspection PyUnusedLocal def on_connect(self, client, userdata, flags, rc): """ updates connection status on connect registers sensors and sends metrics :param client: mqtt client :param userdata: userdata dict :param flags: flags :param rc: rc code """ self.connected_at = datetime.now() # first time we are connected we register metrics and # send initial values if not self.registered: self.logger.info("registering metrics") try: publish_state = self.mqtt_client.publish_state register = self.mqtt_client.register # register all metrics register(CONNECTOR_STATUS, CONNECTOR_STATUS_ICON) register(CONNECTOR_CONNECTION_STATUS, CONNECTOR_CONNECTION_STATUS_ICON) register(CONNECTOR_MQTT_RESTARTS, CONNECTOR_MQTT_RESTARTS_ICON) register(CONNECTOR_FAILED_CONNECTIONS, CONNECTOR_FAILED_CONNECTIONS_ICON) register(CONNECTOR_LAST_MQTT_RESTART, CONNECTOR_LAST_MQTT_RESTART_ICON) # sends initial values publish_state(CONNECTOR_STATUS, STATUS_RUNNING) publish_state(CONNECTOR_CONNECTION_STATUS, CONNECTOR_CONNECTION_OK) publish_state(CONNECTOR_MQTT_RESTARTS, self.mqtt_restarts) publish_state(CONNECTOR_FAILED_CONNECTIONS, self.failed_connections) publish_state(CONNECTOR_LAST_MQTT_RESTART, self.get(CONNECTOR_LAST_MQTT_RESTART)) self.registered = True except Exception as ex: self.logger.error("failed to register initial metrics: %s" % ex) # noinspection PyUnusedLocal def on_disconnect(self, client, userdata, rc): """ updates connection status on disconnect :param client: mqtt client :param userdata: userdata dict :param rc: rc code """ self.was_stable = self.is_stable() self.states_queue.append( (CONNECTOR_CONNECTION_STATUS, CONNECTOR_CONNECTION_OK if self.was_stable else CONNECTOR_CONNECTION_NOK)) def is_stable(self, update=True): """ check if connection is stable by checking if it's up 90% of the time :param update: whether we should update total time connected :return: true if connection is stable, false otherwise """ now = datetime.now() if update: self.time_connected += (now - self.connected_at).total_seconds() tc = self.time_connected else: tc = self.time_connected + (now - self.connected_at).total_seconds() self.logger.debug("spent %s seconds connected", tc) return (tc * 100) / (now - self.started_at).total_seconds() >= 90 def on_not_connect(self): """ behavior on connect to mqtt after 3 failed attempts we try to restart mqtt and wait it to connect again (max 180 seconds) """ if self.attempts >= 3: self.logger.warning("max of 3 connection attempts was reached") self.logger.warning("restarting mqtt service") if exec_command(self.command): append = self.states_queue.append self.mqtt_restarts = self.inc(CONNECTOR_MQTT_RESTARTS, self.mqtt_restarts) append((CONNECTOR_MQTT_RESTARTS, self.mqtt_restarts)) append((CONNECTOR_LAST_MQTT_RESTART, self.put(CONNECTOR_LAST_MQTT_RESTART, strftime(TIME_FORMAT)))) self.mqtt_client.wait_connection(60) self.attempts = 0 else: self.attempts += 1 self.failed_connections = self.inc(CONNECTOR_FAILED_CONNECTIONS, self.failed_connections) self.states_queue.append((CONNECTOR_FAILED_CONNECTIONS, self.failed_connections)) self.logger.warning("broker is not responding (%s of 3)" % self.attempts) sleep(10) def loop(self): """ sleeps 1 second until next validation sends metrics if any to send """ if not self.was_stable: self.was_stable = self.is_stable(False) self.states_queue.append( (CONNECTOR_CONNECTION_STATUS, CONNECTOR_CONNECTION_OK if self.was_stable else CONNECTOR_CONNECTION_NOK)) publish_state = self.mqtt_client.publish_state try: for states in self.states_queue: publish_state(states[0], states[1]) self.states_queue = [] except Exception as ex: self.logger.warning("unable to update metrics: %s" % ex) sleep(1)