class TaskExecutor(object): @Inject def __init__(self, message_client=INJECTED): self._configuration = {} self._intervals = {} self._vpn_open = False self._message_client = message_client self._vpn_controller = VpnController() self._tasks = deque() self._previous_amount_of_tasks = 0 self._executor = DaemonThread(name='taskexecutor', target=self._execute_tasks, interval=300) def start(self): self._vpn_controller.start() self._executor.start() def set_new_tasks(self, task_data): self._tasks.appendleft(task_data) self._executor.request_single_run() @property def vpn_open(self): return self._vpn_open def _execute_tasks(self): while True: try: task_data = self._tasks.pop() except IndexError: return amount_of_tasks = len(task_data) if self._previous_amount_of_tasks != amount_of_tasks: logger.info('Processing {0} tasks...'.format(amount_of_tasks)) if 'configuration' in task_data: self._process_configuration_data(task_data['configuration']) if 'intervals' in task_data: self._process_interval_data(task_data['intervals']) if 'open_vpn' in task_data: self._open_vpn(task_data['open_vpn']) if 'events' in task_data and self._message_client is not None: for event in task_data['events']: try: self._message_client.send_event(event[0], event[1]) except Exception as ex: logger.error( 'Could not send event {0}({1}): {2}'.format( event[0], event[1], ex)) if 'connectivity' in task_data: self._check_connectivity(task_data['connectivity']) if self._previous_amount_of_tasks != amount_of_tasks: logger.info( 'Processing {0} tasks... Done'.format(amount_of_tasks)) self._previous_amount_of_tasks = amount_of_tasks def _process_configuration_data(self, configuration): try: configuration_changed = self._configuration != configuration if configuration_changed: for setting, value in configuration.items(): Config.set_entry(setting, value) logger.info('Configuration changed: {0}'.format(configuration)) self._configuration = configuration except Exception: logger.exception( 'Unexpected exception processing configuration data') def _process_interval_data(self, intervals): try: intervals_changed = self._intervals != intervals if intervals_changed and self._message_client is not None: self._message_client.send_event( OMBusEvents.METRICS_INTERVAL_CHANGE, intervals) logger.info('Intervals changed: {0}'.format(intervals)) self._intervals = intervals except Exception: logger.exception('Unexpected exception processing interval data') def _open_vpn(self, should_open): try: is_running = VpnController.check_vpn() if should_open and not is_running: logger.info('Opening vpn...') VpnController.start_vpn() logger.info('Opening vpn... Done') elif not should_open and is_running: logger.info('Closing vpn...') VpnController.stop_vpn() logger.info('Closing vpn... Done') is_running = VpnController.check_vpn() self._vpn_open = is_running and self._vpn_controller.vpn_connected if self._message_client is not None: self._message_client.send_event(OMBusEvents.VPN_OPEN, self._vpn_open) except Exception: logger.exception('Unexpected exception opening/closing VPN') def _check_connectivity(self, last_successful_heartbeat): try: if last_successful_heartbeat > time.time( ) - CHECK_CONNECTIVITY_TIMEOUT: if self._message_client is not None: self._message_client.send_event(OMBusEvents.CONNECTIVITY, True) else: connectivity = TaskExecutor._has_connectivity() if self._message_client is not None: self._message_client.send_event(OMBusEvents.CONNECTIVITY, connectivity) if not connectivity and last_successful_heartbeat < time.time( ) - REBOOT_TIMEOUT: subprocess.call('sync && reboot', shell=True) except Exception: logger.exception('Unexpected exception checking connectivity') @staticmethod def _ping(target, verbose=True): """ Check if the target can be pinged. Returns True if at least 1/4 pings was successful. """ if target is None: return False # The popen_timeout has been added as a workaround for the hanging subprocess # If NTP date changes the time during a execution of a sub process this hangs forever. def popen_timeout(command, timeout): ping_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) for _ in range(timeout): time.sleep(1) if ping_process.poll() is not None: stdout_data, stderr_data = ping_process.communicate() if ping_process.returncode == 0: return True raise Exception( 'Non-zero exit code. Stdout: {0}, stderr: {1}'.format( stdout_data, stderr_data)) logger.warning( 'Got timeout during ping to {0}. Killing'.format(target)) ping_process.kill() del ping_process # Make sure to clean up everything (or make it cleanable by the GC) logger.info('Ping to {0} killed'.format(target)) return False if verbose is True: logger.info('Testing ping to {0}'.format(target)) try: # Ping returns status code 0 if at least 1 ping is successful return popen_timeout(['ping', '-c', '3', target], 10) except Exception as ex: logger.error('Error during ping: {0}'.format(ex)) return False @staticmethod def _has_connectivity(): # Check connectivity by using ping to recover from a messed up network stack on the BeagleBone # Prefer using OpenMotics infrastructure first if TaskExecutor._ping('cloud.openmotics.com'): # OpenMotics infrastructure can be pinged # > Connectivity return True can_ping_internet_by_fqdn = TaskExecutor._ping( 'example.com') or TaskExecutor._ping('google.com') if can_ping_internet_by_fqdn: # Public internet servers can be pinged by FQDN # > Assume maintenance on OpenMotics infrastructure. Sufficient connectivity return True can_ping_internet_by_ip = TaskExecutor._ping( '8.8.8.8') or TaskExecutor._ping('1.1.1.1') if can_ping_internet_by_ip: # Public internet servers can be pinged by IP, but not by FQDN # > Assume DNS resolving issues. Insufficient connectivity return False # Public internet servers cannot be pinged by IP, nor by FQDN can_ping_default_gateway = TaskExecutor._ping( TaskExecutor._get_default_gateway()) if can_ping_default_gateway: # > Assume ISP outage. Sufficient connectivity return True # > Assume broken TCP stack. No connectivity return False @staticmethod def _get_default_gateway(): """ Get the default gateway. """ try: return subprocess.check_output( "ip r | grep '^default via' | awk '{ print $3; }'", shell=True) except Exception as ex: logger.error('Error during get_gateway: {0}'.format(ex)) return
class MasterHeartbeat(object): """ Monitors the status of the master communication. """ @Inject def __init__(self, master_communicator=INJECTED): # type: (MasterCommunicator) -> None self._master_communicator = master_communicator self._failures = -1 # Start "offline" self._backoff = 60 self._last_restart = 0.0 self._min_threshold = 2 self._thread = DaemonThread(name='masterheartbeat', target=self._heartbeat, interval=30, delay=5) def start(self): # type: () -> None logger.info('Starting master heartbeat') self._thread.start() def stop(self): # type: () -> None self._thread.stop() def is_online(self): # type: () -> bool if self._failures == -1: self._thread.request_single_run() time.sleep(2) return self._failures == 0 def set_offline(self): # type: () -> None self._failures += 1 def get_communicator_health(self): # type: () -> HEALTH if self._failures > self._min_threshold: stats = self._check_stats() if stats is None: return CommunicationStatus.UNSTABLE elif stats: return CommunicationStatus.SUCCESS else: return CommunicationStatus.FAILURE else: return CommunicationStatus.SUCCESS def _heartbeat(self): # type: () -> None if self._failures > self._min_threshold and self._last_restart < time.time( ) - self._backoff: logger.error('Master heartbeat failure, restarting communication') try: self._master_communicator.stop() finally: self._master_communicator.start() self._last_restart = time.time() self._backoff = self._backoff * 2 try: self._master_communicator.do_command(master_api.status()) if self._failures > 0: logger.info('Master heartbeat recovered after %s failures', self._failures) self._failures = 0 except CommunicationTimedOutException: self._failures += 1 logger.error('Master heartbeat %s failures', self._failures) raise DaemonThreadWait() except Exception: logger.error('Master heartbeat unhandled exception') raise def _check_stats(self): # type: () -> Optional[bool] """ """ stats = self._master_communicator.get_communication_statistics() calls_timedout = [call for call in stats['calls_timedout']] calls_succeeded = [call for call in stats['calls_succeeded']] all_calls = sorted(calls_timedout + calls_succeeded) if len(calls_timedout) == 0: # If there are no timeouts at all return True elif len(all_calls) <= 10: # Not enough calls made to have a decent view on what's going on logger.warning( 'Observed master communication failures, but not enough calls') return None elif not any(t in calls_timedout for t in all_calls[-10:]): logger.warning( 'Observed master communication failures, but recent calls recovered' ) # The last X calls are successfull return None calls_last_x_minutes = [t for t in all_calls if t > time.time() - 180] if len(calls_last_x_minutes) <= 5: logger.warning( 'Observed master communication failures, but not recent enough' ) # Not enough recent calls return None ratio = len([t for t in calls_last_x_minutes if t in calls_timedout ]) / float(len(calls_last_x_minutes)) if ratio < 0.25: # Less than 25% of the calls fail, let's assume everything is just "fine" logger.warning( 'Observed master communication failures, but there\'s only a failure ratio of {:.2f}%' .format(ratio * 100)) return None else: return False
class BaseController(object): SYNC_STRUCTURES = None # type: Optional[List[SyncStructure]] @Inject def __init__(self, master_controller, maintenance_controller=INJECTED, pubsub=INJECTED, sync_interval=900): # type: (MasterController, MaintenanceController, PubSub, float) -> None self._master_controller = master_controller self._maintenance_controller = maintenance_controller self._pubsub = pubsub self._sync_orm_thread = None # type: Optional[DaemonThread] self._sync_orm_interval = sync_interval self._sync_dirty = True # Always sync after restart. self._sync_running = False self._pubsub.subscribe_master_events(PubSub.MasterTopics.EEPROM, self._handle_master_event) self._pubsub.subscribe_master_events(PubSub.MasterTopics.MODULE, self._handle_master_event) def _handle_master_event(self, master_event): # type: (MasterEvent) -> None if master_event.type in [ MasterEvent.Types.EEPROM_CHANGE, MasterEvent.Types.MODULE_DISCOVERY ]: self._sync_dirty = True self.request_sync_orm() def start(self): self._sync_orm_thread = DaemonThread(name='{0}sync'.format( self.__class__.__name__.lower()[:10]), target=self._sync_orm, interval=self._sync_orm_interval, delay=300) self._sync_orm_thread.start() def stop(self): if self._sync_orm_thread is not None: self._sync_orm_thread.stop() def request_sync_orm(self): if self._sync_orm_thread is not None: self._sync_orm_thread.request_single_run() def run_sync_orm(self): self._sync_orm() def _sync_orm(self): # type: () -> bool if self.SYNC_STRUCTURES is None: return False if self._sync_running: for structure in self.SYNC_STRUCTURES: orm_model = structure.orm_model logger.info('ORM sync ({0}): Already running'.format( orm_model.__name__)) return False self._sync_running = True try: for structure in self.SYNC_STRUCTURES: orm_model = structure.orm_model try: name = structure.name skip = structure.skip start = time.time() logger.info('ORM sync ({0})'.format(orm_model.__name__)) ids = [] for dto in getattr(self._master_controller, 'load_{0}s'.format(name))(): if skip is not None and skip(dto): continue id_ = dto.id ids.append(id_) if not orm_model.select().where( orm_model.number == id_).exists(): orm_model.create(number=id_) orm_model.delete().where( orm_model.number.not_in(ids)).execute() duration = time.time() - start logger.info( 'ORM sync ({0}): completed after {1:.1f}s'.format( orm_model.__name__, duration)) except CommunicationTimedOutException as ex: logger.error('ORM sync ({0}): Failed: {1}'.format( orm_model.__name__, ex)) except Exception: logger.exception('ORM sync ({0}): Failed'.format( orm_model.__name__)) if self._sync_dirty: type_name = orm_model.__name__.lower() gateway_event = GatewayEvent( GatewayEvent.Types.CONFIG_CHANGE, {'type': type_name}) self._pubsub.publish_gateway_event( PubSub.GatewayTopics.CONFIG, gateway_event) self._sync_dirty = False finally: self._sync_running = False return True
class OutputController(BaseController): SYNC_STRUCTURES = [SyncStructure(Output, 'output')] @Inject def __init__(self, master_controller=INJECTED): # type: (MasterController) -> None super(OutputController, self).__init__(master_controller) self._cache = OutputStateCache() self._sync_state_thread = None # type: Optional[DaemonThread] self._pubsub.subscribe_master_events(PubSub.MasterTopics.OUTPUT, self._handle_master_event) def start(self): # type: () -> None super(OutputController, self).start() self._sync_state_thread = DaemonThread(name='outputsyncstate', target=self._sync_state, interval=600, delay=10) self._sync_state_thread.start() def stop(self): # type: () -> None super(OutputController, self).stop() if self._sync_state_thread: self._sync_state_thread.stop() self._sync_state_thread = None def _handle_master_event(self, master_event): # type: (MasterEvent) -> None super(OutputController, self)._handle_master_event(master_event) if master_event.type == MasterEvent.Types.MODULE_DISCOVERY: if self._sync_state_thread: self._sync_state_thread.request_single_run() if master_event.type == MasterEvent.Types.OUTPUT_STATUS: self._handle_output_status(master_event.data['state']) if master_event.type == MasterEvent.Types.EXECUTE_GATEWAY_API: if master_event.data['type'] == MasterEvent.APITypes.SET_LIGHTS: action = master_event.data['data'][ 'action'] # type: Literal['ON', 'OFF', 'TOGGLE'] floor_id = master_event.data['data'][ 'floor_id'] # type: Optional[int] self.set_all_lights(action=action, floor_id=floor_id) def _handle_output_status(self, state_dto): # type: (OutputStateDTO) -> None changed, output_dto = self._cache.handle_change(state_dto) if changed and output_dto is not None: self._publish_output_change(output_dto) def _sync_state(self): try: self.load_outputs() for state_dto in self._master_controller.load_output_status(): _, output_dto = self._cache.handle_change(state_dto) if output_dto is not None: # Always send events on the background sync self._publish_output_change(output_dto) except CommunicationTimedOutException: logger.error( 'Got communication timeout during synchronization, waiting 10 seconds.' ) raise DaemonThreadWait except CommunicationFailure: # This is an expected situation raise DaemonThreadWait def _publish_output_change(self, output_dto): # type: (OutputDTO) -> None event_status = { 'on': output_dto.state.status, 'locked': output_dto.state.locked } if output_dto.module_type in ['d', 'D']: event_status['value'] = output_dto.state.dimmer event_data = { 'id': output_dto.id, 'status': event_status, 'location': { 'room_id': Toolbox.denonify(output_dto.room, 255) } } gateway_event = GatewayEvent(GatewayEvent.Types.OUTPUT_CHANGE, event_data) self._pubsub.publish_gateway_event(PubSub.GatewayTopics.STATE, gateway_event) def get_output_status(self, output_id): # type: (int) -> OutputStateDTO # TODO also support plugins output_state_dto = self._cache.get_state().get(output_id) if output_state_dto is None: raise ValueError( 'Output with id {} does not exist'.format(output_id)) return output_state_dto def get_output_statuses(self): # type: () -> List[OutputStateDTO] # TODO also support plugins return list(self._cache.get_state().values()) def load_output(self, output_id): # type: (int) -> OutputDTO output = Output.select(Room) \ .join_from(Output, Room, join_type=JOIN.LEFT_OUTER) \ .where(Output.number == output_id) \ .get() # type: Output # TODO: Load dict output_dto = self._master_controller.load_output(output_id=output_id) output_dto.room = output.room.number if output.room is not None else None return output_dto def load_outputs(self): # type: () -> List[OutputDTO] output_dtos = [] for output in list( Output.select(Output, Room).join_from( Output, Room, join_type=JOIN.LEFT_OUTER)): # TODO: Load dicts output_dto = self._master_controller.load_output( output_id=output.number) output_dto.room = output.room.number if output.room is not None else None output_dtos.append(output_dto) self._cache.update_outputs(output_dtos) return output_dtos def save_outputs(self, outputs): # type: (List[OutputDTO]) -> None outputs_to_save = [] for output_dto in outputs: output = Output.get_or_none(number=output_dto.id) # type: Output if output is None: logger.info('Ignored saving non-existing Output {0}'.format( output_dto.id)) if 'room' in output_dto.loaded_fields: if output_dto.room is None: output.room = None elif 0 <= output_dto.room <= 100: output.room, _ = Room.get_or_create(number=output_dto.room) output.save() outputs_to_save.append(output_dto) self._master_controller.save_outputs(outputs_to_save) def set_all_lights( self, action, floor_id=None ): # type: (Literal['ON', 'OFF', 'TOGGLE'], Optional[int]) -> None # TODO: Also include other sources (e.g. plugins) once implemented if floor_id is None: self._master_controller.set_all_lights(action=action) return # TODO: Filter on output type "light" once available query = Output.select(Output.number) \ .join_from(Output, Room, join_type=JOIN.INNER) \ .join_from(Room, Floor, join_type=JOIN.INNER) \ .where(Floor.number == floor_id) output_ids = [output['number'] for output in query.dicts()] # It is unknown whether `floor` is known to the Master implementation. So pass both the floor_id # and the list of Output ids to the MasterController self._master_controller.set_all_lights(action=action, floor_id=floor_id, output_ids=output_ids) def set_output_status(self, output_id, is_on, dimmer=None, timer=None): # type: (int, bool, Optional[int], Optional[int]) -> None self._master_controller.set_output(output_id=output_id, state=is_on, dimmer=dimmer, timer=timer) # Global (led) feedback def load_global_feedback( self, global_feedback_id): # type: (int) -> GlobalFeedbackDTO return self._master_controller.load_global_feedback( global_feedback_id=global_feedback_id) def load_global_feedbacks(self): # type: () -> List[GlobalFeedbackDTO] return self._master_controller.load_global_feedbacks() def save_global_feedbacks( self, global_feedbacks): # type: (List[GlobalFeedbackDTO]) -> None self._master_controller.save_global_feedbacks(global_feedbacks)
class OutputController(BaseController): SYNC_STRUCTURES = [SyncStructure(Output, 'output')] @Inject def __init__(self, master_controller=INJECTED): # type: (MasterController) -> None super(OutputController, self).__init__(master_controller) self._cache = OutputStateCache() self._sync_state_thread = None # type: Optional[DaemonThread] self._pubsub.subscribe_master_events(PubSub.MasterTopics.OUTPUT, self._handle_master_event) def start(self): # type: () -> None super(OutputController, self).start() self._sync_state_thread = DaemonThread(name='outputsyncstate', target=self._sync_state, interval=600, delay=10) self._sync_state_thread.start() def stop(self): # type: () -> None super(OutputController, self).stop() if self._sync_state_thread: self._sync_state_thread.stop() self._sync_state_thread = None def _handle_master_event(self, master_event): # type: (MasterEvent) -> None super(OutputController, self)._handle_master_event(master_event) if master_event.type == MasterEvent.Types.MODULE_DISCOVERY: if self._sync_state_thread: self._sync_state_thread.request_single_run() if master_event.type == MasterEvent.Types.OUTPUT_STATUS: self._handle_output_status(master_event.data) def _handle_output_status(self, change_data): # type: (Dict[str,Any]) -> None changed, output_dto = self._cache.handle_change( change_data['id'], change_data) if changed and output_dto is not None: self._publish_output_change(output_dto) def _sync_state(self): try: self.load_outputs() for state_data in self._master_controller.load_output_status(): if 'id' in state_data: _, output_dto = self._cache.handle_change( state_data['id'], state_data) if output_dto is not None: # Always send events on the background sync self._publish_output_change(output_dto) except CommunicationTimedOutException: logger.error( 'Got communication timeout during synchronization, waiting 10 seconds.' ) raise DaemonThreadWait except CommunicationFailure: # This is an expected situation raise DaemonThreadWait def _publish_output_change(self, output_dto): # type: (OutputDTO) -> None event_status = { 'on': output_dto.state.status, 'locked': output_dto.state.locked } if output_dto.module_type in ['d', 'D']: event_status['value'] = output_dto.state.dimmer event_data = { 'id': output_dto.id, 'status': event_status, 'location': { 'room_id': Toolbox.denonify(output_dto.room, 255) } } gateway_event = GatewayEvent(GatewayEvent.Types.OUTPUT_CHANGE, event_data) self._pubsub.publish_gateway_event(PubSub.GatewayTopics.STATE, gateway_event) def get_output_status(self, output_id): # type: (int) -> OutputStateDTO # TODO also support plugins output_state_dto = self._cache.get_state().get(output_id) if output_state_dto is None: raise ValueError( 'Output with id {} does not exist'.format(output_id)) return output_state_dto def get_output_statuses(self): # type: () -> List[OutputStateDTO] # TODO also support plugins return list(self._cache.get_state().values()) def load_output(self, output_id): # type: (int) -> OutputDTO output = Output.select(Room) \ .join_from(Output, Room, join_type=JOIN.LEFT_OUTER) \ .where(Output.number == output_id) \ .get() # type: Output # TODO: Load dict output_dto = self._master_controller.load_output(output_id=output_id) output_dto.room = output.room.number if output.room is not None else None return output_dto def load_outputs(self): # type: () -> List[OutputDTO] output_dtos = [] for output in list( Output.select(Output, Room).join_from( Output, Room, join_type=JOIN.LEFT_OUTER)): # TODO: Load dicts output_dto = self._master_controller.load_output( output_id=output.number) output_dto.room = output.room.number if output.room is not None else None output_dtos.append(output_dto) self._cache.update_outputs(output_dtos) return output_dtos def save_outputs( self, outputs): # type: (List[Tuple[OutputDTO, List[str]]]) -> None outputs_to_save = [] for output_dto, fields in outputs: output = Output.get_or_none(number=output_dto.id) # type: Output if output is None: logger.info('Ignored saving non-existing Output {0}'.format( output_dto.id)) if 'room' in fields: if output_dto.room is None: output.room = None elif 0 <= output_dto.room <= 100: output.room, _ = Room.get_or_create(number=output_dto.room) output.save() outputs_to_save.append((output_dto, fields)) self._master_controller.save_outputs(outputs_to_save) def set_all_lights_off(self): # type: () -> None return self._master_controller.set_all_lights_off() def set_all_lights_floor_off(self, floor): # type: (int) -> None return self._master_controller.set_all_lights_floor_off(floor=floor) def set_all_lights_floor_on(self, floor): # type: (int) -> None return self._master_controller.set_all_lights_floor_on(floor=floor) def set_output_status(self, output_id, is_on, dimmer=None, timer=None): # type: (int, bool, Optional[int], Optional[int]) -> None self._master_controller.set_output(output_id=output_id, state=is_on, dimmer=dimmer, timer=timer)