def _run_thread_capture_errors(curr_module, sspl_modules, msgQlist, conf_reader, product): """Run the given thread and log any errors that happen on it. Will stop all sspl_modules if one of them fails.""" try: # Each module is passed a reference list to message queues so it can transmit # internal messages to other modules as desired curr_module.start_thread(conf_reader, msgQlist, product) except BaseException as ex: logger.critical( "SSPL-LL encountered a fatal error, terminating service Error: %s" % ex) logger.exception(ex) # Populate an actuator response message and transmit back to HAlon error_msg = "SSPL-LL encountered an error, terminating service Error: " + \ ", Exception: " + logger.exception(ex) json_msg = ThreadControllerMsg(curr_module.name(), error_msg).getJson() if product.lower() in [x.lower() for x in enabled_products]: self._write_internal_msgQ(RabbitMQegressProcessor.name(), json_msg) elif product.lower() in [x.lower() for x in cs_legacy_products]: self._write_internal_msgQ(PlaneCntrlRMQegressProcessor.name(), json_msg) # Shut it down, error is non-recoverable for name, other_module in list(sspl_modules.items()): if other_module is not curr_module: other_module.shutdown()
def run(self): """Run the module periodically on its own thread.""" if (self._product.lower() in [x.lower() for x in enabled_products]) and \ not self._threads_initialized: if self._product.lower() in [x.lower() for x in cs_products]: # Wait for the dcs-collector to populate the /tmp/dcs/hpi directory while not os.path.isdir(self._hpi_base_dir): logger.info("ThreadController, dir not found: %s " % self._hpi_base_dir) logger.info("ThreadController, rechecking in %s secs" % self._start_delay) time.sleep(int(self._start_delay)) logger.debug("ThreadController._sspl_modules is {}".format( self._sspl_modules)) # Allow other threads to initialize continue_waiting = False for (n, m) in self._sspl_modules.items(): if not isinstance(m, SensorThread): continue thread_init_status = m.get_thread_init_status() logger.debug("Thread status for {} is {}".format( m.__class__, thread_init_status)) if thread_init_status == SensorThreadState.FAILED: m.shutdown() elif thread_init_status == SensorThreadState.WAITING: continue_waiting = True if continue_waiting: self._scheduler.enter(10, self._priority, self.run, ()) return # Notify external applications that've started up successfully startup_msg = "SSPL-LL service has started successfully" json_msg = ThreadControllerMsg(ThreadController.name(), startup_msg).getJson() self._write_internal_msgQ(RabbitMQegressProcessor.name(), json_msg) self._threads_initialized = True #self._set_debug(True) #self._set_debug_persist(True) self._log_debug("Start accepting requests") try: # Block on message queue until it contains an entry jsonMsg, _ = self._read_my_msgQ() if jsonMsg is not None: self._process_msg(jsonMsg) # Keep processing until the message queue is empty while not self._is_my_msgQ_empty(): jsonMsg, _ = self._read_my_msgQ() if jsonMsg is not None: self._process_msg(jsonMsg) except Exception as ex: # Log it and restart the whole process when a failure occurs logger.exception("ThreadController restarting: %r" % ex) self._scheduler.enter(1, self._priority, self.run, ()) self._log_debug("Finished processing successfully")
def _check_reset_all_modules(self, jsonMsg): """Restarts all modules with debug mode off. Activated by internal_msgQ""" if jsonMsg.get("sspl_ll_debug") is not None and \ jsonMsg.get("sspl_ll_debug").get("debug_component") is not None and \ jsonMsg.get("sspl_ll_debug").get("debug_component") == "all": for module in self._sspl_modules: self._log_debug("_check_reset_all_modules, module: %s" % module) # Don't restart this thread or it won't complete the loop if module != self.name(): self._restart_module(module) # Populate an actuator response message and transmit msgString = ThreadControllerMsg("All Modules", "Restarted with debug mode off").getJson() self._write_internal_msgQ(EgressProcessor.name(), msgString) return True return False
def _process_msg(self, jsonMsg): """Parses the incoming message and calls the appropriate method""" self._log_debug("_process_msg, jsonMsg: %s" % jsonMsg) # Check to see if debug mode is being globally turned off on all modules if self._check_reset_all_modules(jsonMsg) is True: return # Parse out the module name and request module_name = jsonMsg.get("actuator_request_type").get( "thread_controller").get("module_name") thread_request = jsonMsg.get("actuator_request_type").get( "thread_controller").get("thread_request") # Parse out the uuid so that it can be sent back in Ack message uuid = None if jsonMsg.get("sspl_ll_msg_header") is not None and \ jsonMsg.get("sspl_ll_msg_header").get("uuid") is not None: uuid = jsonMsg.get("sspl_ll_msg_header").get("uuid") self._log_debug("_processMsg, uuid: %s" % uuid) # Pass along the debug section to the module if jsonMsg.get("sspl_ll_debug") is not None: self.debug_section = {"sspl_ll_debug": {}} self.debug_section["sspl_ll_debug"] = jsonMsg.get("sspl_ll_debug") else: self.debug_section = None self._log_debug("_process_msg, self.debug_section: %s" % self.debug_section) # Parse out thread request and call the appropriate method if thread_request == "restart": self._restart_module(module_name) elif thread_request == "start": self._start_module(module_name) elif thread_request == "stop": # Don't let the outside world stop us from using RabbitMQ connection or shut down this thread if module_name == "RabbitMQegressProcessor" or \ module_name == "RabbitMQingressProcessor" or \ module_name == "ThreadController": logger.warn( "Attempt to stop RabbitMQ or ThreadController Processors, \ ignoring. Please try 'restart' instead.") return self._stop_module(module_name) elif thread_request == "status": self._status_module(module_name) elif thread_request == "degrade": if module_name.lower() != "all": logger.warn( "Invalid module_name {0}. Need 'all' in module_name". format(module_name)) return self._switch_to_degraded_state(self._sspl_modules) elif thread_request == "active": if module_name.lower() != "all": logger.warn( "Invalid module_name {0}. Need 'all' in module_name". format(module_name)) return self._switch_to_active_state(self._sspl_modules) else: self._thread_response = "Error, unrecognized thread request" node_id = [] if jsonMsg.get("actuator_request_type").get("thread_controller").get("parameters") is not None and \ jsonMsg.get("actuator_request_type").get("thread_controller").get("parameters").get("node_id"): node_id = jsonMsg.get("actuator_request_type").get( "thread_controller").get("parameters").get("node_id") ack_type = {} ack_type["hostname"] = self._hostname ack_type["node_id"] = node_id # Populate an actuator response message and transmit threadControllerMsg = ThreadControllerMsg(module_name, self._thread_response, \ json.dumps(ack_type)) if uuid is not None: threadControllerMsg.set_uuid(uuid) msgString = threadControllerMsg.getJson() logger.info("ThreadController, response: %s" % str(msgString)) if self._product.lower() in [x.lower() for x in enabled_products]: self._write_internal_msgQ(RabbitMQegressProcessor.name(), msgString) elif self._product.lower() in [x.lower() for x in cs_legacy_products]: self._write_internal_msgQ(PlaneCntrlRMQegressProcessor.name(), msgString)