def _generate_psu_alert(self, json_msg, host_name, alert_type, alert_id,
                            severity, info, specific_info, sensor_type):
        """Parses the json message, also validates it and then send it to the
           egress processor"""

        self._log_debug(f"RealStorEnclMsgHandler, _generate_psu_alert,\
            json_msg {json_msg}")

        real_stor_psu_data_msg = \
            RealStorPSUDataMsg(host_name, alert_type, alert_id, severity, info, specific_info)
        json_msg = real_stor_psu_data_msg.getJson()

        # Saves the json message in memory to serve sspl CLI sensor request
        self._psu_sensor_message = json_msg
        self._fru_type[sensor_type] = self._psu_sensor_message
        self._write_internal_msgQ(EgressProcessor.name(), json_msg,
                                  self._event)
Esempio n. 2
0
    def _check_reset_all_modules(self, jsonMsg):
        """Restarts all modules with debug mode off. Activated by internal_msgQ"""
        if jsonMsg.get("sspl_ll_debug") is not None and \
            jsonMsg.get("sspl_ll_debug").get("debug_component") is not None and \
            jsonMsg.get("sspl_ll_debug").get("debug_component") == "all":
            for module in self._sspl_modules:
                self._log_debug("_check_reset_all_modules, module: %s" %
                                module)
                # Don't restart this thread or it won't complete the loop
                if module != self.name():
                    self._restart_module(module)

            # Populate an actuator response message and transmit
            msgString = ThreadControllerMsg(
                "All Modules", "Restarted with debug mode off").getJson()
            self._write_internal_msgQ(EgressProcessor.name(), msgString)
            return True

        return False
Esempio n. 3
0
    def _send_ifdata_json_msg(self,
                              resource_id,
                              resource_type,
                              state,
                              event=""):
        """A resuable method for transmitting IFDataMsg to RMQ and IEM logging"""
        ifDataMsg = IFdataMsg(self._node_sensor.host_id,
                              self._node_sensor.local_time,
                              self._node_sensor.if_data, resource_id,
                              resource_type, state,
                              self.severity_reader.map_severity(state), event)
        # Add in uuid if it was present in the json request
        if self._uuid is not None:
            ifDataMsg.set_uuid(self._uuid)
        jsonMsg = ifDataMsg.getJson()
        self.if_sensor_data = jsonMsg
        self.os_sensor_type["nw"] = self.if_sensor_data

        # Transmit it to message processor
        self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
        self.persist_state_data('nw', 'NW_SENSOR_DATA')
Esempio n. 4
0
    def _route_IEM(self, jsonMsg):
        # Send the IEM to the logging msg handler to be processed

        # Get the optional log_level if it exists in msg
        if jsonMsg.get("actuator_request_type").get("logging").get("log_level") is not None:
            log_level = jsonMsg.get("actuator_request_type").get("logging").get("log_level")
        else:
            log_level = "LOG_INFO"

        # Get the message to log in format "IEC: EVENT_CODE: EVENT_STRING: JSON DATA"
        log_msg = f"{log_level} {jsonMsg.get('actuator_request_type').get('logging').get('log_msg')}"

        internal_json_msg = json.dumps(
                 {"message": {
                    "IEM_routing": {
                        "log_msg": log_msg
                        }
                    }
                 })
        # Send the IEM to EgressProcessor to be routed to another IEM listener
        self._write_internal_msgQ(EgressProcessor.name(), internal_json_msg)
Esempio n. 5
0
    def _generate_node_fru_data(self, jsonMsg):
        """Create & transmit a FRU IPMI data message as defined
            by the sensor response json schema"""

        if self._node_sensor.host_id is None:
            successful = self._node_sensor.read_data("None", self._get_debug(),
                                                     self._units)
            if not successful:
                logger.error(
                    "NodeDataMsgHandler, updating host information was NOT successful."
                )

        if jsonMsg.get("sensor_request_type").get("node_data") is not None:
            self._fru_info = jsonMsg.get("sensor_request_type").get(
                "node_data")
            node_ipmi_data_msg = NodeIPMIDataMsg(self._fru_info)

        if self._uuid is not None:
            node_ipmi_data_msg.set_uuid(self._uuid)
        jsonMsg = node_ipmi_data_msg.getJson()
        self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
Esempio n. 6
0
def _check_module_recovered(module):
    """
    Once SSPL is restarted, check current status of the module after
    certain recovery cycle time. If module is running and its previous
    state is fault, raise fault_resolved alert and update cache.
    """
    module_name = module.name()
    # Wait till sensor module completes few run cycle. Then
    # raise module recovery fault_resolved alert.
    polling_cycle_time = Conf.get(
        SSPL_CONF, f"{SSPL_LL_SETTING}>sensor_polling_cycle_time", 60)
    time.sleep(polling_cycle_time)
    if not module.is_running():
        return

    curr_state = "fault_resolved"
    per_data_path = os.path.join(
        module_cache_dir, f'{module_name.upper()}_{node_id}')
    if not os.path.isfile(per_data_path):
        module_persistent_data[module_name] = {}
        store.put(module_persistent_data[module_name], per_data_path)
    # Check previous state before sending fault resolved alert
    module_persistent_data[module_name] = store.get(per_data_path)
    prev_state = module_persistent_data[module_name].get('prev_state')
    if prev_state and curr_state != prev_state:
        module_persistent_data[module_name] = {"prev_state": curr_state}
        store.put(module_persistent_data[module_name], per_data_path)
        specific_info = Conf.get(SSPL_CONF, f"{module_name.upper()}")
        info = {
            "module_name": module_name,
            "alert_type": curr_state,
            "description": f"{module_name} is recovered",
            "impact": "",
            "recommendation": "",
            "severity": "info",
            "specific_info": specific_info
        }
        jsonMsg = ThreadMonitorMsg(info).getJson()
        module._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
    def _process_msg(self, json_msg):
        """Parses the incoming message and generate the desired data message"""
        self._log_debug(
            f"RealStorEnclMsgHandler, _process_msg, json_msg: {json_msg}")

        if json_msg.get("sensor_request_type").get(
                "enclosure_alert") is not None:
            internal_sensor_request = json_msg.get("sensor_request_type").\
                                        get("enclosure_alert").get("status")
            if internal_sensor_request:
                resource_type = json_msg.get("sensor_request_type").\
                                get("enclosure_alert").get("info").get("resource_type")
                if ":" in resource_type:
                    sensor_type = resource_type.split(":")[2]
                else:
                    sensor_type = resource_type
                self._propagate_alert(json_msg, sensor_type)
            else:
                # serves the request coming from sspl CLI
                sensor_type = json_msg.get("sensor_request_type").\
                                get("enclosure_alert").get("info").\
                                    get("resource_type")
                if ":" in sensor_type:
                    sensor_type = sensor_type.split(":")[2]
                else:
                    sensor_type = sensor_type
                sensor_message_type = self._fru_type.get(sensor_type, "")

                # get the previously saved json message for the sensor type
                # and send the message
                if sensor_message_type:
                    self._write_internal_msgQ(EgressProcessor.name(),
                                              sensor_message_type, self._event)
                else:
                    self._log_debug(f"RealStorEnclMsgHandler, _process_msg, \
                        No past data found for {sensor_type} sensor type")
        else:
            logger.exception("RealStorEnclMsgHandler, _process_msg,\
                Not a valid sensor request format")
Esempio n. 8
0
    def _send_ifdata_json_msg(self,
                              sensor_type,
                              resource_id,
                              resource_type,
                              state,
                              severity,
                              event=""):
        """A resuable method for transmitting IFDataMsg to RMQ and IEM logging"""
        ifDataMsg = IFdataMsg(self._node_sensor.host_id,
                              self._node_sensor.local_time,
                              self._node_sensor.if_data, resource_id,
                              resource_type, self.site_id, self.node_id,
                              self.cluster_id, self.rack_id, state, severity,
                              event)
        # Add in uuid if it was present in the json request
        if self._uuid is not None:
            ifDataMsg.set_uuid(self._uuid)
        jsonMsg = ifDataMsg.getJson()
        self.if_sensor_data = jsonMsg
        self.os_sensor_type[sensor_type] = self.if_sensor_data

        # Send the event to logging msg handler to send IEM message to journald
        #internal_json_msg=json.dumps({
        #                        'actuator_request_type': {
        #                            'logging': {
        #                                'log_level': 'LOG_WARNING',
        #                                'log_type': 'IEM',
        #                                'log_msg': '{}'.format(jsonMsg)}}})
        #self._write_internal_msgQ(LoggingMsgHandler.name(), internal_json_msg)

        # Transmit it to message processor
        self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
        persistent_nw_data = {
            'prev_nw_status': self.prev_nw_status,
            'prev_cable_cnxns': self.prev_cable_cnxns,
            'interface_fault_state': self.interface_fault_state
        }
        store.put(persistent_nw_data, self.NW_SENSOR_DATA_PATH)
Esempio n. 9
0
    def _generate_disk_space_alert(self):
        """Create & transmit a disk_space_alert message as defined
            by the sensor response json schema"""

        # Notify the node sensor to update its data required for the disk_space_data message
        successful = self._node_sensor.read_data("disk_space_alert",
                                                 self._get_debug(),
                                                 self._units)
        if not successful:
            logger.error(
                "NodeDataMsgHandler, _generate_disk_space_alert was NOT successful."
            )
            return

        # Changing disk_usage_threshold type according to what value type entered in config file
        self._disk_usage_threshold = str(self._disk_usage_threshold)
        try:
            if self._disk_usage_threshold.isdigit():
                self._disk_usage_threshold = int(self._disk_usage_threshold)
            else:
                self._disk_usage_threshold = float(self._disk_usage_threshold)
        except ValueError:
            logger.warning(
                "Disk Space Alert, Invalid disk_usage_threshold value are entered in config."
            )
            # Assigning default value to _disk_usage_threshold
            self._disk_usage_threshold = self.DEFAULT_DISK_USAGE_THRESHOLD

        if self._node_sensor.disk_used_percentage >= self._disk_usage_threshold:
            if not self.disk_fault:
                self.disk_fault = True
                # Create the disk space data message and hand it over to the egress processor to transmit
                fault_event = "Disk usage increased to %s, beyond configured threshold of %s" \
                                %(self._node_sensor.disk_used_percentage, self._disk_usage_threshold)
                logger.warning(fault_event)
                diskSpaceAlertMsg = DiskSpaceAlertMsg(
                    self._node_sensor.host_id, self._epoch_time,
                    self._node_sensor.total_space,
                    self._node_sensor.free_space,
                    self._node_sensor.disk_used_percentage, self._units,
                    self.site_id, self.rack_id, self.node_id, self.cluster_id,
                    self.FAULT, fault_event)

                # Add in uuid if it was present in the json request
                if self._uuid is not None:
                    diskSpaceAlertMsg.set_uuid(self._uuid)
                jsonMsg = diskSpaceAlertMsg.getJson()
                self.disk_sensor_data = jsonMsg
                self.os_sensor_type["disk_space"] = self.disk_sensor_data

                # Transmit it to message processor
                self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)

        if (self._node_sensor.disk_used_percentage <=
                self._disk_usage_threshold) and (self.disk_fault == True):
            # Create the disk space data message and hand it over to the egress processor to transmit
            fault_resolved_event = "Disk usage decreased to %s, lesser than configured threshold of %s" \
                                %(self._node_sensor.disk_used_percentage, self._disk_usage_threshold)
            logger.warning(fault_resolved_event)
            diskSpaceAlertMsg = DiskSpaceAlertMsg(
                self._node_sensor.host_id, self._epoch_time,
                self._node_sensor.total_space, self._node_sensor.free_space,
                self._node_sensor.disk_used_percentage, self._units,
                self.site_id, self.rack_id, self.node_id, self.cluster_id,
                self.FAULT_RESOLVED, fault_resolved_event)

            # Add in uuid if it was present in the json request
            if self._uuid is not None:
                diskSpaceAlertMsg.set_uuid(self._uuid)
            jsonMsg = diskSpaceAlertMsg.getJson()
            self.disk_sensor_data = jsonMsg
            self.os_sensor_type["disk_space"] = self.disk_sensor_data

            # Transmit it to message processor
            self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
            self.disk_fault = False
Esempio n. 10
0
    def _process_msg(self, jsonMsg):
        """Parses the incoming message and handles appropriately"""
        self._log_debug(f"_process_msg, jsonMsg: {jsonMsg}")

        if isinstance(jsonMsg, dict) is False:
            jsonMsg = json.loads(jsonMsg)

        # Parse out the uuid so that it can be sent back in Ack message
        uuid = None
        if jsonMsg.get("sspl_ll_msg_header").get("uuid") is not None:
            uuid = jsonMsg.get("sspl_ll_msg_header").get("uuid")
            self._log_debug(f"_processMsg, uuid: {uuid}")

        if jsonMsg.get("actuator_request_type").get("node_controller").get(
                "node_request") is not None:
            node_request = jsonMsg.get("actuator_request_type").get(
                "node_controller").get("node_request")
            self._log_debug(f"_processMsg, node_request: {node_request}")

            # Parse out the component field in the node_request
            component = node_request[0:4]

            # Handle generic command line requests
            if component == 'SSPL':
                # Query the Zope GlobalSiteManager for an object implementing the MOTR actuator
                if self._command_line_actuator is None:
                    from actuators.Icommand_line import ICommandLine

                    command_line_actuator_class = self._queryUtility(
                        ICommandLine)
                    # Instantiate CommandLine Actuator only if class is loaded
                    if command_line_actuator_class:
                        self._command_line_actuator = command_line_actuator_class(
                            self._conf_reader)
                    else:
                        logger.warn("CommandLine Actuator not loaded")
                        json_msg = AckResponseMsg(
                            node_request,
                            NodeControllerMsgHandler.UNSUPPORTED_REQUEST,
                            uuid).getJson()
                        self._write_internal_msgQ(EgressProcessor.name(),
                                                  json_msg)
                        return

                # Perform the request and get the response
                command_line_response = self._command_line_actuator.perform_request(
                    jsonMsg).strip()
                self._log_debug(
                    f"_process_msg, command line response: {command_line_response}"
                )

                json_msg = AckResponseMsg(node_request, command_line_response,
                                          uuid).getJson()
                self._write_internal_msgQ(EgressProcessor.name(), json_msg)

            # Handle LED effects using the HPI actuator
            elif component == "LED:":
                # HPI related operations are not supported in VM environment.
                if self._is_env_vm():
                    logger.warn(
                        "HPI operations are not supported in current environment"
                    )
                    return
                # Query the Zope GlobalSiteManager for an object implementing the IHPI actuator
                if self._HPI_actuator is None:
                    from actuators.Ihpi import IHPI
                    # Load HPIActuator class
                    HPI_actuator_class = self._queryUtility(IHPI)
                    # Instantiate HPIActuator only if class is loaded
                    if HPI_actuator_class:
                        self._HPI_actuator = HPI_actuator_class(
                            self._conf_reader)
                    else:
                        logger.warn("HPIActuator not loaded")
                        if self._product.lower() in [
                                x.lower() for x in enabled_products
                        ]:
                            json_msg = AckResponseMsg(
                                node_request,
                                NodeControllerMsgHandler.UNSUPPORTED_REQUEST,
                                uuid).getJson()
                            self._write_internal_msgQ(EgressProcessor.name(),
                                                      json_msg)
                        return

                    self._log_debug(
                        f"_process_msg, _HPI_actuator name: {self._HPI_actuator.name()}"
                    )

                    # Perform the request using HPI and get the response
                    hpi_response = self._HPI_actuator.perform_request(
                        jsonMsg).strip()
                    self._log_debug(
                        f"_process_msg, hpi_response: {hpi_response}")

                    json_msg = AckResponseMsg(node_request, hpi_response,
                                              uuid).getJson()
                    self._write_internal_msgQ(EgressProcessor.name(), json_msg)

            # Set the Bezel LED color using the GEM interface
            elif component == "BEZE":
                # Query the Zope GlobalSiteManager for an object implementing the IGEM actuator
                if self._GEM_actuator is None:
                    self._GEM_actuator = self._queryUtility(IGEM)(
                        self._conf_reader)
                    self._log_debug(
                        f"_process_msg, _GEM_actuator name: {self._GEM_actuator.name()}"
                    )

                # Perform the request using GEM and get the response
                gem_response = self._GEM_actuator.perform_request(
                    jsonMsg).strip()
                self._log_debug(f"_process_msg, gem_response: {gem_response}")

                json_msg = AckResponseMsg(node_request, gem_response,
                                          uuid).getJson()
                self._write_internal_msgQ(EgressProcessor.name(), json_msg)

            elif component == "PDU:":
                # Query the Zope GlobalSiteManager for an object implementing the IPDU actuator
                if self._PDU_actuator is None:
                    from actuators.Ipdu import IPDU

                    PDU_actuator_class = self._queryUtility(IPDU)
                    # Instantiate RaritanPDU Actuator only if class is loaded
                    if PDU_actuator_class:
                        self._PDU_actuator = PDU_actuator_class(
                            self._conf_reader)
                    else:
                        logger.warn("RaritanPDU Actuator not loaded")
                        json_msg = AckResponseMsg(
                            node_request,
                            NodeControllerMsgHandler.UNSUPPORTED_REQUEST,
                            uuid).getJson()
                        self._write_internal_msgQ(EgressProcessor.name(),
                                                  json_msg)
                        return

                # Perform the request on the PDU and get the response
                pdu_response = self._PDU_actuator.perform_request(
                    jsonMsg).strip()
                self._log_debug(f"_process_msg, pdu_response: {pdu_response}")

                json_msg = AckResponseMsg(node_request, pdu_response,
                                          uuid).getJson()
                self._write_internal_msgQ(EgressProcessor.name(), json_msg)

            elif component == "RAID":
                # If the state is INITIALIZED, We can assume that actuator is
                # ready to perform operation.
                if actuator_state_manager.is_initialized("RAIDactuator"):
                    self._log_debug(
                        f"_process_msg, _RAID_actuator name: {self._RAID_actuator.name()}"
                    )
                    self._execute_raid_request(node_request,
                                               self._RAID_actuator, jsonMsg,
                                               uuid)

                # If the state is INITIALIZING, need to send message
                elif actuator_state_manager.is_initializing("RAIDactuator"):
                    # This state will not be reached. Kept here for consistency.
                    logger.info("RAID actuator is initializing")
                    busy_json_msg = AckResponseMsg(
                        node_request, "BUSY", uuid,
                        error_no=errno.EBUSY).getJson()
                    self._write_internal_msgQ("EgressProcessor", busy_json_msg)

                elif actuator_state_manager.is_imported("RAIDactuator"):
                    # This case will be for first request only. Subsequent
                    # requests will go to INITIALIZED state case.
                    logger.info("RAID actuator is imported and initializing")

                    from actuators.Iraid import IRAIDactuator
                    actuator_state_manager.set_state(
                        "RAIDactuator", actuator_state_manager.INITIALIZING)
                    # Query the Zope GlobalSiteManager for an object implementing the IRAIDactuator
                    raid_actuator_class = self._queryUtility(IRAIDactuator)
                    if raid_actuator_class:
                        # NOTE: Instantiation part should not time consuming
                        # otherwise NodeControllerMsgHandler will get block
                        # and will not be able serve any subsequent requests.
                        # This applies to instantiation of evey actuator.
                        self._RAID_actuator = raid_actuator_class()
                        logger.info(
                            f"_process_msg, _RAID_actuator name: {self._RAID_actuator.name()}"
                        )
                        self._execute_raid_request(node_request,
                                                   self._RAID_actuator,
                                                   jsonMsg, uuid)
                        actuator_state_manager.set_state(
                            "RAIDactuator", actuator_state_manager.INITIALIZED)
                    else:
                        logger.warn("RAID actuator is not instantiated")

                # If there is no entry for actuator in table, We can assume
                # that it is not loaded for some reason.
                else:
                    logger.warn("RAID actuator is not loaded or not supported")

            elif component == "IPMI":
                # Query the Zope GlobalSiteManager for an object implementing the IPMI actuator
                if self._IPMI_actuator is None:
                    from actuators.Iipmi import Iipmi

                    IPMI_actuator_class = self._queryUtility(Iipmi)
                    # Instantiate IPMI Actuator only if class is loaded
                    if IPMI_actuator_class:
                        self._IPMI_actuator = IPMI_actuator_class(
                            self._conf_reader)
                    else:
                        logger.warn("IPMI Actuator not loaded")
                        json_msg = AckResponseMsg(
                            node_request,
                            NodeControllerMsgHandler.UNSUPPORTED_REQUEST,
                            uuid).getJson()
                        self._write_internal_msgQ(EgressProcessor.name(),
                                                  json_msg)
                        return

                # Perform the IPMI request on the node and get the response
                ipmi_response = self._IPMI_actuator.perform_request(
                    jsonMsg).strip()
                self._log_debug(
                    f"_process_msg, ipmi_response: {ipmi_response}")

                json_msg = AckResponseMsg(node_request, ipmi_response,
                                          uuid).getJson()
                self._write_internal_msgQ(EgressProcessor.name(), json_msg)

            elif component == "STOP":
                # HPI related operations are not supported in VM environment.
                if self._is_env_vm():
                    logger.warn(
                        "HPI operations are not supported in current environment"
                    )
                    return
                # Query the Zope GlobalSiteManager for an object implementing the IHPI actuator
                if self._HPI_actuator is None:
                    from actuators.Ihpi import IHPI
                    # Load HPIActuator class
                    HPI_actuator_class = self._queryUtility(IHPI)
                    # Instantiate HPIActuator only if class is loaded
                    if HPI_actuator_class:
                        self._HPI_actuator = HPI_actuator_class(
                            self._conf_reader)
                    else:
                        logger.warn("HPIActuator not loaded")
                        if self._product.lower() in [
                                x.lower() for x in enabled_products
                        ]:
                            json_msg = AckResponseMsg(
                                node_request,
                                NodeControllerMsgHandler.UNSUPPORTED_REQUEST,
                                uuid).getJson()
                            self._write_internal_msgQ(EgressProcessor.name(),
                                                      json_msg)
                        return

                    self._log_debug(
                        f"_process_msg, _HPI_actuator name: {self._HPI_actuator.name()}"
                    )

                    # Parse out the drive to stop
                    drive_request = node_request[12:].strip()
                    self._log_debug(
                        f"perform_request, drive to stop: {drive_request}")

                    # Append POWER_OFF to notify HPI actuator of desired state
                    jsonMsg["actuator_request_type"]["node_controller"]["node_request"] = \
                            f"DISK: set {drive_request} POWER_OFF"
                    self._log_debug(f"_process_msg, jsonMsg: {jsonMsg}")

                    # Perform the request using HPI and get the response
                    hpi_response = self._HPI_actuator.perform_request(
                        jsonMsg).strip()
                    self._log_debug(
                        f"_process_msg, hpi_response: {hpi_response}")

                    # Simplify success message as external apps don't care about details
                    if "Success" in hpi_response:
                        hpi_response = "Successful"

                    json_msg = AckResponseMsg(node_request, hpi_response,
                                              uuid).getJson()
                    self._write_internal_msgQ(EgressProcessor.name(), json_msg)

            elif component == "STAR":
                # HPI related operations are not supported in VM environment.
                if self._is_env_vm():
                    logger.warn(
                        "HPI operations are not supported in current environment"
                    )
                    return
                # Query the Zope GlobalSiteManager for an object implementing the IHPI actuator
                if self._HPI_actuator is None:
                    from actuators.Ihpi import IHPI
                    # Load HPIActuator class
                    HPI_actuator_class = self._queryUtility(IHPI)
                    # Instantiate HPIActuator only if class is loaded
                    if HPI_actuator_class:
                        self._HPI_actuator = HPI_actuator_class(
                            self._conf_reader)
                    else:
                        logger.warn("HPIActuator not loaded")
                        if self._product.lower() in [
                                x.lower() for x in enabled_products
                        ]:
                            json_msg = AckResponseMsg(
                                node_request,
                                NodeControllerMsgHandler.UNSUPPORTED_REQUEST,
                                uuid).getJson()
                            self._write_internal_msgQ(EgressProcessor.name(),
                                                      json_msg)
                        return

                    self._log_debug(
                        f"_process_msg, _HPI_actuator name: {self._HPI_actuator.name()}"
                    )

                    # Parse out the drive to start
                    drive_request = node_request[13:].strip()
                    self._log_debug(
                        f"perform_request, drive to start: {drive_request}")

                    # Append POWER_ON to notify HPI actuator of desired state
                    jsonMsg["actuator_request_type"]["node_controller"]["node_request"] = \
                            f"DISK: set {drive_request} POWER_ON"
                    self._log_debug(f"_process_msg, jsonMsg: {jsonMsg}")

                    # Perform the request using HPI and get the response
                    hpi_response = self._HPI_actuator.perform_request(
                        jsonMsg).strip()
                    self._log_debug(
                        f"_process_msg, hpi_response: {hpi_response}")

                    # Simplify success message as external apps don't care about details
                    if "Success" in hpi_response:
                        hpi_response = "Successful"

                    json_msg = AckResponseMsg(node_request, hpi_response,
                                              uuid).getJson()
                    self._write_internal_msgQ(EgressProcessor.name(), json_msg)

            elif component == "RESE":
                # HPI related operations are not supported in VM environment.
                if self._is_env_vm():
                    logger.warn(
                        "HPI operations are not supported in current environment"
                    )
                    return
                # Query the Zope GlobalSiteManager for an object implementing the IHPI actuator
                if self._HPI_actuator is None:
                    from actuators.Ihpi import IHPI
                    # Load HPIActuator class
                    HPI_actuator_class = self._queryUtility(IHPI)
                    # Instantiate HPIActuator only if class is loaded
                    if HPI_actuator_class:
                        self._HPI_actuator = HPI_actuator_class(
                            self._conf_reader)
                    else:
                        logger.warn("HPIActuator not loaded")
                        if self._product.lower() in [
                                x.lower() for x in enabled_products
                        ]:
                            json_msg = AckResponseMsg(
                                node_request,
                                NodeControllerMsgHandler.UNSUPPORTED_REQUEST,
                                uuid).getJson()
                            self._write_internal_msgQ(EgressProcessor.name(),
                                                      json_msg)
                        return

                    self._log_debug(
                        f"_process_msg, _HPI_actuator name: {self._HPI_actuator.name()}"
                    )

                    # Parse out the drive to power cycle
                    drive_request = node_request[13:].strip()
                    self._log_debug(
                        f"perform_request, drive to power cycle: {drive_request}"
                    )

                    # Append POWER_OFF and then POWER_ON to notify HPI actuator of desired state
                    jsonMsg["actuator_request_type"]["node_controller"]["node_request"] = \
                            f"DISK: set {drive_request} POWER_OFF"
                    self._log_debug(f"_process_msg, jsonMsg: {jsonMsg}")

                    # Perform the request using HPI and get the response
                    hpi_response = self._HPI_actuator.perform_request(
                        jsonMsg).strip()
                    self._log_debug(
                        f"_process_msg, hpi_response: {hpi_response}")

                    # Check for success and power the disk back on
                    if "Success" in hpi_response:
                        # Append POWER_ON to notify HPI actuator of desired state
                        jsonMsg["actuator_request_type"]["node_controller"]["node_request"] = \
                                   f"DISK: set {drive_request} POWER_ON"
                        self._log_debug(f"_process_msg, jsonMsg: {jsonMsg}")

                        # Perform the request using HPI and get the response
                        hpi_response = self._HPI_actuator.perform_request(
                            jsonMsg).strip()
                        self._log_debug(
                            f"_process_msg, hpi_response: {hpi_response}")

                        # Simplify success message as external apps don't care about details
                        if "Success" in hpi_response:
                            hpi_response = "Successful"

                    json_msg = AckResponseMsg(node_request, hpi_response,
                                              uuid).getJson()
                    self._write_internal_msgQ(EgressProcessor.name(), json_msg)

            elif component == "HDPA":
                # If the state is INITIALIZED, We can assume that actuator is
                # ready to perform operation.
                if actuator_state_manager.is_initialized("Hdparm"):
                    logger.info(
                        f"_process_msg, Hdparm_actuator name: {self._hdparm_actuator.name()}"
                    )
                    # Perform the hdparm request on the node and get the response
                    hdparm_response = self._hdparm_actuator.perform_request(
                        jsonMsg).strip()
                    self._log_debug(
                        f"_process_msg, hdparm_response: {hdparm_response}")

                    json_msg = AckResponseMsg(node_request, hdparm_response,
                                              uuid).getJson()
                    self._write_internal_msgQ(EgressProcessor.name(), json_msg)

                # If the state is INITIALIZING, need to send message
                elif actuator_state_manager.is_initializing("Hdparm"):
                    # This state will not be reached. Kept here for consistency.
                    logger.info("Hdparm actuator is initializing")
                    busy_json_msg = AckResponseMsg(
                        node_request, "BUSY", uuid,
                        error_no=errno.EBUSY).getJson()
                    self._write_internal_msgQ("EgressProcessor", busy_json_msg)

                elif actuator_state_manager.is_imported("Hdparm"):
                    # This case will be for first request only. Subsequent
                    # requests will go to INITIALIZED state case.
                    logger.info("Hdparm actuator is imported and initializing")
                    # Query the Zope GlobalSiteManager for an object
                    # implementing the hdparm actuator.
                    from actuators.Ihdparm import IHdparm
                    actuator_state_manager.set_state(
                        "Hdparm", actuator_state_manager.INITIALIZING)
                    hdparm_actuator_class = self._queryUtility(IHdparm)
                    if hdparm_actuator_class:
                        # NOTE: Instantiation part should not time consuming
                        # otherwise NodeControllerMsgHandler will get block and will
                        # not be able serve any subsequent requests. This applies
                        # to instantiation of evey actuator.
                        self._hdparm_actuator = hdparm_actuator_class()
                        self._log_debug(
                            f"_process_msg, _hdparm_actuator name: {self._hdparm_actuator.name()}"
                        )
                        # Perform the hdparm request on the node and get the response
                        hdparm_response = self._hdparm_actuator.perform_request(
                            jsonMsg).strip()
                        self._log_debug(
                            f"_process_msg, hdparm_response: {hdparm_response}"
                        )

                        json_msg = AckResponseMsg(node_request,
                                                  hdparm_response,
                                                  uuid).getJson()
                        self._write_internal_msgQ(EgressProcessor.name(),
                                                  json_msg)
                        actuator_state_manager.set_state(
                            "Hdparm", actuator_state_manager.INITIALIZED)
                    else:
                        logger.info("Hdparm actuator is not instantiated")

                # If there is no entry for actuator in table, We can assume
                # that it is not loaded for some reason.
                else:
                    logger.info(
                        "Hdparm actuator is not loaded or not supported")

            elif component == "SMAR":
                # Parse out the drive request field in json msg
                node_request = jsonMsg.get("actuator_request_type").get(
                    "node_controller").get("node_request")
                drive_request = node_request[12:].strip()
                self._log_debug(f"perform_request, drive: {drive_request}")

                # If the drive field is an asterisk then send all the smart results for all drives available
                if drive_request == "*":
                    # Send the event to DiskMonitor to schedule SMART test
                    internal_json_msg = json.dumps({
                        "sensor_request_type": "disk_smart_test",
                        "serial_number": "*",
                        "node_request": self.host_id,
                        "uuid": uuid
                    })

                    self._write_internal_msgQ("DiskMonitor", internal_json_msg)
                    return

                # Put together a message to get the serial number of the drive using hdparm tool
                if drive_request.startswith("/"):
                    serial_number, error = self._retrieve_serial_number(
                        drive_request)

                    # Send error response back on ack channel
                    if error != "":
                        json_msg = AckResponseMsg(node_request, error,
                                                  uuid).getJson()
                        self._write_internal_msgQ(EgressProcessor.name(),
                                                  json_msg)
                        return
                else:
                    if self._smartctl_actuator is None:
                        from actuators.Ismartctl import ISmartctl
                        smartctl_actuator_class = self._queryUtility(ISmartctl)
                        if smartctl_actuator_class:
                            self._smartctl_actuator = self._queryUtility(
                                ISmartctl)()
                            self._log_debug(
                                "_process_msg, _smart_actuator name: %s" %
                                self._smartctl_actuator.name())
                        else:
                            logger.error(
                                " No module Smartctl is present to load")
                    serial_compare = self._smartctl_actuator._check_serial_number(
                        drive_request)
                    if not serial_compare:
                        json_msg = AckResponseMsg(node_request,
                                                  "Drive Not Found",
                                                  uuid).getJson()
                        self._write_internal_msgQ(EgressProcessor.name(),
                                                  json_msg)
                        return
                    else:
                        serial_number = drive_request

                    # Send the event to DiskMonitor to schedule SMART test
                    internal_json_msg = json.dumps({
                        "sensor_request_type": "disk_smart_test",
                        "serial_number": serial_number,
                        "node_request": node_request,
                        "uuid": uuid
                    })

                    self._write_internal_msgQ("DiskMonitor", internal_json_msg)

            elif component == "DRVM":
                # Requesting the current status from drivemanager
                # Parse out the drive request field in json msg
                node_request = jsonMsg.get("actuator_request_type").get(
                    "node_controller").get("node_request")
                drive_request = node_request[15:].strip()
                self._log_debug(f"perform_request, drive: {drive_request}")

                # If the drive field is an asterisk then send all the drivemanager results for all drives available
                if drive_request == "*":
                    # Send a message to the disk message handler to lookup the drivemanager status and send it out
                    internal_json_msg = json.dumps({
                        "sensor_request_type": "drvmngr_status",
                        "serial_number": "*",
                        "node_request": self.host_id,
                        "uuid": uuid
                    })

                    # Send the event to disk message handler to generate json message
                    self._write_internal_msgQ(DiskMsgHandler.name(),
                                              internal_json_msg)
                    return

                # Put together a message to get the serial number of the drive using hdparm tool
                if drive_request.startswith("/"):
                    serial_number, error = self._retrieve_serial_number(
                        drive_request)

                    # Send error response back on ack channel
                    if error != "":
                        json_msg = AckResponseMsg(node_request, error,
                                                  uuid).getJson()
                        self._write_internal_msgQ(EgressProcessor.name(),
                                                  json_msg)
                        return
                else:
                    serial_number = drive_request

                # Send a message to the disk message handler to lookup the smart status and send it out
                internal_json_msg = json.dumps({
                    "sensor_request_type": "drvmngr_status",
                    "serial_number": serial_number,
                    "node_request": node_request,
                    "uuid": uuid
                })

                # Send the event to disk message handler to generate json message
                self._write_internal_msgQ(DiskMsgHandler.name(),
                                          internal_json_msg)

            elif component == "HPI_":
                # Requesting the current status from HPI data
                # Parse out the drive request field in json msg
                if self._is_env_vm():
                    logger.warn(
                        "HPI operations are not supported in current environment"
                    )
                    return

                if self.setup == 'cortx':
                    logger.warn("HPIMonitor not loaded")
                    json_msg = AckResponseMsg(
                        node_request,
                        NodeControllerMsgHandler.UNSUPPORTED_REQUEST,
                        uuid).getJson()
                    self._write_internal_msgQ(EgressProcessor.name(), json_msg)
                    return

                node_request = jsonMsg.get("actuator_request_type").get(
                    "node_controller").get("node_request")
                drive_request = node_request[11:].strip()
                self._log_debug(f"perform_request, drive: {drive_request}")

                # If the drive field is an asterisk then send all the hpi results for all drives available
                if drive_request == "*":
                    # Send a message to the disk message handler to lookup the hpi status and send it out
                    internal_json_msg = json.dumps({
                        "sensor_request_type": "hpi_status",
                        "serial_number": "*",
                        "node_request": self.host_id,
                        "uuid": uuid
                    })

                    # Send the event to disk message handler to generate json message
                    self._write_internal_msgQ(DiskMsgHandler.name(),
                                              internal_json_msg)
                    return

                # Put together a message to get the serial number of the drive using hdparm tool
                if drive_request.startswith("/"):
                    serial_number, error = self._retrieve_serial_number(
                        drive_request)

                    # Send error response back on ack channel
                    if error != "":
                        json_msg = AckResponseMsg(node_request, error,
                                                  uuid).getJson()
                        self._write_internal_msgQ(EgressProcessor.name(),
                                                  json_msg)
                        return
                else:
                    serial_number = drive_request

                # Send a message to the disk message handler to lookup the smart status and send it out
                internal_json_msg = json.dumps({
                    "sensor_request_type": "hpi_status",
                    "serial_number": serial_number,
                    "node_request": node_request,
                    "uuid": uuid
                })

                # Send the event to disk message handler to generate json message
                self._write_internal_msgQ(DiskMsgHandler.name(),
                                          internal_json_msg)

            elif component == "SIMU":
                # Requesting to simulate an event
                # Parse out the simulated request field
                node_request = jsonMsg.get("actuator_request_type").get(
                    "node_controller").get("node_request")
                sim_request = node_request[9:].strip().split(" ")
                self._log_debug(
                    f"perform_request, sim_request: {str(sim_request)}")

                # Put together a message to get the serial number of the drive using hdparm tool
                if sim_request[1].startswith("/"):
                    serial_number, error = self._retrieve_serial_number(
                        sim_request[1])

                    # Send error response back on ack channel
                    if error != "":
                        json_msg = AckResponseMsg(node_request, error,
                                                  uuid).getJson()
                        self._write_internal_msgQ(EgressProcessor.name(),
                                                  json_msg)
                        return
                else:
                    serial_number = sim_request[1]

                # SMART simulation requests are sent to DiskMonitor
                if sim_request[0] == "SMART_FAILURE":
                    logger.info(
                        f"NodeControllerMsgHandler, simulating SMART_FAILURE on drive: {serial_number}"
                    )

                    internal_json_msg = json.dumps({
                        "sensor_request_type":
                        "simulate_failure",
                        "serial_number":
                        serial_number,
                        "node_request":
                        sim_request[0],
                        "uuid":
                        uuid
                    })

                    # Send the event to DiskMonitor to handle it from here
                    self._write_internal_msgQ("DiskMonitor", internal_json_msg)

                else:
                    # Send a message to the disk message handler to handle simulation request
                    internal_json_msg = json.dumps({
                        "sensor_request_type":
                        "sim_event",
                        "serial_number":
                        serial_number,
                        "node_request":
                        sim_request[0],
                        "uuid":
                        uuid
                    })

                    # Send the event to disk message handler to generate json message
                    self._write_internal_msgQ(DiskMsgHandler.name(),
                                              internal_json_msg)

            elif component == "NDHW":
                # NDHW Stands for Node HW.
                try:
                    # Load and Instantiate the Actuator for the first request
                    if self._NodeHW_actuator is None:
                        from actuators.impl.generic.node_hw import NodeHWactuator
                        from framework.utils.ipmi_client import IpmiFactory
                        self.ipmi_client_name = Conf.get(
                            SSPL_CONF, f"{NODEHWACTUATOR}>{IPMI_CLIENT}",
                            "ipmitool")
                        ipmi_factory = IpmiFactory()
                        ipmi_client = \
                           ipmi_factory.get_implementor(self.ipmi_client_name)
                        # Instantiate NodeHWactuator only if class is loaded
                        if ipmi_client is not None:
                            self._NodeHW_actuator = NodeHWactuator(
                                ipmi_client, self._conf_reader)
                            self._NodeHW_actuator.initialize()
                        else:
                            logger.error(
                                f"IPMI client: '{self.ipmi_client_name}' doesn't exist"
                            )
                            return
                    node_request = jsonMsg.get("actuator_request_type")
                    # Perform the NodeHW request on the node and get the response
                    #TODO: Send message to Ack as well as Sensor in their respective channel.
                    node_hw_response = self._NodeHW_actuator.perform_request(
                        node_request)
                    self._log_debug(
                        f"_process_msg, node_hw_response: {node_hw_response}")
                    json_msg = NodeHwAckResponseMsg(node_request,
                                                    node_hw_response,
                                                    uuid).getJson()
                    self._write_internal_msgQ(EgressProcessor.name(), json_msg)
                except ImportError as e:
                    logger.error(f"Modules could not be loaded: {e}")
                    return
                except Exception as e:
                    logger.error(
                        f"NodeControllerMsgHandler, _process_msg, Exception in request handling: {e}"
                    )
                    return

            else:
                response = f"NodeControllerMsgHandler, _process_msg, unknown node controller msg: {node_request}"
                self._log_debug(response)

                json_msg = AckResponseMsg(node_request, response,
                                          uuid).getJson()
                self._write_internal_msgQ(EgressProcessor.name(), json_msg)
Esempio n. 11
0
def execute_thread(module, msgQlist, conf_reader, product, resume=True):
    """
    Run module as a thread. Recover the module if any error during
    initialization and run time of the module.

    If recovery count>0,
        module will be recovered from failure until the maximum recovery
        attempt. If not recoverable, corresponding module will be shutdown
        and failure alert will be raised due to its impact.
    If recovery count=0,
        no recovery attempt will be made.
    """
    module_name = module.name()
    # Suspend module threads
    if resume == False:
        module.suspend()

    # Initialize persistent cache for sensor status
    per_data_path = os.path.join(
        module_cache_dir, f"{module_name.upper()}_{node_id}")
    if not os.path.isfile(per_data_path):
        module_persistent_data[module_name] = {}
        store.put(module_persistent_data[module_name], per_data_path)

    is_sensor_thread = False
    recovery_count = recovery_interval = 0
    if isinstance(module, SensorThread):
        recovery_count, recovery_interval = _get_recovery_config(module_name)
        is_sensor_thread = True

    attempt = 0

    while attempt <= recovery_count:
        attempt += 1
        try:
            # Each module is passed a reference list to message queues so it
            # can transmit internal messages to other modules as desired
            module.start_thread(conf_reader, msgQlist, product)
        except Exception as err:
            curr_state = "fault"
            err_msg = f"{module_name}, {err}"
            logger.error(err_msg)
            if attempt > recovery_count:
                logger.debug(traceback.format_exc())
                description = f"{module_name} is stopped and unrecoverable. {err_msg}"
                impact = module.impact()
                recommendation = "Restart SSPL service"
                logger.critical(
                    f"{description}. Impact: {impact} Recommendation: {recommendation}")
                # Check previous state of the module and send fault alert
                if os.path.isfile(per_data_path):
                    module_persistent_data[module_name] = store.get(per_data_path)
                prev_state = module_persistent_data[module_name].get('prev_state')
                if is_sensor_thread and curr_state != prev_state:
                    module_persistent_data[module_name] = {"prev_state": curr_state}
                    store.put(module_persistent_data[module_name], per_data_path)
                    specific_info = Conf.get(SSPL_CONF, f"{module_name.upper()}")
                    info = {
                        "module_name": module_name,
                        "alert_type": curr_state,
                        "description": description,
                        "impact": impact,
                        "recommendation": recommendation,
                        "severity": "critical",
                        "specific_info": specific_info
                    }
                    jsonMsg = ThreadMonitorMsg(info).getJson()
                    module._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
            else:
                logger.debug(f"Recovering {module_name} from failure, "
                             f"attempt: {attempt}")
                time.sleep(recovery_interval)

            # Shutdown if no recovery attempt
            logger.info(f"Terminating monitoring thread {module_name}")
            module.shutdown()
            retry = 5
            while module.is_running():
                module.shutdown()
                retry -= 1
                if not retry:
                    break
                time.sleep(2)
Esempio n. 12
0
 def check_EgressProcessor_is_running(self):
     """Used by the shutdown_handler to allow queued egress msgs to complete"""
     if self._product.lower() in [x.lower() for x in enabled_products]:
         return self._sspl_modules[EgressProcessor.name()].is_running()
     elif self._product.lower() in [x.lower() for x in cs_legacy_products]:
         return self._sspl_modules[EgressProcessor.name()].is_running()
Esempio n. 13
0
    def run(self):
        """Run the module periodically on its own thread."""
        if (self._product.lower() in [x.lower() for x in enabled_products]) and \
           not self._threads_initialized:
            if self._product.lower() in [x.lower() for x in cs_products]:
                # Wait for the dcs-collector to populate the /tmp/dcs/hpi directory
                while not os.path.isdir(self._hpi_base_dir):
                    logger.info(
                        "ThreadController, dir not found: %s " % self._hpi_base_dir)
                    logger.info(
                        "ThreadController, rechecking in %s secs" % self._start_delay)
                    time.sleep(int(self._start_delay))

            logger.debug("ThreadController._sspl_modules is {}".format(
                self._sspl_modules))
            # Allow other threads to initialize
            continue_waiting = False
            for (n,m) in self._sspl_modules.items():
                if not isinstance(m, SensorThread):
                    continue
                thread_init_status = m.get_thread_init_status()
                logger.debug("Thread status for {} is {}".format(
                    m.__class__, thread_init_status))
                if thread_init_status == SensorThreadState.WAITING:
                    continue_waiting = True

            if continue_waiting:
                logger.debug("ThreadController, waiting for all modules to initialize")
                self._scheduler.enter(10, self._priority, self.run, ())
                return

            # Notify external applications that've started up successfully
            startup_msg = "SSPL-LL service has started successfully"
            json_msg = ThreadControllerMsg(ThreadController.name(), startup_msg).getJson()
            self._write_internal_msgQ(EgressProcessor.name(), json_msg)
            self._threads_initialized = True

            # Check sensor module is recovered from previous failure
            for module in self._sspl_modules.values():
                if not isinstance(module, SensorThread):
                    continue
                module_checker = Thread(target=_check_module_recovered,
                                        args=(module,))
                module_checker.start()

            #self._set_debug(True)
            #self._set_debug_persist(True)
            self._log_debug("Start accepting requests")
        try:
            # Block on message queue until it contains an entry
            jsonMsg, _ = self._read_my_msgQ()
            if jsonMsg is not None:
                self._process_msg(jsonMsg)

            # Keep processing until the message queue is empty
            while not self._is_my_msgQ_empty():
                jsonMsg, _ = self._read_my_msgQ()
                if jsonMsg is not None:
                    self._process_msg(jsonMsg)
        except Exception as ex:
            # Log it and restart the whole process when a failure occurs
            logger.exception("ThreadController restarting: %r" % ex)

        self._scheduler.enter(1, self._priority, self.run, ())
        self._log_debug("Finished processing successfully")
Esempio n. 14
0
    def _generate_cpu_data(self):
        """Create & transmit a cpu_data message as defined
            by the sensor response json schema"""

        current_time = Utility.get_current_time()

        # Notify the node sensor to update its data required for the cpu_data message
        successful = self._node_sensor.read_data("cpu_data", self._get_debug())
        if not successful:
            logger.error(
                "NodeDataMsgHandler, _generate_cpu_data was NOT successful.")

        self._cpu_usage_threshold = str(self._cpu_usage_threshold)
        try:
            if self._cpu_usage_threshold.isdigit():
                self._cpu_usage_threshold = int(self._cpu_usage_threshold)
            else:
                self._cpu_usage_threshold = float(self._cpu_usage_threshold)
        except ValueError:
            logger.warn(
                "CPU Usage Alert, Invalid host_memory_usage_threshold value are entered in config."
            )
            # Assigning default value to _cpu_usage_threshold
            self._cpu_usage_threshold = self.DEFAULT_CPU_USAGE_THRESHOLD

        cpu_persistent_data = self.read_persistent_data('CPU_USAGE_DATA')
        if cpu_persistent_data.get('cpu_usage_time_map') is not None:
            previous_check_time = int(
                cpu_persistent_data['cpu_usage_time_map'])
        else:
            previous_check_time = int(-1)
        if cpu_persistent_data.get(
                'cpu_fault_resolved_iterations') is not None:
            fault_resolved_iters = int(
                cpu_persistent_data['cpu_fault_resolved_iterations'])
        else:
            fault_resolved_iters = 0
        try:
            iteration_limit = int(self._high_cpu_usage_wait_threshold /
                                  self._transmit_interval)
        except ZeroDivisionError:
            iteration_limit = 0
        self.usage_time_map['cpu'] = current_time

        if self._node_sensor.cpu_usage >= self._cpu_usage_threshold \
           and not self.high_usage['cpu']:
            if previous_check_time == -1:
                previous_check_time = current_time
                self.persist_state_data('cpu', 'CPU_USAGE_DATA')

            if self.usage_time_map[
                    'cpu'] - previous_check_time >= self._high_cpu_usage_wait_threshold:

                self.high_usage['cpu'] = True
                self.fault_resolved_iterations['cpu'] = 0
                # Create the cpu usage data message and hand it over
                # to the egress processor to transmit
                fault_event = "CPU usage has increased to {}%, "\
                    "beyond the configured threshold of {}% "\
                    "for more than {} seconds.".format(
                        self._node_sensor.cpu_usage,
                        self._cpu_usage_threshold,
                        self._high_cpu_usage_wait_threshold
                    )
                logger.warn(fault_event)

                # Create the cpu usage update message and hand it over to the egress processor to transmit
                cpuDataMsg = CPUdataMsg(
                    self._node_sensor.host_id, self._epoch_time,
                    self._node_sensor.csps, self._node_sensor.idle_time,
                    self._node_sensor.interrupt_time,
                    self._node_sensor.iowait_time, self._node_sensor.nice_time,
                    self._node_sensor.softirq_time,
                    self._node_sensor.steal_time,
                    self._node_sensor.system_time, self._node_sensor.user_time,
                    self._node_sensor.cpu_core_data,
                    self._node_sensor.cpu_usage, self.FAULT, fault_event)

                # Add in uuid if it was present in the json request
                if self._uuid is not None:
                    cpuDataMsg.set_uuid(self._uuid)
                jsonMsg = cpuDataMsg.getJson()
                self.cpu_sensor_data = jsonMsg
                self.os_sensor_type["cpu_usage"] = self.cpu_sensor_data

                # Transmit it to message processor
                self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
                # Store the state to Persistent Cache.
                self.persist_state_data('cpu', 'CPU_USAGE_DATA')

        if self._node_sensor.cpu_usage < self._cpu_usage_threshold:
            if not self.high_usage['cpu']:
                self.persist_state_data('cpu', 'CPU_USAGE_DATA')
            else:
                if fault_resolved_iters < iteration_limit:
                    fault_resolved_iters += 1
                    self.fault_resolved_iterations[
                        'cpu'] = fault_resolved_iters
                    self.persist_state_data('cpu', 'CPU_USAGE_DATA')
                elif fault_resolved_iters >= iteration_limit:

                    # Create the cpu usage data message and hand it over
                    # to the egress processor to transmit
                    fault_resolved_event = "CPU usage has decreased to {}%, "\
                        "lower than the configured threshold of {}%.".format(
                            self._node_sensor.cpu_usage,
                            self._cpu_usage_threshold
                        )
                    logger.info(fault_resolved_event)

                    # Create the cpu usage update message and hand it over to the egress processor to transmit
                    cpuDataMsg = CPUdataMsg(
                        self._node_sensor.host_id, self._epoch_time,
                        self._node_sensor.csps, self._node_sensor.idle_time,
                        self._node_sensor.interrupt_time,
                        self._node_sensor.iowait_time,
                        self._node_sensor.nice_time,
                        self._node_sensor.softirq_time,
                        self._node_sensor.steal_time,
                        self._node_sensor.system_time,
                        self._node_sensor.user_time,
                        self._node_sensor.cpu_core_data,
                        self._node_sensor.cpu_usage, self.FAULT_RESOLVED,
                        fault_resolved_event)

                    # Add in uuid if it was present in the json request
                    if self._uuid is not None:
                        cpuDataMsg.set_uuid(self._uuid)
                    jsonMsg = cpuDataMsg.getJson()
                    self.cpu_sensor_data = jsonMsg
                    self.os_sensor_type["cpu_usage"] = self.cpu_sensor_data

                    # Transmit it to message processor
                    self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
                    self.high_usage['cpu'] = False
                    self.usage_time_map['cpu'] = int(-1)
                    self.fault_resolved_iterations['cpu'] = 0
                    # Store the state to Persistent Cache.
                    self.persist_state_data('cpu', 'CPU_USAGE_DATA')
    def _process_msg(self, body):
        """Parses the incoming message and hands off to the appropriate module"""

        ingressMsg = {}
        uuid = None
        try:
            if isinstance(body, dict) is False:
                ingressMsg = json.loads(body)
            else:
                ingressMsg = body

            # Authenticate message using username and signature fields
            username = ingressMsg.get("username")
            signature = ingressMsg.get("signature")
            message = ingressMsg.get("message")
            uuid = ingressMsg.get("uuid")
            msg_len = len(message) + 1

            if uuid is None:
                uuid = "N/A"

            if use_security_lib and \
                    SSPL_SEC.sspl_verify_message(msg_len, str(message),
                                                 username, signature) != 0:
                logger.warn(
                    "IngressProcessor, Authentication failed on message: %s" % ingressMsg)
                return

            logger.debug("_process_msg, ingressMsg: %s" % ingressMsg)

            # Get the incoming message type
            if message.get("actuator_request_type") is not None:
                msgType = message.get("actuator_request_type")

                # Validate against the actuator schema
                validate(ingressMsg, self._actuator_schema)
                # Compare target_node_id from the request to determine
                # if request is meant for the current node
                target_node_id = message.get("target_node_id")
                if target_node_id is None:
                    logger.warn(
                        "Required attribute target_node_id is missing "
                        "from actuator request %s, IGNORING!!" % (msgType))
                    return
                elif target_node_id == self._node_id:
                    self._send_to_msg_handler(msgType, message, uuid)
                else:
                    logger.debug(
                        "Node identifier mismatch, actuator request ignored.")
                    return

            elif message.get("sensor_request_type") is not None:
                msgType = message.get("sensor_request_type")

                # Validate against the sensor schema
                validate(ingressMsg, self._sensor_schema)
                self._send_to_msg_handler(msgType, message, uuid)

            else:
                # We only handle incoming actuator and sensor requests, ignore
                # everything else.
                return

        except Exception as ex:
            logger.error(
                "IngressProcessor, _process_msg failed to recognize "
                "message: %r with error %r" % (ingressMsg, ex))
            ack_msg = AckResponseMsg("Error Processing Msg",
                                     "Msg Handler Not Found", uuid).getJson()
            self._write_internal_msgQ(EgressProcessor.name(), ack_msg)
Esempio n. 16
0
    def _generate_cpu_data(self):
        """Create & transmit a cpu_data message as defined
            by the sensor response json schema"""

        # Notify the node sensor to update its data required for the cpu_data message
        successful = self._node_sensor.read_data("cpu_data", self._get_debug())
        if not successful:
            logger.error(
                "NodeDataMsgHandler, _generate_cpu_data was NOT successful.")

        self._cpu_usage_threshold = str(self._cpu_usage_threshold)
        try:
            if self._cpu_usage_threshold.isdigit():
                self._cpu_usage_threshold = int(self._cpu_usage_threshold)
            else:
                self._cpu_usage_threshold = float(self._cpu_usage_threshold)
        except ValueError:
            logger.warning(
                "CPU Usage Alert, Invalid host_memory_usage_threshold value are entered in config."
            )
            # Assigning default value to _cpu_usage_threshold
            self._cpu_usage_threshold = self.DEFAULT_CPU_USAGE_THRESHOLD

        if self._node_sensor.cpu_usage >= self._cpu_usage_threshold:

            if not self.cpu_fault:
                self.cpu_fault = True
                # Create the cpu usage data message and hand it over to the egress processor to transmit

                fault_event = "CPU usage increased to %s, beyond configured threshold of %s" \
                                %(self._node_sensor.cpu_usage, self._cpu_usage_threshold)
                logger.warning(fault_event)

                # Create the local mount data message and hand it over to the egress processor to transmit
                cpuDataMsg = CPUdataMsg(
                    self._node_sensor.host_id, self._epoch_time,
                    self._node_sensor.csps, self._node_sensor.idle_time,
                    self._node_sensor.interrupt_time,
                    self._node_sensor.iowait_time, self._node_sensor.nice_time,
                    self._node_sensor.softirq_time,
                    self._node_sensor.steal_time,
                    self._node_sensor.system_time, self._node_sensor.user_time,
                    self._node_sensor.cpu_core_data,
                    self._node_sensor.cpu_usage, self.site_id, self.rack_id,
                    self.node_id, self.cluster_id, self.FAULT, fault_event)

                # Add in uuid if it was present in the json request
                if self._uuid is not None:
                    cpuDataMsg.set_uuid(self._uuid)
                jsonMsg = cpuDataMsg.getJson()
                self.cpu_sensor_data = jsonMsg
                self.os_sensor_type["cpu_usage"] = self.cpu_sensor_data

                # Transmit it to message processor
                self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)

        if (self._node_sensor.cpu_usage <=
                self._cpu_usage_threshold) and (self.cpu_fault == True):
            # Create the cpu usage data message and hand it over to the egress processor to transmit
            fault_resolved_event = "CPU usage decreased to %s, lesser than configured threshold of %s" \
                %(self._node_sensor.cpu_usage, self._cpu_usage_threshold)
            logger.warning(fault_resolved_event)

            # Create the local mount data message and hand it over to the egress processor to transmit
            cpuDataMsg = CPUdataMsg(
                self._node_sensor.host_id, self._epoch_time,
                self._node_sensor.csps, self._node_sensor.idle_time,
                self._node_sensor.interrupt_time,
                self._node_sensor.iowait_time, self._node_sensor.nice_time,
                self._node_sensor.softirq_time, self._node_sensor.steal_time,
                self._node_sensor.system_time, self._node_sensor.user_time,
                self._node_sensor.cpu_core_data, self._node_sensor.cpu_usage,
                self.site_id, self.rack_id, self.node_id, self.cluster_id,
                self.FAULT_RESOLVED, fault_resolved_event)

            # Add in uuid if it was present in the json request
            if self._uuid is not None:
                cpuDataMsg.set_uuid(self._uuid)
            jsonMsg = cpuDataMsg.getJson()
            self.cpu_sensor_data = jsonMsg
            self.os_sensor_type["cpu_usage"] = self.cpu_sensor_data

            # Transmit it to message processor
            self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
            self.cpu_fault = False
Esempio n. 17
0
    def _generate_host_update(self):
        """Create & transmit a host update message as defined
            by the sensor response json schema"""

        # Notify the node sensor to update its data required for the host_update message
        successful = self._node_sensor.read_data("host_update",
                                                 self._get_debug(),
                                                 self._units)
        if not successful:
            logger.error(
                "NodeDataMsgHandler, _generate_host_update was NOT successful."
            )

        self._host_memory_usage_threshold = str(
            self._host_memory_usage_threshold)
        try:
            if self._host_memory_usage_threshold.isdigit():
                self._host_memory_usage_threshold = int(
                    self._host_memory_usage_threshold)
            else:
                self._host_memory_usage_threshold = float(
                    self._host_memory_usage_threshold)
        except ValueError:
            logger.warning(
                "Host Memory Alert, Invalid host_memory_usage_threshold value are entered in config."
            )
            # Assigning default value to _disk_usage_threshold
            self._host_memory_usage_threshold = self.DEFAULT_HOST_MEMORY_USAGE_THRESHOLD
        if self._node_sensor.total_memory[
                "percent"] >= self._host_memory_usage_threshold:
            # Create the disk space data message and hand it over to the egress processor to transmit
            if not self.host_fault:
                self.host_fault = True
                # Create the disk space data message and hand it over to the egress processor to transmit
                fault_event = "Host memory usage increased to %s, beyond configured threshold of %s" \
                                %(self._node_sensor.total_memory["percent"], self._host_memory_usage_threshold)

                logger.warning(fault_event)

                logged_in_users = []
                # Create the host update message and hand it over to the egress processor to transmit
                hostUpdateMsg = HostUpdateMsg(
                    self._node_sensor.host_id, self._epoch_time,
                    self._node_sensor.boot_time, self._node_sensor.up_time,
                    self._node_sensor.uname, self._units, self.site_id,
                    self.rack_id, self.node_id, self.cluster_id,
                    self._node_sensor.total_memory,
                    self._node_sensor.logged_in_users,
                    self._node_sensor.process_count,
                    self._node_sensor.running_process_count, self.FAULT,
                    fault_event)
                # Add in uuid if it was present in the json request
                if self._uuid is not None:
                    hostUpdateMsg.set_uuid(self._uuid)
                jsonMsg = hostUpdateMsg.getJson()
                # Transmit it to message processor
                self.host_sensor_data = jsonMsg
                self.os_sensor_type["memory_usage"] = self.host_sensor_data
                self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)

        if (self._node_sensor.total_memory["percent"] <
                self._host_memory_usage_threshold) and (self.host_fault
                                                        == True):
            fault_resolved_event = "Host memory usage decreased to %s, lesser than configured threshold of %s" \
                                    %(self._node_sensor.total_memory["percent"], self._host_memory_usage_threshold)
            logger.warning(fault_resolved_event)
            logged_in_users = []
            # Create the host update message and hand it over to the egress processor to transmit
            hostUpdateMsg = HostUpdateMsg(
                self._node_sensor.host_id, self._epoch_time,
                self._node_sensor.boot_time, self._node_sensor.up_time,
                self._node_sensor.uname, self._units, self.site_id,
                self.rack_id, self.node_id, self.cluster_id,
                self._node_sensor.total_memory,
                self._node_sensor.logged_in_users,
                self._node_sensor.process_count,
                self._node_sensor.running_process_count, self.FAULT_RESOLVED,
                fault_resolved_event)

            # Add in uuid if it was present in the json request
            if self._uuid is not None:
                hostUpdateMsg.set_uuid(self._uuid)
            jsonMsg = hostUpdateMsg.getJson()
            # Transmit it to message processor
            self.host_sensor_data = jsonMsg
            self.os_sensor_type["memory_usage"] = self.host_sensor_data

            self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
            self.host_fault = False
Esempio n. 18
0
    def _process_msg(self, jsonMsg):
        """Parses the incoming message and generate the desired data message"""
        self._log_debug("_process_msg, jsonMsg: %s" % jsonMsg)

        if isinstance(jsonMsg, dict) is False:
            jsonMsg = json.loads(jsonMsg)

        # Parse out the uuid so that it can be sent back in response message
        self._uuid = None
        if jsonMsg.get("sspl_ll_msg_header") is not None and \
           jsonMsg.get("sspl_ll_msg_header").get("uuid") is not None:
            self._uuid = jsonMsg.get("sspl_ll_msg_header").get("uuid")
            self._log_debug("_processMsg, uuid: %s" % self._uuid)

        if jsonMsg.get("sensor_request_type") is not None and \
           jsonMsg.get("sensor_request_type").get("node_data") is not None and \
           jsonMsg.get("sensor_request_type").get("node_data").get("sensor_type") is not None:
            self.sensor_type = jsonMsg.get("sensor_request_type").get(
                "node_data").get("sensor_type").split(":")[2]
            self._log_debug("_processMsg, sensor_type: %s" % self.sensor_type)

            if self.sensor_type == "memory_usage":
                self._generate_host_update()
                sensor_message_type = self.os_sensor_type.get(
                    self.sensor_type, "")
                if sensor_message_type:
                    self._write_internal_msgQ(EgressProcessor.name(),
                                              sensor_message_type)
                else:
                    self._log_debug(f"NodeDataMsgHandler, _process_msg, \
                        No past data found for {self.sensor_type} sensor type")

            elif self.sensor_type == "cpu_usage":
                self._generate_cpu_data()
                sensor_message_type = self.os_sensor_type.get(
                    self.sensor_type, "")
                if sensor_message_type:
                    self._write_internal_msgQ(EgressProcessor.name(),
                                              sensor_message_type)
                else:
                    self._log_debug(f"NodeDataMsgHandler, _process_msg, \
                        No past data found for {self.sensor_type} sensor type")

            elif self.sensor_type == "nw":
                self._generate_if_data()
                sensor_message_type = self.os_sensor_type.get(
                    self.sensor_type, "")
                if sensor_message_type:
                    self._write_internal_msgQ(EgressProcessor.name(),
                                              sensor_message_type)
                else:
                    self._log_debug(f"NodeDataMsgHandler, _process_msg, \
                        No past data found for {self.sensor_type} sensor type")

            elif self.sensor_type == "disk_space":
                self._generate_disk_space_alert()
                sensor_message_type = self.os_sensor_type.get(
                    self.sensor_type, "")
                if sensor_message_type:
                    self._write_internal_msgQ(EgressProcessor.name(),
                                              sensor_message_type)
                else:
                    self._log_debug(f"NodeDataMsgHandler, _process_msg, \
                        No past data found for {self.sensor_type} sensor type")

            elif self.sensor_type == "raid_data":
                self._generate_raid_data(jsonMsg)
                sensor_message_type = self.os_sensor_type.get(
                    self.sensor_type, "")
                if sensor_message_type:
                    self._write_internal_msgQ(EgressProcessor.name(),
                                              sensor_message_type)
                else:
                    self._log_debug(
                        "NodeDataMsgHandler, _process_msg " +
                        f"No past data found for {self.sensor_type} sensor type"
                    )

            elif self.sensor_type == "raid_integrity":
                self._generate_raid_integrity_data(jsonMsg)
                sensor_message_type = self.os_sensor_type.get(
                    self.sensor_type, "")
                if sensor_message_type:
                    self._write_internal_msgQ(EgressProcessor.name(),
                                              sensor_message_type)
                else:
                    self._log_debug(
                        "NodeDataMsgHandler, _process_msg " +
                        f"No past data found for {self.sensor_type} sensor type"
                    )

        # Update mapping of device names to serial numbers for global use
        elif jsonMsg.get("sensor_response_type") is not None:
            if jsonMsg.get(
                    "sensor_response_type") == "devicename_serialnumber":
                self._update_devicename_sn_dict(jsonMsg)
        elif jsonMsg.get("sensor_request_type") is not None and \
            jsonMsg.get("sensor_request_type").get("node_data") is not None and \
            jsonMsg.get("sensor_request_type").get("node_data").get("info") is not None and \
            jsonMsg.get("sensor_request_type").get("node_data").get("info").get("resource_type") is not None:
            self._generate_node_fru_data(jsonMsg)
Esempio n. 19
0
    def _process_msg(self, jsonMsg):
        """Parses the incoming message and calls the appropriate method"""
        self._log_debug("_process_msg, jsonMsg: %s" % jsonMsg)

        # Check to see if debug mode is being globally turned off on all modules
        if self._check_reset_all_modules(jsonMsg) is True:
            return

        # Parse out the module name and request
        module_name = jsonMsg.get("actuator_request_type").get(
            "thread_controller").get("module_name")
        thread_request = jsonMsg.get("actuator_request_type").get(
            "thread_controller").get("thread_request")

        # Parse out the uuid so that it can be sent back in Ack message
        uuid = None
        if jsonMsg.get("sspl_ll_msg_header") is not None and \
           jsonMsg.get("sspl_ll_msg_header").get("uuid") is not None:
            uuid = jsonMsg.get("sspl_ll_msg_header").get("uuid")
            self._log_debug("_processMsg, uuid: %s" % uuid)

        # Pass along the debug section to the module
        if jsonMsg.get("sspl_ll_debug") is not None:
            self.debug_section = {"sspl_ll_debug": {}}
            self.debug_section["sspl_ll_debug"] = jsonMsg.get("sspl_ll_debug")
        else:
            self.debug_section = None

        self._log_debug("_process_msg, self.debug_section: %s" %
                        self.debug_section)

        # Parse out thread request and call the appropriate method
        if thread_request == "restart":
            self._restart_module(module_name)
        elif thread_request == "start":
            self._start_module(module_name)
        elif thread_request == "stop":
            # Don't let the outside world stop us or shut down this thread
            if module_name == "EgressProcessor" or \
                module_name == "IngressProcessor" or \
                module_name == "ThreadController":
                logger.warn(
                    "Attempt to stop message processors or ThreadController Processors, \
                                    ignoring. Please try 'restart' instead.")
                return
            self._stop_module(module_name)
        elif thread_request == "status":
            self._status_module(module_name)
        elif thread_request == "degrade":
            if module_name.lower() != "all":
                logger.warn(
                    "Invalid module_name {0}. Need 'all' in module_name".
                    format(module_name))
                return
            self._switch_to_degraded_state(self._sspl_modules)
        elif thread_request == "active":
            if module_name.lower() != "all":
                logger.warn(
                    "Invalid module_name {0}. Need 'all' in module_name".
                    format(module_name))
                return
            self._switch_to_active_state(self._sspl_modules)
        else:
            self._thread_response = "Error, unrecognized thread request"

        node_id = []
        if jsonMsg.get("actuator_request_type").get("thread_controller").get("parameters") is not None and \
           jsonMsg.get("actuator_request_type").get("thread_controller").get("parameters").get("node_id"):
            node_id = jsonMsg.get("actuator_request_type").get(
                "thread_controller").get("parameters").get("node_id")

        ack_type = {}
        ack_type["hostname"] = self._hostname
        ack_type["node_id"] = node_id

        # Populate an actuator response message and transmit
        threadControllerMsg = ThreadControllerMsg(module_name, self._thread_response, \
                                                  json.dumps(ack_type))

        if uuid is not None:
            threadControllerMsg.set_uuid(uuid)
        msgString = threadControllerMsg.getJson()
        logger.info("ThreadController, response: %s" % str(msgString))
        if self._product.lower() in [x.lower() for x in enabled_products]:
            self._write_internal_msgQ(EgressProcessor.name(), msgString)
        elif self._product.lower() in [x.lower() for x in cs_legacy_products]:
            self._write_internal_msgQ(PlaneCntrlRMQegressProcessor.name(),
                                      msgString)
Esempio n. 20
0
 def _transmit_json_msg(self, json_data):
     """Transmit message to halon by passing it to egress msg handler"""
     json_data["trapName"] = self._trap_name
     json_msg = SNMPtrapMsg(json_data).getJson()
     self._write_internal_msgQ(EgressProcessor.name(), json_msg)
Esempio n. 21
0
    def _send_msg(self, iem_components, log_timestamp):
        """Creates JSON message from iem components and sends to message bus.
        """
        impact = "NA"
        recommendation = "NA"
        # IEM format is IEC:DESCRIPTION
        # IEC format is SEVERITY|SOURCEID|COMPONENTID|MODULEID|EVENTID
        # Field lengths ----1---|---1----|------3----|----3---|---4---
        # Example IEM -> "IEC: BO1001000001:Error in connecting to controller"
        # Actual IEC doesn't contain separator between fields. It is shown
        # here just for readability. Each field has fixed length.
        severity, source_id, component_id, module_id, event_id, description = \
                                                        [iem_components[i] for i in range(6)]

        # Check if severity level is valid
        if severity not in self.SEVERITY_LEVELS:
            logger.warn(f"Invalid Severity level: {severity}")
            return

        # Check for valid source id
        if source_id not in self.SOURCE_IDS:
            logger.warn(f"Invalid Source ID level: {source_id}")
            return

        # Check for valid event time
        event_time = self._get_epoch_time_from_timestamp(log_timestamp)
        if not event_time:
            logger.error("Timestamp is not in required format, discarding the message")
            return

        # Check for other components
        args = {
            "_comp_id": component_id,
            "_module_id": module_id,
            "_event_id": event_id
        }
        if not self._are_components_in_range(**args):
            return

        # component-id for sspl=005
        if component_id == "005":
            event_code = component_id + module_id + event_id
            impact = Iem().EVENT_STRING[event_code][1]
            recommendation = Iem().EVENT_STRING[event_code][2]

        # Update severity and source_id
        alert_type = iem_severity_to_alert_mapping.get(severity)
        severity = iem_severity_types.get(severity, severity)
        source_id = iem_source_types.get(source_id, source_id)

        # Decode component_id, module_id and event_id
        component_id, module_id, event_id = self._decode_msg( f"{component_id}{module_id}{event_id}")

        info = {
            "source_id": source_id,
            "component_id": component_id,
            "module_id": module_id,
            "event_id": event_id,
            "severity": severity,
            "description": description,
            "impact": impact,
            "recommendation": recommendation,
            "alert_type": alert_type,
            "event_time": event_time,
            "IEC": "".join(iem_components[:-1])
        }
        iem_data_msg = IEMDataMsg(info)
        json_msg = iem_data_msg.getJson()
        self._write_internal_msgQ(EgressProcessor.name(), json_msg)
Esempio n. 22
0
    def _generate_host_update(self):
        """Create & transmit a host update message as defined
            by the sensor response json schema"""

        current_time = Utility.get_current_time()

        # Notify the node sensor to update its data required for the host_update message
        successful = self._node_sensor.read_data("host_update",
                                                 self._get_debug(),
                                                 self._units)
        if not successful:
            logger.error(
                "NodeDataMsgHandler, _generate_host_update was NOT successful."
            )

        self._host_memory_usage_threshold = str(
            self._host_memory_usage_threshold)
        try:
            if self._host_memory_usage_threshold.isdigit():
                self._host_memory_usage_threshold = int(
                    self._host_memory_usage_threshold)
            else:
                self._host_memory_usage_threshold = float(
                    self._host_memory_usage_threshold)
        except ValueError:
            logger.warn(
                "Host Memory Alert, Invalid host_memory_usage_threshold value are entered in config."
            )
            # Assigning default value to _memory_usage_threshold
            self._host_memory_usage_threshold = self.DEFAULT_HOST_MEMORY_USAGE_THRESHOLD

        memory_persistent_data = self.read_persistent_data('MEMORY_USAGE_DATA')
        if memory_persistent_data.get('memory_usage_time_map') is not None:
            previous_check_time = int(
                memory_persistent_data['memory_usage_time_map'])
        else:
            previous_check_time = int(-1)
        if memory_persistent_data\
                .get('memory_fault_resolved_iterations') is not None:
            fault_resolved_iters = int(
                memory_persistent_data['memory_fault_resolved_iterations'])
        else:
            fault_resolved_iters = 0
        try:
            iteration_limit = int(self._high_memory_usage_wait_threshold /
                                  self._transmit_interval)
        except ZeroDivisionError:
            iteration_limit = 0
        self.usage_time_map['memory'] = current_time

        if self._node_sensor.total_memory["percent"] >= self._host_memory_usage_threshold \
           and not self.high_usage['memory']:
            if previous_check_time == -1:
                previous_check_time = current_time
                self.persist_state_data('memory', 'MEMORY_USAGE_DATA')

            if self.usage_time_map[
                    'memory'] - previous_check_time >= self._high_memory_usage_wait_threshold:
                self.high_usage['memory'] = True
                self.fault_resolved_iterations['memory'] = 0
                # Create the memory data message and hand it over
                # to the egress processor to transmit
                fault_event = "Host memory usage has increased to {}%,"\
                    "beyond the configured threshold of {}% "\
                    "for more than {} seconds.".format(
                        self._node_sensor.total_memory["percent"],
                        self._host_memory_usage_threshold,
                        self._high_memory_usage_wait_threshold
                    )

                logger.warn(fault_event)

                logged_in_users = []
                # Create the host update message and hand it over to the egress processor to transmit
                hostUpdateMsg = HostUpdateMsg(
                    self._node_sensor.host_id, self._epoch_time,
                    self._node_sensor.boot_time, self._node_sensor.up_time,
                    self._node_sensor.uname, self._units,
                    self._node_sensor.total_memory,
                    self._node_sensor.logged_in_users,
                    self._node_sensor.process_count,
                    self._node_sensor.running_process_count, self.FAULT,
                    fault_event)
                # Add in uuid if it was present in the json request
                if self._uuid is not None:
                    hostUpdateMsg.set_uuid(self._uuid)
                jsonMsg = hostUpdateMsg.getJson()
                # Transmit it to message processor
                self.host_sensor_data = jsonMsg
                self.os_sensor_type["memory_usage"] = self.host_sensor_data
                self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
                self.persist_state_data('memory', 'MEMORY_USAGE_DATA')

        if self._node_sensor.total_memory[
                "percent"] < self._host_memory_usage_threshold:
            if not self.high_usage['memory']:
                self.persist_state_data('memory', 'MEMORY_USAGE_DATA')
            else:
                if fault_resolved_iters < iteration_limit:
                    fault_resolved_iters += 1
                    self.fault_resolved_iterations[
                        'memory'] = fault_resolved_iters
                    self.persist_state_data('memory', 'MEMORY_USAGE_DATA')
                elif fault_resolved_iters >= iteration_limit:
                    # Create the memory data message and hand it over
                    # to the egress processor to transmit
                    fault_resolved_event = "Host memory usage has decreased to {}%, "\
                        "lower than the configured threshold of {}%.".format(
                            self._node_sensor.total_memory["percent"],
                            self._host_memory_usage_threshold
                        )
                    logger.info(fault_resolved_event)
                    logged_in_users = []

                    # Create the host update message and hand it over to the egress processor to transmit
                    hostUpdateMsg = HostUpdateMsg(
                        self._node_sensor.host_id, self._epoch_time,
                        self._node_sensor.boot_time, self._node_sensor.up_time,
                        self._node_sensor.uname, self._units,
                        self._node_sensor.total_memory,
                        self._node_sensor.logged_in_users,
                        self._node_sensor.process_count,
                        self._node_sensor.running_process_count,
                        self.FAULT_RESOLVED, fault_resolved_event)

                    # Add in uuid if it was present in the json request
                    if self._uuid is not None:
                        hostUpdateMsg.set_uuid(self._uuid)
                    jsonMsg = hostUpdateMsg.getJson()
                    # Transmit it to message processor
                    self.host_sensor_data = jsonMsg
                    self.os_sensor_type["memory_usage"] = self.host_sensor_data
                    self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
                    self.high_usage['memory'] = False
                    self.usage_time_map['memory'] = int(-1)
                    self.fault_resolved_iterations['memory'] = 0
                    self.persist_state_data('memory', 'MEMORY_USAGE_DATA')