def enter(service):
     logger.warning("{} service is disabled, it will not be "
                    "monitored".format(service.name))
     Service.non_active.discard(service.name)
     Service.monitoring_disabled.discard(service.name)
     if service.properties_changed_signal:
         service.properties_changed_signal.remove()
Ejemplo n.º 2
0
 def _get_bmc_info(self):
     """
     nwCableConnection will be default UNKNOWN,
     Until solution to find bmc eth port cable connection status is found.
     """
     try:
         bmcdata = {'ifId': 'ebmc0', 'ipV4Prev': "", 'ipV4': "", 'nwStatus': "DOWN", 'nwCableConnStatus': 'UNKNOWN'}
         ipdata = sp.Popen("sudo ipmitool lan print", shell=True, stdout=sp.PIPE, stderr=sp.PIPE).communicate()[0].decode().strip()
         bmcip = re.findall("\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", ipdata)[0]
         if bmcip:
             pingbmchost = "ping -c1 -W1 -q "+bmcip
             child = sp.Popen(pingbmchost.split(), stdout=sp.PIPE)
             streamdata = child.communicate()[0] #child must be communicated before fetching return code.
             retcode = child.returncode
             if self.prev_bmcip is not None and self.prev_bmcip != bmcip:
                 bmcdata['ipV4Prev'] = self.prev_bmcip
                 bmcdata['ipV4'] = bmcip
                 self.prev_bmcip = bmcip
             else:
                 self.prev_bmcip = bmcdata['ipV4Prev'] = bmcdata['ipV4'] = bmcip
             if retcode == 0:
                 bmcdata['nwStatus'] = "UP"
             else:
                 logger.warning("BMC Host:{0} is not reachable".format(bmcip))
     except Exception as e:
         logger.error("Exception occurs while fetching bmc_info:{}".format(e))
     return bmcdata
Ejemplo n.º 3
0
    def check_and_send_alert(self):
        """Checks whether conditions are met and sends alert if required
        Alerts will be sent if -
        1. All 4 phys of a sas port go up -> down : fault alert
        2. All 4 phys of a sas port come down -> up : fault_resolved alert
        Sensor data stored in persistent storage is a dict of { sas_port_number : alert_type }
        """
        # Update sas ports status
        self.update_sas_ports_status()

        # Check the version of stored alert
        version = None
        try:
            # Try to get the version
            # Exception will be raised if stored alert is None or no Version is available
            version = self.sas_phy_stored_alert['version']
        except Exception:
            logger.warning(
                f"Found no data or old data format for SASPortSensor, \
                            updating data format to version {self.CURRENT_DATA_VERSION}"
            )
            # Versioning is not implemented or there is no data, write new data
            # Initialize dummy fault_resolved for all sas ports and conn
            self.sas_phy_stored_alert = {}
            self.sas_phy_stored_alert['version'] = self.CURRENT_DATA_VERSION
            self.sas_phy_stored_alert['conn'] = 'fault_resolved'
            for i in range(0, self.NUM_SAS_PORTS):
                self.sas_phy_stored_alert[i] = 'fault_resolved'
            # Save data to store
            store.put(self.sas_phy_stored_alert, self.SAS_PORT_SENSOR_DATA)

        if version == self.CURRENT_DATA_VERSION:
            self.handle_current_version_data()
Ejemplo n.º 4
0
    def get_sensor_list_by_type(self, fru_type):
        """Returns the sensor list based on FRU type using ipmitool utility
           ipmitool sdr type '<FRU>'.
           Example of output form 'sdr type 'Fan'' command:
           Sys Fan 2B       | 33h | ok  | 29.4 | 5332 RPM
           ( sensor_id | sensor_num | status | entity_id |
            <FRU Specific attribute> )
            Params : self, fru_type
            Output Format : List of Tuple
            Output Example : [(HDD 1 Status, F1, ok, 4.2, Drive Present),]
        """
        sensor_list_out, retcode = self._run_ipmitool_subcommand(f"sdr type '{fru_type.title()}'")
        if retcode != 0:
            if isinstance(sensor_list_out, tuple):
                sensor_list_out = [val for val in sensor_list_out if val]
            msg = "ipmitool sdr type command failed: {0}".format(b''.join(sensor_list_out))
            logger.warning(msg)
            return
        sensor_list = b''.join(sensor_list_out).decode("utf-8").split("\n")

        out = []
        for sensor in sensor_list:
            if sensor == "":
                break
            # Example of output form 'sdr type' command:
            # Sys Fan 2B       | 33h | ok  | 29.4 | 5332 RPM
            # PS1 1a Fan Fail  | A0h | ok  | 29.13 |
            # HDD 1 Status     | F1h | ok  |  4.2 | Drive Present
            fields_list = [ f.strip() for f in sensor.split("|")]
            sensor_id, sensor_num, status, entity_id, reading  = fields_list
            sensor_num = sensor_num.strip("h").lower()

            out.append((sensor_id, sensor_num, status, entity_id, reading))
        return out
    def _raid_health_monitor(self):
        try:
            devices = self._get_devices()
            if len(devices) == 0:
                return
            logger.debug("Fetched devices:{}".format(devices))

            for device in devices:
                # Update the state as 'check' for RAID device file
                result = self._update_raid_device_file(device)
                if result == "failed":
                    self._retry_execution(self._update_raid_device_file,
                                          device)
                logger.info("RAID device state is changed to 'check'")

                # Check RAID device array state is 'idle' or not
                result = self._check_raid_state(device)
                if result == "failed":
                    logger.warning(
                        "'Idle' state not found for RAID device:{}".format(
                            device))
                    # Retry to check RAID state
                    self._retry_execution(self._check_raid_state, device)
                logger.info(
                    "'idle' state is found in Raid device:{}.".format(device))

                # Check Mismatch count in RAID device files.
                result = self._check_mismatch_count(device)
                if result == "failed":
                    # Persist RAID device fault state and send alert
                    fault_status_file = self.DEFAULT_RAID_DATA_PATH + device + "_" + RaidDataConfig.RAID_MISMATCH_FAULT_STATUS.value
                    if os.path.exists(fault_status_file):
                        with open(fault_status_file, 'r') as fs:
                            data = fs.read().rstrip()
                        if self.FAULT_RESOLVED in data:
                            self.alert_type = self.FAULT
                            self._alert_msg = RaidAlertMsgs.MISMATCH_MSG.value
                            self._send_json_msg(self.alert_type, device,
                                                self._alert_msg)
                            self._update_fault_state_file(
                                device, self.FAULT, fault_status_file)
                    else:
                        self.alert_type = self.FAULT
                        self._alert_msg = RaidAlertMsgs.MISMATCH_MSG.value
                        self._send_json_msg(self.alert_type, device,
                                            self._alert_msg)
                        self._update_fault_state_file(device, self.FAULT,
                                                      fault_status_file)

                    # Retry to check mismatch_cnt
                    self._retry_execution(self._check_mismatch_count, device)
                logger.debug(
                    "No mismatch count is found in Raid device:{}".format(
                        device))

        except Exception as ae:
            logger.error("Failed in monitoring RAID health. ERROR:{}".format(
                str(ae)))
Ejemplo n.º 6
0
    def _nw_cable_alert_exists(self, interfaces):
        """Checks cable connection status with physical link(carrier) state and
        avoids duplicate alert reporting by comparing with its previous state.
        Fault detection is identified by physical link state Down.
        Fault resolved is identified by physical link state changed from Down to Up.
        """
        identified_cables = {}

        for interface in interfaces:

            interface_name = interface.get("ifId")
            phy_link_status = interface.get("nwCableConnStatus")

            # fault detected (Down, Up to Down)
            if phy_link_status == 'DOWN':
                if self.prev_cable_cnxns.get(
                        interface_name) != phy_link_status:
                    if self.prev_cable_cnxns.get(interface_name):
                        logger.warning(
                            f"Cable connection fault is detected with '{interface_name}'"
                        )
                        identified_cables[interface_name] = self.FAULT
                    self.prev_cable_cnxns[interface_name] = phy_link_status
            # fault resolved (Down to Up)
            elif phy_link_status == 'UP':
                if self.prev_cable_cnxns.get(
                        interface_name) != phy_link_status:
                    if self.prev_cable_cnxns.get(interface_name):
                        logger.info(
                            f"Cable connection fault is resolved with '{interface_name}'"
                        )
                        identified_cables[interface_name] = self.FAULT_RESOLVED

                        if self.interface_fault_state and interface_name in self.interface_fault_state:
                            # After the cable fault is resolved, unset the flag for interface
                            # So that, it can be tracked further for any failure
                            self.INTERFACE_FAULT_DETECTED = False
                            self.interface_fault_state[
                                interface_name] = self.INTERFACE_FAULT_DETECTED

                            # Also clear the global nw interface dictionary
                            self.prev_nw_status[
                                interface_name] = phy_link_status

                    self.prev_cable_cnxns[interface_name] = phy_link_status
            else:
                logger.debug(
                    f"Cable connection state is unknown with '{interface_name}'"
                )

        return identified_cables
Ejemplo n.º 7
0
    def _get_nwalert(self, interfaces):
        """
        Get network interfaces with fault/OK state for each interface.
        Parameters:
                    interfaces(list) : List of availabel network interfaces

        Returns: Dictionary of network interfaces having key as interface name and value as fault state.

        Return type: dict
        """
        nw_alerts = {}
        try:
            for interface in interfaces:
                interface_name = interface.get("ifId")
                nw_status = interface.get("nwStatus")
                logger.debug("{0}:{1}".format(interface_name, nw_status))
                # fault detected (Down/UNKNOWN, Up/UNKNOWN to Down, Up/Down to UNKNOWN)
                if nw_status == 'DOWN' or nw_status == 'UNKNOWN':
                    if self.prev_nw_status.get(interface_name) != nw_status:
                        if self.prev_nw_status.get(
                                interface_name) and self.prev_nw_status.get(
                                    interface_name) == 'UP':
                            logger.warning(
                                f"Network connection fault is detected for interface:'{interface_name}'"
                            )
                            nw_alerts[interface_name] = self.FAULT
                        self.prev_nw_status[interface_name] = nw_status
                # fault resolved (Down to Up)
                elif nw_status == 'UP':
                    if self.prev_nw_status.get(interface_name) != nw_status:
                        if self.prev_nw_status.get(interface_name):
                            logger.info(
                                f"Network connection fault is resolved for interface:'{interface_name}'"
                            )
                            nw_alerts[interface_name] = self.FAULT_RESOLVED
                        self.prev_nw_status[interface_name] = nw_status
                else:
                    logger.warning(
                        f"Network connection state is:'{nw_status}', for interface:'{interface_name}'"
                    )
        except Exception as e:
            logger.error(
                f"Exception occurs while checking for network alert condition:'{e}'"
            )
        logger.debug("nw_alerts existed for:{}".format(nw_alerts))
        return nw_alerts
Ejemplo n.º 8
0
    def get_sensor_props(self, sensor_id):
        """Returns individual sensor instance properties based on
           sensor id using ipmitool utility
           ipmitool sensor get "Sys Fan 1A"
           Returns FRU instance specific information
           Params : self, sensor_id
           Output Format : Tuple inside dictionary of common and specific data
           Output Example : ({common dict data},{specific dict data})
        """
        props_list_out, retcode = self._run_ipmitool_subcommand("sensor get '{0}'".format(sensor_id))
        if retcode != 0:
            if isinstance(props_list_out, tuple):
                props_list_out = [val for val in props_list_out if val]
            msg = "ipmitool sensor get command failed: {0}".format(b''.join(props_list_out))
            logger.warning(msg)
            err_response = {sensor_id: {"ERROR": msg}}
            return (False, err_response)
        props_list = b''.join(props_list_out).decode("utf-8").split("\n")
        props_list = props_list[1:] # The first line is 'Locating sensor record...'

        specific = {}
        curr_key = None
        for prop in props_list:
            if prop == '':
                continue
            if ':' in prop:
                curr_key, val = [f.strip() for f in prop.split(":")]
                specific[curr_key] = val
            else:
                specific[curr_key] += "\n" + prop

        common = {}
        common_props = {
            'Sensor ID',
            'Entity ID',
        }
        # Whatever keys from common_props are present,
        # move them to the 'common' dict
        for c in (set(specific.keys()) & common_props):
            common[c] = specific[c]
            del specific[c]

        return (common, specific)
Ejemplo n.º 9
0
    def check_notactive_services(self):
        """
           Monitor non-active Services.

           Raise FAULT Alert if any of the not-active services has exceeded
           the threshould time for inactivity.
        """
        not_active_services_copy = self.not_active_services.copy()
        for service, [start_time, prev_state, prev_substate]\
                                 in not_active_services_copy.items():

            if self.current_time() - start_time > self.max_wait_time:
                state = self.service_status[service]["state"]
                substate = self.service_status[service]["substate"]
                pid = self.service_status[service]["pid"]
                self.not_active_services.pop(service)
                self.failed_services.append(service)
                self.raise_alert(service, prev_state, state, prev_substate,
                                 substate, pid, pid, 1)
                logger.warning(f"{service} in {state}:{substate} for "\
                               f"more than {self.max_wait_time} seconds.")
Ejemplo n.º 10
0
 def is_env_vm(self):
     """
     Returns true if VM env, false otherwise
     """
     is_vm = True
     CMD = "facter is_virtual"
     try:
         subout = subprocess.Popen(CMD,
                                   shell=True,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
         result = subout.stdout.readlines()
         if result == [] or result == "":
             logger.warning(
                 "Not able to read whether env is vm or not, assuming VM env."
             )
         else:
             if 'false' in result[0].decode():
                 is_vm = False
     except Exception as e:
         logger.warning(
             "Error while reading whether env is vm or not, assuming VM env : {e}"
         )
     return is_vm
Ejemplo n.º 11
0
    def action_per_transition(self, service, prev_state, state, prev_substate,
                              substate, prev_pid, pid):
        """Take action according to the state change of the service."""
        # alert_info_index : index pointing to alert_info table from
        #               ServiceMonitor:raise_alerts() representing alert
        #               description, type, impact etc. to be sent.
        alert_info_index = -1

        logger.debug(f"ServiceMonitor:action_per_transition for {service} : " + \
            f"({prev_state}:{prev_substate}) -> ({state}:{substate})")

        if prev_state in ["active", "reloading"]:
            if state == "active":
                # reloading -> active
                self.not_active_services.pop(service)
                if service in self.failed_services:
                    self.failed_services.remove(service)
                    alert_info_index = 2
            elif state != "failed":
                # active -> deactivating/inactive/reloading/activating
                # or
                # reloading -> deactivating/inactive/activating
                self.not_active_services[service] = \
                    [self.current_time(), prev_state, prev_substate]
            elif state == "failed":
                # active/reloading -> failed
                if service not in self.failed_services:
                    self.failed_services.append(service)
                    alert_info_index = 0
        elif prev_state == "deactivating":
            if state in ["inactive", "activating"]:
                # deactivating -> inactive/activating
                if service not in self.not_active_services:
                    self.not_active_services[service] = \
                        [self.current_time(), prev_state, prev_substate]
            elif state == "failed":
                # deactivating -> failed
                if service not in self.failed_services:
                    self.failed_services.append(service)
                    alert_info_index = 0
            elif state == "active":
                # deactivating -> active
                if service in self.not_active_services:
                    self.not_active_services.pop(service)
                if service in self.failed_services:
                    self.failed_services.remove(service)
                    alert_info_index = 2
            else:
                alert_info_index = 3
        elif prev_state in ["inactive", "failed"]:
            if state == "activating":
                # inactive/failed -> activating
                if service not in self.not_active_services:
                    self.not_active_services[service] = \
                        [self.current_time(), prev_state, prev_substate]
            elif state == "active":
                # inactive/failed -> active
                if service in self.failed_services:
                    self.failed_services.remove(service)
                    alert_info_index = 2
                if service in self.not_active_services:
                    self.not_active_services.pop(service)
            elif state == "failed":
                # inactive -> failed
                if service not in self.failed_services:
                    self.failed_services.append(service)
                    alert_info_index = 0
            else:
                alert_info_index = 3
        elif prev_state == "activating":
            if service in self.not_active_services:
                self.not_active_services.pop(service)
            if state in ["inactive", "deactivating"]:
                # activating -> inactive/deactivating
                self.failed_services.append(service)
                alert_info_index = 0
            elif state == "active":
                # activating -> active
                if service in self.failed_services:
                    self.failed_services.remove(service)
                    alert_info_index = 2
                else:
                    # its a restart.
                    pass
            elif state == "failed":
                # activating -> failed
                if service not in self.failed_services:
                    self.failed_services.append(service)
                    alert_info_index = 0
            else:
                alert_info_index = 3

        if alert_info_index == 3:
            logger.warning(f"{service} service state transition from "\
                           f"{prev_state} to {state} is not handled.")
        if alert_info_index != -1:
            self.raise_alert(service, prev_state, state, prev_substate,
                             substate, prev_pid, pid, alert_info_index)
Ejemplo n.º 12
0
    def _generate_disk_space_alert(self):
        """Create & transmit a disk_space_alert message as defined
            by the sensor response json schema"""

        # Notify the node sensor to update its data required for the disk_space_data message
        successful = self._node_sensor.read_data("disk_space_alert",
                                                 self._get_debug(),
                                                 self._units)
        if not successful:
            logger.error(
                "NodeDataMsgHandler, _generate_disk_space_alert was NOT successful."
            )
            return

        # Changing disk_usage_threshold type according to what value type entered in config file
        self._disk_usage_threshold = str(self._disk_usage_threshold)
        try:
            if self._disk_usage_threshold.isdigit():
                self._disk_usage_threshold = int(self._disk_usage_threshold)
            else:
                self._disk_usage_threshold = float(self._disk_usage_threshold)
        except ValueError:
            logger.warning(
                "Disk Space Alert, Invalid disk_usage_threshold value are entered in config."
            )
            # Assigning default value to _disk_usage_threshold
            self._disk_usage_threshold = self.DEFAULT_DISK_USAGE_THRESHOLD

        if self._node_sensor.disk_used_percentage >= self._disk_usage_threshold:
            if not self.disk_fault:
                self.disk_fault = True
                # Create the disk space data message and hand it over to the egress processor to transmit
                fault_event = "Disk usage increased to %s, beyond configured threshold of %s" \
                                %(self._node_sensor.disk_used_percentage, self._disk_usage_threshold)
                logger.warning(fault_event)
                diskSpaceAlertMsg = DiskSpaceAlertMsg(
                    self._node_sensor.host_id, self._epoch_time,
                    self._node_sensor.total_space,
                    self._node_sensor.free_space,
                    self._node_sensor.disk_used_percentage, self._units,
                    self.site_id, self.rack_id, self.node_id, self.cluster_id,
                    self.FAULT, fault_event)

                # Add in uuid if it was present in the json request
                if self._uuid is not None:
                    diskSpaceAlertMsg.set_uuid(self._uuid)
                jsonMsg = diskSpaceAlertMsg.getJson()
                self.disk_sensor_data = jsonMsg
                self.os_sensor_type["disk_space"] = self.disk_sensor_data

                # Transmit it out over rabbitMQ channel
                self._write_internal_msgQ(RabbitMQegressProcessor.name(),
                                          jsonMsg)

        if (self._node_sensor.disk_used_percentage <=
                self._disk_usage_threshold) and (self.disk_fault == True):
            # Create the disk space data message and hand it over to the egress processor to transmit
            fault_resolved_event = "Disk usage decreased to %s, lesser than configured threshold of %s" \
                                %(self._node_sensor.disk_used_percentage, self._disk_usage_threshold)
            logger.warning(fault_resolved_event)
            diskSpaceAlertMsg = DiskSpaceAlertMsg(
                self._node_sensor.host_id, self._epoch_time,
                self._node_sensor.total_space, self._node_sensor.free_space,
                self._node_sensor.disk_used_percentage, self._units,
                self.site_id, self.rack_id, self.node_id, self.cluster_id,
                self.FAULT_RESOLVED, fault_resolved_event)

            # Add in uuid if it was present in the json request
            if self._uuid is not None:
                diskSpaceAlertMsg.set_uuid(self._uuid)
            jsonMsg = diskSpaceAlertMsg.getJson()
            self.disk_sensor_data = jsonMsg
            self.os_sensor_type["disk_space"] = self.disk_sensor_data

            # Transmit it out over rabbitMQ channel
            self._write_internal_msgQ(RabbitMQegressProcessor.name(), jsonMsg)
            self.disk_fault = False
Ejemplo n.º 13
0
    def _generate_cpu_data(self):
        """Create & transmit a cpu_data message as defined
            by the sensor response json schema"""

        # Notify the node sensor to update its data required for the cpu_data message
        successful = self._node_sensor.read_data("cpu_data", self._get_debug())
        if not successful:
            logger.error(
                "NodeDataMsgHandler, _generate_cpu_data was NOT successful.")

        self._cpu_usage_threshold = str(self._cpu_usage_threshold)
        try:
            if self._cpu_usage_threshold.isdigit():
                self._cpu_usage_threshold = int(self._cpu_usage_threshold)
            else:
                self._cpu_usage_threshold = float(self._cpu_usage_threshold)
        except ValueError:
            logger.warning(
                "CPU Usage Alert, Invalid host_memory_usage_threshold value are entered in config."
            )
            # Assigning default value to _cpu_usage_threshold
            self._cpu_usage_threshold = self.DEFAULT_CPU_USAGE_THRESHOLD

        if self._node_sensor.cpu_usage >= self._cpu_usage_threshold:

            if not self.cpu_fault:
                self.cpu_fault = True
                # Create the cpu usage data message and hand it over to the egress processor to transmit

                fault_event = "CPU usage increased to %s, beyond configured threshold of %s" \
                                %(self._node_sensor.cpu_usage, self._cpu_usage_threshold)
                logger.warning(fault_event)

                # Create the local mount data message and hand it over to the egress processor to transmit
                cpuDataMsg = CPUdataMsg(
                    self._node_sensor.host_id, self._epoch_time,
                    self._node_sensor.csps, self._node_sensor.idle_time,
                    self._node_sensor.interrupt_time,
                    self._node_sensor.iowait_time, self._node_sensor.nice_time,
                    self._node_sensor.softirq_time,
                    self._node_sensor.steal_time,
                    self._node_sensor.system_time, self._node_sensor.user_time,
                    self._node_sensor.cpu_core_data,
                    self._node_sensor.cpu_usage, self.site_id, self.rack_id,
                    self.node_id, self.cluster_id, self.FAULT, fault_event)

                # Add in uuid if it was present in the json request
                if self._uuid is not None:
                    cpuDataMsg.set_uuid(self._uuid)
                jsonMsg = cpuDataMsg.getJson()
                self.cpu_sensor_data = jsonMsg
                self.os_sensor_type["cpu_usage"] = self.cpu_sensor_data

                # Transmit it out over rabbitMQ channel
                self._write_internal_msgQ(RabbitMQegressProcessor.name(),
                                          jsonMsg)

        if (self._node_sensor.cpu_usage <=
                self._cpu_usage_threshold) and (self.cpu_fault == True):
            # Create the cpu usage data message and hand it over to the egress processor to transmit
            fault_resolved_event = "CPU usage decreased to %s, lesser than configured threshold of %s" \
                %(self._node_sensor.cpu_usage, self._cpu_usage_threshold)
            logger.warning(fault_resolved_event)

            # Create the local mount data message and hand it over to the egress processor to transmit
            cpuDataMsg = CPUdataMsg(
                self._node_sensor.host_id, self._epoch_time,
                self._node_sensor.csps, self._node_sensor.idle_time,
                self._node_sensor.interrupt_time,
                self._node_sensor.iowait_time, self._node_sensor.nice_time,
                self._node_sensor.softirq_time, self._node_sensor.steal_time,
                self._node_sensor.system_time, self._node_sensor.user_time,
                self._node_sensor.cpu_core_data, self._node_sensor.cpu_usage,
                self.site_id, self.rack_id, self.node_id, self.cluster_id,
                self.FAULT_RESOLVED, fault_resolved_event)

            # Add in uuid if it was present in the json request
            if self._uuid is not None:
                cpuDataMsg.set_uuid(self._uuid)
            jsonMsg = cpuDataMsg.getJson()
            self.cpu_sensor_data = jsonMsg
            self.os_sensor_type["cpu_usage"] = self.cpu_sensor_data

            # Transmit it out over rabbitMQ channel
            self._write_internal_msgQ(RabbitMQegressProcessor.name(), jsonMsg)
            self.cpu_fault = False
Ejemplo n.º 14
0
    def _generate_host_update(self):
        """Create & transmit a host update message as defined
            by the sensor response json schema"""

        # Notify the node sensor to update its data required for the host_update message
        successful = self._node_sensor.read_data("host_update",
                                                 self._get_debug(),
                                                 self._units)
        if not successful:
            logger.error(
                "NodeDataMsgHandler, _generate_host_update was NOT successful."
            )

        self._host_memory_usage_threshold = str(
            self._host_memory_usage_threshold)
        try:
            if self._host_memory_usage_threshold.isdigit():
                self._host_memory_usage_threshold = int(
                    self._host_memory_usage_threshold)
            else:
                self._host_memory_usage_threshold = float(
                    self._host_memory_usage_threshold)
        except ValueError:
            logger.warning(
                "Host Memory Alert, Invalid host_memory_usage_threshold value are entered in config."
            )
            # Assigning default value to _disk_usage_threshold
            self._host_memory_usage_threshold = self.DEFAULT_HOST_MEMORY_USAGE_THRESHOLD
        if self._node_sensor.total_memory[
                "percent"] >= self._host_memory_usage_threshold:
            # Create the disk space data message and hand it over to the egress processor to transmit
            if not self.host_fault:
                self.host_fault = True
                # Create the disk space data message and hand it over to the egress processor to transmit
                fault_event = "Host memory usage increased to %s, beyond configured threshold of %s" \
                                %(self._node_sensor.total_memory["percent"], self._host_memory_usage_threshold)

                logger.warning(fault_event)

                logged_in_users = []
                # Create the host update message and hand it over to the egress processor to transmit
                hostUpdateMsg = HostUpdateMsg(
                    self._node_sensor.host_id, self._epoch_time,
                    self._node_sensor.boot_time, self._node_sensor.up_time,
                    self._node_sensor.uname, self._units, self.site_id,
                    self.rack_id, self.node_id, self.cluster_id,
                    self._node_sensor.total_memory,
                    self._node_sensor.logged_in_users,
                    self._node_sensor.process_count,
                    self._node_sensor.running_process_count, self.FAULT,
                    fault_event)
                # Add in uuid if it was present in the json request
                if self._uuid is not None:
                    hostUpdateMsg.set_uuid(self._uuid)
                jsonMsg = hostUpdateMsg.getJson()
                # Transmit it out over rabbitMQ channel
                self.host_sensor_data = jsonMsg
                self.os_sensor_type["memory_usage"] = self.host_sensor_data
                self._write_internal_msgQ(RabbitMQegressProcessor.name(),
                                          jsonMsg)

        if (self._node_sensor.total_memory["percent"] <
                self._host_memory_usage_threshold) and (self.host_fault
                                                        == True):
            fault_resolved_event = "Host memory usage decreased to %s, lesser than configured threshold of %s" \
                                    %(self._node_sensor.total_memory["percent"], self._host_memory_usage_threshold)
            logger.warning(fault_resolved_event)
            logged_in_users = []
            # Create the host update message and hand it over to the egress processor to transmit
            hostUpdateMsg = HostUpdateMsg(
                self._node_sensor.host_id, self._epoch_time,
                self._node_sensor.boot_time, self._node_sensor.up_time,
                self._node_sensor.uname, self._units, self.site_id,
                self.rack_id, self.node_id, self.cluster_id,
                self._node_sensor.total_memory,
                self._node_sensor.logged_in_users,
                self._node_sensor.process_count,
                self._node_sensor.running_process_count, self.FAULT_RESOLVED,
                fault_resolved_event)

            # Add in uuid if it was present in the json request
            if self._uuid is not None:
                hostUpdateMsg.set_uuid(self._uuid)
            jsonMsg = hostUpdateMsg.getJson()
            # Transmit it out over rabbitMQ channel
            self.host_sensor_data = jsonMsg
            self.os_sensor_type["memory_usage"] = self.host_sensor_data

            self._write_internal_msgQ(RabbitMQegressProcessor.name(), jsonMsg)
            self.host_fault = False