Beispiel #1
0
    def initialize(self, conf_reader, msgQlist, product):
        """initialize configuration reader and internal msg queues"""

        # Initialize ScheduledMonitorThread and InternalMsgQ
        super(CPUFaultSensor, self).initialize(conf_reader)

        super(CPUFaultSensor, self).initialize_msgQ(msgQlist)

        # get the cpu fault implementor from configuration
        cpu_fault_utility = Conf.get(SSPL_CONF,
                                     f"{self.name().upper()}>{self.PROBE}",
                                     'sysfs')

        # Creating the instance of ToolFactory class
        self.tool_factory = ToolFactory()

        try:
            # Get the instance of the utility using ToolFactory
            self._utility_instance = self._utility_instance or \
                                self.tool_factory.get_instance(cpu_fault_utility)
        except Exception as err:
            raise Exception(
                "Error while initializing. "
                f"Unable to get the instance of {cpu_fault_utility} Utility, {err}"
            )

        self._node_id = Conf.get(GLOBAL_CONF, NODE_ID_KEY, 'SN01')
        cache_dir_path = os.path.join(DATA_PATH, self.CACHE_DIR_NAME)
        self.CPU_FAULT_SENSOR_DATA = os.path.join(
            cache_dir_path, f'CPU_FAULT_SENSOR_DATA_{self._node_id}')

        return True
Beispiel #2
0
    def initialize(self, conf_reader, msgQlist, product):
        """initialize configuration reader and internal msg queues"""

        # Initialize ScheduledMonitorThread and InternalMsgQ
        super(CPUFaultSensor, self).initialize(conf_reader)

        super(CPUFaultSensor, self).initialize_msgQ(msgQlist)

        self._site_id = Conf.get(GLOBAL_CONF, SITE_ID_KEY,'DC01')
        self._rack_id = Conf.get(GLOBAL_CONF, RACK_ID_KEY,'RC01')
        self._node_id = Conf.get(GLOBAL_CONF, NODE_ID_KEY,'SN01')
        self._cluster_id = Conf.get(GLOBAL_CONF, CLUSTER_ID_KEY,'CC01')

        # get the cpu fault implementor from configuration
        cpu_fault_utility = Conf.get(SSPL_CONF, f"{self.name().capitalize()}>{self.PROBE}",
                                    'sysfs')

        # Creating the instance of ToolFactory class
        self.tool_factory = ToolFactory()

        try:
            # Get the instance of the utility using ToolFactory
            self._utility_instance = self._utility_instance or \
                                self.tool_factory.get_instance(cpu_fault_utility)
        except Exception as e:
            logger.error(f"Error while initializing, shutting down CPUFaultSensor : {e}")
            self.shutdown()

        cache_dir_path = os.path.join(DATA_PATH, self.CACHE_DIR_NAME)
        self.CPU_FAULT_SENSOR_DATA = os.path.join(cache_dir_path, f'CPU_FAULT_SENSOR_DATA_{self._node_id}')

        return True
 def __init__(self):
     """Initialize server."""
     super().__init__()
     self.log = CustomLog(const.HEALTH_SVC_NAME)
     self.validate_server_type_support()
     self.sysfs = ToolFactory().get_instance('sysfs')
     self.sysfs.initialize()
     self.sysfs_base_path = self.sysfs.get_sysfs_base_path()
     self.cpu_path = self.sysfs_base_path + const.CPU_PATH
     hw_resources = {
         'cpu': self.get_cpu_info,
         'platform_sensors': self.get_platform_sensors_info,
         'memory': self.get_mem_info,
         'fans': self.get_fans_info,
         'nw_ports': self.get_nw_ports_info,
         'sas_hba': self.get_sas_hba_info,
         'sas_ports': self.get_sas_ports_info,
         'disks': self.get_disks_info,
         'psus': self.get_psu_info
     }
     sw_resources = {
         'cortx_sw_services': self.get_cortx_service_info,
         'external_sw_services': self.get_external_service_info,
         'raid': self.get_raid_info
     }
     self.server_resources = {"hw": hw_resources, "sw": sw_resources}
     self._ipmi = IpmiFactory().get_implementor("ipmitool")
     self.platform_sensor_list = ['Temperature', 'Voltage', 'Current']
Beispiel #4
0
 def __init__(self):
     """Initialize server."""
     super().__init__()
     self.log = CustomLog(const.HEALTH_SVC_NAME)
     server_type = Conf.get(GLOBAL_CONF, NODE_TYPE_KEY)
     Platform.validate_server_type_support(self.log, ResourceMapError,
                                           server_type)
     self.sysfs = ToolFactory().get_instance('sysfs')
     self.sysfs.initialize()
     self.sysfs_base_path = self.sysfs.get_sysfs_base_path()
     self.cpu_path = self.sysfs_base_path + const.CPU_PATH
     hw_resources = {
         'cpu': self.get_cpu_info,
         'platform_sensor': self.get_platform_sensors_info,
         'memory': self.get_mem_info,
         'fan': self.get_fans_info,
         'nw_port': self.get_nw_ports_info,
         'sas_hba': self.get_sas_hba_info,
         'sas_port': self.get_sas_ports_info,
         'disk': self.get_disks_info,
         'psu': self.get_psu_info
     }
     sw_resources = {
         'cortx_sw_services': self.get_cortx_service_info,
         'external_sw_services': self.get_external_service_info,
         'raid': self.get_raid_info
     }
     self.server_resources = {"hw": hw_resources, "sw": sw_resources}
     self._ipmi = IpmiFactory().get_implementor("ipmitool")
     self.platform_sensor_list = ['Temperature', 'Voltage', 'Current']
     self.service = Service()
     self.resource_indexing_map = ServerResourceMap.resource_indexing_map\
         ["health"]
Beispiel #5
0
    def initialize(self, conf_reader, msgQlist, product):
        """initialize configuration reader and internal msg queues"""

        # Initialize ScheduledMonitorThread and InternalMsgQ
        super(MemFaultSensor, self).initialize(conf_reader)

        super(MemFaultSensor, self).initialize_msgQ(msgQlist)

        self._site_id = self._conf_reader._get_value_with_default(
            self.SYSTEM_INFORMATION_KEY,
            COMMON_CONFIGS.get(self.SYSTEM_INFORMATION_KEY).get(
                self.SITE_ID_KEY), '001')
        self._cluster_id = self._conf_reader._get_value_with_default(
            self.SYSTEM_INFORMATION_KEY,
            COMMON_CONFIGS.get(self.SYSTEM_INFORMATION_KEY).get(
                self.CLUSTER_ID_KEY), '001')
        self._rack_id = self._conf_reader._get_value_with_default(
            self.SYSTEM_INFORMATION_KEY,
            COMMON_CONFIGS.get(self.SYSTEM_INFORMATION_KEY).get(
                self.RACK_ID_KEY), '001')
        self._node_id = self._conf_reader._get_value_with_default(
            self.SYSTEM_INFORMATION_KEY,
            COMMON_CONFIGS.get(self.SYSTEM_INFORMATION_KEY).get(
                self.NODE_ID_KEY), '001')

        # get the mem fault implementor from configuration
        mem_fault_utility = self._conf_reader._get_value_with_default(
            self.name().capitalize(), self.PROBE, "procfs")

        self.polling_interval = int(
            self._conf_reader._get_value_with_default(
                self.SENSOR_NAME.upper(), self.POLLING_INTERVAL_KEY,
                self.DEFAULT_POLLING_INTERVAL))

        # Creating the instance of ToolFactory class
        self.tool_factory = ToolFactory()

        try:
            # Get the instance of the utility using ToolFactory
            self._utility_instance = self._utility_instance or \
                                self.tool_factory.get_instance(mem_fault_utility)
#            self._utility_instance.initialize()
        except KeyError as key_error:
            logger.error(
                "Unable to get the instance of {} \
                Utility. Hence shutting down the sensor {}"\
                .format(mem_fault_utility, MemFaultSensor.SENSOR_NAME))
            self.shutdown()

        cache_dir_path = os.path.join(DATA_PATH, self.CACHE_DIR_NAME)
        self.MEM_FAULT_SENSOR_DATA = os.path.join(
            cache_dir_path, f'MEM_FAULT_SENSOR_DATA_{self._node_id}')

        return True
    def __init__(self):
        super(NodeData, self).__init__()

        self.os_utils = OSUtils()
        self._epoch_time = str(int(time.time()))
        # Total number of CPUs
        self.cpus = psutil.cpu_count()
        self.host_id = self.os_utils.get_fqdn()

        # Calculate the load averages on separate blocking threads
        self.load_1min_average = []
        self.load_5min_average = []
        self.load_15min_average = []
        self.prev_bmcip = None
        load_1min_avg = threading.Thread(target=self._load_1min_avg).start()
        load_5min_avg = threading.Thread(target=self._load_5min_avg).start()
        load_15min_avg = threading.Thread(target=self._load_15min_avg).start()

        self.conf_reader = ConfigReader()

        nw_fault_utility = Conf.get(
            SSPL_CONF, f"{self.name().capitalize()}>{self.PROBE}", "sysfs")

        self._utility_instance = None

        try:
            # Creating the instance of ToolFactory class
            self.tool_factory = ToolFactory()
            # Get the instance of the utility using ToolFactory
            self._utility_instance = self._utility_instance or \
                                self.tool_factory.get_instance(nw_fault_utility)
            if self._utility_instance:
                # Initialize the path as /sys/class/net/
                self.nw_interface_path = self._utility_instance.get_sys_dir_path(
                    'net')
        except KeyError as key_error:
            logger.error(
                f'NodeData, Unable to get the instance of {nw_fault_utility} Utility'
            )
        except Exception as err:
            logger.error(
                f'NodeData, Problem occured while getting the instance of {nw_fault_utility}'
            )
    def initialize(self, conf_reader, msgQlist, product):
        """initialize configuration reader and internal msg queues"""

        # Initialize ScheduledMonitorThread and InternalMsgQ
        super(SASPortSensor, self).initialize(conf_reader)

        super(SASPortSensor, self).initialize_msgQ(msgQlist)

        self._site_id = Conf.get(GLOBAL_CONF, SITE_ID_KEY, 'DC01')
        self._rack_id = Conf.get(GLOBAL_CONF, RACK_ID_KEY, 'RC01')
        self._node_id = Conf.get(GLOBAL_CONF, NODE_ID_KEY, 'SN01')
        self._cluster_id = Conf.get(GLOBAL_CONF, CLUSTER_ID_KEY, 'CC01')

        # Get the sas port implementor from configuration
        sas_port_utility = Conf.get(
            SSPL_CONF, f"{self.name().capitalize()}>{self.PROBE}", "sysfs")

        self.polling_interval = int(
            Conf.get(SSPL_CONF,
                     f"{self.SENSOR_NAME.upper()}>{self.POLLING_INTERVAL}",
                     self.DEFAULT_POLLING_INTERVAL))

        # Creating the instance of ToolFactory class
        self.tool_factory = ToolFactory()

        cache_dir_path = os.path.join(DATA_PATH, self.CACHE_DIR_NAME)
        self.SAS_PORT_SENSOR_DATA = os.path.join(
            cache_dir_path, f'SAS_PORT_SENSOR_DATA_{self._node_id}')

        alert_type = None

        try:
            # Get the instance of the utility using ToolFactory
            self._utility_instance = self._utility_instance or \
                                self.tool_factory.get_instance(sas_port_utility)
            self._utility_instance.initialize()
            phy_status = None

            link_value_phy_status_collection = ()

            # Call to sas phy dirctory which will return a dictionary
            # which has phy_name to negotiated link rate mapping
            # Ex: {"phy-0:0": "<12.0, Unknown>"}
            self.phy_dir_to_linkrate_mapping = \
                    self._utility_instance.get_phy_negotiated_link_rate()

            # Iterate over populated dictionary and restructure it
            # Ex: if phy-0:0 is 12.0/6.0/3.0, considered as UP.
            # {"phy-0:0": ("link_rate", <Up/Down>)}
            for phy, value in self.phy_dir_to_linkrate_mapping.items():
                if 'Gbit'.lower() in value.strip().lower():
                    phy_status = 'up'
                    # Increment global phy_link count for UP status
                    self.phy_link_count += 1
                else:
                    phy_status = 'fault'
                link_value_phy_status_collection = (value, phy_status)
                self.phy_dir_to_linkrate_mapping[
                    phy] = link_value_phy_status_collection

            # Get the stored previous alert info
            self.sas_phy_stored_alert = store.get(self.SAS_PORT_SENSOR_DATA)
            self.check_and_send_alert()

        except KeyError as key_error:
            logger.error("Unable to get the instance of {} \
                Utility. Hence shutting down the sensor".format(
                sas_port_utility))
            self.shutdown()
        except Exception as e:
            if e == errno.ENOENT:
                logger.error("Problem occured while reading from sas_phy \
                    directory. directory path doesn't directory. Hence \
                    shuting down the sensor")
            elif e == errno.EACCES:
                logger.error(
                    "Problem occured while reading from sas_phy directory. \
                     Not enough permission to read from the directory. \
                     Hence shuting down the sensor")
            else:
                logger.error(
                    "Problem occured while reading from sas_phy directory. \
                     {0}. Hence shuting down the sensor".format(e))
            self.shutdown()

        return True