Beispiel #1
0
    def _consul_get(self, key, **kwargs):
        """Load consul data from the given key."""
        data = None
        status = "Failure"

        for retry_index in range(0, MAX_CONSUL_RETRY):
            try:
                _opt_recurse = kwargs.get("recurse", False)
                key = self._get_key(key)
                data = self.consul_conn.kv.get(key, recurse=_opt_recurse)[1]
                if data:
                    data = data["Value"]
                    try:
                        data = pickle.loads(data)
                    except:
                        pass
                status = "Success"
                break

            except requests.exceptions.ConnectionError as connerr:
                logger.warn("Error[{0}] consul connection refused Retry Index {1}" \
                    .format(connerr, retry_index))
                time.sleep(WAIT_BEFORE_RETRY)

            except Exception as gerr:
                logger.warn("Error[{0}] while reading data from consul {1}" \
                    .format(gerr, key))
                break

        return data, status
Beispiel #2
0
    def check_and_send_alert(self):
        """Checks whether conditions are met and sends alert if required
        Alerts will be sent if -
        1. All 4 phys of a sas port go up -> down : fault alert
        2. All 4 phys of a sas port come down -> up : fault_resolved alert
        Sensor data stored in persistent storage is a dict of { sas_port_number : alert_type }
        """
        # Update sas ports status
        self.update_sas_ports_status()

        # Check the version of stored alert
        version = None
        try:
            # Try to get the version
            # Exception will be raised if stored alert is None or no Version is available
            version = self.sas_phy_stored_alert['version']
        except Exception:
            logger.warn(f"Found no data or old data format for SASPortSensor, \
                            updating data format to version {self.CURRENT_DATA_VERSION}"
                        )
            # Versioning is not implemented or there is no data, write new data
            # Initialize dummy fault_resolved for all sas ports and conn
            self.sas_phy_stored_alert = {}
            self.sas_phy_stored_alert['version'] = self.CURRENT_DATA_VERSION
            self.sas_phy_stored_alert['conn'] = 'fault_resolved'
            for i in range(0, self.NUM_SAS_PORTS):
                self.sas_phy_stored_alert[i] = 'fault_resolved'
            # Save data to store
            store.put(self.sas_phy_stored_alert, self.SAS_PORT_SENSOR_DATA)

        if version == self.CURRENT_DATA_VERSION:
            self.handle_current_version_data()
Beispiel #3
0
    def get_keys_with_prefix(self, prefix):
        """ get keys with given prefix
        """
        for retry_index in range(0, MAX_CONSUL_RETRY):
            try:
                prefix = self._get_key(prefix)
                data = self.consul_conn.kv.get(prefix, recurse=True)[1]
                if data:
                    return [
                        item["Key"][item["Key"].rindex("/") + 1:]
                        for item in data
                    ]
                else:
                    return []
                break

            except requests.exceptions.ConnectionError as connerr:
                logger.warn("Error[{0}] consul connection refused Retry Index {1}" \
                    .format(connerr, retry_index))
                time.sleep(WAIT_BEFORE_RETRY)

            except Exception as gerr:
                logger.warn("Error[{0}] while getting keys with given prefix {1}" \
                    .format(gerr, prefix))
                break
 def _update_raid_device_file(self, device):
     try:
         status = "failed"
         raid_check = 0
         raid_dir = RaidDataConfig.DIR.value
         sync_action_file = RaidDataConfig.SYNC_ACTION_FILE.value
         while raid_check <= RaidDataConfig.MAX_RETRIES.value:
             CHECK_COMMAND = "echo 'check' |sudo tee " + raid_dir + device + sync_action_file + " > /dev/null"
             logger.debug(
                 'Executing CHECK_COMMAND:{}'.format(CHECK_COMMAND))
             response, error = self._run_command(CHECK_COMMAND)
             if error:
                 logger.warn(
                     "Failed in executing command:{}.".format(error))
                 raid_check += 1
                 time.sleep(1)
             else:
                 logger.debug(
                     "RAID device state is changed to 'check' with response : {}"
                     .format(response))
                 status = "success"
                 break
         return status
     except Exception as ae:
         logger.error("Failed to update RAID File. ERROR:{}".format(
             str(ae)))
         raise
Beispiel #5
0
    def _process_msg(self, body):
        """Parses the incoming message and hands off to the appropriate module"""

        ingressMsg = {}
        uuid = None
        try:
            if isinstance(body, dict) is False:
                ingressMsg = json.loads(body)
            else:
                ingressMsg = body

            # Authenticate message using username and signature fields
            username = ingressMsg.get("username")
            signature = ingressMsg.get("signature")
            message = ingressMsg.get("message")
            uuid = ingressMsg.get("uuid")
            msg_len = len(message) + 1

            if uuid is None:
                uuid = "N/A"

            if use_security_lib and \
                    SSPL_SEC.sspl_verify_message(msg_len, str(message),
                                                 username, signature) != 0:
                logger.warn(
                    "IngressProcessor, Authentication failed on message: %s" %
                    ingressMsg)
                return

            # Get the incoming message type
            if message.get("actuator_request_type") is not None:
                msgType = message.get("actuator_request_type")

                # Validate against the actuator schema
                validate(ingressMsg, self._actuator_schema)

            elif message.get("sensor_request_type") is not None:
                msgType = message.get("sensor_request_type")

                # Validate against the sensor schema
                validate(ingressMsg, self._sensor_schema)

            else:
                # We only handle incoming actuator and sensor requests, ignore
                # everything else.
                return

            # Check for debugging being activated in the message header
            self._check_debug(message)
            self._log_debug("_process_msg, ingressMsg: %s" % ingressMsg)

            self._send_to_msg_handler(msgType, message, uuid)

        except Exception as ex:
            logger.error(
                "IngressProcessor, _process_msg unrecognized message: %r" %
                ingressMsg)
            ack_msg = AckResponseMsg("Error Processing Msg",
                                     "Msg Handler Not Found", uuid).getJson()
            self._write_internal_msgQ(EgressProcessor.name(), ack_msg)
    def _get_controllers(self, instance_id):

        url = self.rssencl.build_url(self.rssencl.URI_CLIAPI_SHOWCONTROLLERS)

        response = self.rssencl.ws_request(url, self.rssencl.ws.HTTP_GET)

        if not response:
            logger.warn("{0}:: Controller status unavailable as ws request {1}"
                        "failed".format(self.rssencl.LDR_R1_ENCL, url))
            return

        if response.status_code != self.rssencl.ws.HTTP_OK:
            if url.find(self.rssencl.ws.LOOPBACK) == -1:
                logger.error(
                    "{0}:: http request {1} to get controller failed with http err"
                    " {2}".format(self.rssencl.LDR_R1_ENCL, url,
                                  response.status_code))
            return

        response_data = json.loads(response.text)

        controllers_list = response_data["controllers"]
        controllers_list = self._get_controller_data(controllers_list,
                                                     instance_id)
        return controllers_list
    def _get_fan_modules(self, instance_id):

        url = self.rssencl.build_url(self.rssencl.URI_CLIAPI_SHOWFANMODULES)

        response = self.rssencl.ws_request(url, self.rssencl.ws.HTTP_GET)

        if not response:
            logger.warn(
                "{0}:: Fan-modules status unavailable as ws request {1}"
                "failed".format(self.rssencl.LDR_R1_ENCL, url))
            return

        if response.status_code != self.rssencl.ws.HTTP_OK:
            if url.find(self.rssencl.ws.LOOPBACK) == -1:
                logger.error(
                    "{0}:: http request {1} to get fan-modules failed with http err"
                    " {2}".format(self.rssencl.LDR_R1_ENCL, url,
                                  response.status_code))
            return

        response_data = json.loads(response.text)

        fan_modules_list = response_data["fan-modules"]
        fan_modules_list = self._get_fan_module_data(fan_modules_list,
                                                     instance_id)
        return fan_modules_list
 def _get_psu(self, psu_name):
     #build url for fetching the psu type data
     url = self.rssencl.build_url(self.rssencl.URI_CLIAPI_SHOWPSUS)
     response = self.rssencl.ws_request(url, self.rssencl.ws.HTTP_GET)
     if not response:
         logger.warn("{0}: Psu status unavailable as ws request {1}"
                     " failed".format(self.rssencl.LDR_R1_ENCL, url))
         return
     if response.status_code != self.rssencl.ws.HTTP_OK:
         if url.find(self.rssencl.ws.LOOPBACK) == -1:
             logger.error("{0}:: http request {1} to poll psu failed with"
                          " err {2}".format(self.rssencl.LDR_R1_ENCL, url,
                                            response.status_code))
         return
     try:
         jresponse = json.loads(response.content)
     except ValueError as badjson:
         logger.error("%s returned mal-formed json:\n%s" % (url, badjson))
     if jresponse:
         api_resp = self.rssencl.get_api_status(jresponse['status'])
         if ((api_resp == -1)
                 and (response.status_code == self.rssencl.ws.HTTP_OK)):
             logger.warn("/show/power-supplies api response unavailable, "
                         "marking success as http code is 200")
             api_resp = 0
         if api_resp == 0:
             if psu_name == "*":
                 return jresponse["power-supplies"]
             else:
                 for resource in jresponse["power-supplies"]:
                     if psu_name == resource["name"]:
                         return resource
                 else:
                     raise Exception("Resource not Found")
 def _get_encl_response(self, uri, request_type):
     """ query enclosure and return json data"""
     url = self.rssencl.build_url(uri)
     response = {}
     response = self.rssencl.ws_request(url, request_type)
     if not response:
         # The request did but sometimes we get a response delay (response
         # timeout). In the shutdown of both controllers we did not get a
         # response because the controller not available in that case.
         response = None
         logger.warn(f"Failed to get data for {uri}")
         return response
     if response.status_code != self.rssencl.ws.HTTP_OK:
         # Failure of web service request.
         response = {}
         logger.error(f"Failed to get data for {uri}")
         return response
     try:
         response = json.loads(response.content)
         api_response = self.rssencl.get_api_status(response.get('status'))
         if api_response == 0 or \
             (api_response == -1 and response.status_code == self.rssencl.ws.HTTP_OK):
             return response
         else:
             response = {}
             logger.error(f"invalid data for {uri}")
             return response
     except ValueError as err:
         response = {}
         logger.error(f"invalid data for {uri} {err}")
         return response
    def _get_sideplane_expander_list(self):
        """return sideplane expander list using API /show/enclosure"""

        sideplane_expanders = []

        url = self.rssencl.build_url(self.rssencl.URI_CLIAPI_SHOWENCLOSURE)

        response = self.rssencl.ws_request(url, self.rssencl.ws.HTTP_GET)

        if not response:
            logger.warn(
                f"{self.rssencl.LDR_R1_ENCL}:: Enclosure status unavailable as ws request {url} failed"
            )
            return

        if response.status_code != self.rssencl.ws.HTTP_OK:
            if url.find(self.rssencl.ws.LOOPBACK) == -1:
                logger.error(
                    f"{self.rssencl.LDR_R1_ENCL}:: http request {url} to get enclosure failed with  \
                                      err {response.status_code}")
            return

        response_data = json.loads(response.text)
        encl_drawers = response_data["enclosures"][0]["drawers"]
        if encl_drawers:
            for drawer in encl_drawers:
                sideplane_list = drawer["sideplanes"]
                for sideplane in sideplane_list:
                    sideplane_expanders.append(sideplane)

        return sideplane_expanders
Beispiel #11
0
    def _get_logical_volumes(self, pool_serial_number):
        """Receives list of Logical Volumes from API.
           URL: http://<host>/api/show/volumes/pool/<pool_serial_number>
        """
        url = self.rssencl.build_url(self.rssencl.URI_CLIAPI_SHOWVOLUMES)

        url = f"{url}/pool/{pool_serial_number}"

        response = self.rssencl.ws_request(url, self.rssencl.ws.HTTP_GET)

        if not response:
            logger.warn(
                f"{self.rssencl.LDR_R1_ENCL}:: Logical Volume status unavailable as ws request {url}"
                " failed")
            return

        if response.status_code != self.rssencl.ws.HTTP_OK:
            logger.error(
                f"{self.rssencl.LDR_R1_ENCL}:: http request {url} to get logical volumes failed with \
                 err {response.status_code}")
            return

        response_data = json.loads(response.text)
        logical_volumes = response_data.get("volumes")
        return logical_volumes
Beispiel #12
0
    def run(self):
        """Run disk monitoring periodically on its own thread."""

        # Do not proceed if module is suspended
        if self._suspended == True:
            self._scheduler.enter(self.pollfreq_disksensor, self._priority, self.run, ())
            return

        # Allow RealStor Encl MC to start services.
        #time.sleep(self.rssencl.REALSTOR_MC_BOOTWAIT)

        # Check for debug mode being activated
        self._read_my_msgQ_noWait()

        # poll all disk status and raise events if
        # insertion/removal detected
        self._rss_check_disks_presence()

        #Do not proceed further if latest disks info can't be validated due to store function error
        if not self.invalidate_latest_disks_info:
            # Polling system status
            self.rssencl.get_system_status()

            # check for disk faults & raise if found
            self._rss_check_disk_faults()
        else:
            logger.warn("Can not validate disk faults or presence due to persistence store error")

        # Reset debug mode if persistence is not enabled
        self._disable_debug_if_persist_false()

        # Fire every configured seconds to poll disks status
        self._scheduler.enter(self.pollfreq_disksensor,
          self._priority, self.run, ())
Beispiel #13
0
 def _extract_iem_components(self, iem):
     """Splits iem in multiple components using a delimiter and
        return tuple of various extracted components.
     """
     components = []
     if iem is None or len(iem.strip()) == 0:
         raise TypeError
     things_to_strip = f"{self.IEC_KEYWORD}:"
     splitted_iem = iem[len(things_to_strip):].strip()
     # Split IEM by ":" delimieter. First part is IEC and second part
     # is description.
     iem_parts = splitted_iem.split(":")
     # Check for minimum length of IEC and presense of description
     if len(iem_parts) < 2 or len(iem_parts[0]) < self.IEC_LENGTH:
         logger.warn("Invalid IEM. Missing component")
         components = None
     else:
         components.append(iem_parts[0][0]) # Severity level
         components.append(iem_parts[0][1]) # Source ID
         components.append(iem_parts[0][2:5]) # Component ID
         components.append(iem_parts[0][5:8]) # Module ID
         components.append(iem_parts[0][8:]) # Event ID
         # if description is having ':'
         components.append(":".join(iem_parts[1:])) # Description level
     return components
 def _check_raid_state(self, device):
     try:
         status = None
         raid_check = 0
         raid_dir = RaidDataConfig.DIR.value
         sync_action_file = RaidDataConfig.SYNC_ACTION_FILE.value
         while raid_check <= RaidDataConfig.MAX_RETRIES.value:
             self.output_file = self._get_unique_filename(RaidDataConfig.RAID_RESULT_FILE_PATH.value, device)
             STATE_COMMAND = 'cat ' + raid_dir + device + sync_action_file
             logger.debug('Executing STATE_COMMAND:{}'.format(STATE_COMMAND))
             response, error = self._run_command(STATE_COMMAND)
             if error:
                 logger.warn("Error in cmd{} in raid health monitor"
                             .format(STATE_COMMAND))
                 raid_check += 1
             else:
                 if response == RaidDataConfig.STATE_COMMAND_RESPONSE.value:
                     status = "success"
                     with open(self.output_file, 'w') as raid_file:
                         raid_file.write(RaidDataConfig.STATE_COMMAND_RESPONSE.value + "\n")
                     break
                 else:
                     status = "failed"
                     raid_check += 1
                     time.sleep(WAIT_BEFORE_RETRY)
         return status
     except Exception as ae:
         logger.error("Failed in checking RAID device state. ERROR:{}"
                     .format(str(ae)))
         raise
Beispiel #15
0
    def get_sensor_list_by_type(self, fru_type):
        """Returns the sensor list based on FRU type using ipmitool utility
           ipmitool sdr type '<FRU>'.
           Example of output form 'sdr type 'Fan'' command:
           Sys Fan 2B       | 33h | ok  | 29.4 | 5332 RPM
           ( sensor_id | sensor_num | status | entity_id |
            <FRU Specific attribute> )
            Params : self, fru_type
            Output Format : List of Tuple
            Output Example : [(HDD 1 Status, F1, ok, 4.2, Drive Present),]
        """
        sensor_list_out, error, retcode = \
            self._run_ipmitool_subcommand(f"sdr type '{fru_type.title()}'")
        if retcode != 0:
            msg = "ipmitool sdr type command failed: {0}".format(error)
            logger.warn(msg)
            return
        sensor_list = sensor_list_out.split("\n")

        out = []
        for sensor in sensor_list:
            if sensor == "":
                break
            # Example of output form 'sdr type' command:
            # Sys Fan 2B       | 33h | ok  | 29.4 | 5332 RPM
            # PS1 1a Fan Fail  | A0h | ok  | 29.13 |
            # HDD 1 Status     | F1h | ok  |  4.2 | Drive Present
            fields_list = [f.strip() for f in sensor.split("|")]
            sensor_id, sensor_num, status, entity_id, reading = fields_list
            sensor_num = sensor_num.strip("h").lower()

            out.append((sensor_id, sensor_num, status, entity_id, reading))
        return out
Beispiel #16
0
    def _restart_module(self, module_name):
        """Restart a module"""
        self._log_debug("_restart_module, module_name: %s" % module_name)

        try:
            # Stop the module if it's running and let existing thread die gracefully
            if self._status_module(module_name) is True:
                self._stop_module(module_name)

            # Allow module a few seconds to shut down gracefully
            max_wait = 10
            curr_wait = 1
            while self._status_module(module_name) is True:
                time.sleep(3)
                logger.info("Retrying: %s" % str(curr_wait))
                self._stop_module(module_name)
                curr_wait += 1
                if curr_wait > max_wait:
                    break

            # Start the module
            self._start_module(module_name)
        except Exception as ae:
            logger.warn("Restart thread failed: %s" % str(ae))
            self._thread_response = "Restart Failed"
        else:
            self._thread_response = "Restart Successful"
Beispiel #17
0
 def _get_sensor_properties(self, sensor_name):
     """
     Get all the properties of a sensor.
     Returns a tuple (common, specific) where common is a dict of common sensor properties and
     their values for this sensor, and specific is a dict of the properties specific to this sensor
     e.g. ipmitool sensor get 'PS1 Temperature'
     Locating sensor record...
      Sensor ID              : PS1 Temperature (0x5c)
      Entity ID             : 10.1
      Sensor Type (Threshold)  : Temperature
      Sensor Reading        : 16 (+/- 0) degrees C
      Status                : ok
      Lower Non-Recoverable : na
      Lower Critical        : na
      Lower Non-Critical    : na
      Upper Non-Critical    : 55.000
      Upper Critical        : 60.000
      Upper Non-Recoverable : na
      Positive Hysteresis   : 2.000
      Negative Hysteresis   : 2.000
      Assertion Events      :
      Assertions Enabled    : unc+ ucr+
      Deassertions Enabled  : unc+ ucr+
     """
     try:
         sensor_get_response, return_code = self._executor._run_ipmitool_subcommand("sensor get '{0}'".format(sensor_name))
         if return_code == 0:
             return self._response_to_dict(sensor_get_response)
         else:
             msg = "sensor get '{0}' : command failed with error {1}".format(sensor_name, sensor_get_response)
             logger.warn(msg)
             return self._errorstr_to_dict(sensor_get_response)
     except Exception as err:
         logger.error("Exception occurred in _get_sensor_properties for cmd - sensor get '{0}': {1}".format(sensor_name, err))
 def _get_host_update_data(self):
     """Retrieves node information for the host_update json message"""
     logged_in_users = []
     uname_keys = ("sysname", "nodename", "version", "release", "machine")
     self.up_time = int(psutil.boot_time())
     self.boot_time = self._epoch_time
     self.uname = dict(zip(uname_keys, os.uname()))
     self.total_memory = dict(psutil.virtual_memory()._asdict())
     self.process_count = len(psutil.pids())
     for users in psutil.users():
         logged_in_users.append(dict(users._asdict()))
     self.logged_in_users = logged_in_users
     # Calculate the current number of running processes at this moment
     total_running_proc = 0
     for proc in psutil.process_iter():
         try:
             pinfo = proc.as_dict(attrs=['status'])
             if pinfo['status'] not in (psutil.STATUS_ZOMBIE,
                                        psutil.STATUS_DEAD,
                                        psutil.STATUS_STOPPED,
                                        psutil.STATUS_IDLE,
                                        psutil.STATUS_SLEEPING):
                 total_running_proc += 1
         except psutil.NoSuchProcess:
             logger.warn(
                 f"(psutil) Process '{proc.name()}' exited unexpectedly.")
     self.running_process_count = total_running_proc
Beispiel #19
0
 def enter(service):
     logger.warn("{} service is disabled, it will not be "
                 "monitored".format(service.name))
     Service.non_active.discard(service.name)
     Service.monitoring_disabled.discard(service.name)
     if service.properties_changed_signal:
         service.properties_changed_signal.remove()
Beispiel #20
0
    def _start_module(self, module_name):
        """Start a module"""
        self._log_debug("_start_module, module_name: %s" % module_name)

        try:
            if self._status_module(module_name) is True:
                self._log_debug("_start_module, status: True")
                return

            self._thread_response = "Start Successful"

            # NOTE: This is internal code that is currently unused.
            # If this is brought into use again its interaction
            # with the init dependency code will need to be considered
            module_thread = Thread(target=_run_thread_capture_errors,
                                   args=(self._sspl_modules[module_name],
                                         self._sspl_modules, self._msgQlist,
                                         self._conf_reader, self._product))

            # Put a configure debug message on the module's queue before starting it up
            if self.debug_section is not None:
                self._write_internal_msgQ(module_name, self.debug_section)

            module_thread.start()
        except Exception as ae:
            logger.warn("Start thread failed: %s" % str(ae))
            self._thread_response = "Start Failed"
Beispiel #21
0
    def _rss_raise_disk_alert(self, alert_type, disk_info):
        """Raise disk alert with supported alert type"""

        #logger.debug("Raise - alert type {0}, info {1}".format(alert_type,disk_info))
        if not disk_info:
            logger.warn("disk_info None, ignoring")
            return

        if alert_type not in self.rssencl.fru_alerts:
            logger.error(f"Supplied alert type [{alert_type}] not supported")
            return

        # form json with default values
        disk = dict.fromkeys(self.disk_generic_info, "NA")
        disk['slot'] = -1
        disk['blink'] = 0
        disk['enclosure-id'] = 0

        # Build data for must fields in fru disk data
        for item in self.disk_generic_info:
            if item in disk_info:
                disk[item] = disk_info[item]

        encl = self.rssencl.ENCL_FAMILY
        disk[encl] = self.rssencl.LDR_R1_ENCL

        # Build data for platform specific fields in fru disk data
        # get remaining extra key value pairs from passed disk_info
        extended_info = {key:disk_info[key] for key in disk_info if key not in\
                            disk and self.NUMERIC_IDENTIFIER not in key}

        # notify realstor encl msg handler
        self._send_json_msg(alert_type, disk, extended_info)
    def _get_disk(self, disk):
        """Retreive realstor disk info using cli api /show/disks"""

        # make ws request
        url = self.rssencl.build_url(
                  self.rssencl.URI_CLIAPI_SHOWDISKS)

        # TODO: Add pagination to response for '*' case.
        # storage enclosures will have
        # ~ 80 to 100 drives, which will make the
        # response huge.
        if(disk != self.RESOURCE_ALL):
            try:
                diskId = "0.{}".format(int(disk))
            except ValueError:
                msg = "Wrong format for disk resource value: {},"\
                        " expected int or '*'".format( disk)
                logger.error("RealStorActuator: _get_disk: {}".format(msg))
                return

            url = f"{url}/{diskId}"

        url = f"{url}/detail"


        response = self.rssencl.ws_request( url, self.rssencl.ws.HTTP_GET)

        if not response:
            logger.warn("{0}:: Disks status unavailable as ws request {1}"
                " failed".format(self.rssencl.LDR_R1_ENCL, url))
            return

        if response.status_code != self.rssencl.ws.HTTP_OK:
            if url.find(self.rssencl.ws.LOOPBACK) == -1:
                logger.error("{0}:: http request {1} to poll disks failed with"
                    " err {2}".format(self.rssencl.LDR_R1_ENCL, url, response.status_code))
            return

        try:
            jresponse = json.loads(response.content)
        except ValueError as badjson:
            logger.error("%s returned mal-formed json:\n%s" % (url, badjson))

        if jresponse:
            api_resp = self.rssencl.get_api_status(jresponse['status'])

            if ((api_resp == -1) and
                   (response.status_code == self.rssencl.ws.HTTP_OK)):
                logger.warn("/show/disks api response unavailable, "
                    "marking success as http code is 200")
                api_resp = 0

            if api_resp == 0:
                drives = jresponse['drives']

                return drives
    def _get_enclosure_data(self, sasurl, response):
        logger.info("url comes into _get_enclosure_data is:{0}".format(sasurl))
        sas_response = self.rssencl.ws_request(sasurl,
                                               self.rssencl.ws.HTTP_GET)
        logger.info(
            "_get_sas_port_status, sasresponse for coming is:{0}".format(
                sas_response))

        if not sas_response:
            logger.warn(
                "{0}:: sas port status unavailable for request:{1} --gets failed"
                .format(self.rssencl.LDR_R1_ENCL, url))
            return None

        if sas_response.status_code != self.rssencl.ws.HTTP_OK:
            if sasurl.find(self.rssencl.ws.LOOPBACK) == -1:
                logger.error(
                    "{0}:: http request {1} to sas port health status failed with error:{2}"
                    .format(self.rssencl.LDR_R1_ENCL, sasurl,
                            sasresponse.status_code))
            return None

        json_response = None
        try:
            json_response = json.loads(sas_response.content)
        except ValueError as v_error:
            logger.error("{0} returned invalid json:\n{1}".format(
                sasurl, v_error))

        if json_response is not None:
            api_status = self.rssencl.get_api_status(json_response['status'])
            if ((api_status == -1)
                    and (sas_response.status_code == self.rssencl.ws.HTTP_OK)):
                logger.warn("/show/sas-link-health api response unavailable, "
                            "marking success as http code is 200")

            if api_status == 0:
                if self._resource_id == self.RESOURCE_ALL:
                    response['specific_info'] = []
                    response['specific_info'].extend(
                        json_response.get("expander-ports"))
                    for interfc in response['specific_info']:
                        interfc['resource_id'] = interfc['name']
                else:
                    response['specific_info'] = {}
                    for port_enclr in json_response.get("expander-ports"):
                        logger.info(port_enclr)
                        if self._resource_id.lower(
                        ) == port_enclr['name'].lower():
                            response['specific_info'] = port_enclr
                            break
                    else:
                        response['specific_info'][
                            "reason"] = "Data not available for port interface: {}.".format(
                                self._resource_id.lower())
Beispiel #24
0
    def _notify_DiskMsgHandler(self, status_file: str, serial_num_file):
        """Send the event to the disk message handler for generating JSON message"""

        if not os.path.isfile(status_file):
            logger.warn(
                f"status_file: {status_file} does not exist, ignoring.")
            return

        if not os.path.isfile(serial_num_file):
            logger.warn(
                f"serial_num_file: {serial_num_file} does not exist, ignoring."
            )
            return

        # Read in status and see if it has changed
        with open(status_file, "r") as datafile:
            status = datafile.read().replace('\n', '')

        # See if there's a reason file
        reason_file = os.path.join(os.path.dirname(status_file), "reason")
        if os.path.isfile(reason_file):
            with open(reason_file, "r") as datafile:
                reason = datafile.read().replace('\n', '')
                status = f"{status}_{reason}"

        # Do nothing if the drive status has not changed
        if self._drive_status[os.path.dirname(status_file)] == status:
            return

        # Update the status for this drive
        self._log_debug(
            f"Status change, status_file: {status_file}, status: {status}")
        self._drive_status[os.path.dirname(status_file)] = status

        # Read in the serial number
        with open(serial_num_file, "r") as datafile:
            serial_number = datafile.read().replace('\n', '')

        # Remove base dcs dir since it contains no relevant data
        data_str = status_file[len(self._drive_mngr_base_dir) + 1:]

        # Send a message to the disk manager handler to create and transmit json msg
        internal_json_msg = json.dumps({
            "sensor_response_type": "disk_status_drivemanager",
            "event_path": data_str,
            "status": status,
            "serial_number": serial_number
        })

        # Send the event to disk message handler to generate json message
        self._write_internal_msgQ(DiskMsgHandler.name(), internal_json_msg)

        # Reset debug mode if persistence is not enabled
        self._disable_debug_if_persist_false()
Beispiel #25
0
    def exists(self, key):
        """check if key is present
        """
        key_present = False
        status = "Failure"
        try:
            key_present = os.path.exists(key)
            status = "Success"
        except Exception as gerr:
            logger.warn("Error while checking if {0} is present".format(gerr))

        return key_present, status
Beispiel #26
0
 def _get_degraded_state_modules_list(self):
     """Reads list of modules to run in degraded state and returns a list
        of those modules.
     """
     # List of modules to run in degraded mode
     modules_to_resume = []
     try:
         # Read list of modules from conf file to load in degraded mode
         modules_to_resume = self._conf_reader._get_value_list(self.SSPL_SETTING,
                                                       self.DEGRADED_STATE_MODULES)
     except Exception as e:
         logger.warn("ThreadController: Configuration not found, degraded_state_modules")
     return modules_to_resume
Beispiel #27
0
    def check_system_faults_changed(self):
        """Check change in faults state"""

        changed = False

        if self.existing_faults:
            #logger.debug("existing_faults TRUE")
            return True

        if self.latest_faults != self.memcache_faults:
            changed = True
            logger.warn("System faults state changed, updating cached faults!!")

        return changed
Beispiel #28
0
    def _load_json_file(self, key):
        """ Load dict obj from json in given absolute file path"""
        value = None
        absfilepath = key

        # Check if directory exists
        directory_path = os.path.join(os.path.dirname(absfilepath), "")
        if not os.path.isdir(directory_path):
            logger.critical("Path doesn't exists: {0}".format(directory_path))
            return

        try:
            fh = open(absfilepath,"rb")
            try:
                value = pickle.load(fh)
            except:
                value = fh.read()
        except IOError as err:
            logger.warn("I/O error[{0}] while loading data from file {1}): {2}"\
                .format(err.errno,absfilepath,err))
        except ValueError as jsonerr:
            logger.warn("JSON error{0} while loading from {1}".format(jsonerr, absfilepath))
            value = None
        except OSError as oserr:
            logger.warn("OS error{0} while loading from {1}".format(oserr, absfilepath))
        except Exception as gerr:
            logger.warn("Error{0} while reading data from file {1}"\
                .format(gerr, absfilepath))
        else:
            fh.close()

        return value
    def build_url(self, uri):
        """Build request url"""

        wsport = ""

        if self.active_wsport.isdigit():
            wsport = ":" + self.active_wsport
        else:
            logger.warn("Non-numeric webservice port configured [%s], ignoring",\
                self.active_wsport)

        url = "http://" + self.active_ip + wsport + "/api" + uri

        return url
Beispiel #30
0
    def _process_missing_md_devices(self, md_device_list, drive_dict):
        """ checks the md raid configuration file, compares all it's
            entries with list of arrays from mdstat file and sends
            missing entry
        """

        if not os.path.isfile(self.RAID_CONF_FILE):
            logger.warn(
                f"_process_missing_md_devices, MDRaid configuration file {self.RAID_CONF_FILE} is missing"
            )
            return

        conf_device_list = []
        with open(self.RAID_CONF_FILE, 'r') as raid_conf_file:
            raid_conf_data = raid_conf_file.read().strip().split("\n")
        for line in raid_conf_data:
            try:
                raid_conf_field = line.split(" ")
                if "#" not in raid_conf_field[0] and "ARRAY" in raid_conf_field[0] and \
                    "/md" in raid_conf_field[1]:
                    # Mapped the device i.e. /dev/md/1 and /dev/md1 will be the same device.
                    map_device = raid_conf_field[1].split('md/')
                    if len(map_device) > 1:
                        conf_device_list.append(map_device[0] + 'md' +
                                                map_device[1])
                    else:
                        conf_device_list.append(raid_conf_field[1])
            except Exception as ae:
                self._log_debug(
                    f"_process_missing_md_devices, error retrieving raid entry    \
                 from {self.RAID_CONF_FILE} file: {str(ae)}")
                return

        # compare conf file raid array list with mdstat raid array list
        for device in conf_device_list:
            if device not in md_device_list and device not in self._faulty_device_list:
                # add that missing raid array entry into the list of raid devices
                self.alert_type = self.FAULT
                self._faulty_device_list.add(device)
                self._send_json_msg(self.alert_type, device, device,
                                    self.RAID_DOWN_DRIVE_STATUS)

            elif device in md_device_list and device in self._faulty_device_list:
                # add that missing raid array entry into the list of raid devices
                self.alert_type = self.FAULT_RESOLVED
                self._map_drive_status(device, drive_dict, "Down/Recovery")
                self._faulty_device_list.remove(device)
                self._send_json_msg(self.alert_type, device, device,
                                    self._drives[device])