def _consul_get(self, key, **kwargs): """Load consul data from the given key.""" data = None status = "Failure" for retry_index in range(0, MAX_CONSUL_RETRY): try: _opt_recurse = kwargs.get("recurse", False) key = self._get_key(key) data = self.consul_conn.kv.get(key, recurse=_opt_recurse)[1] if data: data = data["Value"] try: data = pickle.loads(data) except: pass status = "Success" break except requests.exceptions.ConnectionError as connerr: logger.warn("Error[{0}] consul connection refused Retry Index {1}" \ .format(connerr, retry_index)) time.sleep(WAIT_BEFORE_RETRY) except Exception as gerr: logger.warn("Error[{0}] while reading data from consul {1}" \ .format(gerr, key)) break return data, status
def check_and_send_alert(self): """Checks whether conditions are met and sends alert if required Alerts will be sent if - 1. All 4 phys of a sas port go up -> down : fault alert 2. All 4 phys of a sas port come down -> up : fault_resolved alert Sensor data stored in persistent storage is a dict of { sas_port_number : alert_type } """ # Update sas ports status self.update_sas_ports_status() # Check the version of stored alert version = None try: # Try to get the version # Exception will be raised if stored alert is None or no Version is available version = self.sas_phy_stored_alert['version'] except Exception: logger.warn(f"Found no data or old data format for SASPortSensor, \ updating data format to version {self.CURRENT_DATA_VERSION}" ) # Versioning is not implemented or there is no data, write new data # Initialize dummy fault_resolved for all sas ports and conn self.sas_phy_stored_alert = {} self.sas_phy_stored_alert['version'] = self.CURRENT_DATA_VERSION self.sas_phy_stored_alert['conn'] = 'fault_resolved' for i in range(0, self.NUM_SAS_PORTS): self.sas_phy_stored_alert[i] = 'fault_resolved' # Save data to store store.put(self.sas_phy_stored_alert, self.SAS_PORT_SENSOR_DATA) if version == self.CURRENT_DATA_VERSION: self.handle_current_version_data()
def get_keys_with_prefix(self, prefix): """ get keys with given prefix """ for retry_index in range(0, MAX_CONSUL_RETRY): try: prefix = self._get_key(prefix) data = self.consul_conn.kv.get(prefix, recurse=True)[1] if data: return [ item["Key"][item["Key"].rindex("/") + 1:] for item in data ] else: return [] break except requests.exceptions.ConnectionError as connerr: logger.warn("Error[{0}] consul connection refused Retry Index {1}" \ .format(connerr, retry_index)) time.sleep(WAIT_BEFORE_RETRY) except Exception as gerr: logger.warn("Error[{0}] while getting keys with given prefix {1}" \ .format(gerr, prefix)) break
def _update_raid_device_file(self, device): try: status = "failed" raid_check = 0 raid_dir = RaidDataConfig.DIR.value sync_action_file = RaidDataConfig.SYNC_ACTION_FILE.value while raid_check <= RaidDataConfig.MAX_RETRIES.value: CHECK_COMMAND = "echo 'check' |sudo tee " + raid_dir + device + sync_action_file + " > /dev/null" logger.debug( 'Executing CHECK_COMMAND:{}'.format(CHECK_COMMAND)) response, error = self._run_command(CHECK_COMMAND) if error: logger.warn( "Failed in executing command:{}.".format(error)) raid_check += 1 time.sleep(1) else: logger.debug( "RAID device state is changed to 'check' with response : {}" .format(response)) status = "success" break return status except Exception as ae: logger.error("Failed to update RAID File. ERROR:{}".format( str(ae))) raise
def _process_msg(self, body): """Parses the incoming message and hands off to the appropriate module""" ingressMsg = {} uuid = None try: if isinstance(body, dict) is False: ingressMsg = json.loads(body) else: ingressMsg = body # Authenticate message using username and signature fields username = ingressMsg.get("username") signature = ingressMsg.get("signature") message = ingressMsg.get("message") uuid = ingressMsg.get("uuid") msg_len = len(message) + 1 if uuid is None: uuid = "N/A" if use_security_lib and \ SSPL_SEC.sspl_verify_message(msg_len, str(message), username, signature) != 0: logger.warn( "IngressProcessor, Authentication failed on message: %s" % ingressMsg) return # Get the incoming message type if message.get("actuator_request_type") is not None: msgType = message.get("actuator_request_type") # Validate against the actuator schema validate(ingressMsg, self._actuator_schema) elif message.get("sensor_request_type") is not None: msgType = message.get("sensor_request_type") # Validate against the sensor schema validate(ingressMsg, self._sensor_schema) else: # We only handle incoming actuator and sensor requests, ignore # everything else. return # Check for debugging being activated in the message header self._check_debug(message) self._log_debug("_process_msg, ingressMsg: %s" % ingressMsg) self._send_to_msg_handler(msgType, message, uuid) except Exception as ex: logger.error( "IngressProcessor, _process_msg unrecognized message: %r" % ingressMsg) ack_msg = AckResponseMsg("Error Processing Msg", "Msg Handler Not Found", uuid).getJson() self._write_internal_msgQ(EgressProcessor.name(), ack_msg)
def _get_controllers(self, instance_id): url = self.rssencl.build_url(self.rssencl.URI_CLIAPI_SHOWCONTROLLERS) response = self.rssencl.ws_request(url, self.rssencl.ws.HTTP_GET) if not response: logger.warn("{0}:: Controller status unavailable as ws request {1}" "failed".format(self.rssencl.LDR_R1_ENCL, url)) return if response.status_code != self.rssencl.ws.HTTP_OK: if url.find(self.rssencl.ws.LOOPBACK) == -1: logger.error( "{0}:: http request {1} to get controller failed with http err" " {2}".format(self.rssencl.LDR_R1_ENCL, url, response.status_code)) return response_data = json.loads(response.text) controllers_list = response_data["controllers"] controllers_list = self._get_controller_data(controllers_list, instance_id) return controllers_list
def _get_fan_modules(self, instance_id): url = self.rssencl.build_url(self.rssencl.URI_CLIAPI_SHOWFANMODULES) response = self.rssencl.ws_request(url, self.rssencl.ws.HTTP_GET) if not response: logger.warn( "{0}:: Fan-modules status unavailable as ws request {1}" "failed".format(self.rssencl.LDR_R1_ENCL, url)) return if response.status_code != self.rssencl.ws.HTTP_OK: if url.find(self.rssencl.ws.LOOPBACK) == -1: logger.error( "{0}:: http request {1} to get fan-modules failed with http err" " {2}".format(self.rssencl.LDR_R1_ENCL, url, response.status_code)) return response_data = json.loads(response.text) fan_modules_list = response_data["fan-modules"] fan_modules_list = self._get_fan_module_data(fan_modules_list, instance_id) return fan_modules_list
def _get_psu(self, psu_name): #build url for fetching the psu type data url = self.rssencl.build_url(self.rssencl.URI_CLIAPI_SHOWPSUS) response = self.rssencl.ws_request(url, self.rssencl.ws.HTTP_GET) if not response: logger.warn("{0}: Psu status unavailable as ws request {1}" " failed".format(self.rssencl.LDR_R1_ENCL, url)) return if response.status_code != self.rssencl.ws.HTTP_OK: if url.find(self.rssencl.ws.LOOPBACK) == -1: logger.error("{0}:: http request {1} to poll psu failed with" " err {2}".format(self.rssencl.LDR_R1_ENCL, url, response.status_code)) return try: jresponse = json.loads(response.content) except ValueError as badjson: logger.error("%s returned mal-formed json:\n%s" % (url, badjson)) if jresponse: api_resp = self.rssencl.get_api_status(jresponse['status']) if ((api_resp == -1) and (response.status_code == self.rssencl.ws.HTTP_OK)): logger.warn("/show/power-supplies api response unavailable, " "marking success as http code is 200") api_resp = 0 if api_resp == 0: if psu_name == "*": return jresponse["power-supplies"] else: for resource in jresponse["power-supplies"]: if psu_name == resource["name"]: return resource else: raise Exception("Resource not Found")
def _get_encl_response(self, uri, request_type): """ query enclosure and return json data""" url = self.rssencl.build_url(uri) response = {} response = self.rssencl.ws_request(url, request_type) if not response: # The request did but sometimes we get a response delay (response # timeout). In the shutdown of both controllers we did not get a # response because the controller not available in that case. response = None logger.warn(f"Failed to get data for {uri}") return response if response.status_code != self.rssencl.ws.HTTP_OK: # Failure of web service request. response = {} logger.error(f"Failed to get data for {uri}") return response try: response = json.loads(response.content) api_response = self.rssencl.get_api_status(response.get('status')) if api_response == 0 or \ (api_response == -1 and response.status_code == self.rssencl.ws.HTTP_OK): return response else: response = {} logger.error(f"invalid data for {uri}") return response except ValueError as err: response = {} logger.error(f"invalid data for {uri} {err}") return response
def _get_sideplane_expander_list(self): """return sideplane expander list using API /show/enclosure""" sideplane_expanders = [] url = self.rssencl.build_url(self.rssencl.URI_CLIAPI_SHOWENCLOSURE) response = self.rssencl.ws_request(url, self.rssencl.ws.HTTP_GET) if not response: logger.warn( f"{self.rssencl.LDR_R1_ENCL}:: Enclosure status unavailable as ws request {url} failed" ) return if response.status_code != self.rssencl.ws.HTTP_OK: if url.find(self.rssencl.ws.LOOPBACK) == -1: logger.error( f"{self.rssencl.LDR_R1_ENCL}:: http request {url} to get enclosure failed with \ err {response.status_code}") return response_data = json.loads(response.text) encl_drawers = response_data["enclosures"][0]["drawers"] if encl_drawers: for drawer in encl_drawers: sideplane_list = drawer["sideplanes"] for sideplane in sideplane_list: sideplane_expanders.append(sideplane) return sideplane_expanders
def _get_logical_volumes(self, pool_serial_number): """Receives list of Logical Volumes from API. URL: http://<host>/api/show/volumes/pool/<pool_serial_number> """ url = self.rssencl.build_url(self.rssencl.URI_CLIAPI_SHOWVOLUMES) url = f"{url}/pool/{pool_serial_number}" response = self.rssencl.ws_request(url, self.rssencl.ws.HTTP_GET) if not response: logger.warn( f"{self.rssencl.LDR_R1_ENCL}:: Logical Volume status unavailable as ws request {url}" " failed") return if response.status_code != self.rssencl.ws.HTTP_OK: logger.error( f"{self.rssencl.LDR_R1_ENCL}:: http request {url} to get logical volumes failed with \ err {response.status_code}") return response_data = json.loads(response.text) logical_volumes = response_data.get("volumes") return logical_volumes
def run(self): """Run disk monitoring periodically on its own thread.""" # Do not proceed if module is suspended if self._suspended == True: self._scheduler.enter(self.pollfreq_disksensor, self._priority, self.run, ()) return # Allow RealStor Encl MC to start services. #time.sleep(self.rssencl.REALSTOR_MC_BOOTWAIT) # Check for debug mode being activated self._read_my_msgQ_noWait() # poll all disk status and raise events if # insertion/removal detected self._rss_check_disks_presence() #Do not proceed further if latest disks info can't be validated due to store function error if not self.invalidate_latest_disks_info: # Polling system status self.rssencl.get_system_status() # check for disk faults & raise if found self._rss_check_disk_faults() else: logger.warn("Can not validate disk faults or presence due to persistence store error") # Reset debug mode if persistence is not enabled self._disable_debug_if_persist_false() # Fire every configured seconds to poll disks status self._scheduler.enter(self.pollfreq_disksensor, self._priority, self.run, ())
def _extract_iem_components(self, iem): """Splits iem in multiple components using a delimiter and return tuple of various extracted components. """ components = [] if iem is None or len(iem.strip()) == 0: raise TypeError things_to_strip = f"{self.IEC_KEYWORD}:" splitted_iem = iem[len(things_to_strip):].strip() # Split IEM by ":" delimieter. First part is IEC and second part # is description. iem_parts = splitted_iem.split(":") # Check for minimum length of IEC and presense of description if len(iem_parts) < 2 or len(iem_parts[0]) < self.IEC_LENGTH: logger.warn("Invalid IEM. Missing component") components = None else: components.append(iem_parts[0][0]) # Severity level components.append(iem_parts[0][1]) # Source ID components.append(iem_parts[0][2:5]) # Component ID components.append(iem_parts[0][5:8]) # Module ID components.append(iem_parts[0][8:]) # Event ID # if description is having ':' components.append(":".join(iem_parts[1:])) # Description level return components
def _check_raid_state(self, device): try: status = None raid_check = 0 raid_dir = RaidDataConfig.DIR.value sync_action_file = RaidDataConfig.SYNC_ACTION_FILE.value while raid_check <= RaidDataConfig.MAX_RETRIES.value: self.output_file = self._get_unique_filename(RaidDataConfig.RAID_RESULT_FILE_PATH.value, device) STATE_COMMAND = 'cat ' + raid_dir + device + sync_action_file logger.debug('Executing STATE_COMMAND:{}'.format(STATE_COMMAND)) response, error = self._run_command(STATE_COMMAND) if error: logger.warn("Error in cmd{} in raid health monitor" .format(STATE_COMMAND)) raid_check += 1 else: if response == RaidDataConfig.STATE_COMMAND_RESPONSE.value: status = "success" with open(self.output_file, 'w') as raid_file: raid_file.write(RaidDataConfig.STATE_COMMAND_RESPONSE.value + "\n") break else: status = "failed" raid_check += 1 time.sleep(WAIT_BEFORE_RETRY) return status except Exception as ae: logger.error("Failed in checking RAID device state. ERROR:{}" .format(str(ae))) raise
def get_sensor_list_by_type(self, fru_type): """Returns the sensor list based on FRU type using ipmitool utility ipmitool sdr type '<FRU>'. Example of output form 'sdr type 'Fan'' command: Sys Fan 2B | 33h | ok | 29.4 | 5332 RPM ( sensor_id | sensor_num | status | entity_id | <FRU Specific attribute> ) Params : self, fru_type Output Format : List of Tuple Output Example : [(HDD 1 Status, F1, ok, 4.2, Drive Present),] """ sensor_list_out, error, retcode = \ self._run_ipmitool_subcommand(f"sdr type '{fru_type.title()}'") if retcode != 0: msg = "ipmitool sdr type command failed: {0}".format(error) logger.warn(msg) return sensor_list = sensor_list_out.split("\n") out = [] for sensor in sensor_list: if sensor == "": break # Example of output form 'sdr type' command: # Sys Fan 2B | 33h | ok | 29.4 | 5332 RPM # PS1 1a Fan Fail | A0h | ok | 29.13 | # HDD 1 Status | F1h | ok | 4.2 | Drive Present fields_list = [f.strip() for f in sensor.split("|")] sensor_id, sensor_num, status, entity_id, reading = fields_list sensor_num = sensor_num.strip("h").lower() out.append((sensor_id, sensor_num, status, entity_id, reading)) return out
def _restart_module(self, module_name): """Restart a module""" self._log_debug("_restart_module, module_name: %s" % module_name) try: # Stop the module if it's running and let existing thread die gracefully if self._status_module(module_name) is True: self._stop_module(module_name) # Allow module a few seconds to shut down gracefully max_wait = 10 curr_wait = 1 while self._status_module(module_name) is True: time.sleep(3) logger.info("Retrying: %s" % str(curr_wait)) self._stop_module(module_name) curr_wait += 1 if curr_wait > max_wait: break # Start the module self._start_module(module_name) except Exception as ae: logger.warn("Restart thread failed: %s" % str(ae)) self._thread_response = "Restart Failed" else: self._thread_response = "Restart Successful"
def _get_sensor_properties(self, sensor_name): """ Get all the properties of a sensor. Returns a tuple (common, specific) where common is a dict of common sensor properties and their values for this sensor, and specific is a dict of the properties specific to this sensor e.g. ipmitool sensor get 'PS1 Temperature' Locating sensor record... Sensor ID : PS1 Temperature (0x5c) Entity ID : 10.1 Sensor Type (Threshold) : Temperature Sensor Reading : 16 (+/- 0) degrees C Status : ok Lower Non-Recoverable : na Lower Critical : na Lower Non-Critical : na Upper Non-Critical : 55.000 Upper Critical : 60.000 Upper Non-Recoverable : na Positive Hysteresis : 2.000 Negative Hysteresis : 2.000 Assertion Events : Assertions Enabled : unc+ ucr+ Deassertions Enabled : unc+ ucr+ """ try: sensor_get_response, return_code = self._executor._run_ipmitool_subcommand("sensor get '{0}'".format(sensor_name)) if return_code == 0: return self._response_to_dict(sensor_get_response) else: msg = "sensor get '{0}' : command failed with error {1}".format(sensor_name, sensor_get_response) logger.warn(msg) return self._errorstr_to_dict(sensor_get_response) except Exception as err: logger.error("Exception occurred in _get_sensor_properties for cmd - sensor get '{0}': {1}".format(sensor_name, err))
def _get_host_update_data(self): """Retrieves node information for the host_update json message""" logged_in_users = [] uname_keys = ("sysname", "nodename", "version", "release", "machine") self.up_time = int(psutil.boot_time()) self.boot_time = self._epoch_time self.uname = dict(zip(uname_keys, os.uname())) self.total_memory = dict(psutil.virtual_memory()._asdict()) self.process_count = len(psutil.pids()) for users in psutil.users(): logged_in_users.append(dict(users._asdict())) self.logged_in_users = logged_in_users # Calculate the current number of running processes at this moment total_running_proc = 0 for proc in psutil.process_iter(): try: pinfo = proc.as_dict(attrs=['status']) if pinfo['status'] not in (psutil.STATUS_ZOMBIE, psutil.STATUS_DEAD, psutil.STATUS_STOPPED, psutil.STATUS_IDLE, psutil.STATUS_SLEEPING): total_running_proc += 1 except psutil.NoSuchProcess: logger.warn( f"(psutil) Process '{proc.name()}' exited unexpectedly.") self.running_process_count = total_running_proc
def enter(service): logger.warn("{} service is disabled, it will not be " "monitored".format(service.name)) Service.non_active.discard(service.name) Service.monitoring_disabled.discard(service.name) if service.properties_changed_signal: service.properties_changed_signal.remove()
def _start_module(self, module_name): """Start a module""" self._log_debug("_start_module, module_name: %s" % module_name) try: if self._status_module(module_name) is True: self._log_debug("_start_module, status: True") return self._thread_response = "Start Successful" # NOTE: This is internal code that is currently unused. # If this is brought into use again its interaction # with the init dependency code will need to be considered module_thread = Thread(target=_run_thread_capture_errors, args=(self._sspl_modules[module_name], self._sspl_modules, self._msgQlist, self._conf_reader, self._product)) # Put a configure debug message on the module's queue before starting it up if self.debug_section is not None: self._write_internal_msgQ(module_name, self.debug_section) module_thread.start() except Exception as ae: logger.warn("Start thread failed: %s" % str(ae)) self._thread_response = "Start Failed"
def _rss_raise_disk_alert(self, alert_type, disk_info): """Raise disk alert with supported alert type""" #logger.debug("Raise - alert type {0}, info {1}".format(alert_type,disk_info)) if not disk_info: logger.warn("disk_info None, ignoring") return if alert_type not in self.rssencl.fru_alerts: logger.error(f"Supplied alert type [{alert_type}] not supported") return # form json with default values disk = dict.fromkeys(self.disk_generic_info, "NA") disk['slot'] = -1 disk['blink'] = 0 disk['enclosure-id'] = 0 # Build data for must fields in fru disk data for item in self.disk_generic_info: if item in disk_info: disk[item] = disk_info[item] encl = self.rssencl.ENCL_FAMILY disk[encl] = self.rssencl.LDR_R1_ENCL # Build data for platform specific fields in fru disk data # get remaining extra key value pairs from passed disk_info extended_info = {key:disk_info[key] for key in disk_info if key not in\ disk and self.NUMERIC_IDENTIFIER not in key} # notify realstor encl msg handler self._send_json_msg(alert_type, disk, extended_info)
def _get_disk(self, disk): """Retreive realstor disk info using cli api /show/disks""" # make ws request url = self.rssencl.build_url( self.rssencl.URI_CLIAPI_SHOWDISKS) # TODO: Add pagination to response for '*' case. # storage enclosures will have # ~ 80 to 100 drives, which will make the # response huge. if(disk != self.RESOURCE_ALL): try: diskId = "0.{}".format(int(disk)) except ValueError: msg = "Wrong format for disk resource value: {},"\ " expected int or '*'".format( disk) logger.error("RealStorActuator: _get_disk: {}".format(msg)) return url = f"{url}/{diskId}" url = f"{url}/detail" response = self.rssencl.ws_request( url, self.rssencl.ws.HTTP_GET) if not response: logger.warn("{0}:: Disks status unavailable as ws request {1}" " failed".format(self.rssencl.LDR_R1_ENCL, url)) return if response.status_code != self.rssencl.ws.HTTP_OK: if url.find(self.rssencl.ws.LOOPBACK) == -1: logger.error("{0}:: http request {1} to poll disks failed with" " err {2}".format(self.rssencl.LDR_R1_ENCL, url, response.status_code)) return try: jresponse = json.loads(response.content) except ValueError as badjson: logger.error("%s returned mal-formed json:\n%s" % (url, badjson)) if jresponse: api_resp = self.rssencl.get_api_status(jresponse['status']) if ((api_resp == -1) and (response.status_code == self.rssencl.ws.HTTP_OK)): logger.warn("/show/disks api response unavailable, " "marking success as http code is 200") api_resp = 0 if api_resp == 0: drives = jresponse['drives'] return drives
def _get_enclosure_data(self, sasurl, response): logger.info("url comes into _get_enclosure_data is:{0}".format(sasurl)) sas_response = self.rssencl.ws_request(sasurl, self.rssencl.ws.HTTP_GET) logger.info( "_get_sas_port_status, sasresponse for coming is:{0}".format( sas_response)) if not sas_response: logger.warn( "{0}:: sas port status unavailable for request:{1} --gets failed" .format(self.rssencl.LDR_R1_ENCL, url)) return None if sas_response.status_code != self.rssencl.ws.HTTP_OK: if sasurl.find(self.rssencl.ws.LOOPBACK) == -1: logger.error( "{0}:: http request {1} to sas port health status failed with error:{2}" .format(self.rssencl.LDR_R1_ENCL, sasurl, sasresponse.status_code)) return None json_response = None try: json_response = json.loads(sas_response.content) except ValueError as v_error: logger.error("{0} returned invalid json:\n{1}".format( sasurl, v_error)) if json_response is not None: api_status = self.rssencl.get_api_status(json_response['status']) if ((api_status == -1) and (sas_response.status_code == self.rssencl.ws.HTTP_OK)): logger.warn("/show/sas-link-health api response unavailable, " "marking success as http code is 200") if api_status == 0: if self._resource_id == self.RESOURCE_ALL: response['specific_info'] = [] response['specific_info'].extend( json_response.get("expander-ports")) for interfc in response['specific_info']: interfc['resource_id'] = interfc['name'] else: response['specific_info'] = {} for port_enclr in json_response.get("expander-ports"): logger.info(port_enclr) if self._resource_id.lower( ) == port_enclr['name'].lower(): response['specific_info'] = port_enclr break else: response['specific_info'][ "reason"] = "Data not available for port interface: {}.".format( self._resource_id.lower())
def _notify_DiskMsgHandler(self, status_file: str, serial_num_file): """Send the event to the disk message handler for generating JSON message""" if not os.path.isfile(status_file): logger.warn( f"status_file: {status_file} does not exist, ignoring.") return if not os.path.isfile(serial_num_file): logger.warn( f"serial_num_file: {serial_num_file} does not exist, ignoring." ) return # Read in status and see if it has changed with open(status_file, "r") as datafile: status = datafile.read().replace('\n', '') # See if there's a reason file reason_file = os.path.join(os.path.dirname(status_file), "reason") if os.path.isfile(reason_file): with open(reason_file, "r") as datafile: reason = datafile.read().replace('\n', '') status = f"{status}_{reason}" # Do nothing if the drive status has not changed if self._drive_status[os.path.dirname(status_file)] == status: return # Update the status for this drive self._log_debug( f"Status change, status_file: {status_file}, status: {status}") self._drive_status[os.path.dirname(status_file)] = status # Read in the serial number with open(serial_num_file, "r") as datafile: serial_number = datafile.read().replace('\n', '') # Remove base dcs dir since it contains no relevant data data_str = status_file[len(self._drive_mngr_base_dir) + 1:] # Send a message to the disk manager handler to create and transmit json msg internal_json_msg = json.dumps({ "sensor_response_type": "disk_status_drivemanager", "event_path": data_str, "status": status, "serial_number": serial_number }) # Send the event to disk message handler to generate json message self._write_internal_msgQ(DiskMsgHandler.name(), internal_json_msg) # Reset debug mode if persistence is not enabled self._disable_debug_if_persist_false()
def exists(self, key): """check if key is present """ key_present = False status = "Failure" try: key_present = os.path.exists(key) status = "Success" except Exception as gerr: logger.warn("Error while checking if {0} is present".format(gerr)) return key_present, status
def _get_degraded_state_modules_list(self): """Reads list of modules to run in degraded state and returns a list of those modules. """ # List of modules to run in degraded mode modules_to_resume = [] try: # Read list of modules from conf file to load in degraded mode modules_to_resume = self._conf_reader._get_value_list(self.SSPL_SETTING, self.DEGRADED_STATE_MODULES) except Exception as e: logger.warn("ThreadController: Configuration not found, degraded_state_modules") return modules_to_resume
def check_system_faults_changed(self): """Check change in faults state""" changed = False if self.existing_faults: #logger.debug("existing_faults TRUE") return True if self.latest_faults != self.memcache_faults: changed = True logger.warn("System faults state changed, updating cached faults!!") return changed
def _load_json_file(self, key): """ Load dict obj from json in given absolute file path""" value = None absfilepath = key # Check if directory exists directory_path = os.path.join(os.path.dirname(absfilepath), "") if not os.path.isdir(directory_path): logger.critical("Path doesn't exists: {0}".format(directory_path)) return try: fh = open(absfilepath,"rb") try: value = pickle.load(fh) except: value = fh.read() except IOError as err: logger.warn("I/O error[{0}] while loading data from file {1}): {2}"\ .format(err.errno,absfilepath,err)) except ValueError as jsonerr: logger.warn("JSON error{0} while loading from {1}".format(jsonerr, absfilepath)) value = None except OSError as oserr: logger.warn("OS error{0} while loading from {1}".format(oserr, absfilepath)) except Exception as gerr: logger.warn("Error{0} while reading data from file {1}"\ .format(gerr, absfilepath)) else: fh.close() return value
def build_url(self, uri): """Build request url""" wsport = "" if self.active_wsport.isdigit(): wsport = ":" + self.active_wsport else: logger.warn("Non-numeric webservice port configured [%s], ignoring",\ self.active_wsport) url = "http://" + self.active_ip + wsport + "/api" + uri return url
def _process_missing_md_devices(self, md_device_list, drive_dict): """ checks the md raid configuration file, compares all it's entries with list of arrays from mdstat file and sends missing entry """ if not os.path.isfile(self.RAID_CONF_FILE): logger.warn( f"_process_missing_md_devices, MDRaid configuration file {self.RAID_CONF_FILE} is missing" ) return conf_device_list = [] with open(self.RAID_CONF_FILE, 'r') as raid_conf_file: raid_conf_data = raid_conf_file.read().strip().split("\n") for line in raid_conf_data: try: raid_conf_field = line.split(" ") if "#" not in raid_conf_field[0] and "ARRAY" in raid_conf_field[0] and \ "/md" in raid_conf_field[1]: # Mapped the device i.e. /dev/md/1 and /dev/md1 will be the same device. map_device = raid_conf_field[1].split('md/') if len(map_device) > 1: conf_device_list.append(map_device[0] + 'md' + map_device[1]) else: conf_device_list.append(raid_conf_field[1]) except Exception as ae: self._log_debug( f"_process_missing_md_devices, error retrieving raid entry \ from {self.RAID_CONF_FILE} file: {str(ae)}") return # compare conf file raid array list with mdstat raid array list for device in conf_device_list: if device not in md_device_list and device not in self._faulty_device_list: # add that missing raid array entry into the list of raid devices self.alert_type = self.FAULT self._faulty_device_list.add(device) self._send_json_msg(self.alert_type, device, device, self.RAID_DOWN_DRIVE_STATUS) elif device in md_device_list and device in self._faulty_device_list: # add that missing raid array entry into the list of raid devices self.alert_type = self.FAULT_RESOLVED self._map_drive_status(device, drive_dict, "Down/Recovery") self._faulty_device_list.remove(device) self._send_json_msg(self.alert_type, device, device, self._drives[device])