def __call__(self): """ Method to call the plugin directly. """ try: self.parse_args() self.init_root_logger() self._read_config() self.parse_args_second() log.debug("Creating REST API client object ...") self.api = RestApi.from_config( extra_config_file=self.argparser.args.extra_config_file, api_url=self.argparser.args.api_url, timeout=self.timeout, ) if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) self.pre_run() self.run() except Exception as e: state = nagios.state.unknown ename = e.__class__.__name__ appname = os.path.basename(sys.argv[0]) out = "%s on executing %r: %s" % (ename, appname, e) if self.verbose: self.handle_error(str(e), ename, do_traceback=True) self.exit(state, out)
def _read_config(self): """ Read configuration from an optional configuration file. """ cfg = NagiosPluginConfig() try: configs = cfg.read() log.debug("Read configuration files:\n%s", pp(configs)) except NoConfigfileFound as e: log.debug("Could not read NagiosPluginConfig: %s", e) return if self.verbose > 2: log.debug("Read configuration:\n%s", pp(cfg.__dict__)) self.read_config(cfg)
def evaluate_proc_stats(self): for proc in self.process_list.processes.values(): self.processes.append(proc) i = 0 duration = self.process_list.duration for proc in self.processes: i += 1 blkio_delay = 0 proc_duration = duration blkio_delay = proc.stats_accum.blkio_delay_total proc_duration = time.time() - proc.stats_accum_timestamp blkio_delay_percent = float(blkio_delay) / (proc_duration * 10000000.0) self.process_count['total'] += 1 if blkio_delay_percent >= 90: self.process_count['90'] += 1 elif blkio_delay_percent >= 50: self.process_count['50'] += 1 elif blkio_delay_percent >= 10: self.process_count['10'] += 1 self.process_count['0'] = (self.process_count['total'] - self.process_count['90'] - self.process_count['50'] - self.process_count['10']) if self.verbose > 1: log.debug("Got the following results:\n%s", pp(self.process_count))
def evaluate_proc_stats(self): for proc in self.process_list.processes.values(): self.processes.append(proc) i = 0 duration = self.process_list.duration for proc in self.processes: i += 1 blkio_delay = 0 proc_duration = duration blkio_delay = proc.stats_accum.blkio_delay_total proc_duration = time.time() - proc.stats_accum_timestamp blkio_delay_percent = float(blkio_delay) / (proc_duration * 10000000.0) self.process_count['total'] += 1 if blkio_delay_percent >= 90: self.process_count['90'] += 1 elif blkio_delay_percent >= 50: self.process_count['50'] += 1 elif blkio_delay_percent >= 10: self.process_count['10'] += 1 self.process_count['0'] = ( self.process_count['total'] - self.process_count['90'] - self.process_count['50'] - self.process_count['10']) if self.verbose > 1: log.debug("Got the following results:\n%s", pp(self.process_count))
def get_data(self, force=False): """ Main method to retrieve the data about the VG with the 'vgs' command. @param force: retrieve data, even if self.checked is True @type force: bool """ if self.checked and not force: return # vgs --unit m --noheadings --nosuffix --separator ';' --unbuffered \ # -o vg_fmt,vg_name,vg_attr,vg_size,vg_free,vg_extent_size,\ # vg_extent_count,vg_free_count storage fields = ('vg_fmt', 'vg_name', 'vg_attr', 'vg_extent_size', 'vg_extent_count', 'vg_free_count') cmd = [ self.vgs_cmd, '--unit', 'm', '--noheadings', '--nosuffix', '--separator', ';', '--unbuffered', '-o', ','.join(fields), self.vg ] current_locale = os.environ.get('LC_NUMERIC') if self.verbose > 2: log.debug("Current locale is %r, setting to 'C'.", current_locale) os.environ['LC_NUMERIC'] = 'C' try: (ret, stdoutdata, stderrdata) = self.plugin.exec_cmd(cmd) finally: if current_locale: os.environ['LC_NUMERIC'] = current_locale else: del os.environ['LC_NUMERIC'] if self.verbose > 3: log.debug("Got from STDOUT: %r", stdoutdata) fields = stdoutdata.strip().split(';') if self.verbose > 2: log.debug("Got fields:\n%s", pp(fields)) self._format = fields[0] self._ext_size = int(float(fields[3])) self._ext_count = int(fields[4]) self._ext_free = int(fields[5]) attr_str = fields[2] attr = set([]) for i in (0, 1, 2, 3, 4): if attr_str[i] != '-': attr.add(attr_str[i]) if attr_str[5] == 'c': attr.add('C') self._attr = attr self._checked = True
def __call__(self): self.pre_call() if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) self.call()
def test_read_cfgfile(self): log.info("Testing read temp configfile %r.", self.tmp_cfg) try: cfg = NagiosPluginConfig() configs = cfg.read(self.tmp_cfg) log.debug("Read configuration files:\n%s", pp(configs)) c = {} for section in cfg.sections(): if not section in c: c[section] = {} for option in cfg.options(section): val = cfg.get(section, option) c[section][option] = val log.debug("Found options in config:\n%s", pp(c)) except NoConfigfileFound as e: self.fail("Could not read NagiosPluginConfig by a %s: %s" % ( e.__class__.__name__, str(e)))
def test_read_cfgfile(self): log.info("Testing read temp configfile %r.", self.tmp_cfg) try: cfg = NagiosPluginConfig() configs = cfg.read(self.tmp_cfg) log.debug("Read configuration files:\n%s", pp(configs)) c = {} for section in cfg.sections(): if not section in c: c[section] = {} for option in cfg.options(section): val = cfg.get(section, option) c[section][option] = val log.debug("Found options in config:\n%s", pp(c)) except NoConfigfileFound as e: self.fail("Could not read NagiosPluginConfig by a %s: %s" % (e.__class__.__name__, str(e)))
def __str__(self): """ Typecasting function for translating object structure into a string. @return: structure as string @rtype: str """ return pp(self.as_dict())
def parse_args(self, args=None): """ Executes self.argparser.parse_args(). @param args: the argument strings to parse. If not given, they are taken from sys.argv. @type args: list of str or None """ super(CheckSoftwareRaidPlugin, self).parse_args(args) self.init_root_logger() ini_opts = self.argparser._load_config_section('softwareraid') log.debug("Got options from ini-Parser: %s", pp(ini_opts)) if ini_opts and 'spare_ok' in ini_opts: self.spare_ok = to_bool(ini_opts['spare_ok']) if self.argparser.args.no_spare: self.spare_ok = False re_dev = re.compile(r'^(?:/dev/|/sys/block/)?(md\d+)$') if self.argparser.args.device: if self.argparser.args.device.lower() == 'all': self.check_all = True else: match = re_dev.search(self.argparser.args.device) if not match: self.die("Device %r is not a valid MD device." % ( self.argparser.args.device)) self.devices.append(match.group(1)) else: self.check_all = True if self.check_all: return dev = self.devices[0] dev_dev = os.sep + os.path.join('dev', dev) sys_dev = os.sep + os.path.join('sys', 'block', dev) if not os.path.isdir(sys_dev): self.die("Device %r is not a block device." % (dev)) if not os.path.exists(dev_dev): self.die("Device %r doesn't exists." % (dev_dev)) dev_stat = os.stat(dev_dev) dev_mode = dev_stat.st_mode if not stat.S_ISBLK(dev_mode): self.die("%r is not a block device." % (dev_dev))
def __call__(self): """ Method to call the plugin directly. """ self.parse_args() self.init_root_logger() if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) un = os.uname() state = nagios.state.ok out = "%s kernel for %s with version %r." % (un[0], un[4], un[2]) if self.arch is not None: if self.arch.lower() != un[4].lower(): state = self.max_state(state, nagios.state.critical) out += " Architecture is not %r." % (self.arch) if self.os is not None: if self.os.lower() != un[0].lower(): state = self.max_state(state, nagios.state.critical) out += " Operating system is not %r." % (self.os) if self.min_version is not None: cur_version = un[2] parsed_version_expected = debian.debian_support.Version( self.min_version) if self.verbose > 1: log.debug("Expecting parsed version %r.", parsed_version_expected) parsed_version_got = debian.debian_support.Version(cur_version) if self.verbose > 1: log.debug("Got parsed version %r.", parsed_version_got) if parsed_version_got < parsed_version_expected: if self.warning: state = self.max_state(state, nagios.state.warning) else: state = self.max_state(state, nagios.state.critical) out += " Expected min. kernel version: %r." % ( self.min_version) self.exit(state, out)
def run(self): """Main execution method.""" state = nagios.state.ok out = "Storage exports on %r seems to be okay." % (self.hostname) self.get_current_cluster() self.get_cluster_pservers() self.all_api_exports = {} self.existing_exports = {} self.count = { 'exported_devs': 0, 'exported_luns': 0, 'missing': 0, 'alien': 0, 'ok': 0, 'dummy': 0, 'error': 0, 'needless': 0, } self.get_api_storage_exports() self.get_api_image_exports() self.get_existing_exports() self.check_exports() self.check_ini_groups() log.debug("Results of check:\n%s", pp(self.count)) total_errors = self.count['missing'] + self.count['error'] + self.count['needless'] state = self.threshold.get_status(total_errors) if total_errors == 1: out = "There is one error on exported or not exported volumes." elif total_errors > 1: out = "There are %d errors on exported or not exported volumes." % (total_errors) # generate performance data (except number of dummy volumes) self.add_perfdata(label='total_errors', value=total_errors, threshold=self.threshold) for key in self.count: if key == 'dummy': continue self.add_perfdata(label=key, value=self.count[key]) self.exit(state, out)
def __call__(self): """ Method to call the plugin directly. """ self.parse_args() self.init_root_logger() if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) un = os.uname() state = nagios.state.ok out = "%s kernel for %s with version %r." % (un[0], un[4], un[2]) if self.arch is not None: if self.arch.lower() != un[4].lower(): state = self.max_state(state, nagios.state.critical) out += " Architecture is not %r." % (self.arch) if self.os is not None: if self.os.lower() != un[0].lower(): state = self.max_state(state, nagios.state.critical) out += " Operating system is not %r." % (self.os) if self.min_version is not None: cur_version = un[2] parsed_version_expected = debian.debian_support.Version(self.min_version) if self.verbose > 1: log.debug("Expecting parsed version %r.", parsed_version_expected) parsed_version_got = debian.debian_support.Version(cur_version) if self.verbose > 1: log.debug("Got parsed version %r.", parsed_version_got) if parsed_version_got < parsed_version_expected: if self.warning: state = self.max_state(state, nagios.state.warning) else: state = self.max_state(state, nagios.state.critical) out += " Expected min. kernel version: %r." % (self.min_version) self.exit(state, out)
def __call__(self): """ Method to call the plugin directly. """ self.parse_args() self.init_root_logger() state = nagios.state.ok out = "VCB on %r port %d seems to be okay." % ( self.host_address, self.vcb_port) if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) signal.signal(signal.SIGHUP, self.exit_signal_handler) signal.signal(signal.SIGINT, self.exit_signal_handler) signal.signal(signal.SIGABRT, self.exit_signal_handler) signal.signal(signal.SIGTERM, self.exit_signal_handler) signal.signal(signal.SIGUSR1, self.exit_signal_handler) signal.signal(signal.SIGUSR2, self.exit_signal_handler) xml = XML_TEMPLATE % (self.job_id) if self.verbose > 3: log.debug("XML to send:\n%s", xml) result = '' do_parse = False result_rcvd = False rstatus = None got_version = None try: result = self.send(xml) result = result.strip() do_parse = True result_rcvd = True except NoListeningError, e: result = "Error: " + str(e).strip() state = nagios.state.critical
def get_current_cluster(self): try: storages = self.api.pstorages(name=self.hostname) except RestApiError as e: self.die(str(e)) except Exception as e: self.die("%s: %s" % (e.__class__.__name__, e)) log.debug("Info about current storage server from API:\n%s", pp(storages)) if not len(storages): self.die("Could not find information about current storage server %r." % (self.hostname)) key_cluster = 'cluster' if sys.version_info[0] <= 2: key_cluster = key_cluster.decode('utf-8') cluster = storages[0][key_cluster] if sys.version_info[0] <= 2: cluster = cluster.encode('utf-8') log.debug("Cluster of current storage server %r: %r", self.hostname, cluster) self._current_cluster = cluster
def get_existing_exports(self): """ Result of descovering - dict of dicts in the form: { 'devicename': '5ce99e968d67ded2', 'guid': UUID('600144f0-0001-8aa6-91a2-19f911e39d8f'), 'has_errors': False, 'luns': { 'pserver123': '26'}, 'read_only': False, 'volume': '/dev/storage/0001-8aa6-91a2-19f911e39d8f'} Keys of the upper dict are the SCST devicenames. """ self.existing_exports = {} first = True pb_lv_pattern = (r'^' + os.sep + os.path.join('dev', self.storage_vg) + os.sep + r'((?:[0-9a-f]{4}-){3}[0-9a-f]{12})$') if self.verbose > 2: log.debug("Search pattern for ProfiBricks volumes: %r", pb_lv_pattern) pb_lv = re.compile(pb_lv_pattern) pattern = os.path.join(SCST_DEV_DIR, '*') log.debug("Searching for SCST devices in %r ...", pattern) dev_dirs = glob.glob(pattern) for dev_dir in dev_dirs: vl = 4 if first: vl = 2 filename_file = os.path.join(dev_dir, 'filename') handler_link = os.path.join(dev_dir, 'handler') has_errors = False read_only = False if not os.path.exists(filename_file): continue if not os.path.exists(handler_link): continue self.count['exported_devs'] += 1 exported_dir = os.path.join(dev_dir, 'exported') luns = {} if os.path.isdir(exported_dir): exports = glob.glob(os.path.join(exported_dir, '*')) nr_exports = 0 for export_link in exports: nr_exports += 1 link_target = os.readlink(export_link) if os.path.isabs(link_target): lun_dir = os.path.realpath(export_link) else: lun_dir = os.path.realpath(os.path.relpath(link_target, os.path.dirname(export_link))) ini_group = os.path.basename(os.path.dirname(os.path.dirname(lun_dir))) lun_nr = os.path.basename(lun_dir) luns[ini_group] = {'id': lun_nr, 'checked': False} self.count['exported_luns'] += 1 # if nr_exports > 1: # vl = 2 devname = os.path.basename(dev_dir) export_filename = self.get_scst_export_filename(filename_file) if not export_filename: log.info("No devicename found for export %r.", devname) self.count['error'] += 1 continue match = pb_lv.search(export_filename) if not match: if self.verbose > 2: log.debug("Export %r for device %r is not a regular ProfitBricks volume.", devname, export_filename) self.count['alien'] += 1 continue short_guid = match.group(1) if short_guid == DUMMY_LV and devname == DUMMY_CRC: if self.verbose > 1: log.debug("Found the exported notorious dummy device.") self.count['dummy'] += 1 continue guid = '600144f0-' + short_guid digest = crc64_digest(guid) if not digest == devname: log.info(("Found mismatch between volume name %r and SCST " "device name %r (should be %r)."), export_filename, devname, digest) self.count['error'] += 1 continue fc_ph_id_expected = guid.replace('-', '') fc_ph_id_current = self.get_fc_ph_id(dev_dir) if fc_ph_id_expected != fc_ph_id_current: log.info("Export %r for device %r has wrong fc_ph_id %r.", devname, export_filename, fc_ph_id_current) has_errors = True read_only = self.get_read_only(dev_dir) if read_only is None: has_errors = True export = { 'devicename': devname, 'volume': export_filename, 'guid': uuid.UUID(guid), 'read_only': read_only, 'has_errors': has_errors, 'luns': luns, 'checked': False, } self.existing_exports[devname] = export if has_errors: self.count['error'] += 1 if self.verbose > 2: log.debug("Found export %r.", devname) if self.verbose > vl: log.debug("Got existing export:\n%s", pp(export)) if first: first = False
def get_api_image_exports(self): self.image_exports = [] api_volumes = {} key_replicated = 'replicate' key_replicas = 'replicas' key_storage_server = 'storage_server' key_guid = 'guid' key_uuid = 'uuid' key_pstorage_name = 'pstorage_name' key_vstorage_uuid = 'vstorage_uuid' key_image_uuid = 'image_uuid' key_pserver_name = 'pserver_name' if sys.version_info[0] <= 2: key_replicated = key_replicated.decode('utf-8') key_replicas = key_replicas.decode('utf-8') key_storage_server = key_storage_server.decode('utf-8') key_guid = key_guid.decode('utf-8') key_uuid = key_uuid.decode('utf-8') key_pstorage_name = key_pstorage_name.decode('utf-8') key_vstorage_uuid = key_vstorage_uuid.decode('utf-8') key_image_uuid = key_image_uuid.decode('utf-8') key_pserver_name = key_pserver_name.decode('utf-8') log.debug("Retrieving image volumes from API ...") images = None try: images = self.api.vimages(pstorage=self.hostname) except RestApiError as e: self.die(str(e)) except Exception as e: self.die("%s: %s" % (e.__class__.__name__, e)) first = True for img in images: """ { 'absolute_path': 'ftp://*****:*****@imageserver/3111/iso-images/XYZ.iso', 'contract': 31720930, 'creation_date': '2014-02-06T14:55:19.333', 'image_type': 'CDROM', 'modification_date': '2014-02-06T14:55:19.333', 'replicas': [ { 'guid': '600144f0-0001-d843-4c30-8f3e11e3ae16', 'storage_server': 'storage108', 'virtual_state': 'AVAILABLE'}, { 'guid': '600144f0-0001-d843-4c31-8f3e11e3ae16', 'storage_server': 'storage203', 'virtual_state': 'AVAILABLE'}], 'replicate': True, 'size': 272, 'uuid': 'b24d6e86-8f3e-11e3-b7e8-52540066fee9', 'virtual_state': 'AVAILABLE'} """ vl = 4 if first: vl = 2 if self.verbose > vl: log.debug("Got Image volume from API:\n%s", pp(img)) replicated = bool(img[key_replicated]) vol_uuid = uuid.UUID(img[key_uuid]) guid = None for replica in img[key_replicas]: hn = replica[key_storage_server] if sys.version_info[0] <= 2: hn = hn.encode('utf-8') if hn == self.hostname: guid = uuid.UUID(replica[key_guid]) break if not guid: log.debug("No valid GUID found for image volume:\n%s", pp(img)) continue vol = { 'guid': guid, 'replicated': replicated, } api_volumes[vol_uuid] = vol if self.verbose > vl: log.debug("Transformed Image volume %r:\n%s", vol_uuid, pp(vol)) if first: first = False log.debug("Retrieving image mappings from API ...") maps = None try: maps = self.api.vimage_maps(pstorage=self.hostname) except RestApiError as e: self.die(str(e)) except Exception as e: self.die("%s: %s" % (e.__class__.__name__, e)) first = True for mapping in maps: """ { 'boot_order': 1, 'creation_date': '2014-03-03T09:23:35.748', 'dc_name': 'ldcb.tjasys.net', 'image_guid': '600144f0-0001-7e84-f65a-a2b511e3ad7d', 'image_legalentity': 12508, 'image_name': 'GSP1RMCPRXFREO_DE_DVD.ISO', 'image_size': 3271557120, 'image_type': 'CDROM', 'image_uuid': 'ada919ce-a284-11e3-b5f6-52540066fee9', 'modification_date': '2014-03-03T09:45:16.915', 'mount_state': 'DEALLOCATED', 'mount_type': 'IDE', 'mount_uuid': '0b778d45-b5ac-4be8-bd89-3b4de4e13491', 'network_uuid': '8ca30b27-9300-9b8e-dc84-f0349a0498de', 'order_nr': 1, 'pserver_name': None, 'pserver_uuid': None, 'pstorage_name': 'storage108', 'pstorage_uuid': '00000000-0000-0000-0000-002590A93640', 'replicated': True, 'size_mb': 3120, 'vm_name': 'Win7Test', 'vm_uuid': 'c1b53ee7-66a7-4dc9-8240-e50432438582'} """ if mapping[key_pstorage_name] != self.hostname: continue pserver = mapping[key_pserver_name] if pserver is None: continue if pserver not in self.valid_pservers: log.debug("Image export to %r not considered.", pserver) continue vl = 4 if first: vl = 2 if self.verbose > vl: log.debug("Got Image mapping from API:\n%s", pp(mapping)) if first: first = False vol_uuid = uuid.UUID(mapping[key_image_uuid]) if vol_uuid not in api_volumes: log.error("No volume for mapping of %r found.", vol_uuid) continue guid = api_volumes[vol_uuid]['guid'] scst_devname = crc64_digest(str(guid)) if sys.version_info[0] <= 2: pserver = pserver.encode('utf-8') m = { 'uuid': vol_uuid, 'guid': guid, 'scst_devname': scst_devname, 'replicated': api_volumes[vol_uuid]['replicated'], 'pserver': pserver, 'checked': False, } self.image_exports.append(m) if self.verbose > vl: log.debug("Transformed storage mapping:\n%s", pp(m)) log.debug("Finished retrieving image mappings from API, found %d mappings.", len(self.image_exports))
def get_api_storage_exports(self): self.storage_exports = [] api_volumes = {} key_replicated = 'replicated' key_replicas = 'replicas' key_storage_server = 'storage_server' key_guid = 'guid' key_uuid = 'uuid' key_pstorage_name = 'pstorage_name' key_vstorage_uuid = 'vstorage_uuid' key_pserver_name = 'pserver_name' if sys.version_info[0] <= 2: key_replicated = key_replicated.decode('utf-8') key_replicas = key_replicas.decode('utf-8') key_storage_server = key_storage_server.decode('utf-8') key_guid = key_guid.decode('utf-8') key_uuid = key_uuid.decode('utf-8') key_pstorage_name = key_pstorage_name.decode('utf-8') key_vstorage_uuid = key_vstorage_uuid.decode('utf-8') key_pserver_name = key_pserver_name.decode('utf-8') log.debug("Retrieving storage volumes from API ...") storages = None try: storages = self.api.vstorages(pstorage=self.hostname, contract_infos=False) except RestApiError as e: self.die(str(e)) except Exception as e: self.die("%s: %s" % (e.__class__.__name__, e)) first = True for stor in storages: """ { 'cloned_from': None, 'cluster': 'de-ka-cluster-01', 'contract': 31721232, 'creation_date': '2014-05-21T04:10:38.399', 'modification_date': '2014-05-21T04:15:35.620', 'name': 'DWDCStorage_EMEA_D_Mirror', 'os_type': None, 'physical_server': 'pserver117', 'region': 'europe', 'replicas': [ { 'guid': '600144f0-0001-dc9b-0121-e09d11e3920c', 'storage_server': 'storage201', 'virtual_state': 'AVAILABLE'}, { 'guid': '600144f0-0001-dc98-6910-e09d11e3920c', 'storage_server': 'storage103', 'virtual_state': 'AVAILABLE'}], 'size': 716800, 'uuid': 'e7abbe07-3d3e-4468-9af4-1bfe8af418dc', 'virtual_network': '61ba3819-d719-4986-b59d-c6178a32aabe'} """ vl = 4 if first: vl = 2 if self.verbose > vl: log.debug("Got Storage volume from API:\n%s", pp(stor)) replicated = True if key_replicated in stor: replicated = stor[key_replicated] vol_uuid = uuid.UUID(stor[key_uuid]) guid = None for replica in stor[key_replicas]: hn = replica[key_storage_server] if sys.version_info[0] <= 2: hn = hn.encode('utf-8') if hn == self.hostname: guid = uuid.UUID(replica[key_guid]) break if not guid: log.debug("No valid GUID found for storage volume:\n%s", pp(stor)) continue vol = { 'guid': guid, 'replicated': replicated, } api_volumes[vol_uuid] = vol if self.verbose > vl: log.debug("Transformed Storage volume %r:\n%s", vol_uuid, pp(vol)) if first: first = False log.debug("Retrieving storage mappings from API ...") maps = None try: maps = self.api.vstorage_maps(pstorage=self.hostname) except RestApiError as e: self.die(str(e)) except Exception as e: self.die("%s: %s" % (e.__class__.__name__, e)) first = True for mapping in maps: """ { 'boot_order': 1, 'creation_date': '2014-05-23T08:42:32.770', 'dc_name': 'BKK Pfalz', 'modification_date': '2014-05-23T08:42:32.770', 'mount_state': 'AVAILABLE', 'mount_type': 'VIRTIO', 'mount_uuid': '8b30c7e8-f9fb-41b3-8b1d-723e1c41ee99', 'network_uuid': '485abc1d-ec56-81d0-24cc-621cde8f34dc', 'order_nr': 1, 'pserver_name': 'pserver220', 'pserver_uuid': '48385147-3600-0030-48FF-003048FF26B2', 'pstorage_name': 'storage201', 'pstorage_uuid': '49434D53-0200-9071-2500-71902500F26D', 'size_mb': 102400, 'vm_name': 'replacement', 'vm_uuid': '001f75f0-fa36-40cc-a628-060d2ecdccc1', 'vstorage_guid': None, 'vstorage_name': 'BKK Storage 2', 'vstorage_uuid': '64f9dd3a-6db9-4405-a023-a0d32203c2aa'} """ if mapping[key_pstorage_name] != self.hostname: continue pserver = mapping[key_pserver_name] if pserver is None: continue if pserver not in self.valid_pservers: log.debug("Storage export to %r not considered.", pserver) continue vl = 4 if first: vl = 2 if self.verbose > vl: log.debug("Got Storage mapping from API:\n%s", pp(mapping)) vol_uuid = uuid.UUID(mapping[key_vstorage_uuid]) if vol_uuid not in api_volumes: log.error("No volume for mapping of %r found.", vol_uuid) continue guid = api_volumes[vol_uuid]['guid'] scst_devname = crc64_digest(str(guid)) if sys.version_info[0] <= 2: pserver = pserver.encode('utf-8') m = { 'uuid': vol_uuid, 'guid': guid, 'scst_devname': scst_devname, 'replicated': api_volumes[vol_uuid]['replicated'], 'pserver': pserver, 'checked': False, } self.storage_exports.append(m) if self.verbose > vl: log.debug("Transformed storage mapping:\n%s", pp(m)) if first: first = False log.debug("Finished retrieving storage mappings from API, found %d mappings.", len(self.storage_exports))
def get_cluster_pservers(self): if not self.current_cluster: self.die("No current cluster defined - cannot get pservers.") return self.valid_pservers = {} try: pservers = self.api.pservers(cluster=self.current_cluster) except RestApiError as e: self.die(str(e)) except Exception as e: self.die("%s: %s" % (e.__class__.__name__, e)) if self.verbose > 3: log.debug("Info about pservers in current cluster %r from API:\n%s", self.current_cluster, pp(pservers)) key_name = 'name' key_zone = 'zone' if sys.version_info[0] <= 2: key_name = key_name.decode('utf-8') key_zone = key_zone.decode('utf-8') for pserver in pservers: """ { u'alloc_cores': 22, u'alloc_ram': 50465865728, u'cluster': u'de-ka-cluster-01', u'cores_per_die': 8, u'cores_per_package': 16, u'dedication': None, u'ip': u'10.1.21.59', u'liveboot_version': u'liveboot-20131207-1344', u'name': u'pserver259', u'region': u'europe', u'total_cores': 62, u'total_ram': 257698037760, u'up': True, u'uuid': u'C0899115-C3C4-4A0B-9C26-2513CAEEC71C', u'vservers': 4, u'zone': 2}, """ pserver_name = pserver[key_name] if sys.version_info[0] <= 2: pserver_name = pserver_name.encode('utf-8') pserver_zone = pserver[key_zone] try: if pserver_zone: pserver_zone = int(pserver_zone) else: pserver_zone = 0 except ValueError: pserver_zone = -1 self.valid_pservers[pserver_name] = pserver_zone if self.verbose > 2: log.debug("Found Pservers in current cluster %r from API:\n%s", self.current_cluster, pp(self.valid_pservers))
def __call__(self): """ Method to call the plugin directly. """ self.parse_args() self.init_root_logger() if not self.argparser.args.vg: self.die("No volume group to check given.") self._vg = self.argparser.args.vg if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) # ---------------------------------------------------------- # Parameters for check_free crit = 0 crit_is_abs = True warn = 0 warn_is_abs = True if not self.check_state: match_pc = re_number_percent.search(self.argparser.args.critical) match_abs = re_number_abs.search(self.argparser.args.critical) if match_pc: crit = int(match_pc.group(1)) crit_is_abs = False elif match_abs: crit = int(match_abs.group(1)) else: self.die("Invalid critical value %r." % (self.argparser.args.critical)) return match_pc = re_number_percent.search(self.argparser.args.warning) match_abs = re_number_abs.search(self.argparser.args.warning) if match_pc: warn = int(match_pc.group(1)) warn_is_abs = False elif match_abs: warn = int(match_abs.group(1)) else: self.die("Invalid warning value %r." % (self.argparser.args.warning)) return # ---------------------------------------------------------- # Getting current state of VG vg_state = LvmVgState( plugin=self, vg=self.vg, vgs_cmd=self.vgs_cmd, verbose=self.verbose, timeout=self.argparser.args.timeout) try: vg_state.get_data() except (ExecutionTimeoutError, VgNotExistsError) as e: self.die(str(e)) except CalledProcessError as e: msg = "The %r command returned %d with the message: %s" % ( self.vgs_cmd, e.returncode, e.output) self.die(msg) if self.verbose > 1: log.debug( "Got a state of the volume group %r:\n%s", self.vg, vg_state) # ---------------------------------------------- if self.check_state: self.add_message( nagios.state.ok, ("Volume group %r seems to be OK." % (self.vg))) if 'r' in vg_state.attr: self.add_message( nagios.state.warning, ("Volume group %r is in a read-only state." % (self.vg))) if 'z' not in vg_state.attr: self.add_message( nagios.state.warning, ("Volume group %r is not resizeable." % (self.vg))) if 'p' in vg_state.attr: self.add_message( nagios.state.critical, (("One or more physical volumes belonging to the " "volume group %r are missing from the system.") % (self.vg))) if self.verbose: self.out( "Attributes of VG %r: %s" % (self.vg, vg_state.attr_str)) (state, msg) = self.check_messages() self.exit(state, msg) # Only for the blinds: return # ---------------------------------------------- # And now check free space (or whatever) if not vg_state.size_mb: self.die( "Cannot detect absolute size of volume group %r." % (self.vg)) c_free_abs = 0 c_free_pc = 0 c_used_abs = 0 c_used_pc = 0 if crit_is_abs: c_free_abs = crit c_used_abs = vg_state.size_mb - crit c_free_pc = float(crit) / float(vg_state.size_mb) * 100 c_used_pc = float(c_used_abs) / float(vg_state.size_mb) * 100 else: c_free_pc = float(crit) c_used_pc = 100.0 - c_free_pc c_free_abs = int(math.ceil(c_free_pc * float(vg_state.size_mb) / 100)) c_used_abs = vg_state.size_mb - c_free_abs w_free_abs = 0 w_free_pc = 0 w_used_abs = 0 w_used_pc = 0 if warn_is_abs: w_free_abs = warn w_used_abs = vg_state.size_mb - warn w_free_pc = float(warn) / float(vg_state.size_mb) * 100 w_used_pc = float(w_used_abs) / float(vg_state.size_mb) * 100 else: w_free_pc = float(warn) w_used_pc = 100.0 - w_free_pc w_free_abs = int(math.ceil(w_free_pc * float(vg_state.size_mb) / 100)) w_used_abs = vg_state.size_mb - w_free_abs if c_free_abs > w_free_abs: self.die( "The warning threshold must be greater than the critical threshold.") th_free_abs = NagiosThreshold( warning="@%d" % (w_free_abs), critical="@%d" % (c_free_abs)) th_used_abs = NagiosThreshold( warning="%d" % (w_used_abs), critical="%d" % (c_used_abs)) th_free_pc = NagiosThreshold( warning="@%d" % (w_free_pc), critical="@%d" % (c_free_pc)) th_used_pc = NagiosThreshold( warning="%f" % (w_used_pc), critical="%f" % (c_used_pc)) if self.verbose: self.out( "VG %r total size: %8d MiBytes." % (self.vg, vg_state.size_mb)) self.out( "VG %r used size: %8d MiBytes (%0.2f%%)." % ( self.vg, vg_state.used_mb, vg_state.percent_used)) self.out( "VG %r free size: %8d MiBytes (%0.2f%%)." % ( self.vg, vg_state.free_mb, vg_state.percent_free)) if self.verbose > 2: log.debug("Thresholds free MBytes:\n%s", pp(th_free_abs.as_dict())) log.debug("Thresholds free percent:\n%s", pp(th_free_pc.as_dict())) log.debug("Thresholds used MBytes:\n%s", pp(th_used_abs.as_dict())) log.debug("Thresholds used percent:\n%s", pp(th_used_pc.as_dict())) self.add_perfdata( label='total_size', value=vg_state.size_mb, uom='MB') self.add_perfdata( label='free_size', value=vg_state.free_mb, uom='MB', threshold=th_free_abs) self.add_perfdata( label='free_percent', value=float("%0.2f" % (vg_state.percent_free)), uom='%', threshold=th_free_pc) self.add_perfdata( label='alloc_size', value=vg_state.used_mb, uom='MB', threshold=th_used_abs) self.add_perfdata( label='alloc_percent', value=float("%0.2f" % (vg_state.percent_used)), uom='%', threshold=th_used_pc) state = th_free_abs.get_status(vg_state.free_mb) out = "%d MiB total, %d MiB free (%0.1f%%), %d MiB allocated (%0.1f%%)" % ( vg_state.size_mb, vg_state.free_mb, vg_state.percent_free, vg_state.used_mb, vg_state.percent_used) self.exit(state, out)
def __call__(self): """ Method to call the plugin directly. """ self.parse_args() self.init_root_logger() ps_cmd = PS_CMD if self.argparser.args.ps_cmd: self._ps_cmd = self.get_command(self.argparser.args.ps_cmd) ps_cmd = self.argparser.args.ps_cmd if not self.ps_cmd: msg = "Command %r not found." % (ps_cmd) self.die(msg) if os.path.exists(PID_MAX_FILE): log.debug("Reading %r ...", PID_MAX_FILE) self._pid_max = int(self.read_file(PID_MAX_FILE, quiet=True)) log.debug("Got a pid_max value of %d processes.", self._pid_max) self._warning = NagiosRange(self.pid_max * 70 / 100) self._critical = NagiosRange(self.pid_max * 90 / 100) if self.argparser.args.user: self.user = self.argparser.args.user if self.user is None: msg = "Invalid user name or UID %r given." % (self.argparser.args.user) self.die(msg) match = re_percent.search(self.argparser.args.warning) if match: percent = float(match.group(1)) warning = int(self.pid_max * percent / 100) self._warning = NagiosRange(warning) else: self._warning = NagiosRange(self.argparser.args.warning) match = re_percent.search(self.argparser.args.critical) if match: percent = float(match.group(1)) critical = int(self.pid_max * percent / 100) self._critical = NagiosRange(critical) else: self._critical = NagiosRange(self.argparser.args.critical) if self.verbose > 1: log.debug("Got thresholds: warning: %s, critical: %s.", self.warning, self.critical) self.set_thresholds(warning=self.warning, critical=self.critical) if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) uom = self.get_uom() label = self.get_label() found_processes = self.collect_processes() value_total = self.get_total_value(found_processes) count = len(found_processes) log.debug("Got a total value (by %s) of %d%s.", self.argparser.args.metric, value_total, uom) state = self.threshold.get_status(value_total) self.add_perfdata( label=label, value=value_total, uom=uom, threshold=self.threshold, ) plural = '' if count != 1: plural = 'es' out = "%d process%s" % (count, plural) fdescription = self.get_filter_description() if fdescription: out += ' with ' + fdescription self.exit(state, out)
def get_api_image_volumes(self): self.api_images = [] key_replicated = 'replicate' key_size = 'size' key_replicas = 'replicas' key_storage_server = 'storage_server' key_guid = 'guid' key_image_type = 'image_type' key_virtual_state = 'virtual_state' if sys.version_info[0] <= 2: key_replicated = key_replicated.decode('utf-8') key_size = key_size.decode('utf-8') key_replicas = key_replicas.decode('utf-8') key_storage_server = key_storage_server.decode('utf-8') key_guid = key_guid.decode('utf-8') key_image_type = key_image_type.decode('utf-8') key_virtual_state = key_virtual_state.decode('utf-8') images = None try: images = self.api.vimages(pstorage=self.hostname) except RestApiError as e: self.die(str(e)) except Exception as e: self.die("%s: %s" % (e.__class__.__name__, e)) first_volume = True for stor in images: vl = 4 if first_volume: vl = 2 if self.verbose > vl: log.debug("Got Image volume from API:\n%s", pp(stor)) first_volume = False state = None if key_virtual_state in stor: state = stor[key_virtual_state] if sys.version_info[0] <= 2: state = state.encode('utf-8') replicated = False if key_replicated in stor: if stor[key_replicated]: replicated = True size = stor[key_size] size = int(math.ceil(float(size) / 4.0)) * 4 if replicated: size += 4 img_type = stor[key_image_type] if sys.version_info[0] <= 2: img_type = img_type.encode('utf-8') guid = None for replica in stor[key_replicas]: hn = replica[key_storage_server] if sys.version_info[0] <= 2: hn = hn.encode('utf-8') if hn == self.hostname: guid = uuid.UUID(replica[key_guid]) if key_virtual_state in replica: state = replica[key_virtual_state] if sys.version_info[0] <= 2: state = state.encode('utf-8') break if not guid: log.debug("No valid GUID found for image:\n%s", pp(stor)) continue if state: state = state.lower() vol = { 'guid': guid, 'replicated': replicated, 'size': size, 'img_type': img_type, 'state': state, } self.api_images.append(vol) if self.verbose > vl: log.debug("Transferred data of image volume:\n%s", pp(vol)) if self.verbose > 1: log.debug("Got %d Image volumes from API.", len(self.api_images)) if self.verbose > 3: log.debug("Got Image volumes from API:\n%s", pp(self.api_images))
def check_mddev(self, dev): """ Underlying method to check the state of a MD device. @raise NPReadTimeoutError: on timeout reading a particular file in sys filesystem @raise IOError: if a sysfilesystem file disappears sinc start of this script @param dev: the name of the MD device to check (e.g. 'md0', 'md400') @type dev: str @return: a tuple of two values: * the numeric (Nagios) state * a textual description of the state @rtype: tuple of str and int """ log.debug("Checking device %r ...", dev) # Define directories and files in sysfs # /sys/block/mdX base_dir = os.sep + os.path.join('sys', 'block', dev) # /sys/block/mdX/md base_mddir = os.path.join(base_dir, 'md') # /sys/block/mdX/md/array_state array_state_file = os.path.join(base_mddir, 'array_state') # /sys/block/mdX/md/degraded degraded_file = os.path.join(base_mddir, 'degraded') # /sys/block/mdX/md/raid_disks raid_disks_file = os.path.join(base_mddir, 'raid_disks') # /sys/block/mdX/md/level raid_level_file = os.path.join(base_mddir, 'level') # /sys/block/mdX/md/degraded degraded_file = os.path.join(base_mddir, 'degraded') # /sys/block/mdX/md/suspended suspended_file = os.path.join(base_mddir, 'suspended') # /sys/block/mdX/md/sync_action sync_action_file = os.path.join(base_mddir, 'sync_action') # /sys/block/mdX/md/sync_completed sync_completed_file = os.path.join(base_mddir, 'sync_completed') # /sys/block/mdX/md/dev-* slavedir_pattern = os.path.join(base_mddir, 'dev-*') for sys_dir in (base_dir, base_mddir): if not os.path.isdir(sys_dir): raise IOError(errno.ENOENT, "Directory doesn't exists.", sys_dir) state = RaidState(dev) # Array status state.array_state = self.read_file(array_state_file).strip() # RAID level state.raid_level = self.read_file(raid_level_file).strip() # degraded state, if available if os.path.exists(degraded_file): state.degraded = bool(int(self.read_file(degraded_file))) # number of raid disks state.nr_raid_disks = int(self.read_file(raid_disks_file)) # suspended state, if available if os.path.exists(suspended_file): state.suspended = bool(int(self.read_file(suspended_file))) # state of synchronisation, if available if os.path.exists(sync_action_file): state.sync_action = self.read_file(sync_action_file).strip() # state of synchronisation process, if available if os.path.exists(sync_completed_file): sync_state = self.read_file(sync_completed_file).strip() match = re_sync_completed.search(sync_state) if match: state.sectors_synced = int(match.group(1)) state.sectors_total = int(match.group(2)) if state.sectors_total: state.sync_completed = ( float(state.sectors_synced) / float(state.sectors_total)) i = 0 while i < state.nr_raid_disks: state.raid_devices[i] = None i += 1 if self.verbose > 3: log.debug( "Searching for slave dirs with pattern %r ...", slavedir_pattern) slavedirs = glob.glob(slavedir_pattern) if self.verbose > 2: log.debug("Found slave dirs: %r", slavedirs) for slave_dir in slavedirs: if self.verbose > 3: log.debug("Checking slave dir %r ...", slave_dir) # Defining some sysfs files # /sys/block/mdX/md/dev-XYZ/state slave_state_file = os.path.join(slave_dir, 'state') # /sys/block/mdX/md/dev-XYZ/slot slave_slot_file = os.path.join(slave_dir, 'slot') # /sys/block/mdX/md/dev-XYZ/block slave_block_file = os.path.join(slave_dir, 'block') is_spare = False # Reading some status files try: slave_slot = int(self.read_file(slave_slot_file)) except ValueError: slave_slot = None slave_state = self.read_file(slave_state_file).strip() if slave_state == 'spare': is_spare = True rd_link = None if slave_slot is not None: rd_link = os.path.join(base_mddir, 'rd%d' % (slave_slot)) # Retreiving the slave block device block_target = os.readlink(slave_block_file) slave_block_device = os.path.normpath(os.path.join( os.path.dirname(slave_block_file), block_target)) slave_bd_basename = os.path.basename(slave_block_device) slave_block_device = os.sep + os.path.join('dev', slave_bd_basename) slave = SlaveState(slave_slot, slave_dir) slave.block_device = slave_block_device slave.state = slave_state # Check existense of the rdX link slave.rdlink = rd_link if rd_link is not None and os.path.exists(rd_link): slave.rdlink_exists = True else: slave.rdlink_exists = False # Assigne slave as a raid or a spare device state.slaves.append(slave_bd_basename) if is_spare: state.spare_devices[slave_bd_basename] = slave elif rd_link is None or slave_state == 'faulty': state.failed_devices[slave_bd_basename] = slave else: state.raid_devices[slave_slot] = slave if self.verbose > 2: log.debug("Status results for %r:\n%s", dev, pp(state.as_dict())) # And evaluate the results .... state_id = nagios.state.ok # Check the array state state_msg = "%s - %s" % (dev, state.array_state) if state.array_state not in ( 'readonly', 'read-auto', 'clean', 'active', 'active-idle'): if state.array_state == 'write-pending': state_id = nagios.state.warning elif state.array_state in ('clear', 'inactive', 'readonly'): state_id = nagios.state.critical else: state_id = nagios.state.unknown if not self.spare_ok: # Check for existing spare devices if state.spare_devices.keys(): state_msg += ", has spares %r" % (state.spare_devices.keys()) state_id = max_state(state_id, nagios.state.warning) # Check degraded and synchronisation state if state.degraded: state_msg += ", degraded" if state.sync_action is None: state_id = max_state(state_id, nagios.state.critical) state_msg += ", unknown sync action" elif state.sync_action == 'idle': state_id = max_state(state_id, nagios.state.critical) state_msg += ", idle" elif state.sync_action in ('resync', 'recover', 'check', 'repair'): state_id = max_state(state_id, nagios.state.warning) state_msg += ", " + state.sync_action else: state_id = max_state(state_id, nagios.state.unknown) state_msg += ", sync " + state.sync_action # Add percentage of sync completed to output if state.sync_completed is not None: state_msg += " %.1f%%" % ((state.sync_completed * 100)) # Check state of slave devices for i in state.raid_devices: log.debug("Evaluating state of raid_device[%r]", i) if state.raid_devices[i] is None: if state.sync_action in ('resync', 'recover', 'check', 'repair'): state_id = max_state(state_id, nagios.state.warning) else: state_id = max_state(state_id, nagios.state.critical) state_msg += ", raid_device[%r] fails" % (i) continue raid_device = state.raid_devices[i] if raid_device.state in ('in_sync', 'writemostly'): continue bd = os.path.basename(raid_device.block_device) state_msg += ", raid_device[%r]=%s %s" % (i, bd, raid_device.state) if not raid_device.rdlink_exists: state_msg += " failed" state_id = max_state(state_id, nagios.state.critical) if state.failed_devices.keys(): state_msg += ", failed %r" % (state.failed_devices.keys()) state_id = max_state(state_id, nagios.state.critical) return (state_id, state_msg)
def get_data(self, force=False): """ Main method to retrieve the data about the VG with the 'vgs' command. @param force: retrieve data, even if self.checked is True @type force: bool """ if self.checked and not force: return # vgs --unit m --noheadings --nosuffix --separator ';' --unbuffered \ # -o vg_fmt,vg_name,vg_attr,vg_size,vg_free,vg_extent_size,\ # vg_extent_count,vg_free_count storage fields = ( 'vg_fmt', 'vg_name', 'vg_attr', 'vg_extent_size', 'vg_extent_count', 'vg_free_count') cmd = [ self.vgs_cmd, '--unit', 'm', '--noheadings', '--nosuffix', '--separator', ';', '--unbuffered', '-o', ','.join(fields), self.vg ] current_locale = os.environ.get('LC_NUMERIC') if self.verbose > 2: log.debug("Current locale is %r, setting to 'C'.", current_locale) os.environ['LC_NUMERIC'] = 'C' try: (ret, stdoutdata, stderrdata) = self.plugin.exec_cmd(cmd) finally: if current_locale: os.environ['LC_NUMERIC'] = current_locale else: del os.environ['LC_NUMERIC'] if self.verbose > 3: log.debug("Got from STDOUT: %r", stdoutdata) fields = stdoutdata.strip().split(';') if self.verbose > 2: log.debug("Got fields:\n%s", pp(fields)) self._format = fields[0] self._ext_size = int(float(fields[3])) self._ext_count = int(fields[4]) self._ext_free = int(fields[5]) attr_str = fields[2] attr = set([]) for i in (0, 1, 2, 3, 4): if attr_str[i] != '-': attr.add(attr_str[i]) if attr_str[5] == 'c': attr.add('C') self._attr = attr self._checked = True
def __call__(self): """ Method to call the plugin directly. """ self.parse_args() self.init_root_logger() state = nagios.state.ok out = "Infiniband port %s:%d seems to be okay." % (self.hca_name, self.hca_port) if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) # Checking directories in sysfs ... hca_dir = os.path.join(IB_BASE_DIR, self.hca_name) ports_dir = os.path.join(hca_dir, 'ports') port_dir = os.path.join(ports_dir, str(self.hca_port)) for sysfsdir in (IB_BASE_DIR, hca_dir, ports_dir, port_dir): if self.verbose > 1: log.debug("Checking directory %r ...", sysfsdir) if not os.path.exists(sysfsdir): msg = "Directory %r doesn't exists." % (sysfsdir) self.exit(nagios.state.critical, msg) if not os.path.isdir(sysfsdir): msg = "%r is not a directory." % (sysfsdir) self.exit(nagios.state.critical, msg) # Checking state files state_file = os.path.join(port_dir, 'state') phys_state_file = os.path.join(port_dir, 'phys_state') rate_file = os.path.join(port_dir, 'rate') for sfile in (state_file, phys_state_file, rate_file): if self.verbose > 1: log.debug("Checking file %r ...", sfile) if not os.path.exists(sfile): msg = "File %r doesn't exists." % (sfile) self.exit(nagios.state.critical, msg) if not os.path.isfile(sfile): msg = "%r is not a regular file." % (sfile) self.exit(nagios.state.critical, msg) # getting state (e.g.: '4: ACTIVE', '1: DOWN') cur_state = self.read_file(state_file).strip() state_num = None state_str = None match = re_state.search(cur_state) if not match: msg = "Could not evaluate IB port state %r from %r." % (cur_state, state_file) self.die(msg) state_num = int(match.group(1)) state_str = match.group(2) log.debug("Got a state %r (%d) for infiniband port %s:%d.", state_str, state_num, self.hca_name, self.hca_port) # getting physical state (e.g.: '5: LinkUp', '2: Polling') cur_phys_state = self.read_file(phys_state_file).strip() phys_state_num = None phys_state_str = None match = re_state.search(cur_phys_state) if not match: msg = "Could not evaluate IB port physical state %r from %r." % ( cur_phys_state, phys_state_file) self.die(msg) phys_state_num = int(match.group(1)) phys_state_str = match.group(2) log.debug("Got a physical state %r (%d) for infiniband port %s:%d.", phys_state_str, phys_state_num, self.hca_name, self.hca_port) # getting the current port rate (e.g. '40 Gb/sec (4X QDR)') cur_rate = self.read_file(rate_file).strip() rate_val = None match = re_rate.search(cur_rate) if not match: msg = "Could not evaluate IB port rate %r from %r." % (cur_rate, rate_file) self.die(msg) rate_val = int(match.group(1)) log.debug( "Got a data rate of %d GiB/sec [%s] for infiniband port %s:%d.", rate_val, cur_rate, self.hca_name, self.hca_port) if rate_val != self.rate: state = nagios.state.warning if state_num != IB_LINK_ACTIVE: state = nagios.state.critical if phys_state_num != IB_PORT_PHYS_STATE_LINKUP: state = nagios.state.critical out = "Infiniband port %s:%d is %s (%s) - current rate %s." % ( self.hca_name, self.hca_port, state_str, phys_state_str, cur_rate) self.exit(state, out)
def __call__(self): """ Method to call the plugin directly. """ self.parse_args() self.init_root_logger() ps_cmd = PS_CMD if self.argparser.args.ps_cmd: self._ps_cmd = self.get_command(self.argparser.args.ps_cmd) ps_cmd = self.argparser.args.ps_cmd if not self.ps_cmd: msg = "Command %r not found." % (ps_cmd) self.die(msg) if os.path.exists(PID_MAX_FILE): log.debug("Reading %r ...", PID_MAX_FILE) self._pid_max = int(self.read_file(PID_MAX_FILE, quiet=True)) log.debug("Got a pid_max value of %d processes.", self._pid_max) self._warning = NagiosRange(self.pid_max * 70 / 100) self._critical = NagiosRange(self.pid_max * 90 / 100) if self.argparser.args.user: self.user = self.argparser.args.user if self.user is None: msg = "Invalid user name or UID %r given." % ( self.argparser.args.user) self.die(msg) match = re_percent.search(self.argparser.args.warning) if match: percent = float(match.group(1)) warning = int(self.pid_max * percent / 100) self._warning = NagiosRange(warning) else: self._warning = NagiosRange(self.argparser.args.warning) match = re_percent.search(self.argparser.args.critical) if match: percent = float(match.group(1)) critical = int(self.pid_max * percent / 100) self._critical = NagiosRange(critical) else: self._critical = NagiosRange(self.argparser.args.critical) if self.verbose > 1: log.debug("Got thresholds: warning: %s, critical: %s.", self.warning, self.critical) self.set_thresholds(warning=self.warning, critical=self.critical) if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) uom = self.get_uom() label = self.get_label() found_processes = self.collect_processes() value_total = self.get_total_value(found_processes) count = len(found_processes) log.debug("Got a total value (by %s) of %d%s.", self.argparser.args.metric, value_total, uom) state = self.threshold.get_status(value_total) self.add_perfdata( label=label, value=value_total, uom=uom, threshold=self.threshold, ) plural = '' if count != 1: plural = 'es' out = "%d process%s" % (count, plural) fdescription = self.get_filter_description() if fdescription: out += ' with ' + fdescription self.exit(state, out)
def get_api_snapshot_volumes(self): self.api_snapshots = [] key_replicated = 'replicate' key_size = 'size' key_replicas = 'replicas' key_storage_server = 'storage_server' key_guid = 'guid' key_image_type = 'image_type' key_virtual_state = 'virtual_state' if sys.version_info[0] <= 2: key_replicated = key_replicated.decode('utf-8') key_size = key_size.decode('utf-8') key_replicas = key_replicas.decode('utf-8') key_storage_server = key_storage_server.decode('utf-8') key_guid = key_guid.decode('utf-8') key_image_type = key_image_type.decode('utf-8') key_virtual_state = key_virtual_state.decode('utf-8') snapshots = None try: snapshots = self.api.vsnapshots(pstorage=self.hostname) except RestApiError as e: self.die(str(e)) except Exception as e: self.die("%s: %s" % (e.__class__.__name__, e)) first_volume = True for stor in snapshots: vl = 4 if first_volume: vl = 2 if self.verbose > vl: log.debug("Got Snapshot volume from API:\n%s", pp(stor)) first_volume = False size = stor[key_size] guid = uuid.UUID(stor[key_guid]) state = None if key_virtual_state in stor: state = stor[key_virtual_state] if sys.version_info[0] <= 2: state = state.encode('utf-8') if state: state = state.lower() vol = { 'guid': guid, 'size': size, 'state': state, } self.api_snapshots.append(vol) if self.verbose > vl: log.debug("Transferred data of storage volume:\n%s", pp(vol)) if self.verbose > 1: log.debug("Got %d Snapshot volumes from API.", len(self.api_snapshots)) if self.verbose > 3: log.debug("Got Snapshot volumes from API:\n%s", pp(self.api_snapshots))
def __call__(self): """ Method to call the plugin directly. """ self.parse_args() self.init_root_logger() if not self.argparser.args.vg: self.die("No volume group to check given.") self._vg = self.argparser.args.vg if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) # ---------------------------------------------------------- # Parameters for check_free crit = 0 crit_is_abs = True warn = 0 warn_is_abs = True if not self.check_state: match_pc = re_number_percent.search(self.argparser.args.critical) match_abs = re_number_abs.search(self.argparser.args.critical) if match_pc: crit = int(match_pc.group(1)) crit_is_abs = False elif match_abs: crit = int(match_abs.group(1)) else: self.die("Invalid critical value %r." % (self.argparser.args.critical)) return match_pc = re_number_percent.search(self.argparser.args.warning) match_abs = re_number_abs.search(self.argparser.args.warning) if match_pc: warn = int(match_pc.group(1)) warn_is_abs = False elif match_abs: warn = int(match_abs.group(1)) else: self.die("Invalid warning value %r." % (self.argparser.args.warning)) return # ---------------------------------------------------------- # Getting current state of VG vg_state = LvmVgState(plugin=self, vg=self.vg, vgs_cmd=self.vgs_cmd, verbose=self.verbose, timeout=self.argparser.args.timeout) try: vg_state.get_data() except (ExecutionTimeoutError, VgNotExistsError) as e: self.die(str(e)) except CalledProcessError as e: msg = "The %r command returned %d with the message: %s" % ( self.vgs_cmd, e.returncode, e.output) self.die(msg) if self.verbose > 1: log.debug("Got a state of the volume group %r:\n%s", self.vg, vg_state) # ---------------------------------------------- if self.check_state: self.add_message(nagios.state.ok, ("Volume group %r seems to be OK." % (self.vg))) if 'r' in vg_state.attr: self.add_message(nagios.state.warning, ("Volume group %r is in a read-only state." % (self.vg))) if 'z' not in vg_state.attr: self.add_message(nagios.state.warning, ("Volume group %r is not resizeable." % (self.vg))) if 'p' in vg_state.attr: self.add_message( nagios.state.critical, (("One or more physical volumes belonging to the " "volume group %r are missing from the system.") % (self.vg))) if self.verbose: self.out("Attributes of VG %r: %s" % (self.vg, vg_state.attr_str)) (state, msg) = self.check_messages() self.exit(state, msg) # Only for the blinds: return # ---------------------------------------------- # And now check free space (or whatever) if not vg_state.size_mb: self.die("Cannot detect absolute size of volume group %r." % (self.vg)) c_free_abs = 0 c_free_pc = 0 c_used_abs = 0 c_used_pc = 0 if crit_is_abs: c_free_abs = crit c_used_abs = vg_state.size_mb - crit c_free_pc = float(crit) / float(vg_state.size_mb) * 100 c_used_pc = float(c_used_abs) / float(vg_state.size_mb) * 100 else: c_free_pc = float(crit) c_used_pc = 100.0 - c_free_pc c_free_abs = int( math.ceil(c_free_pc * float(vg_state.size_mb) / 100)) c_used_abs = vg_state.size_mb - c_free_abs w_free_abs = 0 w_free_pc = 0 w_used_abs = 0 w_used_pc = 0 if warn_is_abs: w_free_abs = warn w_used_abs = vg_state.size_mb - warn w_free_pc = float(warn) / float(vg_state.size_mb) * 100 w_used_pc = float(w_used_abs) / float(vg_state.size_mb) * 100 else: w_free_pc = float(warn) w_used_pc = 100.0 - w_free_pc w_free_abs = int( math.ceil(w_free_pc * float(vg_state.size_mb) / 100)) w_used_abs = vg_state.size_mb - w_free_abs if c_free_abs > w_free_abs: self.die( "The warning threshold must be greater than the critical threshold." ) th_free_abs = NagiosThreshold(warning="@%d" % (w_free_abs), critical="@%d" % (c_free_abs)) th_used_abs = NagiosThreshold(warning="%d" % (w_used_abs), critical="%d" % (c_used_abs)) th_free_pc = NagiosThreshold(warning="@%d" % (w_free_pc), critical="@%d" % (c_free_pc)) th_used_pc = NagiosThreshold(warning="%f" % (w_used_pc), critical="%f" % (c_used_pc)) if self.verbose: self.out("VG %r total size: %8d MiBytes." % (self.vg, vg_state.size_mb)) self.out("VG %r used size: %8d MiBytes (%0.2f%%)." % (self.vg, vg_state.used_mb, vg_state.percent_used)) self.out("VG %r free size: %8d MiBytes (%0.2f%%)." % (self.vg, vg_state.free_mb, vg_state.percent_free)) if self.verbose > 2: log.debug("Thresholds free MBytes:\n%s", pp(th_free_abs.as_dict())) log.debug("Thresholds free percent:\n%s", pp(th_free_pc.as_dict())) log.debug("Thresholds used MBytes:\n%s", pp(th_used_abs.as_dict())) log.debug("Thresholds used percent:\n%s", pp(th_used_pc.as_dict())) self.add_perfdata(label='total_size', value=vg_state.size_mb, uom='MB') self.add_perfdata(label='free_size', value=vg_state.free_mb, uom='MB', threshold=th_free_abs) self.add_perfdata(label='free_percent', value=float("%0.2f" % (vg_state.percent_free)), uom='%', threshold=th_free_pc) self.add_perfdata(label='alloc_size', value=vg_state.used_mb, uom='MB', threshold=th_used_abs) self.add_perfdata(label='alloc_percent', value=float("%0.2f" % (vg_state.percent_used)), uom='%', threshold=th_used_pc) state = th_free_abs.get_status(vg_state.free_mb) out = "%d MiB total, %d MiB free (%0.1f%%), %d MiB allocated (%0.1f%%)" % ( vg_state.size_mb, vg_state.free_mb, vg_state.percent_free, vg_state.used_mb, vg_state.percent_used) self.exit(state, out)
) result = plugin.argparser.args.result if result is None: result = random.randint(1, 20) log.debug("Checking result value of %d.", result) else: if result < 0 or result > 20: plugin.die((" invalid number supplied for the -r option, " + "must be between 0 and 20")) plugin.add_perfdata( label = 'Result', value = result, threshold = plugin.threshold, min_data = 0, max_data = 20, ) if verbose > 1: log.debug("Plugin object:\n" + pp(plugin.as_dict())) plugin.exit( code = plugin.check_threshold(result), message = (" sample result was %d" % (result)) ) #============================================================================== # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
def __call__(self): """ Method to call the plugin directly. """ self.parse_args() self.init_root_logger() state = nagios.state.ok out = "Infiniband port %s:%d seems to be okay." % ( self.hca_name, self.hca_port) if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) # Checking directories in sysfs ... hca_dir = os.path.join(IB_BASE_DIR, self.hca_name) ports_dir = os.path.join(hca_dir, 'ports') port_dir = os.path.join(ports_dir, str(self.hca_port)) for sysfsdir in (IB_BASE_DIR, hca_dir, ports_dir, port_dir): if self.verbose > 1: log.debug("Checking directory %r ...", sysfsdir) if not os.path.exists(sysfsdir): msg = "Directory %r doesn't exists." % (sysfsdir) self.exit(nagios.state.critical, msg) if not os.path.isdir(sysfsdir): msg = "%r is not a directory." % (sysfsdir) self.exit(nagios.state.critical, msg) # Checking state files state_file = os.path.join(port_dir, 'state') phys_state_file = os.path.join(port_dir, 'phys_state') rate_file = os.path.join(port_dir, 'rate') for sfile in (state_file, phys_state_file, rate_file): if self.verbose > 1: log.debug("Checking file %r ...", sfile) if not os.path.exists(sfile): msg = "File %r doesn't exists." % (sfile) self.exit(nagios.state.critical, msg) if not os.path.isfile(sfile): msg = "%r is not a regular file." % (sfile) self.exit(nagios.state.critical, msg) # getting state (e.g.: '4: ACTIVE', '1: DOWN') cur_state = self.read_file(state_file).strip() state_num = None state_str = None match = re_state.search(cur_state) if not match: msg = "Could not evaluate IB port state %r from %r." % ( cur_state, state_file) self.die(msg) state_num = int(match.group(1)) state_str = match.group(2) log.debug("Got a state %r (%d) for infiniband port %s:%d.", state_str, state_num, self.hca_name, self.hca_port) # getting physical state (e.g.: '5: LinkUp', '2: Polling') cur_phys_state = self.read_file(phys_state_file).strip() phys_state_num = None phys_state_str = None match = re_state.search(cur_phys_state) if not match: msg = "Could not evaluate IB port physical state %r from %r." % ( cur_phys_state, phys_state_file) self.die(msg) phys_state_num = int(match.group(1)) phys_state_str = match.group(2) log.debug("Got a physical state %r (%d) for infiniband port %s:%d.", phys_state_str, phys_state_num, self.hca_name, self.hca_port) # getting the current port rate (e.g. '40 Gb/sec (4X QDR)') cur_rate = self.read_file(rate_file).strip() rate_val = None match = re_rate.search(cur_rate) if not match: msg = "Could not evaluate IB port rate %r from %r." % ( cur_rate, rate_file) self.die(msg) rate_val = int(match.group(1)) log.debug("Got a data rate of %d GiB/sec [%s] for infiniband port %s:%d.", rate_val, cur_rate, self.hca_name, self.hca_port) if rate_val != self.rate: state = nagios.state.warning if state_num != IB_LINK_ACTIVE: state = nagios.state.critical if phys_state_num != IB_PORT_PHYS_STATE_LINKUP: state = nagios.state.critical out = "Infiniband port %s:%d is %s (%s) - current rate %s." % ( self.hca_name, self.hca_port, state_str, phys_state_str, cur_rate) self.exit(state, out)
def run(self): """Main execution method.""" if os.geteuid(): log.error("You must be root to execute this.") sys.exit(1) state = nagios.state.ok out = "Storage volumes on %r seems to be okay." % (self.hostname) self.all_api_volumes = {} self.get_api_storage_volumes() for vol in self.api_volumes: guid = str(vol['guid']) size = vol['size'] self.all_api_volumes[guid] = { 'size': size, 'type': 'vol', 'state': vol['state'], } self.api_volumes = None self.get_api_image_volumes() for vol in self.api_images: guid = str(vol['guid']) size = vol['size'] self.all_api_volumes[guid] = { 'size': size, 'type': 'img', 'state': vol['state'], } self.api_images = None self.get_api_snapshot_volumes() for vol in self.api_snapshots: guid = str(vol['guid']) size = vol['size'] self.all_api_volumes[guid] = { 'size': size, 'type': 'snap', 'state': vol['state'], } self.api_snapshots = None if self.verbose > 2: log.debug("All Volumes from API:\n%s", pp(self.all_api_volumes)) self.get_lvm_lvs() if self.verbose > 3: log.debug("All Logical Volumes from LVM:\n%s", pp(self.lvm_lvs)) self.count = { 'total': 0, 'missing': 0, 'alien': 0, 'orphans': 0, 'zombies': 0, 'snapshots': 0, 'ok': 0, 'dummy': 0, 'error': 0, } self.compare() # Get current state and redefine output, if necessary total_errors = self.count['missing'] + self.count[ 'orphans'] + self.count['error'] state = self.threshold.get_status(total_errors) if total_errors == 1: out = "One error on provisioned storage volumes." elif total_errors > 1: out = "Currently %d errors on provisioned storage volumes." % ( total_errors) # generate performance data (except number of dummy volumes) for key in self.count: if key == 'dummy': continue self.add_perfdata(label=key, value=self.count[key]) if self.verbose > 1: log.debug("Got following counts:\n%s", pp(self.count)) self.exit(state, out)
def __call__(self): """ Method to call the plugin directly. """ self.parse_args() self.init_root_logger() state = nagios.state.ok out = "VCB on %r port %d seems to be okay." % (self.host_address, self.vcb_port) if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) signal.signal(signal.SIGHUP, self.exit_signal_handler) signal.signal(signal.SIGINT, self.exit_signal_handler) signal.signal(signal.SIGABRT, self.exit_signal_handler) signal.signal(signal.SIGTERM, self.exit_signal_handler) signal.signal(signal.SIGUSR1, self.exit_signal_handler) signal.signal(signal.SIGUSR2, self.exit_signal_handler) xml = XML_TEMPLATE % (self.job_id) if self.verbose > 3: log.debug("XML to send:\n%s", xml) result = '' do_parse = False result_rcvd = False rstatus = None got_version = None try: result = self.send(xml) result = re.sub(r'\x00', '', result) result = result.strip() do_parse = True result_rcvd = True except NoListeningError as e: result = "Error: " + str(e).strip() state = nagios.state.critical except SocketTransportError as e: result = "Error: " + str(e).strip() state = nagios.state.critical except Exception as e: result = "Error %s on checking VCB on %r port %d: %s" % ( e.__class__.__name__, self.host_address, self.vcb_port, e) state = nagios.state.critical if self.verbose > 1: log.debug("Got result:\n%s.", result) if do_parse: try: rstatus = self.parse_result(result) while rstatus.state == STATUS['progress']: lines = result.splitlines() line_removed = lines.pop(0) log.debug("Removed first line %r", line_removed) result = '\n'.join(lines) rstatus = self.parse_result(result) if rstatus.state != STATUS['succeeded']: state = self.max_state(state, nagios.state.critical) result = rstatus.message result_rcvd = True except RequestStatusError as e: result = "Could not understand message: %s" % (result) state = self.max_state(state, nagios.state.critical) if result_rcvd: got_version = self.parse_for_version(result) result = ' '.join(result.splitlines()) log.debug("Got a version of: %r", got_version) if got_version is None: state = self.max_state(state, nagios.state.warning) result += ', no version found.' elif self.min_version is not None: parsed_version_expected = debian.debian_support.Version(self.min_version) if self.verbose > 1: log.debug("Expecting parsed version %r.", parsed_version_expected) parsed_version_got = debian.debian_support.Version(got_version) if self.verbose > 1: log.debug("Got parsed version %r.", parsed_version_got) if parsed_version_got < parsed_version_expected: state = self.max_state(state, nagios.state.warning) result += ', version is less than %r.' % (self.min_version) out = result self.exit(state, out)
def __call__(self): """ Method to call the plugin directly. """ self.parse_args() if self.check_all: self.collect_devices() if not self.devices: self.exit(nagios.state.ok, "No MD devices to check found.") if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) log.debug("MD devices to check: %r", self.devices) state = nagios.state.ok out = "MD devices seems to be ok." for dev in sorted(self.devices, key=lambda x: int(x.replace('md', ''))): result = None try: result = self.check_mddev(dev) except NPReadTimeoutError: msg = "%s - timeout on getting information" % (dev) self.ugly_ones.append(msg) except IOError as e: msg = "MD device %r disappeared during this script: %s" % ( dev, e) log.debug(msg) continue except Exception as e: msg = "Error on getting information about %r: %s" % (dev, e) self.handle_error(msg, e.__class__.__name__, True) self.die("Unknown %r error on getting information about %r: %s" % ( e.__class__.__name__, dev, e)) if result is None: continue self.checked_devices += 1 (state, output) = result if state == nagios.state.ok: self.good_ones.append(output) elif state == nagios.state.warning: self.bad_ones.append(output) else: self.ugly_ones.append(output) if not self.checked_devices: self.exit(nagios.state.ok, "No MD devices to check found.") if self.verbose > 2: log.debug("Ugly states: %s", pp(self.ugly_ones)) log.debug("Bad states: %s", pp(self.bad_ones)) log.debug("Good states: %s", pp(self.good_ones)) msgs = [] if self.bad_ones or self.ugly_ones: for m in self.ugly_ones: msgs.append(m) for m in self.bad_ones: msgs.append(m) else: msgs = self.good_ones[:] out = '; '.join(msgs) if self.ugly_ones: state = nagios.state.critical elif self.bad_ones: state = nagios.state.warning self.exit(state, out)
def run(self): """Main execution method.""" if os.geteuid(): log.error("You must be root to execute this.") sys.exit(1) state = nagios.state.ok out = "Storage volumes on %r seems to be okay." % ( self.hostname) self.all_api_volumes = {} self.get_api_storage_volumes() for vol in self.api_volumes: guid = str(vol['guid']) size = vol['size'] self.all_api_volumes[guid] = { 'size': size, 'type': 'vol', 'state': vol['state'], } self.api_volumes = None self.get_api_image_volumes() for vol in self.api_images: guid = str(vol['guid']) size = vol['size'] self.all_api_volumes[guid] = { 'size': size, 'type': 'img', 'state': vol['state'], } self.api_images = None self.get_api_snapshot_volumes() for vol in self.api_snapshots: guid = str(vol['guid']) size = vol['size'] self.all_api_volumes[guid] = { 'size': size, 'type': 'snap', 'state': vol['state'], } self.api_snapshots = None if self.verbose > 2: log.debug("All Volumes from API:\n%s", pp(self.all_api_volumes)) self.get_lvm_lvs() if self.verbose > 3: log.debug("All Logical Volumes from LVM:\n%s", pp(self.lvm_lvs)) self.count = { 'total': 0, 'missing': 0, 'alien': 0, 'orphans': 0, 'zombies': 0, 'snapshots': 0, 'ok': 0, 'dummy': 0, 'error': 0, } self.compare() # Get current state and redefine output, if necessary total_errors = self.count['missing'] + self.count['orphans'] + self.count['error'] state = self.threshold.get_status(total_errors) if total_errors == 1: out = "One error on provisioned storage volumes." elif total_errors > 1: out = "Currently %d errors on provisioned storage volumes." % (total_errors) # generate performance data (except number of dummy volumes) for key in self.count: if key == 'dummy': continue self.add_perfdata(label=key, value=self.count[key]) if self.verbose > 1: log.debug("Got following counts:\n%s", pp(self.count)) self.exit(state, out)
def __call__(self): """ Method to call the plugin directly. """ self.parse_args() if self.verbose > 2: log.debug("Current object:\n%s", pp(self.as_dict())) state = nagios.state.ok out = "All seems to be ok." re_is_sas = re.compile(r'^\s*Transport\s+protocol\s*:\s*SAS.*$', (re.IGNORECASE | re.MULTILINE)) no_smart_patterns = ( r'Device\s+does\s+not\s+support\s+SMART', # SMART support is: Unavailable - device lacks SMART capability. r'SMART\s+support\s+is:\s+Unavailable\s+-\s+.*', ) pattern = r'(' + r'|'.join(no_smart_patterns) + r')' re_no_smart = re.compile(pattern, (re.IGNORECASE | re.MULTILINE)) if self.verbose > 2: log.debug("No SMART pattern: %r", re_no_smart.pattern) smart_output = self._exec_smartctl() is_sas = False if re_is_sas.search(smart_output): is_sas = True match = re_no_smart.search(smart_output) if match: msg = '' if is_sas: msg = "SAS " else: msg = "SATA " dev = self.device if self.megaraid: dev = "[%d:%d]" % self.megaraid_slot reason = match.group(1).strip() reason = re.sub(r'\s+', ' ', reason) log.debug("No SMART of Drive %s: %s", dev, reason) if self.megaraid: # Exit with OK, if the disk is spun down spin_state = self.get_megaraid_pd_spin_state() if spin_state and spin_state == 'down': msg += "Drive %s: Spun Down" % (dev) self.exit(nagios.state.ok, msg) msg += "Drive %s: %s" % (dev, reason) self.die(msg) self.disk_data = { 'model': None, 'serial': None, 'health_state': None, 'nr_grown_defects': 0, 'temperature': None, 'hours_on': None, } if is_sas: log.debug("Disk is a SAS disk.") self._eval_sas_disk(smart_output) else: log.debug("Disk is a SATA disk.") self._eval_sata_disk(smart_output) log.debug("Evaluated disk data:\n%s", pp(self.disk_data)) err_msgs = [] if self.disk_data['health_state'] is None: msg = "Could not detect SMART Health Status of " if is_sas: msg += "SAS " else: msg += "SATA " dev = self.device if self.megaraid: dev = "[%d:%d]" % self.megaraid_slot msg += "Drive %s." % (dev) self.die(msg) if is_sas: if self.disk_data['health_state'].lower() != 'ok': state = self.max_state(state, nagios.state.critical) err_msgs.append("SMART Health Status is %r." % (self.disk_data['health_state'])) else: if self.disk_data['health_state'].lower() != 'passed': state = self.max_state(state, nagios.state.critical) err_msgs.append("SMART overall-health self-assessment test result is %r." % (self.disk_data['health_state'])) gd_count = self.disk_data['nr_grown_defects'] if self.threshold: gd_state = self.threshold.get_status(gd_count) if gd_state != nagios.state.ok: state = self.max_state(state, gd_state) err_msgs.append("%d elements in list of grown defects." % (gd_count)) self.add_perfdata(label='gd_list', value=gd_count, threshold=self.threshold) else: self.add_perfdata(label='gd_list', value=gd_count) if self.disk_data['temperature'] is not None: self.add_perfdata( label='temperature', value=self.disk_data['temperature'], uom="C", ) out = "" if is_sas: out = "SAS " else: out = "SATA " dev = self.device if self.megaraid: dev = "[%d:%d]" % self.megaraid_slot out += "Drive %s " % (dev) if err_msgs: out += ", ".join(err_msgs) else: out += "SMART Health Status seems to be okay." if (self.disk_data['hours_on'] is not None and isinstance(self.disk_data['hours_on'], Number)): days = self.disk_data['hours_on'] / 24 hours = self.disk_data['hours_on'] % 24 out += " Power on: %d days, %d hours." % (days, hours) self.exit(state, out)