def GET(self, **params): partitions = psutil.disk_partitions(all=False) diskusage = {} disks = [] for partition in partitions: if 'loop' in partition.device: continue diskusage[partition.mountpoint] = psutil.disk_usage( partition.mountpoint) if 'md' in partition.device: continue disk = partition.device.replace('/dev/', '') disk = ''.join(i for i in disk if not i.isdigit()) disks.append(disk) md = mdstat.parse() for array in md['devices'].keys(): for disk in md['devices'][array]['disks'].keys(): disks.append(''.join(i for i in disk if not i.isdigit())) disks = list(set(disks)) disks.sort() temperatures = {'/dev/' + k: self.get_hdd_temp(k) for k in disks} return { "partitions": partitions, "usage": diskusage, "temperatures": temperatures, 'mdstat': md, 'disks': disks }
def run(self): try: import mdstat except ImportError: self.checks_logger.error( "You will need to install mdstat via pip install mdstat") output = {} try: data = mdstat.parse() for device in data['devices']: try: status_dict = data['devices'][device]['status'] except: check_logger.error('Device %s status does not exist' % (device, )) continue for key in status_dict.keys(): output['%s_%s' % (device, key)] = status_dict[key] if data['devices'][device]['status']['raid_disks'] !=\ data['devices'][device]['status']['non_degraded_disks']: output[device + '_degraded'] = 1 else: output[device + '_degraded'] = 0 except OSError as exception: self.checks_logger.error('Unable to find mdstat.' ' Error: {0}'.format(exception.message)) return output
def run(self): try: import mdstat except ImportError: self.checks_logger.error( "You will need to install mdstat via pip install mdstat") output = {} try: data = mdstat.parse() for device in data['devices']: try: status_dict = data['devices'][device]['status'] except: check_logger.error( 'Device %s status does not exist' % (device, )) continue for key in status_dict.keys(): output['%s_%s' % (device, key)] = status_dict[key] if data['devices'][device]['status']['raid_disks'] !=\ data['devices'][device]['status']['non_degraded_disks']: output[device + '_degraded'] = 1 else: output[device + '_degraded'] = 0 except OSError as exception: self.checks_logger.error( 'Unable to find mdstat.' ' Error: {0}'.format(exception.message)) return output
def pool_status(name: str, path: str, log: logging.Logger = None): if log: log.debug("pool_status({}, {})".format(name, path)) stats = get_fs_stats(path) if log: log.debug("pool_status({}, {}): get_fs_stats({}):".format( name, path, path)) log.debug(stats) mdstat_data = mdstat.parse() if name not in mdstat_data['devices']: if log: log.error("Pool {} not found in mdstat!".format(name)) return PoolStatus(name, path, PoolState.ERROR, [], stats.total_size, stats.used) pool_data = mdstat_data['devices'][name] if log: log.debug("pool_status({}, {}): pool_data:".format(name, path)) log.debug(pool_data) is_active = pool_data['active'] raid_disks = pool_data['status']['raid_disks'] non_degraded_disks = pool_data['status']['non_degraded_disks'] disks = pool_data['disks'] drive_status = [ DriveStatus(disk_name, PoolState.DOWN if raw['faulty'] else PoolState.HEALTHY) for disk_name, raw in disks.items() ] if log: log.debug("pool_status({}, {}): is_active: {}".format( name, path, is_active)) log.debug("pool_status({}, {}): raid_disks: {}".format( name, path, raid_disks)) log.debug("pool_status({}, {}): non_degraded_disks: {}".format( name, path, non_degraded_disks)) log.debug("pool_status({}, {}): drive_status: {}".format( name, path, "\t".join([str(ds) for ds in drive_status]))) pool_state = PoolState.HEALTHY if not is_active: pool_state = PoolState.DOWN elif non_degraded_disks < raid_disks: pool_state = PoolState.DEGRADED else: for ds in drive_status: if ds.state == PoolState.DOWN: pool_state = PoolState.DEGRADED if log: log.info("pool_status({}, {}): pool_state: {}".format( name, path, name)) return PoolStatus(name, path, pool_state, drive_status, stats.total_size, stats.used)
def getMdstat(): mds = {} with open(os.devnull, 'w') as DEVNULL: _devlines = subprocess.run( ['mdadm', '--detail', '--scan'], stderr=DEVNULL, stdout=subprocess.PIPE).stdout.decode('utf-8') for m in _devlines.split(): l = m.split() dev = os.path.split(l[1])[-1] mds[dev] = { 'device': l[1].strip(), 'metadata': l[2].split('=')[1].strip(), 'host': l[3].split('=')[1].split(':')[0].strip(), 'name': l[3].split('=')[1].split(':')[1].strip(), 'uuid': l[4].split('=')[1].strip() } _md = mdstat.parse()['devices'][dev] mds[dev]['status'] = ('active' if _md['active'] else 'inactive') mds[dev]['members'] = _md['disks'] if _md['resync']: mds[dev]['status'] = _md['resync']
def collect(self, metrics): metrics_return = [] ts_now = time.time() md_devs = mdstat.parse()['devices'] for (md_dev_name, md_dev_info) in md_devs.items(): metric = snap.Metric(namespace=[i for i in metrics[0].namespace]) metric.namespace[2].value = md_dev_name metric.namespace[3].value = "DISKS" metric.data = md_dev_info["status"]["raid_disks"] - len( md_dev_info["disks"]) metric.timestamp = ts_now metric.tags['host'] = self.hostname metrics_return.append(metric) for (disk_name, disk_info) in md_dev_info['disks'].items(): metric = snap.Metric( namespace=[i for i in metrics[0].namespace]) metric.namespace[2].value = md_dev_name metric.namespace[3].value = disk_name metric.data = 1 if disk_info['faulty'] else 0 metric.timestamp = ts_now metric.tags['host'] = self.hostname metrics_return.append(metric) return metrics_return
def check_raid(): """Check all RAID devices seen in /proc/mdstat Returns: tuple(int, str) - status and message """ status, message, count = STATUS_OK, '', 0 for array, state in mdstat.parse().get('devices', {}).items(): for element, values in state.get('disks').items(): if values.get('faulty'): return STATUS_CRIT, 'CRIT: /dev/%s element=%s faulty' % ( array, element) # unless monthly checkarray is running, warn if out of sync action = open('/sys/block/%s/md/sync_action' % array, 'r').read() if (False in dot_get(state, 'status.synced') or state.get('resync')) and action.strip() != 'check': status = STATUS_WARN message = 'WARN: /dev/%s resync progress=%s finish=%s' % ( array, dot_get( state, 'resync.progress'), dot_get(state, 'resync.finish')) count += 1 if status == STATUS_OK: message = 'RAID OK: %d array%s clean' % (count, 's'[:count - 1]) return status, message
if not validator.validate(config): _LOG.error( "Config did not validate:\n%s", yaml.dump(validator.errors)) sys.exit(1) config = validator.normalized(config) client = init_mqtt(config["mqtt"]) try: client.connect(config["mqtt"]["host"], config["mqtt"]["port"], 60) except socket.error as err: _LOG.fatal("Unable to connect to MQTT server: %s" % err) sys.exit(1) client.loop_start() topic_prefix = config["mqtt"]["topic_prefix"] try: while True: data = mdstat.parse(config["mdstat"]["location"]) client.publish(topic_prefix, payload=json.dumps(data)) sleep(config["mdstat"]["report_frequency"]) except KeyboardInterrupt: print("") finally: client.publish( "%s/%s" % (topic_prefix, config["mqtt"]["status_topic"]), config["mqtt"]["status_payload_stopped"], qos=1, retain=True) # This should also quit the mqtt loop thread. client.disconnect()
def GET(self, **params): partitions = psutil.disk_partitions(all=False) diskusage = {} disks = [] for partition in partitions: if 'loop' in partition.device: continue diskusage[partition.mountpoint] = psutil.disk_usage(partition.mountpoint) if 'md' in partition.device: continue disk = partition.device.replace('/dev/', '') disk = ''.join(i for i in disk if not i.isdigit()) disks.append(disk) try: md = mdstat.parse() for array in md['devices'].keys(): for disk in md['devices'][array]['disks'].keys(): disks.append(''.join(i for i in disk if not i.isdigit())) except: md = {} disks = list(set(disks)) disks.sort() temperatures = {'/dev/' + k: self.get_hdd_temp(k) for k in disks} blk_info = self.get_blk_info() def collect(devices): out = [] for dev in devices: if 'children' in dev.keys(): out += collect(dev['children']) else: out += [dev] return out devices = collect(blk_info['blockdevices']) drives = {'/dev/' + dev['name']: dev for dev in blk_info['blockdevices'] if dev['name'] in disks} for d in drives.keys(): if d in temperatures.keys(): drives[d]['temperature'] = temperatures[d] devices = {'/dev/' + dev['name']: dev for dev in devices} partitions = {partition.device: dev for dev in partitions} for d in devices.keys(): if d in partitions.keys(): partition_data = partitions[d]._asdict() devices[d].update(partition_data) if devices[d]['mountpoint'] in diskusage.keys(): mp = devices[d]['mountpoint'] u = diskusage[mp] devices[d].update(u._asdict()) devices[d]['size'] = int(devices[d]['size']) return { "partitions": devices, "drives": drives, 'mdstat': md }
def get_raids(): return {'/dev/' + k: v for k, v in mdstat.parse()['devices'].iteritems()}
import psutil import mdstat import platform from datetime import datetime, timezone, timedelta from influxdb import InfluxDBClient # Do not edit below this line # influx_payload = [] devices = { 'md': mdstat.parse()['devices'], } for array in devices['md']: influx_payload.append({ "measurement": "Storage Servers", "tags": { "server": platform.uname()[1], "mount_point": array, "type": 'rebuild' }, "time": datetime.now(timezone.utc).astimezone().isoformat(), "fields": { "resync_progress": float(devices['md'][array]['resync']['progress'].replace('%', '')), "resync_eta_mins": float(devices['md'][array]['resync']['finish'].replace('min', '')), "resync_eta_date": '{:%A, %b %d %I:%M %p}'.format(datetime.now() + timedelta( minutes=float(devices['md'][array]['resync']['finish'].replace(