def collect_hypervisor(self, sudo=False, timeout=None): cmd = ['lparstat', '-H', '1', '1'] output, _, _ = get_subprocess_output(cmd, self.log, sudo=sudo, timeout=timeout) ''' System configuration: type=Shared mode=Uncapped smt=On lcpu=4 mem=7936MB psize=16 ent=0.20 Detailed information on Hypervisor Calls Hypervisor Number of %Total Time %Hypervisor Avg Call Max Call Call Calls Spent Time Spent Time(ns) Time(ns) remove 15 0.0 0.4 1218 1781 read 0 0.0 0.0 0 0 nclear_mod 0 0.0 0.0 0 0 page_init 316 0.0 9.7 1452 6843 clear_ref 0 0.0 0.0 0 0 protect 0 0.0 0.0 0 0 ... ... -------------------------------------------------------------------------------- ''' stats = [_f for _f in output.splitlines() if _f][self.HYPERVISOR_METRICS_START_IDX:] for stat in stats: values = [_f for _f in stat.split(' ') if _f] call_tag = "call:{}".format(values[0]) for idx, entry in enumerate(values[1:]): try: m = self.HYPERVISOR_IDX_METRIC_MAP[idx] v = float(entry) self.gauge(m, v, tags=[call_tag]) except ValueError: self.log.info("unable to convert %s to float for %s - skipping", m, call_tag) continue
def collect_memory(self, page_stats=True, sudo=False, timeout=None): cmd = ['lparstat', '-m'] if page_stats: cmd.append('-pw') cmd.extend(['1', '1']) output, _, _ = get_subprocess_output(cmd, self.log, sudo=sudo, timeout=timeout) ''' System configuration: lcpu=4 mem=7936MB mpsz=0.00GB iome=7936.00MB iomp=16 ent=0.20 physb hpi hpit pmem iomin iomu iomf iohwm iomaf pgcol mpgcol ccol %entc vcsw ----- ----- ----- ----- ------ ------ ------ ------ ----- ------ ------ ---- ----- ----- 0.63 0 0 7.75 46.8 23.8 - 23.9 0 0.0 0.0 0.0 4.1 1249055296 or System configuration: lcpu=4 mem=7936MB mpsz=0.00GB iome=7936.00MB iomp=16 ent=0.20 physb hpi hpit pmem iomin iomu iomf iohwm iomaf %entc vcsw ----- ----- ----- ----- ------ ------ ------ ------ ----- ----- ----- 0.63 0 0 7.75 46.8 - - - 0 4.1 1249045057 ''' stats = [_f for _f in output.splitlines() if _f][self.MEMORY_METRICS_START_IDX:] fields = [_f for _f in stats[0].split(' ') if _f] values = [_f for _f in stats[2].split(' ') if _f] for idx, field in enumerate(fields): try: m = float(values[idx]) if '%' in field: field = field.replace('%', '') self.gauge('system.lpar.memory.{}'.format(field), m) except ValueError: self.log.info("unable to convert %s to float - skipping", field) continue
def check(self, instance): up = uptime.uptime() if up: self.gauge("system.uptime", up) return # On AIX and some other platforms the uptime module may fail to find the system # uptime and return `None` - in that case, grab the uptime as the init process # (pid 1) uptime try: # get uptime from init process lifetime (pid 1) # format: 8-00:56:09 up, _, _ = get_subprocess_output(['ps', '-o', 'etime=', '-p1'], self.log) up = up.split('-') if len(up) == 1: days, rest = 0, up[0] else: days, rest = up[0], up[1] time = rest.split(':') days_s = int(days) * 24 * 60 * 60 hour_s = int(time[0]) * 60 * 60 mins_s = int(time[1]) * 60 secs = int(time[2]) self.gauge("system.uptime", days_s + hour_s + mins_s + secs) except Exception: self.log.exception("Cannot collect uptime statistics")
def check(self, instance): output, _, _ = get_subprocess_output(['iostat', '-Dsal', '1', '1'], self.log) stats = [_f for _f in output.splitlines() if _f] mode = '' for line in stats[4:]: if line.startswith(self.TABLE_SEP): continue for m in self.SCHEMA: if line.startswith(m): mode = m expected_fields_no = 0 for section in self.SCHEMA[mode]['sections']: expected_fields_no += len( self.SCHEMA[mode][section]['cols']) expected_fields_no += 1 # the device continue fields = line.split(' ') fields = [_f for _f in fields if _f] if len(fields) != expected_fields_no: continue tags = [] metrics = {} if mode.lower() is not 'physical': # odd one out device = fields[0] tags = [ "{mode}:{device}".format(mode=mode.lower(), device=device.lower()) ] section_idx = 1 # we start after the device for section in self.SCHEMA[mode]['sections']: for idx, colname in enumerate( self.SCHEMA[mode][section]['cols']): try: section_tag_cols = self.SCHEMA[mode][section].get( 'tags', []) if colname in section_tag_cols: tags.append("{tag}:{val}".format( tag=colname, val=fields[section_idx + idx])) else: metrics["{mode}.{section}.{name}".format(mode=mode.lower(), section=section, name=colname)] = \ self.extract_with_unit(fields[section_idx+idx]) except ValueError as e: self.log.debug("unexpected value parsing metric %s", e) section_idx += len(self.SCHEMA[mode][section]['cols']) for name, value in metrics.items(): self.gauge("system.iostat.{}".format(name), value, tags=tags)
def collect_spurr(self, sudo=False, timeout=None): cmd = ['lparstat', '-E', '1', '1'] output, _, _ = get_subprocess_output(cmd, self.log, sudo=sudo, timeout=timeout) ''' System configuration: type=Shared mode=Uncapped smt=On lcpu=4 mem=7936MB ent=0.20 Power=Disabled Physical Processor Utilisation: --------Actual-------- ------Normalised------ user sys wait idle freq user sys wait idle ---- ---- ---- ---- --------- ---- ---- ---- ---- 0.008 0.012 0.000 0.180 3.6GHz[100%] 0.008 0.012 0.000 0.180 ''' table = [_f for _f in output.splitlines() if _f][self.SPURR_PROCESSOR_UTILIZATION_START_IDX:] fields = [_f for _f in table[0].split(' ') if _f] stats = [_f for _f in table[2].split(' ') if _f] metrics = {} total = 0 total_norm = 0 metric_tpl = "system.lpar.spurr.{}" for idx, field in enumerate(fields): metric = metric_tpl.format(field) if idx > len(fields) / 2: metric = "{}.norm".format(metric) try: metrics[metric] = float(stats[idx]) except ValueError: self.log.info("unable to convert %s to float - skipping", metric) continue if 'norm' in metric: total_norm += metrics[metric] else: total += metrics[metric] for metric, val in metrics.items(): if 'norm' in metric: val_pct = val / total_norm else: val_pct = val / total self.gauge(metric, val) self.gauge("{}.pct".format(metric), val_pct)
def collect_memory_entitlements(self, sudo=False, timeout=None): cmd = ['lparstat', '-m', '-eR', '1', '1'] output, _, _ = get_subprocess_output(cmd, self.log, sudo=sudo, timeout=timeout) ''' System configuration: lcpu=4 mem=7936MB mpsz=0.00GB iome=7936.00MB iomp=16 ent=0.20 physb hpi hpit pmem iomin iomu iomf iohwm iomaf %entc vcsw ----- ----- ----- ----- ------ ------ ------ ------ ----- ----- ----- 0.64 0 0 7.75 46.8 - - - 0 4.1 1250974887 iompn: iomin iodes iomu iores iohwm iomaf ent1.txpool 2.12 16.00 2.00 2.12 2.00 0 ent1.rxpool__4 4.00 16.00 3.50 4.00 3.50 0 ent1.rxpool__3 4.00 16.00 2.00 16.00 2.00 0 ent1.rxpool__2 2.50 5.00 2.00 2.50 2.00 0 ent1.rxpool__1 0.84 2.25 0.75 0.84 0.75 0 ent1.rxpool__0 1.59 4.25 1.50 1.59 1.50 0 ent1.phypmem 0.10 0.10 0.09 0.10 0.09 0 ent0.txpool 2.12 16.00 2.00 2.12 2.00 0 ent0.rxpool__4 4.00 16.00 3.50 4.00 3.50 0 ent0.rxpool__3 4.00 16.00 2.00 16.00 2.00 0 ent0.rxpool__2 2.50 5.00 2.00 2.50 2.00 0 ent0.rxpool__1 0.84 2.25 0.75 0.84 0.75 0 ent0.rxpool__0 1.59 4.25 1.50 1.59 1.50 0 ent0.phypmem 0.10 0.10 0.09 0.10 0.09 0 vscsi0 16.50 16.50 0.13 16.50 0.18 0 sys0 0.00 0.00 0.00 0.00 0.00 0 ''' stats = [_f for _f in output.splitlines() if _f][self.MEMORY_ENTITLEMENTS_START_IDX:] fields = [_f for _f in stats[0].split(' ') if _f][1:] for stat in stats[1:]: values = [_f for _f in stat.split(' ') if _f] tag = "iompn:{}".format(values[0]) for idx, field in enumerate(fields): try: m = "system.lpar.memory.entitlement.{}".format(field) v = float(values[idx + 1]) self.gauge(m, v, tags=[tag]) except ValueError: self.log.info( "unable to convert %s to float for %s - skipping", m, tag) continue
def check(self, instance): # -P for POSIX portable format # units are in MB output, _, _ = get_subprocess_output(['df', '-P'], self.log) ''' Filesystem MB blocks Used Available Capacity Mounted on /dev/hd4 768.00 304.84 463.16 40% / /dev/hd2 8448.00 2583.39 5864.61 31% /usr /dev/hd9var 768.00 655.57 112.43 86% /var /dev/hd3 256.00 158.39 97.61 62% /tmp /dev/hd1 256.00 219.11 36.89 86% /home ''' stats = [_f for _f in output.splitlines() if _f] for line in stats[1:]: fields = line.split(' ') fields = [_f for _f in fields if _f] filesystem = '_'.join(fields[0:-5]) try: blocks = float(fields[-5]) used = float(fields[-4]) available = float(fields[-3]) except ValueError: self.log.debug("Unable to get stats for %s - skipping", filesystem) continue mount = fields[-1] tags = ['fs:{}'.format(filesystem), 'mount:{}'.format(mount)] self.gauge('system.fs.total', round(blocks / self.KB), tags=tags) self.gauge('system.fs.used', round(used / self.KB), tags=tags) self.gauge('system.fs.available', round(available / self.KB), tags=tags) try: self.gauge('system.fs.available.pct', (used / blocks) * 100, tags=tags) except ZeroDivisionError: self.gauge('system.fs.available.pct', 100, tags=tags)
def check(self, instance): try: load = os.getloadavg() # os.getloadvg() not available on AIX fallback to uptime report except AttributeError: # sample output: '10:50AM up 8 days, 2:48, 2 users, load average: 1.19, 0.77, 0.85' load, _, _ = get_subprocess_output(["uptime"], self.log) load = load.strip().split(' ') load = [float(load[-3].strip(',')), float(load[-2].strip(',')), float(load[-1].strip(','))] self.gauge('system.load.1', load[0]) self.gauge('system.load.5', load[1]) self.gauge('system.load.15', load[2]) # Normalize load by number of cores cores = psutil.cpu_count() assert cores >= 1, "Cannot determine number of cores" self.gauge('system.load.norm.1', load[0]/cores) self.gauge('system.load.norm.5', load[1]/cores) self.gauge('system.load.norm.15', load[2]/cores)