Example #1
0
    def collect_hypervisor(self, sudo=False, timeout=None):
        cmd = ['lparstat', '-H', '1', '1']
        output, _, _ = get_subprocess_output(cmd, self.log, sudo=sudo, timeout=timeout)
        '''

        System configuration: type=Shared mode=Uncapped smt=On lcpu=4 mem=7936MB psize=16 ent=0.20

                   Detailed information on Hypervisor Calls

        Hypervisor                  Number of    %Total Time   %Hypervisor   Avg Call    Max Call
          Call                        Calls         Spent      Time Spent    Time(ns)    Time(ns)

        remove                          15            0.0           0.4       1218        1781
        read                             0            0.0           0.0          0           0
        nclear_mod                       0            0.0           0.0          0           0
        page_init                      316            0.0           9.7       1452        6843
        clear_ref                        0            0.0           0.0          0           0
        protect                          0            0.0           0.0          0           0
                                                ...
                                                ...
        --------------------------------------------------------------------------------
        '''
        stats = [_f for _f in output.splitlines() if _f][self.HYPERVISOR_METRICS_START_IDX:]
        for stat in stats:
            values = [_f for _f in stat.split(' ') if _f]
            call_tag = "call:{}".format(values[0])
            for idx, entry in enumerate(values[1:]):
                try:
                    m = self.HYPERVISOR_IDX_METRIC_MAP[idx]
                    v = float(entry)
                    self.gauge(m, v, tags=[call_tag])
                except ValueError:
                    self.log.info("unable to convert %s to float for %s - skipping", m, call_tag)
                    continue
Example #2
0
    def collect_memory(self, page_stats=True, sudo=False, timeout=None):
        cmd = ['lparstat', '-m']
        if page_stats:
            cmd.append('-pw')
        cmd.extend(['1', '1'])

        output, _, _ = get_subprocess_output(cmd, self.log, sudo=sudo, timeout=timeout)
        '''

        System configuration: lcpu=4 mem=7936MB mpsz=0.00GB iome=7936.00MB iomp=16 ent=0.20

        physb   hpi  hpit  pmem  iomin   iomu   iomf  iohwm iomaf  pgcol mpgcol ccol %entc  vcsw
        ----- ----- ----- ----- ------ ------ ------ ------ ----- ------ ------ ---- ----- -----
        0.63     0     0  7.75   46.8   23.8   -     23.9     0    0.0   0.0   0.0   4.1 1249055296

        or

        System configuration: lcpu=4 mem=7936MB mpsz=0.00GB iome=7936.00MB iomp=16 ent=0.20

        physb   hpi  hpit  pmem  iomin   iomu   iomf  iohwm iomaf %entc  vcsw
        ----- ----- ----- ----- ------ ------ ------ ------ ----- ----- -----
         0.63     0     0  7.75   46.8   -     -     -       0   4.1 1249045057
        '''
        stats = [_f for _f in output.splitlines() if _f][self.MEMORY_METRICS_START_IDX:]
        fields = [_f for _f in stats[0].split(' ') if _f]
        values = [_f for _f in stats[2].split(' ') if _f]
        for idx, field in enumerate(fields):
            try:
                m = float(values[idx])
                if '%' in field:
                    field = field.replace('%', '')
                self.gauge('system.lpar.memory.{}'.format(field), m)
            except ValueError:
                self.log.info("unable to convert %s to float - skipping", field)
                continue
Example #3
0
    def check(self, instance):
        up = uptime.uptime()
        if up:
            self.gauge("system.uptime", up)
            return

        # On AIX and some other platforms the uptime module may fail to find the system
        # uptime and return `None` - in that case, grab the uptime as the init process
        # (pid 1) uptime
        try:
            # get uptime from init process lifetime (pid 1)
            # format: 8-00:56:09
            up, _, _ = get_subprocess_output(['ps', '-o', 'etime=', '-p1'],
                                             self.log)
            up = up.split('-')
            if len(up) == 1:
                days, rest = 0, up[0]
            else:
                days, rest = up[0], up[1]

            time = rest.split(':')
            days_s = int(days) * 24 * 60 * 60
            hour_s = int(time[0]) * 60 * 60
            mins_s = int(time[1]) * 60
            secs = int(time[2])
            self.gauge("system.uptime", days_s + hour_s + mins_s + secs)
        except Exception:
            self.log.exception("Cannot collect uptime statistics")
Example #4
0
    def check(self, instance):
        output, _, _ = get_subprocess_output(['iostat', '-Dsal', '1', '1'],
                                             self.log)
        stats = [_f for _f in output.splitlines() if _f]
        mode = ''
        for line in stats[4:]:
            if line.startswith(self.TABLE_SEP):
                continue

            for m in self.SCHEMA:
                if line.startswith(m):
                    mode = m
                    expected_fields_no = 0
                    for section in self.SCHEMA[mode]['sections']:
                        expected_fields_no += len(
                            self.SCHEMA[mode][section]['cols'])
                    expected_fields_no += 1  # the device
                    continue

            fields = line.split(' ')
            fields = [_f for _f in fields if _f]
            if len(fields) != expected_fields_no:
                continue

            tags = []
            metrics = {}
            if mode.lower() is not 'physical':  # odd one out
                device = fields[0]
                tags = [
                    "{mode}:{device}".format(mode=mode.lower(),
                                             device=device.lower())
                ]

            section_idx = 1  # we start after the device
            for section in self.SCHEMA[mode]['sections']:
                for idx, colname in enumerate(
                        self.SCHEMA[mode][section]['cols']):
                    try:
                        section_tag_cols = self.SCHEMA[mode][section].get(
                            'tags', [])
                        if colname in section_tag_cols:
                            tags.append("{tag}:{val}".format(
                                tag=colname, val=fields[section_idx + idx]))
                        else:
                            metrics["{mode}.{section}.{name}".format(mode=mode.lower(), section=section, name=colname)] = \
                                self.extract_with_unit(fields[section_idx+idx])
                    except ValueError as e:
                        self.log.debug("unexpected value parsing metric %s", e)

                section_idx += len(self.SCHEMA[mode][section]['cols'])

            for name, value in metrics.items():
                self.gauge("system.iostat.{}".format(name), value, tags=tags)
Example #5
0
    def collect_spurr(self, sudo=False, timeout=None):
        cmd = ['lparstat', '-E', '1', '1']
        output, _, _ = get_subprocess_output(cmd,
                                             self.log,
                                             sudo=sudo,
                                             timeout=timeout)
        '''

        System configuration: type=Shared mode=Uncapped smt=On lcpu=4 mem=7936MB ent=0.20 Power=Disabled

        Physical Processor Utilisation:

         --------Actual--------              ------Normalised------
         user   sys  wait  idle      freq    user   sys  wait  idle
         ----  ----  ----  ----   ---------  ----  ----  ----  ----
        0.008 0.012 0.000 0.180 3.6GHz[100%] 0.008 0.012 0.000 0.180
        '''
        table = [_f for _f in output.splitlines()
                 if _f][self.SPURR_PROCESSOR_UTILIZATION_START_IDX:]
        fields = [_f for _f in table[0].split(' ') if _f]
        stats = [_f for _f in table[2].split(' ') if _f]
        metrics = {}
        total = 0
        total_norm = 0
        metric_tpl = "system.lpar.spurr.{}"
        for idx, field in enumerate(fields):
            metric = metric_tpl.format(field)
            if idx > len(fields) / 2:
                metric = "{}.norm".format(metric)

            try:
                metrics[metric] = float(stats[idx])
            except ValueError:
                self.log.info("unable to convert %s to float - skipping",
                              metric)
                continue
            if 'norm' in metric:
                total_norm += metrics[metric]
            else:
                total += metrics[metric]

        for metric, val in metrics.items():
            if 'norm' in metric:
                val_pct = val / total_norm
            else:
                val_pct = val / total

            self.gauge(metric, val)
            self.gauge("{}.pct".format(metric), val_pct)
Example #6
0
    def collect_memory_entitlements(self, sudo=False, timeout=None):
        cmd = ['lparstat', '-m', '-eR', '1', '1']
        output, _, _ = get_subprocess_output(cmd,
                                             self.log,
                                             sudo=sudo,
                                             timeout=timeout)
        '''

        System configuration: lcpu=4 mem=7936MB mpsz=0.00GB iome=7936.00MB iomp=16 ent=0.20

        physb   hpi  hpit  pmem  iomin   iomu   iomf  iohwm iomaf %entc  vcsw
        ----- ----- ----- ----- ------ ------ ------ ------ ----- ----- -----
        0.64     0     0  7.75   46.8   -     -     -       0   4.1 1250974887

                    iompn: iomin  iodes   iomu  iores  iohwm  iomaf
               ent1.txpool  2.12  16.00   2.00   2.12   2.00      0
            ent1.rxpool__4  4.00  16.00   3.50   4.00   3.50      0
            ent1.rxpool__3  4.00  16.00   2.00  16.00   2.00      0
            ent1.rxpool__2  2.50   5.00   2.00   2.50   2.00      0
            ent1.rxpool__1  0.84   2.25   0.75   0.84   0.75      0
            ent1.rxpool__0  1.59   4.25   1.50   1.59   1.50      0
              ent1.phypmem  0.10   0.10   0.09   0.10   0.09      0
               ent0.txpool  2.12  16.00   2.00   2.12   2.00      0
            ent0.rxpool__4  4.00  16.00   3.50   4.00   3.50      0
            ent0.rxpool__3  4.00  16.00   2.00  16.00   2.00      0
            ent0.rxpool__2  2.50   5.00   2.00   2.50   2.00      0
            ent0.rxpool__1  0.84   2.25   0.75   0.84   0.75      0
            ent0.rxpool__0  1.59   4.25   1.50   1.59   1.50      0
              ent0.phypmem  0.10   0.10   0.09   0.10   0.09      0
                    vscsi0 16.50  16.50   0.13  16.50   0.18      0
                      sys0  0.00   0.00   0.00   0.00   0.00      0
                    '''
        stats = [_f for _f in output.splitlines()
                 if _f][self.MEMORY_ENTITLEMENTS_START_IDX:]
        fields = [_f for _f in stats[0].split(' ') if _f][1:]
        for stat in stats[1:]:
            values = [_f for _f in stat.split(' ') if _f]
            tag = "iompn:{}".format(values[0])
            for idx, field in enumerate(fields):
                try:
                    m = "system.lpar.memory.entitlement.{}".format(field)
                    v = float(values[idx + 1])
                    self.gauge(m, v, tags=[tag])
                except ValueError:
                    self.log.info(
                        "unable to convert %s to float for %s - skipping", m,
                        tag)
                    continue
    def check(self, instance):
        # -P  for POSIX portable format
        # units are in MB
        output, _, _ = get_subprocess_output(['df', '-P'], self.log)
        '''
        Filesystem    MB blocks      Used Available Capacity Mounted on
        /dev/hd4         768.00    304.84    463.16      40% /
        /dev/hd2        8448.00   2583.39   5864.61      31% /usr
        /dev/hd9var      768.00    655.57    112.43      86% /var
        /dev/hd3         256.00    158.39     97.61      62% /tmp
        /dev/hd1         256.00    219.11     36.89      86% /home
        '''
        stats = [_f for _f in output.splitlines() if _f]
        for line in stats[1:]:
            fields = line.split(' ')
            fields = [_f for _f in fields if _f]
            filesystem = '_'.join(fields[0:-5])
            try:
                blocks = float(fields[-5])
                used = float(fields[-4])
                available = float(fields[-3])
            except ValueError:
                self.log.debug("Unable to get stats for %s - skipping",
                               filesystem)
                continue

            mount = fields[-1]

            tags = ['fs:{}'.format(filesystem), 'mount:{}'.format(mount)]
            self.gauge('system.fs.total', round(blocks / self.KB), tags=tags)
            self.gauge('system.fs.used', round(used / self.KB), tags=tags)
            self.gauge('system.fs.available',
                       round(available / self.KB),
                       tags=tags)
            try:
                self.gauge('system.fs.available.pct', (used / blocks) * 100,
                           tags=tags)
            except ZeroDivisionError:
                self.gauge('system.fs.available.pct', 100, tags=tags)
Example #8
0
    def check(self, instance):
        try:
            load = os.getloadavg()  # os.getloadvg() not available on AIX fallback to uptime report
        except AttributeError:
            # sample output: '10:50AM   up 8 days,   2:48,  2 users,  load average: 1.19, 0.77, 0.85'
            load, _, _ = get_subprocess_output(["uptime"], self.log)
            load = load.strip().split(' ')
            load = [float(load[-3].strip(',')),
                    float(load[-2].strip(',')),
                    float(load[-1].strip(','))]

        self.gauge('system.load.1', load[0])
        self.gauge('system.load.5', load[1])
        self.gauge('system.load.15', load[2])

        # Normalize load by number of cores
        cores = psutil.cpu_count()
        assert cores >= 1, "Cannot determine number of cores"

        self.gauge('system.load.norm.1', load[0]/cores)
        self.gauge('system.load.norm.5', load[1]/cores)
        self.gauge('system.load.norm.15', load[2]/cores)