Beispiel #1
0
    def _collect_inodes_metrics(self, mountpoint):
        metrics = {}
        # we need to timeout this, too.
        try:
            inodes = timeout(5)(os.statvfs)(mountpoint)
        except TimeoutException:
            self.log.warning(
                u'Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...',
                mountpoint)
            return metrics
        except Exception as e:
            self.log.warning('Unable to get disk metrics for %s: %s',
                             mountpoint, e)
            return metrics

        if inodes.f_files != 0:
            total = inodes.f_files
            free = inodes.f_ffree

            metrics[self.METRIC_INODE.format('total')] = total
            metrics[self.METRIC_INODE.format('free')] = free
            metrics[self.METRIC_INODE.format('used')] = total - free
            # FIXME: 6.x, use percent, a lot more logical than in_use
            metrics[self.METRIC_INODE.format('in_use')] = (total -
                                                           free) / total

        return metrics
Beispiel #2
0
    def _collect_inodes_metrics(self, mountpoint):
        metrics = {}
        # we need to timeout this, too.
        try:
            inodes = timeout(self._timeout)(os.statvfs)(mountpoint)
        except TimeoutException:
            self.log.warning(
                u'Timeout after %d seconds while retrieving the disk usage of `%s` mountpoint. '
                u'You might want to change the timeout length in the settings.',
                self._timeout,
                mountpoint,
            )
            return metrics
        except Exception as e:
            self.log.warning(
                u'Unable to get disk metrics for %s: %s. '
                u'You can exclude this mountpoint in the settings if it is invalid.',
                mountpoint,
                e,
            )
            return metrics

        if inodes.f_files != 0:
            total = inodes.f_files
            free = inodes.f_ffree

            metrics[self.METRIC_INODE.format('total')] = total
            metrics[self.METRIC_INODE.format('free')] = free
            metrics[self.METRIC_INODE.format('used')] = total - free
            # FIXME: 8.x, use percent, a lot more logical than in_use
            metrics[self.METRIC_INODE.format('in_use')] = (total -
                                                           free) / total

        return metrics
Beispiel #3
0
    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            # we check all exclude conditions
            if self.exclude_disk(part):
                continue

            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint)
            except TimeoutException:
                self.log.warning(
                    u'Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...', part.mountpoint
                )
                continue
            except Exception as e:
                self.log.warning('Unable to get disk metrics for %s: %s', part.mountpoint, e)
                continue

            # Exclude disks with total disk size 0
            if disk_usage.total == 0:
                continue

            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {}'.format(part.device))

            device_name = part.mountpoint if self._use_mount else part.device

            tags = [part.fstype, 'filesystem:{}'.format(part.fstype)] if self._tag_by_filesystem else []
            tags.extend(self._custom_tags)

            # apply device/mountpoint specific tags
            for regex, device_tags in self._device_tag_re:
                if regex.match(device_name):
                    tags.extend(device_tags)

            if self.devices_label.get(device_name):
                tags.append(self.devices_label.get(device_name))

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()

            tags.append('device:{}'.format(device_name))
            for metric_name, metric_value in iteritems(self._collect_part_metrics(part, disk_usage)):
                self.gauge(metric_name, metric_value, tags=tags)

            # Add in a disk read write or read only check
            if self._service_check_rw:
                rwro = {'rw', 'ro'} & set(part.opts.split(','))
                if len(rwro) == 1:
                    self.service_check(
                        'disk.read_write', AgentCheck.OK if rwro.pop() == 'rw' else AgentCheck.CRITICAL, tags=tags
                    )
                else:
                    self.service_check('disk.read_write', AgentCheck.UNKNOWN, tags=tags)

        self.collect_latency_metrics()
Beispiel #4
0
    def check(self, instance):
        """Get disk space/inode stats"""
        if self._tag_by_label and Platform.is_linux():
            self.devices_label = self._get_devices_label()

        self._valid_disks = {}
        for part in psutil.disk_partitions(all=self._include_all_devices):
            # we check all exclude conditions
            if self.exclude_disk(part):
                continue

            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = timeout(self._timeout)(psutil.disk_usage)(
                    part.mountpoint)
            except TimeoutException:
                self.log.warning(
                    u'Timeout after %d seconds while retrieving the disk usage of `%s` mountpoint. '
                    u'You might want to change the timeout length in the settings.',
                    self._timeout,
                    part.mountpoint,
                )
                continue
            except Exception as e:
                self.log.warning(
                    u'Unable to get disk metrics for %s: %s. '
                    u'You can exclude this mountpoint in the settings if it is invalid.',
                    part.mountpoint,
                    e,
                )
                continue

            # Exclude disks with size less than min_disk_size
            if disk_usage.total <= self._min_disk_size:
                if disk_usage.total > 0:
                    self.log.info(
                        'Excluding device %s with total disk size %s',
                        part.device, disk_usage.total)
                continue

            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: %s', part.device)

            device_name = part.mountpoint if self._use_mount else part.device

            tags = [part.fstype, 'filesystem:{}'.format(part.fstype)
                    ] if self._tag_by_filesystem else []
            tags.extend(self._custom_tags)

            # apply device/mountpoint specific tags
            for regex, device_tags in self._device_tag_re:
                if regex.match(device_name):
                    tags.extend(device_tags)

            if self.devices_label.get(device_name):
                tags.extend(self.devices_label.get(device_name))

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()

            tags.append('device:{}'.format(device_name))
            tags.append('device_name:{}'.format(_base_device_name(
                part.device)))
            for metric_name, metric_value in iteritems(
                    self._collect_part_metrics(part, disk_usage)):
                self.gauge(metric_name, metric_value, tags=tags)

            # Add in a disk read write or read only check
            if self._service_check_rw:
                rwro = {'rw', 'ro'} & set(part.opts.split(','))
                if len(rwro) == 1:
                    self.service_check('disk.read_write',
                                       AgentCheck.OK if rwro.pop() == 'rw' else
                                       AgentCheck.CRITICAL,
                                       tags=tags)
                else:
                    self.service_check('disk.read_write',
                                       AgentCheck.UNKNOWN,
                                       tags=tags)

        self.collect_latency_metrics()