def _collect_inodes_metrics(self, mountpoint): metrics = {} # we need to timeout this, too. try: inodes = timeout(5)(os.statvfs)(mountpoint) except TimeoutException: self.log.warning( u'Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...', mountpoint) return metrics except Exception as e: self.log.warning('Unable to get disk metrics for %s: %s', mountpoint, e) return metrics if inodes.f_files != 0: total = inodes.f_files free = inodes.f_ffree metrics[self.METRIC_INODE.format('total')] = total metrics[self.METRIC_INODE.format('free')] = free metrics[self.METRIC_INODE.format('used')] = total - free # FIXME: 6.x, use percent, a lot more logical than in_use metrics[self.METRIC_INODE.format('in_use')] = (total - free) / total return metrics
def _collect_inodes_metrics(self, mountpoint): metrics = {} # we need to timeout this, too. try: inodes = timeout(self._timeout)(os.statvfs)(mountpoint) except TimeoutException: self.log.warning( u'Timeout after %d seconds while retrieving the disk usage of `%s` mountpoint. ' u'You might want to change the timeout length in the settings.', self._timeout, mountpoint, ) return metrics except Exception as e: self.log.warning( u'Unable to get disk metrics for %s: %s. ' u'You can exclude this mountpoint in the settings if it is invalid.', mountpoint, e, ) return metrics if inodes.f_files != 0: total = inodes.f_files free = inodes.f_ffree metrics[self.METRIC_INODE.format('total')] = total metrics[self.METRIC_INODE.format('free')] = free metrics[self.METRIC_INODE.format('used')] = total - free # FIXME: 8.x, use percent, a lot more logical than in_use metrics[self.METRIC_INODE.format('in_use')] = (total - free) / total return metrics
def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=True): # we check all exclude conditions if self.exclude_disk(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint) except TimeoutException: self.log.warning( u'Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...', part.mountpoint ) continue except Exception as e: self.log.warning('Unable to get disk metrics for %s: %s', part.mountpoint, e) continue # Exclude disks with total disk size 0 if disk_usage.total == 0: continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {}'.format(part.device)) device_name = part.mountpoint if self._use_mount else part.device tags = [part.fstype, 'filesystem:{}'.format(part.fstype)] if self._tag_by_filesystem else [] tags.extend(self._custom_tags) # apply device/mountpoint specific tags for regex, device_tags in self._device_tag_re: if regex.match(device_name): tags.extend(device_tags) if self.devices_label.get(device_name): tags.append(self.devices_label.get(device_name)) # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() tags.append('device:{}'.format(device_name)) for metric_name, metric_value in iteritems(self._collect_part_metrics(part, disk_usage)): self.gauge(metric_name, metric_value, tags=tags) # Add in a disk read write or read only check if self._service_check_rw: rwro = {'rw', 'ro'} & set(part.opts.split(',')) if len(rwro) == 1: self.service_check( 'disk.read_write', AgentCheck.OK if rwro.pop() == 'rw' else AgentCheck.CRITICAL, tags=tags ) else: self.service_check('disk.read_write', AgentCheck.UNKNOWN, tags=tags) self.collect_latency_metrics()
def check(self, instance): """Get disk space/inode stats""" if self._tag_by_label and Platform.is_linux(): self.devices_label = self._get_devices_label() self._valid_disks = {} for part in psutil.disk_partitions(all=self._include_all_devices): # we check all exclude conditions if self.exclude_disk(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = timeout(self._timeout)(psutil.disk_usage)( part.mountpoint) except TimeoutException: self.log.warning( u'Timeout after %d seconds while retrieving the disk usage of `%s` mountpoint. ' u'You might want to change the timeout length in the settings.', self._timeout, part.mountpoint, ) continue except Exception as e: self.log.warning( u'Unable to get disk metrics for %s: %s. ' u'You can exclude this mountpoint in the settings if it is invalid.', part.mountpoint, e, ) continue # Exclude disks with size less than min_disk_size if disk_usage.total <= self._min_disk_size: if disk_usage.total > 0: self.log.info( 'Excluding device %s with total disk size %s', part.device, disk_usage.total) continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: %s', part.device) device_name = part.mountpoint if self._use_mount else part.device tags = [part.fstype, 'filesystem:{}'.format(part.fstype) ] if self._tag_by_filesystem else [] tags.extend(self._custom_tags) # apply device/mountpoint specific tags for regex, device_tags in self._device_tag_re: if regex.match(device_name): tags.extend(device_tags) if self.devices_label.get(device_name): tags.extend(self.devices_label.get(device_name)) # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() tags.append('device:{}'.format(device_name)) tags.append('device_name:{}'.format(_base_device_name( part.device))) for metric_name, metric_value in iteritems( self._collect_part_metrics(part, disk_usage)): self.gauge(metric_name, metric_value, tags=tags) # Add in a disk read write or read only check if self._service_check_rw: rwro = {'rw', 'ro'} & set(part.opts.split(',')) if len(rwro) == 1: self.service_check('disk.read_write', AgentCheck.OK if rwro.pop() == 'rw' else AgentCheck.CRITICAL, tags=tags) else: self.service_check('disk.read_write', AgentCheck.UNKNOWN, tags=tags) self.collect_latency_metrics()