def exclude_disk(self, part): # skip cd-rom drives with no disk in it; they may raise # ENOENT, pop-up a Windows GUI error for a non-ready # partition or just hang; # and all the other excluded disks skip_win = Platform.is_win32() and ('cdrom' in part.opts or part.fstype == '') return skip_win or self._exclude_disk(part.device, part.fstype, part.mountpoint)
def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=True): # we check all exclude conditions if self.exclude_disk(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint) except TimeoutException: self.log.warning( u'Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...', part.mountpoint ) continue except Exception as e: self.log.warning('Unable to get disk metrics for %s: %s', part.mountpoint, e) continue # Exclude disks with total disk size 0 if disk_usage.total == 0: continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {}'.format(part.device)) device_name = part.mountpoint if self._use_mount else part.device tags = [part.fstype, 'filesystem:{}'.format(part.fstype)] if self._tag_by_filesystem else [] tags.extend(self._custom_tags) # apply device/mountpoint specific tags for regex, device_tags in self._device_tag_re: if regex.match(device_name): tags.extend(device_tags) if self.devices_label.get(device_name): tags.append(self.devices_label.get(device_name)) # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() tags.append('device:{}'.format(device_name)) for metric_name, metric_value in iteritems(self._collect_part_metrics(part, disk_usage)): self.gauge(metric_name, metric_value, tags=tags) # Add in a disk read write or read only check if self._service_check_rw: rwro = {'rw', 'ro'} & set(part.opts.split(',')) if len(rwro) == 1: self.service_check( 'disk.read_write', AgentCheck.OK if rwro.pop() == 'rw' else AgentCheck.CRITICAL, tags=tags ) else: self.service_check('disk.read_write', AgentCheck.UNKNOWN, tags=tags) self.collect_latency_metrics()
def psutil_wrapper(self, process, method, accessors, try_sudo, *args, **kwargs): """ A psutil wrapper that is calling * psutil.method(*args, **kwargs) and returns the result OR * psutil.method(*args, **kwargs).accessor[i] for each accessors given in a list, the result being indexed in a dictionary by the accessor name """ if accessors is None: result = None else: result = {} # Ban certain method that we know fail if method == 'num_fds' and not Platform.is_unix(): return result elif method == 'num_handles' and not Platform.is_win32(): return result try: res = getattr(process, method)(*args, **kwargs) if accessors is None: result = res else: for acc in accessors: try: result[acc] = getattr(res, acc) except AttributeError: self.log.debug( "psutil.%s().%s attribute does not exist", method, acc) except (NotImplementedError, AttributeError): self.log.debug("psutil method %s not implemented", method) except psutil.AccessDenied: self.log.debug("psutil was denied access for method %s", method) if method == 'num_fds' and Platform.is_unix() and try_sudo: try: # It is up the agent's packager to grant # corresponding sudo policy on unix platforms ls_args = [ 'sudo', 'ls', '/proc/{}/fd/'.format(process.pid) ] process_ls = subprocess.check_output(ls_args) result = len(process_ls.splitlines()) except subprocess.CalledProcessError as e: self.log.exception( "trying to retrieve %s with sudo failed with return code %s", method, e.returncode) except Exception: self.log.exception( "trying to retrieve %s with sudo also failed", method) except psutil.NoSuchProcess: self.warning("Process %s disappeared while scanning", process.pid) return result
def psutil_wrapper(self, process, method, accessors=None, *args, **kwargs): """ A psutil wrapper that is calling * psutil.method(*args, **kwargs) and returns the result OR * psutil.method(*args, **kwargs).accessor[i] for each accessors given in a list, the result being indexed in a dictionary by the accessor name """ if accessors is None: result = None else: result = {} # Ban certain method that we know fail if method == 'num_fds' and not Platform.is_unix(): return result elif method == 'num_handles' and not Platform.is_win32(): return result # Try running `num_fds` with sudo if possible if method == 'num_fds' and self.try_sudo: self.log.debug("Running num_fds using sudo") try: ls_args = ['sudo', 'ls', '/proc/{}/fd/'.format(process.pid)] process_ls = subprocess.check_output(ls_args) result = len(process_ls.splitlines()) except Exception as e: self.log.exception( "Trying to retrieve %s with sudo failed with error: %s", method, e) else: try: res = getattr(process, method)(*args, **kwargs) if accessors is None: result = res else: for acc in accessors: try: result[acc] = getattr(res, acc) except AttributeError: self.log.debug( "psutil.%s().%s attribute does not exist", method, acc) except (NotImplementedError, AttributeError): self.log.debug("psutil method %s not implemented", method) except psutil.AccessDenied: self.log.debug("psutil was denied access for method %s", method) except psutil.NoSuchProcess: self.log.debug("Process %s disappeared while scanning", process.pid) return result
from datadog_checks.base import AgentCheck from datadog_checks.base.utils.platform import Platform PY3 = sys.version_info[0] == 3 if PY3: # use higher precision clock available in Python3 time_func = time.perf_counter else: time_func = time.time # These imports are necessary because otherwise dynamic type # resolution will fail on windows without it. # See more here: https://github.com/rthalley/dnspython/issues/39. if Platform.is_win32(): from dns.rdtypes.ANY import * # noqa from dns.rdtypes.IN import * # noqa # for tiny time deltas, time.time on Windows reports the same value # of the clock more than once, causing the computation of response_time # to be often 0; let's use time.clock that is more precise. if not PY3: time_func = time.clock class BadConfException(Exception): pass class DNSCheck(AgentCheck):
def check(self, instance): """Get disk space/inode stats""" if self._tag_by_label and Platform.is_linux(): self.devices_label = self._get_devices_label() self._valid_disks = {} for part in psutil.disk_partitions(all=self._include_all_devices): # we check all exclude conditions if self.exclude_disk(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = timeout(self._timeout)(psutil.disk_usage)( part.mountpoint) except TimeoutException: self.log.warning( u'Timeout after %d seconds while retrieving the disk usage of `%s` mountpoint. ' u'You might want to change the timeout length in the settings.', self._timeout, part.mountpoint, ) continue except Exception as e: self.log.warning( u'Unable to get disk metrics for %s: %s. ' u'You can exclude this mountpoint in the settings if it is invalid.', part.mountpoint, e, ) continue # Exclude disks with size less than min_disk_size if disk_usage.total <= self._min_disk_size: if disk_usage.total > 0: self.log.info( 'Excluding device %s with total disk size %s', part.device, disk_usage.total) continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: %s', part.device) device_name = part.mountpoint if self._use_mount else part.device tags = [part.fstype, 'filesystem:{}'.format(part.fstype) ] if self._tag_by_filesystem else [] tags.extend(self._custom_tags) # apply device/mountpoint specific tags for regex, device_tags in self._device_tag_re: if regex.match(device_name): tags.extend(device_tags) if self.devices_label.get(device_name): tags.extend(self.devices_label.get(device_name)) # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() tags.append('device:{}'.format(device_name)) tags.append('device_name:{}'.format(_base_device_name( part.device))) for metric_name, metric_value in iteritems( self._collect_part_metrics(part, disk_usage)): self.gauge(metric_name, metric_value, tags=tags) # Add in a disk read write or read only check if self._service_check_rw: rwro = {'rw', 'ro'} & set(part.opts.split(',')) if len(rwro) == 1: self.service_check('disk.read_write', AgentCheck.OK if rwro.pop() == 'rw' else AgentCheck.CRITICAL, tags=tags) else: self.service_check('disk.read_write', AgentCheck.UNKNOWN, tags=tags) self.collect_latency_metrics()