예제 #1
0
 def exclude_disk(self, part):
     # skip cd-rom drives with no disk in it; they may raise
     # ENOENT, pop-up a Windows GUI error for a non-ready
     # partition or just hang;
     # and all the other excluded disks
     skip_win = Platform.is_win32() and ('cdrom' in part.opts or part.fstype == '')
     return skip_win or self._exclude_disk(part.device, part.fstype, part.mountpoint)
예제 #2
0
    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            # we check all exclude conditions
            if self.exclude_disk(part):
                continue

            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint)
            except TimeoutException:
                self.log.warning(
                    u'Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...', part.mountpoint
                )
                continue
            except Exception as e:
                self.log.warning('Unable to get disk metrics for %s: %s', part.mountpoint, e)
                continue

            # Exclude disks with total disk size 0
            if disk_usage.total == 0:
                continue

            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {}'.format(part.device))

            device_name = part.mountpoint if self._use_mount else part.device

            tags = [part.fstype, 'filesystem:{}'.format(part.fstype)] if self._tag_by_filesystem else []
            tags.extend(self._custom_tags)

            # apply device/mountpoint specific tags
            for regex, device_tags in self._device_tag_re:
                if regex.match(device_name):
                    tags.extend(device_tags)

            if self.devices_label.get(device_name):
                tags.append(self.devices_label.get(device_name))

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()

            tags.append('device:{}'.format(device_name))
            for metric_name, metric_value in iteritems(self._collect_part_metrics(part, disk_usage)):
                self.gauge(metric_name, metric_value, tags=tags)

            # Add in a disk read write or read only check
            if self._service_check_rw:
                rwro = {'rw', 'ro'} & set(part.opts.split(','))
                if len(rwro) == 1:
                    self.service_check(
                        'disk.read_write', AgentCheck.OK if rwro.pop() == 'rw' else AgentCheck.CRITICAL, tags=tags
                    )
                else:
                    self.service_check('disk.read_write', AgentCheck.UNKNOWN, tags=tags)

        self.collect_latency_metrics()
예제 #3
0
    def psutil_wrapper(self, process, method, accessors, try_sudo, *args,
                       **kwargs):
        """
        A psutil wrapper that is calling
        * psutil.method(*args, **kwargs) and returns the result
        OR
        * psutil.method(*args, **kwargs).accessor[i] for each accessors
        given in a list, the result being indexed in a dictionary
        by the accessor name
        """

        if accessors is None:
            result = None
        else:
            result = {}

        # Ban certain method that we know fail
        if method == 'num_fds' and not Platform.is_unix():
            return result
        elif method == 'num_handles' and not Platform.is_win32():
            return result

        try:
            res = getattr(process, method)(*args, **kwargs)
            if accessors is None:
                result = res
            else:
                for acc in accessors:
                    try:
                        result[acc] = getattr(res, acc)
                    except AttributeError:
                        self.log.debug(
                            "psutil.%s().%s attribute does not exist", method,
                            acc)
        except (NotImplementedError, AttributeError):
            self.log.debug("psutil method %s not implemented", method)
        except psutil.AccessDenied:
            self.log.debug("psutil was denied access for method %s", method)
            if method == 'num_fds' and Platform.is_unix() and try_sudo:
                try:
                    # It is up the agent's packager to grant
                    # corresponding sudo policy on unix platforms
                    ls_args = [
                        'sudo', 'ls', '/proc/{}/fd/'.format(process.pid)
                    ]
                    process_ls = subprocess.check_output(ls_args)
                    result = len(process_ls.splitlines())

                except subprocess.CalledProcessError as e:
                    self.log.exception(
                        "trying to retrieve %s with sudo failed with return code %s",
                        method, e.returncode)
                except Exception:
                    self.log.exception(
                        "trying to retrieve %s with sudo also failed", method)
        except psutil.NoSuchProcess:
            self.warning("Process %s disappeared while scanning", process.pid)

        return result
예제 #4
0
    def psutil_wrapper(self, process, method, accessors=None, *args, **kwargs):
        """
        A psutil wrapper that is calling
        * psutil.method(*args, **kwargs) and returns the result
        OR
        * psutil.method(*args, **kwargs).accessor[i] for each accessors
        given in a list, the result being indexed in a dictionary
        by the accessor name
        """

        if accessors is None:
            result = None
        else:
            result = {}

        # Ban certain method that we know fail
        if method == 'num_fds' and not Platform.is_unix():
            return result
        elif method == 'num_handles' and not Platform.is_win32():
            return result

        # Try running `num_fds` with sudo if possible
        if method == 'num_fds' and self.try_sudo:
            self.log.debug("Running num_fds using sudo")
            try:
                ls_args = ['sudo', 'ls', '/proc/{}/fd/'.format(process.pid)]
                process_ls = subprocess.check_output(ls_args)
                result = len(process_ls.splitlines())
            except Exception as e:
                self.log.exception(
                    "Trying to retrieve %s with sudo failed with error: %s",
                    method, e)

        else:
            try:
                res = getattr(process, method)(*args, **kwargs)
                if accessors is None:
                    result = res
                else:
                    for acc in accessors:
                        try:
                            result[acc] = getattr(res, acc)
                        except AttributeError:
                            self.log.debug(
                                "psutil.%s().%s attribute does not exist",
                                method, acc)
            except (NotImplementedError, AttributeError):
                self.log.debug("psutil method %s not implemented", method)
            except psutil.AccessDenied:
                self.log.debug("psutil was denied access for method %s",
                               method)
            except psutil.NoSuchProcess:
                self.log.debug("Process %s disappeared while scanning",
                               process.pid)

        return result
예제 #5
0
from datadog_checks.base import AgentCheck
from datadog_checks.base.utils.platform import Platform

PY3 = sys.version_info[0] == 3

if PY3:
    # use higher precision clock available in Python3
    time_func = time.perf_counter
else:
    time_func = time.time

# These imports are necessary because otherwise dynamic type
# resolution will fail on windows without it.
# See more here: https://github.com/rthalley/dnspython/issues/39.
if Platform.is_win32():
    from dns.rdtypes.ANY import *  # noqa
    from dns.rdtypes.IN import *  # noqa

    # for tiny time deltas, time.time on Windows reports the same value
    # of the clock more than once, causing the computation of response_time
    # to be often 0; let's use time.clock that is more precise.
    if not PY3:
        time_func = time.clock


class BadConfException(Exception):
    pass


class DNSCheck(AgentCheck):
예제 #6
0
    def check(self, instance):
        """Get disk space/inode stats"""
        if self._tag_by_label and Platform.is_linux():
            self.devices_label = self._get_devices_label()

        self._valid_disks = {}
        for part in psutil.disk_partitions(all=self._include_all_devices):
            # we check all exclude conditions
            if self.exclude_disk(part):
                continue

            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = timeout(self._timeout)(psutil.disk_usage)(
                    part.mountpoint)
            except TimeoutException:
                self.log.warning(
                    u'Timeout after %d seconds while retrieving the disk usage of `%s` mountpoint. '
                    u'You might want to change the timeout length in the settings.',
                    self._timeout,
                    part.mountpoint,
                )
                continue
            except Exception as e:
                self.log.warning(
                    u'Unable to get disk metrics for %s: %s. '
                    u'You can exclude this mountpoint in the settings if it is invalid.',
                    part.mountpoint,
                    e,
                )
                continue

            # Exclude disks with size less than min_disk_size
            if disk_usage.total <= self._min_disk_size:
                if disk_usage.total > 0:
                    self.log.info(
                        'Excluding device %s with total disk size %s',
                        part.device, disk_usage.total)
                continue

            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: %s', part.device)

            device_name = part.mountpoint if self._use_mount else part.device

            tags = [part.fstype, 'filesystem:{}'.format(part.fstype)
                    ] if self._tag_by_filesystem else []
            tags.extend(self._custom_tags)

            # apply device/mountpoint specific tags
            for regex, device_tags in self._device_tag_re:
                if regex.match(device_name):
                    tags.extend(device_tags)

            if self.devices_label.get(device_name):
                tags.extend(self.devices_label.get(device_name))

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()

            tags.append('device:{}'.format(device_name))
            tags.append('device_name:{}'.format(_base_device_name(
                part.device)))
            for metric_name, metric_value in iteritems(
                    self._collect_part_metrics(part, disk_usage)):
                self.gauge(metric_name, metric_value, tags=tags)

            # Add in a disk read write or read only check
            if self._service_check_rw:
                rwro = {'rw', 'ro'} & set(part.opts.split(','))
                if len(rwro) == 1:
                    self.service_check('disk.read_write',
                                       AgentCheck.OK if rwro.pop() == 'rw' else
                                       AgentCheck.CRITICAL,
                                       tags=tags)
                else:
                    self.service_check('disk.read_write',
                                       AgentCheck.UNKNOWN,
                                       tags=tags)

        self.collect_latency_metrics()