Example #1
0
    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            # we check all exclude conditions
            if self._exclude_disk_psutil(part):
                continue

            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint)
            except TimeoutException:
                self.log.warn(
                    u"Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...",
                    part.mountpoint)
                continue
            except Exception as e:
                self.log.warn("Unable to get disk metrics for %s: %s",
                              part.mountpoint, e)
                continue
            # Exclude disks with total disk size 0
            if disk_usage.total == 0:
                continue
            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {0}'.format(part.device))

            tags = [part.fstype] if self._tag_by_filesystem else []
            device_name = part.mountpoint if self._use_mount else part.device

            # Note: psutil (0.3.0 to at least 3.1.1) calculates in_use as (used / total)
            #       The problem here is that total includes reserved space the user
            #       doesn't have access to. This causes psutil to calculate a misleadng
            #       percentage for in_use; a lower percentage than df shows.

            # Calculate in_use w/o reserved space; consistent w/ df's Use% metric.
            pmets = self._collect_part_metrics(part, disk_usage)
            used = 'system.disk.used'
            free = 'system.disk.free'
            pmets['system.disk.in_use'] = pmets[used] / (pmets[used] +
                                                         pmets[free])

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()
            for metric_name, metric_value in pmets.iteritems():
                self.gauge(metric_name,
                           metric_value,
                           tags=tags,
                           device_name=device_name)
        # And finally, latency metrics, a legacy gift from the old Windows Check
        if Platform.is_win32():
            self.collect_latency_metrics()
Example #2
0
    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            # we check all exclude conditions
            if self._exclude_disk_psutil(part):
                continue

            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint)
            except TimeoutException:
                self.log.warn(
                    u"Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...",
                    part.mountpoint
                )
                continue
            except Exception as e:
                self.log.warn("Unable to get disk metrics for %s: %s", part.mountpoint, e)
                continue
            # Exclude disks with total disk size 0
            if disk_usage.total == 0:
                continue
            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {0}'.format(part.device))

            tags = [part.fstype] if self._tag_by_filesystem else []
            device_name = part.mountpoint if self._use_mount else part.device

            # Note: psutil (0.3.0 to at least 3.1.1) calculates in_use as (used / total)
            #       The problem here is that total includes reserved space the user
            #       doesn't have access to. This causes psutil to calculate a misleadng
            #       percentage for in_use; a lower percentage than df shows.

            # Calculate in_use w/o reserved space; consistent w/ df's Use% metric.
            pmets = self._collect_part_metrics(part, disk_usage)
            used = 'system.disk.used'
            free = 'system.disk.free'
            pmets['system.disk.in_use'] = pmets[used] / (pmets[used] + pmets[free])

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()
            for metric_name, metric_value in pmets.iteritems():
                self.gauge(metric_name, metric_value,
                           tags=tags, device_name=device_name)
        # And finally, latency metrics, a legacy gift from the old Windows Check
        if Platform.is_win32():
            self.collect_latency_metrics()
Example #3
0
    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            # we check all exclude conditions
            if self._exclude_disk_psutil(part):
                continue
            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = psutil.disk_usage(part.mountpoint)
            except Exception, e:
                self.log.debug("Unable to get disk metrics for %s: %s",
                               part.mountpoint, e)
                continue
            # Exclude disks with total disk size 0
            if disk_usage.total == 0:
                continue
            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {0}'.format(part.device))

            tags = [part.fstype] if self._tag_by_filesystem else []
            device_name = part.mountpoint if self._use_mount else part.device

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()
            for metric_name, metric_value in self._collect_part_metrics(part, disk_usage).iteritems():
                self.gauge(metric_name, metric_value,
                           tags=tags, device_name=device_name)
Example #4
0
def get_system_stats():
    systemStats = {
        'machine': platform.machine(),
        'platform': sys.platform,
        'processor': platform.processor(),
        'pythonV': platform.python_version(),
    }

    platf = sys.platform
    
    if  Platform.is_linux(platf):
        grep = subprocess.Popen(['grep', 'model name', '/proc/cpuinfo'], stdout=subprocess.PIPE, close_fds=True)
        wc = subprocess.Popen(['wc', '-l'], stdin=grep.stdout, stdout=subprocess.PIPE, close_fds=True)
        systemStats['cpuCores'] = int(wc.communicate()[0])

    if Platform.is_darwin(platf):
        systemStats['cpuCores'] = int(subprocess.Popen(['sysctl', 'hw.ncpu'], stdout=subprocess.PIPE, close_fds=True).communicate()[0].split(': ')[1])

    if Platform.is_freebsd(platf):
        systemStats['cpuCores'] = int(subprocess.Popen(['sysctl', 'hw.ncpu'], stdout=subprocess.PIPE, close_fds=True).communicate()[0].split(': ')[1])

    if Platform.is_linux(platf):
        systemStats['nixV'] = platform.dist()

    elif Platform.is_darwin(platf):
        systemStats['macV'] = platform.mac_ver()

    elif Platform.is_freebsd(platf):
        version = platform.uname()[2]
        systemStats['fbsdV'] = ('freebsd', version, '')  # no codename for FreeBSD

    elif Platform.is_win32(platf):
        systemStats['winV'] = platform.win32_ver()

    return systemStats
Example #5
0
def get_system_stats():
    systemStats = {
        'machine': platform.machine(),
        'platform': sys.platform,
        'processor': platform.processor(),
        'pythonV': platform.python_version(),
    }

    platf = sys.platform

    if  Platform.is_linux(platf):
        grep = subprocess.Popen(['grep', 'model name', '/proc/cpuinfo'], stdout=subprocess.PIPE, close_fds=True)
        wc = subprocess.Popen(['wc', '-l'], stdin=grep.stdout, stdout=subprocess.PIPE, close_fds=True)
        systemStats['cpuCores'] = int(wc.communicate()[0])

    if Platform.is_darwin(platf):
        systemStats['cpuCores'] = int(subprocess.Popen(['sysctl', 'hw.ncpu'], stdout=subprocess.PIPE, close_fds=True).communicate()[0].split(': ')[1])

    if Platform.is_freebsd(platf):
        systemStats['cpuCores'] = int(subprocess.Popen(['sysctl', 'hw.ncpu'], stdout=subprocess.PIPE, close_fds=True).communicate()[0].split(': ')[1])

    if Platform.is_linux(platf):
        systemStats['nixV'] = platform.dist()

    elif Platform.is_darwin(platf):
        systemStats['macV'] = platform.mac_ver()

    elif Platform.is_freebsd(platf):
        version = platform.uname()[2]
        systemStats['fbsdV'] = ('freebsd', version, '')  # no codename for FreeBSD

    elif Platform.is_win32(platf):
        systemStats['winV'] = platform.win32_ver()

    return systemStats
Example #6
0
    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            # we check all exclude conditions
            if self._exclude_disk_psutil(part):
                continue
            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = psutil.disk_usage(part.mountpoint)
            except Exception, e:
                self.log.debug("Unable to get disk metrics for %s: %s",
                               part.mountpoint, e)
                continue
            # Exclude disks with total disk size 0
            if disk_usage.total == 0:
                continue
            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {0}'.format(part.device))

            tags = [part.fstype] if self._tag_by_filesystem else []
            device_name = part.mountpoint if self._use_mount else part.device

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()
            for metric_name, metric_value in self._collect_part_metrics(
                    part, disk_usage).iteritems():
                self.gauge(metric_name,
                           metric_value,
                           tags=tags,
                           device_name=device_name)
Example #7
0
    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            if self._exclude_disk_psutil(part):
                continue
            try:
                disk_usage = psutil.disk_usage(part.mountpoint)
            except Exception, e:
                self.log.debug("Unable to get disk metrics for %s: %s",
                               part.mountpoint, e)
                continue

            if disk_usage.total == 0:
                continue
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {0}'.format(part.device))

            tags = [part.fstype] if self._tag_by_filesystem else []
            device_name = part.mountpoint if self._use_mount else part.device

            pmets = self._collect_part_metrics(part, disk_usage)
            used = 'system.disk.used'
            free = 'system.disk.free'
            pmets['system.disk.pct_usage'] = (pmets[used] / (pmets[used] + pmets[free])) * 100

            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()
            for metric_name, metric_value in pmets.iteritems():
                self.gauge(metric_name, metric_value,
                           tags=tags, device_name=device_name)
Example #8
0
 def _exclude_disk_psutil(self, part):
     # skip cd-rom drives with no disk in it; they may raise
     # ENOENT, pop-up a Windows GUI error for a non-ready
     # partition or just hang;
     # and all the other excluded disks
     return ((Platform.is_win32() and
              ('cdrom' in part.opts or part.fstype == ''))
             or self._exclude_disk(part.device, part.fstype))
Example #9
0
 def _exclude_disk_psutil(self, part):
     # skip cd-rom drives with no disk in it; they may raise
     # ENOENT, pop-up a Windows GUI error for a non-ready
     # partition or just hang;
     # and all the other excluded disks
     return ((Platform.is_win32() and ('cdrom' in part.opts or
                                       part.fstype == '')) or
             self._exclude_disk(part.device, part.fstype, part.mountpoint))
Example #10
0
 def _get_pickle_path(cls):
     if Platform.is_win32():
         path = os.path.join(_windows_commondata_path(), 'Datadog',
                             cls.__name__ + '.pickle')
     else:
         path = os.path.join(tempfile.gettempdir(),
                             cls.__name__ + '.pickle')
     return path
Example #11
0
    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            # we check all exclude conditions
            if self._exclude_disk_psutil(part):
                continue

            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint)
            except TimeoutException:
                self.log.warn(
                    u"Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...",
                    part.mountpoint)
                continue
            except Exception as e:
                self.log.warn("Unable to get disk metrics for %s: %s",
                              part.mountpoint, e)
                continue
            # Exclude disks with total disk size 0
            if disk_usage.total == 0:
                continue
            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {0}'.format(part.device))

            tags = [part.fstype, 'filesystem:{}'.format(part.fstype)
                    ] if self._tag_by_filesystem else []
            device_name = part.mountpoint if self._use_mount else part.device

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()
            for metric_name, metric_value in self._collect_part_metrics(
                    part, disk_usage).iteritems():
                self.gauge(metric_name,
                           metric_value,
                           tags=tags,
                           device_name=device_name)
        # And finally, latency metrics, a legacy gift from the old Windows Check
        if Platform.is_win32():
            self.collect_latency_metrics()
Example #12
0
    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=self._all_partitions):
            # we check all exclude conditions
            if self._exclude_disk_psutil(part):
                continue
            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {0}'.format(part.device))

            tags = [part.fstype] if self._tag_by_filesystem else []
            device_name = part.mountpoint if self._use_mount else part.device
            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()
            for metric_name, metric_value in self._collect_part_metrics(part).iteritems():
                self.gauge(metric_name, metric_value,
                           tags=tags, device_name=device_name)
        # And finally, latency metrics, a legacy gift from the old Windows Check
        if Platform.is_win32():
            self.collect_latency_metrics()
Example #13
0
    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            # we check all exclude conditions
            if self._exclude_disk_psutil(part):
                continue

            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint)
            except TimeoutException:
                self.log.warn(
                    u"Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...",
                    part.mountpoint
                )
                continue
            except Exception as e:
                self.log.warn("Unable to get disk metrics for %s: %s", part.mountpoint, e)
                continue
            # Exclude disks with total disk size 0
            if disk_usage.total == 0:
                continue
            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {0}'.format(part.device))

            tags = [part.fstype] if self._tag_by_filesystem else []
            device_name = part.mountpoint if self._use_mount else part.device

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()
            for metric_name, metric_value in self._collect_part_metrics(part, disk_usage).iteritems():
                self.gauge(metric_name, metric_value,
                           tags=tags, device_name=device_name)
        # And finally, latency metrics, a legacy gift from the old Windows Check
        if Platform.is_win32():
            self.collect_latency_metrics()
Example #14
0
def get_system_stats():
    systemStats = {
        "machine": platform.machine(),
        "platform": sys.platform,
        "processor": platform.processor(),
        "pythonV": platform.python_version(),
    }

    platf = sys.platform

    if Platform.is_linux(platf):
        grep = subprocess.Popen(["grep", "model name", "/proc/cpuinfo"], stdout=subprocess.PIPE, close_fds=True)
        wc = subprocess.Popen(["wc", "-l"], stdin=grep.stdout, stdout=subprocess.PIPE, close_fds=True)
        systemStats["cpuCores"] = int(wc.communicate()[0])

    if Platform.is_darwin(platf):
        systemStats["cpuCores"] = int(
            subprocess.Popen(["sysctl", "hw.ncpu"], stdout=subprocess.PIPE, close_fds=True)
            .communicate()[0]
            .split(": ")[1]
        )

    if Platform.is_freebsd(platf):
        systemStats["cpuCores"] = int(
            subprocess.Popen(["sysctl", "hw.ncpu"], stdout=subprocess.PIPE, close_fds=True)
            .communicate()[0]
            .split(": ")[1]
        )

    if Platform.is_linux(platf):
        systemStats["nixV"] = platform.dist()

    elif Platform.is_darwin(platf):
        systemStats["macV"] = platform.mac_ver()

    elif Platform.is_freebsd(platf):
        version = platform.uname()[2]
        systemStats["fbsdV"] = ("freebsd", version, "")  # no codename for FreeBSD

    elif Platform.is_win32(platf):
        systemStats["winV"] = platform.win32_ver()

    return systemStats
Example #15
0
    def _load_conf(self, instance):
        self._excluded_filesystems = instance.get('excluded_filesystems', [])
        self._excluded_disks = instance.get('excluded_disks', [])
        self._tag_by_filesystem = _is_affirmative(
            instance.get('tag_by_filesystem', False))
        # On Windows, we need all_partitions to True by default to collect
        # metrics about remote disks
        # On Linux, we need all_partitions to False to avoid collecting metrics
        # about nodev filesystems
        self._all_partitions = _is_affirmative(
            instance.get('all_partitions', Platform.is_win32()))

        # FIXME: 6.x, drop use_mount option in datadog.conf
        self._load_legacy_option(instance, 'use_mount', False,
                                 operation=_is_affirmative)
        # FIXME: 6.x, drop device_blacklist_re option in datadog.conf
        self._load_legacy_option(instance, 'excluded_disk_re', '^$',
                                 legacy_name='device_blacklist_re',
                                 operation=re.compile)
Example #16
0
    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            # we check all exclude conditions
            if self._exclude_disk_psutil(part):
                continue
            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = psutil.disk_usage(part.mountpoint)
            except Exception, e:
                self.log.debug("Unable to get disk metrics for %s: %s", part.mountpoint, e)
                continue
            # Exclude disks with total disk size 0
            if disk_usage.total == 0:
                continue
            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug("Passed: {0}".format(part.device))

            tags = [part.fstype] if self._tag_by_filesystem else []
            device_name = part.mountpoint if self._use_mount else part.device

            # Note: psutil (0.3.0 to at least 3.1.1) calculates in_use as (used / total)
            #       The problem here is that total includes reserved space the user
            #       doesn't have access to. This causes psutil to calculate a misleadng
            #       percentage for in_use; a lower percentage than df shows.

            # Calculate in_use w/o reserved space; consistent w/ df's Use% metric.
            pmets = self._collect_part_metrics(part, disk_usage)
            used = "system.disk.used"
            free = "system.disk.free"
            pmets["system.disk.in_use"] = pmets[used] / (pmets[used] + pmets[free])

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip("\\").lower()
            for metric_name, metric_value in pmets.iteritems():
                self.gauge(metric_name, metric_value, tags=tags, device_name=device_name)
Example #17
0
    def _exclude_disk_psutil(self, part):

        return ((Platform.is_win32() and ('cdrom' in part.opts or
                                          part.fstype == '')) or
                self._exclude_disk(part.device, part.fstype))
Example #18
0
def get_jmx_status_path():
    if Platform.is_win32():
        path = os.path.join(_windows_commondata_path(), 'Datadog')
    else:
        path = tempfile.gettempdir()
    return path
Example #19
0
def get_jmx_status_path():
    if Platform.is_win32():
        path = os.path.join(_windows_commondata_path(), 'Datadog')
    else:
        path = tempfile.gettempdir()
    return path
Example #20
0
    def get_process_metrics(self, pids, cpu_check_interval, ignore_denied_access=True):

        # initialize process metrics
        # process metrics available for all versions of psutil
        rss = 0
        vms = 0
        cpu = 0
        thr = 0
        voluntary_ctx_switches = 0
        involuntary_ctx_switches = 0

        # process metrics available for psutil versions 0.6.0 and later
        if Platform.is_win32() or Platform.is_solaris():
            real = None
        else:
            real = 0

        if Platform.is_unix():
            open_file_descriptors = 0
        else:
            open_file_descriptors = None

        # process I/O counters (agent might not have permission to access)
        read_count = 0
        write_count = 0
        read_bytes = 0
        write_bytes = 0

        got_denied = False

        for pid in set(pids):
            try:
                p = psutil.Process(pid)
                try:
                    if real is not None:
                        mem = p.memory_info_ex()
                        real += mem.rss - mem.shared
                    else:
                        mem = p.memory_info()

                    if Platform.is_unix():
                        ctx_switches = p.num_ctx_switches()
                        voluntary_ctx_switches += ctx_switches.voluntary
                        involuntary_ctx_switches += ctx_switches.involuntary

                    rss += mem.rss
                    vms += mem.vms
                    thr += p.num_threads()
                    cpu += p.cpu_percent(cpu_check_interval)

                    if open_file_descriptors is not None:
                        open_file_descriptors += p.num_fds()

                except NotImplementedError:
                    # Handle old Kernels which don't provide this info.
                    voluntary_ctx_switches = None
                    involuntary_ctx_switches = None
                except AttributeError:
                        self.log.debug("process attribute not supported on this platform")
                except psutil.AccessDenied:
                    got_denied = True

                # user agent might not have permission to call io_counters()
                # user agent might have access to io counters for some processes and not others
                if read_count is not None:
                    try:
                        io_counters = p.io_counters()
                        read_count += io_counters.read_count
                        write_count += io_counters.write_count
                        read_bytes += io_counters.read_bytes
                        write_bytes += io_counters.write_bytes
                    except AttributeError:
                        self.log.debug("process attribute not supported on this platform")
                    except psutil.AccessDenied:
                        log_func = self.log.debug if ignore_denied_access else self.log.info
                        log_func('dd-agent user does not have access \
                            to I/O counters for process %d: %s' % (pid, p.name()))
                        read_count = None
                        write_count = None
                        read_bytes = None
                        write_bytes = None

            # Skip processes dead in the meantime
            except psutil.NoSuchProcess:
                self.warning('Process %s disappeared while scanning' % pid)

        if got_denied and not ignore_denied_access:
            self.warning("The Datadog Agent was denied access when trying to get the number of file descriptors")

        #Memory values are in Byte
        return (thr, cpu, rss, vms, real, open_file_descriptors,
            read_count, write_count, read_bytes, write_bytes, voluntary_ctx_switches, involuntary_ctx_switches)
Example #21
0
 def _get_pickle_path(cls):
     if Platform.is_win32():
         path = os.path.join(_windows_commondata_path(), 'Datadog', cls.__name__ + '.pickle')
     else:
         path = os.path.join(tempfile.gettempdir(), cls.__name__ + '.pickle')
     return path
Example #22
0
# stdlib
import time

# 3p
import dns.resolver

# project
from checks import AgentCheck
from util import Platform

# These imports are necessary because otherwise dynamic type
# resolution will fail on windows without it.
# See more here: https://github.com/rthalley/dnspython/issues/39.
if Platform.is_win32():
    from dns.rdtypes.ANY import *  # noqa
    from dns.rdtypes.IN import *  # noqa


class DNSCheck(AgentCheck):
    SERVICE_CHECK_NAME = 'dns.can_resolve'
    DEFAULT_TIMEOUT = 5

    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        self.default_timeout = init_config.get('default_timeout',
                                               self.DEFAULT_TIMEOUT)

    def check(self, instance):
        if 'hostname' not in instance:
            self.log.info("Skipping instance, no hostname found.")
            return
Example #23
0
    def get_process_metrics(self,
                            pids,
                            cpu_check_interval,
                            ignore_denied_access=True):

        # initialize process metrics
        # process metrics available for all versions of psutil
        rss = 0
        vms = 0
        cpu = 0
        thr = 0
        voluntary_ctx_switches = 0
        involuntary_ctx_switches = 0

        # process metrics available for psutil versions 0.6.0 and later
        if Platform.is_win32() or Platform.is_solaris():
            real = None
        else:
            real = 0

        if Platform.is_unix():
            open_file_descriptors = 0
        else:
            open_file_descriptors = None

        # process I/O counters (agent might not have permission to access)
        read_count = 0
        write_count = 0
        read_bytes = 0
        write_bytes = 0

        got_denied = False

        for pid in set(pids):
            try:
                p = psutil.Process(pid)
                try:
                    if real is not None:
                        mem = p.memory_info_ex()
                        real += mem.rss - mem.shared
                    else:
                        mem = p.memory_info()

                    if Platform.is_unix():
                        ctx_switches = p.num_ctx_switches()
                        voluntary_ctx_switches += ctx_switches.voluntary
                        involuntary_ctx_switches += ctx_switches.involuntary

                    rss += mem.rss
                    vms += mem.vms
                    thr += p.num_threads()
                    cpu += p.cpu_percent(cpu_check_interval)

                    if open_file_descriptors is not None:
                        open_file_descriptors += p.num_fds()

                except NotImplementedError:
                    # Handle old Kernels which don't provide this info.
                    voluntary_ctx_switches = None
                    involuntary_ctx_switches = None
                except AttributeError:
                    self.log.debug(
                        "process attribute not supported on this platform")
                except psutil.AccessDenied:
                    got_denied = True

                # user agent might not have permission to call io_counters()
                # user agent might have access to io counters for some processes and not others
                if read_count is not None:
                    try:
                        io_counters = p.io_counters()
                        read_count += io_counters.read_count
                        write_count += io_counters.write_count
                        read_bytes += io_counters.read_bytes
                        write_bytes += io_counters.write_bytes
                    except AttributeError:
                        self.log.debug(
                            "process attribute not supported on this platform")
                    except psutil.AccessDenied:
                        log_func = self.log.debug if ignore_denied_access else self.log.info
                        log_func('dd-agent user does not have access \
                            to I/O counters for process %d: %s' %
                                 (pid, p.name()))
                        read_count = None
                        write_count = None
                        read_bytes = None
                        write_bytes = None

            # Skip processes dead in the meantime
            except psutil.NoSuchProcess:
                self.warning('Process %s disappeared while scanning' % pid)

        if got_denied and not ignore_denied_access:
            self.warning('The Datadog Agent was denied access '
                         'when trying to get the number of file descriptors')

        # Memory values are in Byte
        return (thr, cpu, rss, vms, real, open_file_descriptors, read_count,
                write_count, read_bytes, write_bytes, voluntary_ctx_switches,
                involuntary_ctx_switches)
Example #24
0
# stdlib
import time

# 3p
import dns.resolver

# project
from util import Platform
from checks.network_checks import NetworkCheck, Status


# These imports are necessary because otherwise dynamic type
# resolution will fail on windows without it.
# See more here: https://github.com/rthalley/dnspython/issues/39.
if Platform.is_win32():
    from dns.rdtypes.ANY import *  # noqa
    from dns.rdtypes.IN import *  # noqa

class BadConfException(Exception):
    pass

class DNSCheck(NetworkCheck):
    SERVICE_CHECK_NAME = 'dns.can_resolve'
    DEFAULT_TIMEOUT = 5

    def __init__(self, name, init_config, agentConfig, instances):
        # Now that the DNS check is a Network check, we must provide a `name` for each
        # instance before calling NetworkCheck to make backwards compatible with old yaml.
        for idx, inst in enumerate(instances):
            try:
Example #25
0
    def get_process_metrics(self, pids, psutil, cpu_check_interval):

        # initialize process metrics
        # process metrics available for all versions of psutil
        rss = 0
        vms = 0
        cpu = 0
        thr = 0

        # process metrics available for psutil versions 0.6.0 and later
        extended_metrics_0_6_0 = self.is_psutil_version_later_than((0, 6, 0)) and \
            not Platform.is_win32()
        # On Windows get_ext_memory_info returns different metrics
        if extended_metrics_0_6_0:
            real = 0
            voluntary_ctx_switches = 0
            involuntary_ctx_switches = 0
        else:
            real = None
            voluntary_ctx_switches = None
            involuntary_ctx_switches = None

        # process metrics available for psutil versions 0.5.0 and later on UNIX
        extended_metrics_0_5_0_unix = self.is_psutil_version_later_than((0, 5, 0)) and \
                                Platform.is_unix()
        if extended_metrics_0_5_0_unix:
            open_file_descriptors = 0
        else:
            open_file_descriptors = None

        # process I/O counters (agent might not have permission to access)
        read_count = 0
        write_count = 0
        read_bytes = 0
        write_bytes = 0

        got_denied = False

        for pid in set(pids):
            try:
                p = psutil.Process(pid)
                if extended_metrics_0_6_0:
                    mem = p.get_ext_memory_info()
                    real += mem.rss - mem.shared
                    try:
                        ctx_switches = p.get_num_ctx_switches()
                        voluntary_ctx_switches += ctx_switches.voluntary
                        involuntary_ctx_switches += ctx_switches.involuntary
                    except NotImplementedError:
                        # Handle old Kernels which don't provide this info.
                        voluntary_ctx_switches = None
                        involuntary_ctx_switches = None
                else:
                    mem = p.get_memory_info()

                if extended_metrics_0_5_0_unix:
                    try:
                        open_file_descriptors += p.get_num_fds()
                    except psutil.AccessDenied:
                        got_denied = True

                rss += mem.rss
                vms += mem.vms
                thr += p.get_num_threads()
                cpu += p.get_cpu_percent(cpu_check_interval)

                # user agent might not have permission to call get_io_counters()
                # user agent might have access to io counters for some processes and not others
                if read_count is not None:
                    try:
                        io_counters = p.get_io_counters()
                        read_count += io_counters.read_count
                        write_count += io_counters.write_count
                        read_bytes += io_counters.read_bytes
                        write_bytes += io_counters.write_bytes
                    except psutil.AccessDenied:
                        self.log.info('DD user agent does not have access \
                            to I/O counters for process %d: %s' % (pid, p.name))
                        read_count = None
                        write_count = None
                        read_bytes = None
                        write_bytes = None

            # Skip processes dead in the meantime
            except psutil.NoSuchProcess:
                self.warning('Process %s disappeared while scanning' % pid)
                pass

        if got_denied:
            self.warning("The Datadog Agent was denied access when trying to get the number of file descriptors")

        #Memory values are in Byte
        return (thr, cpu, rss, vms, real, open_file_descriptors,
            read_count, write_count, read_bytes, write_bytes, voluntary_ctx_switches, involuntary_ctx_switches)
Example #26
0
class Disk(AgentCheck):
    """ Collects metrics about the machine's disks. """
    # -T for filesystem info
    DF_COMMAND = ['df', '-T']
    METRIC_DISK = 'system.disk.{0}'
    METRIC_INODE = 'system.fs.inodes.{0}'

    def __init__(self, name, init_config, agentConfig, instances=None):
        if instances is not None and len(instances) > 1:
            raise Exception(
                "Disk check only supports one configured instance.")
        AgentCheck.__init__(self,
                            name,
                            init_config,
                            agentConfig,
                            instances=instances)
        # Get the configuration once for all
        self._load_conf(instances[0])

    def check(self, instance):
        """Get disk space/inode stats"""
        # Windows and Mac will always have psutil
        # (we have packaged for both of them)
        if self._psutil():
            self.collect_metrics_psutil()
        else:
            # FIXME: implement all_partitions (df -a)
            self.collect_metrics_manually()

    @classmethod
    def _psutil(cls):
        return psutil is not None

    def _load_conf(self, instance):
        self._excluded_filesystems = instance.get('excluded_filesystems', [])
        self._excluded_disks = instance.get('excluded_disks', [])
        self._tag_by_filesystem = _is_affirmative(
            instance.get('tag_by_filesystem', False))
        self._all_partitions = _is_affirmative(
            instance.get('all_partitions', False))

        # Force exclusion of CDROM (iso9660) from disk check
        self._excluded_filesystems.append('iso9660')

        # FIXME: 6.x, drop use_mount option in datadog.conf
        self._load_legacy_option(instance,
                                 'use_mount',
                                 False,
                                 operation=_is_affirmative)
        # FIXME: 6.x, drop device_blacklist_re option in datadog.conf
        self._load_legacy_option(instance,
                                 'excluded_disk_re',
                                 '^$',
                                 legacy_name='device_blacklist_re',
                                 operation=re.compile)

    def _load_legacy_option(self,
                            instance,
                            option,
                            default,
                            legacy_name=None,
                            operation=lambda l: l):
        value = instance.get(option, default)
        legacy_name = legacy_name or option

        if value == default and legacy_name in self.agentConfig:
            self.log.warn("Using `{0}` in datadog.conf has been deprecated"
                          " in favor of `{1}` in disk.yaml".format(
                              legacy_name, option))
            value = self.agentConfig.get(legacy_name) or default
        setattr(self, '_{0}'.format(option), operation(value))

    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            # we check all exclude conditions
            if self._exclude_disk_psutil(part):
                continue
            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = psutil.disk_usage(part.mountpoint)
            except Exception, e:
                self.log.debug("Unable to get disk metrics for %s: %s",
                               part.mountpoint, e)
                continue
            # Exclude disks with total disk size 0
            if disk_usage.total == 0:
                continue
            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {0}'.format(part.device))

            tags = [part.fstype] if self._tag_by_filesystem else []
            device_name = part.mountpoint if self._use_mount else part.device

            # Note: psutil (0.3.0 to at least 3.1.1) calculates in_use as (used / total)
            #       The problem here is that total includes reserved space the user
            #       doesn't have access to. This causes psutil to calculate a misleadng
            #       percentage for in_use; a lower percentage than df shows.

            # Calculate in_use w/o reserved space; consistent w/ df's Use% metric.
            pmets = self._collect_part_metrics(part, disk_usage)
            used = 'system.disk.used'
            free = 'system.disk.free'
            pmets['system.disk.in_use'] = pmets[used] / (pmets[used] +
                                                         pmets[free])

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()
            for metric_name, metric_value in pmets.iteritems():
                self.gauge(metric_name,
                           metric_value,
                           tags=tags,
                           device_name=device_name)
        # And finally, latency metrics, a legacy gift from the old Windows Check
        if Platform.is_win32():
            self.collect_latency_metrics()
Example #27
0
class Disk(AgentCheck):
    DF_COMMAND = ['df', '-T']
    METRIC_DISK = 'system.disk.{0}'
    METRIC_INODE = 'system.fs.inodes.{0}'

    def __init__(self, name, init_config, agentConfig, instances=None):
        if instances is not None and len(instances) > 1:
            raise Exception("Disk check only supports one configured instance.")
        AgentCheck.__init__(self, name, init_config,
                            agentConfig, instances=instances)
        self._load_conf(instances[0])

    def check(self, instance):
        if self._psutil():
            self.collect_metrics_psutil()
        else:
            self.collect_metrics_manually()

    @classmethod
    def _psutil(cls):
        return psutil is not None

    def _load_conf(self, instance):
        self._excluded_filesystems = instance.get('excluded_filesystems', [])
        self._excluded_disks = instance.get('excluded_disks', [])
        self._tag_by_filesystem = _is_affirmative(
            instance.get('tag_by_filesystem', False))
        self._all_partitions = _is_affirmative(
            instance.get('all_partitions', False))

        self._excluded_filesystems.append('iso9660')

        self._load_legacy_option(instance, 'use_mount', False,
                                 operation=_is_affirmative)

        self._load_legacy_option(instance, 'excluded_disk_re', '^$',
                                 legacy_name='device_blacklist_re',
                                 operation=re.compile)

    def _load_legacy_option(self, instance, option, default,
                            legacy_name=None, operation=lambda l: l):
        value = instance.get(option, default)
        legacy_name = legacy_name or option

        if value == default and legacy_name in self.agentConfig:
            self.log.warn(
                "Using `{0}` in datamonitor.conf has been deprecated"
                " in favor of `{1}` in disk.yaml".format(legacy_name, option)
            )
            value = self.agentConfig.get(legacy_name) or default
        setattr(self, '_{0}'.format(option), operation(value))

    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            if self._exclude_disk_psutil(part):
                continue
            try:
                disk_usage = psutil.disk_usage(part.mountpoint)
            except Exception, e:
                self.log.debug("Unable to get disk metrics for %s: %s",
                               part.mountpoint, e)
                continue

            if disk_usage.total == 0:
                continue
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {0}'.format(part.device))

            tags = [part.fstype] if self._tag_by_filesystem else []
            device_name = part.mountpoint if self._use_mount else part.device

            pmets = self._collect_part_metrics(part, disk_usage)
            used = 'system.disk.used'
            free = 'system.disk.free'
            pmets['system.disk.pct_usage'] = (pmets[used] / (pmets[used] + pmets[free])) * 100

            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()
            for metric_name, metric_value in pmets.iteritems():
                self.gauge(metric_name, metric_value,
                           tags=tags, device_name=device_name)

        if Platform.is_win32():
            self.collect_latency_metrics()