def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=True): # we check all exclude conditions if self._exclude_disk_psutil(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint) except TimeoutException: self.log.warn( u"Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...", part.mountpoint) continue except Exception as e: self.log.warn("Unable to get disk metrics for %s: %s", part.mountpoint, e) continue # Exclude disks with total disk size 0 if disk_usage.total == 0: continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {0}'.format(part.device)) tags = [part.fstype] if self._tag_by_filesystem else [] device_name = part.mountpoint if self._use_mount else part.device # Note: psutil (0.3.0 to at least 3.1.1) calculates in_use as (used / total) # The problem here is that total includes reserved space the user # doesn't have access to. This causes psutil to calculate a misleadng # percentage for in_use; a lower percentage than df shows. # Calculate in_use w/o reserved space; consistent w/ df's Use% metric. pmets = self._collect_part_metrics(part, disk_usage) used = 'system.disk.used' free = 'system.disk.free' pmets['system.disk.in_use'] = pmets[used] / (pmets[used] + pmets[free]) # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() for metric_name, metric_value in pmets.iteritems(): self.gauge(metric_name, metric_value, tags=tags, device_name=device_name) # And finally, latency metrics, a legacy gift from the old Windows Check if Platform.is_win32(): self.collect_latency_metrics()
def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=True): # we check all exclude conditions if self._exclude_disk_psutil(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint) except TimeoutException: self.log.warn( u"Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...", part.mountpoint ) continue except Exception as e: self.log.warn("Unable to get disk metrics for %s: %s", part.mountpoint, e) continue # Exclude disks with total disk size 0 if disk_usage.total == 0: continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {0}'.format(part.device)) tags = [part.fstype] if self._tag_by_filesystem else [] device_name = part.mountpoint if self._use_mount else part.device # Note: psutil (0.3.0 to at least 3.1.1) calculates in_use as (used / total) # The problem here is that total includes reserved space the user # doesn't have access to. This causes psutil to calculate a misleadng # percentage for in_use; a lower percentage than df shows. # Calculate in_use w/o reserved space; consistent w/ df's Use% metric. pmets = self._collect_part_metrics(part, disk_usage) used = 'system.disk.used' free = 'system.disk.free' pmets['system.disk.in_use'] = pmets[used] / (pmets[used] + pmets[free]) # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() for metric_name, metric_value in pmets.iteritems(): self.gauge(metric_name, metric_value, tags=tags, device_name=device_name) # And finally, latency metrics, a legacy gift from the old Windows Check if Platform.is_win32(): self.collect_latency_metrics()
def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=True): # we check all exclude conditions if self._exclude_disk_psutil(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = psutil.disk_usage(part.mountpoint) except Exception, e: self.log.debug("Unable to get disk metrics for %s: %s", part.mountpoint, e) continue # Exclude disks with total disk size 0 if disk_usage.total == 0: continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {0}'.format(part.device)) tags = [part.fstype] if self._tag_by_filesystem else [] device_name = part.mountpoint if self._use_mount else part.device # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() for metric_name, metric_value in self._collect_part_metrics(part, disk_usage).iteritems(): self.gauge(metric_name, metric_value, tags=tags, device_name=device_name)
def get_system_stats(): systemStats = { 'machine': platform.machine(), 'platform': sys.platform, 'processor': platform.processor(), 'pythonV': platform.python_version(), } platf = sys.platform if Platform.is_linux(platf): grep = subprocess.Popen(['grep', 'model name', '/proc/cpuinfo'], stdout=subprocess.PIPE, close_fds=True) wc = subprocess.Popen(['wc', '-l'], stdin=grep.stdout, stdout=subprocess.PIPE, close_fds=True) systemStats['cpuCores'] = int(wc.communicate()[0]) if Platform.is_darwin(platf): systemStats['cpuCores'] = int(subprocess.Popen(['sysctl', 'hw.ncpu'], stdout=subprocess.PIPE, close_fds=True).communicate()[0].split(': ')[1]) if Platform.is_freebsd(platf): systemStats['cpuCores'] = int(subprocess.Popen(['sysctl', 'hw.ncpu'], stdout=subprocess.PIPE, close_fds=True).communicate()[0].split(': ')[1]) if Platform.is_linux(platf): systemStats['nixV'] = platform.dist() elif Platform.is_darwin(platf): systemStats['macV'] = platform.mac_ver() elif Platform.is_freebsd(platf): version = platform.uname()[2] systemStats['fbsdV'] = ('freebsd', version, '') # no codename for FreeBSD elif Platform.is_win32(platf): systemStats['winV'] = platform.win32_ver() return systemStats
def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=True): # we check all exclude conditions if self._exclude_disk_psutil(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = psutil.disk_usage(part.mountpoint) except Exception, e: self.log.debug("Unable to get disk metrics for %s: %s", part.mountpoint, e) continue # Exclude disks with total disk size 0 if disk_usage.total == 0: continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {0}'.format(part.device)) tags = [part.fstype] if self._tag_by_filesystem else [] device_name = part.mountpoint if self._use_mount else part.device # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() for metric_name, metric_value in self._collect_part_metrics( part, disk_usage).iteritems(): self.gauge(metric_name, metric_value, tags=tags, device_name=device_name)
def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=True): if self._exclude_disk_psutil(part): continue try: disk_usage = psutil.disk_usage(part.mountpoint) except Exception, e: self.log.debug("Unable to get disk metrics for %s: %s", part.mountpoint, e) continue if disk_usage.total == 0: continue self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {0}'.format(part.device)) tags = [part.fstype] if self._tag_by_filesystem else [] device_name = part.mountpoint if self._use_mount else part.device pmets = self._collect_part_metrics(part, disk_usage) used = 'system.disk.used' free = 'system.disk.free' pmets['system.disk.pct_usage'] = (pmets[used] / (pmets[used] + pmets[free])) * 100 if Platform.is_win32(): device_name = device_name.strip('\\').lower() for metric_name, metric_value in pmets.iteritems(): self.gauge(metric_name, metric_value, tags=tags, device_name=device_name)
def _exclude_disk_psutil(self, part): # skip cd-rom drives with no disk in it; they may raise # ENOENT, pop-up a Windows GUI error for a non-ready # partition or just hang; # and all the other excluded disks return ((Platform.is_win32() and ('cdrom' in part.opts or part.fstype == '')) or self._exclude_disk(part.device, part.fstype))
def _exclude_disk_psutil(self, part): # skip cd-rom drives with no disk in it; they may raise # ENOENT, pop-up a Windows GUI error for a non-ready # partition or just hang; # and all the other excluded disks return ((Platform.is_win32() and ('cdrom' in part.opts or part.fstype == '')) or self._exclude_disk(part.device, part.fstype, part.mountpoint))
def _get_pickle_path(cls): if Platform.is_win32(): path = os.path.join(_windows_commondata_path(), 'Datadog', cls.__name__ + '.pickle') else: path = os.path.join(tempfile.gettempdir(), cls.__name__ + '.pickle') return path
def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=True): # we check all exclude conditions if self._exclude_disk_psutil(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint) except TimeoutException: self.log.warn( u"Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...", part.mountpoint) continue except Exception as e: self.log.warn("Unable to get disk metrics for %s: %s", part.mountpoint, e) continue # Exclude disks with total disk size 0 if disk_usage.total == 0: continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {0}'.format(part.device)) tags = [part.fstype, 'filesystem:{}'.format(part.fstype) ] if self._tag_by_filesystem else [] device_name = part.mountpoint if self._use_mount else part.device # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() for metric_name, metric_value in self._collect_part_metrics( part, disk_usage).iteritems(): self.gauge(metric_name, metric_value, tags=tags, device_name=device_name) # And finally, latency metrics, a legacy gift from the old Windows Check if Platform.is_win32(): self.collect_latency_metrics()
def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=self._all_partitions): # we check all exclude conditions if self._exclude_disk_psutil(part): continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {0}'.format(part.device)) tags = [part.fstype] if self._tag_by_filesystem else [] device_name = part.mountpoint if self._use_mount else part.device # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() for metric_name, metric_value in self._collect_part_metrics(part).iteritems(): self.gauge(metric_name, metric_value, tags=tags, device_name=device_name) # And finally, latency metrics, a legacy gift from the old Windows Check if Platform.is_win32(): self.collect_latency_metrics()
def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=True): # we check all exclude conditions if self._exclude_disk_psutil(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint) except TimeoutException: self.log.warn( u"Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...", part.mountpoint ) continue except Exception as e: self.log.warn("Unable to get disk metrics for %s: %s", part.mountpoint, e) continue # Exclude disks with total disk size 0 if disk_usage.total == 0: continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {0}'.format(part.device)) tags = [part.fstype] if self._tag_by_filesystem else [] device_name = part.mountpoint if self._use_mount else part.device # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() for metric_name, metric_value in self._collect_part_metrics(part, disk_usage).iteritems(): self.gauge(metric_name, metric_value, tags=tags, device_name=device_name) # And finally, latency metrics, a legacy gift from the old Windows Check if Platform.is_win32(): self.collect_latency_metrics()
def get_system_stats(): systemStats = { "machine": platform.machine(), "platform": sys.platform, "processor": platform.processor(), "pythonV": platform.python_version(), } platf = sys.platform if Platform.is_linux(platf): grep = subprocess.Popen(["grep", "model name", "/proc/cpuinfo"], stdout=subprocess.PIPE, close_fds=True) wc = subprocess.Popen(["wc", "-l"], stdin=grep.stdout, stdout=subprocess.PIPE, close_fds=True) systemStats["cpuCores"] = int(wc.communicate()[0]) if Platform.is_darwin(platf): systemStats["cpuCores"] = int( subprocess.Popen(["sysctl", "hw.ncpu"], stdout=subprocess.PIPE, close_fds=True) .communicate()[0] .split(": ")[1] ) if Platform.is_freebsd(platf): systemStats["cpuCores"] = int( subprocess.Popen(["sysctl", "hw.ncpu"], stdout=subprocess.PIPE, close_fds=True) .communicate()[0] .split(": ")[1] ) if Platform.is_linux(platf): systemStats["nixV"] = platform.dist() elif Platform.is_darwin(platf): systemStats["macV"] = platform.mac_ver() elif Platform.is_freebsd(platf): version = platform.uname()[2] systemStats["fbsdV"] = ("freebsd", version, "") # no codename for FreeBSD elif Platform.is_win32(platf): systemStats["winV"] = platform.win32_ver() return systemStats
def _load_conf(self, instance): self._excluded_filesystems = instance.get('excluded_filesystems', []) self._excluded_disks = instance.get('excluded_disks', []) self._tag_by_filesystem = _is_affirmative( instance.get('tag_by_filesystem', False)) # On Windows, we need all_partitions to True by default to collect # metrics about remote disks # On Linux, we need all_partitions to False to avoid collecting metrics # about nodev filesystems self._all_partitions = _is_affirmative( instance.get('all_partitions', Platform.is_win32())) # FIXME: 6.x, drop use_mount option in datadog.conf self._load_legacy_option(instance, 'use_mount', False, operation=_is_affirmative) # FIXME: 6.x, drop device_blacklist_re option in datadog.conf self._load_legacy_option(instance, 'excluded_disk_re', '^$', legacy_name='device_blacklist_re', operation=re.compile)
def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=True): # we check all exclude conditions if self._exclude_disk_psutil(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = psutil.disk_usage(part.mountpoint) except Exception, e: self.log.debug("Unable to get disk metrics for %s: %s", part.mountpoint, e) continue # Exclude disks with total disk size 0 if disk_usage.total == 0: continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug("Passed: {0}".format(part.device)) tags = [part.fstype] if self._tag_by_filesystem else [] device_name = part.mountpoint if self._use_mount else part.device # Note: psutil (0.3.0 to at least 3.1.1) calculates in_use as (used / total) # The problem here is that total includes reserved space the user # doesn't have access to. This causes psutil to calculate a misleadng # percentage for in_use; a lower percentage than df shows. # Calculate in_use w/o reserved space; consistent w/ df's Use% metric. pmets = self._collect_part_metrics(part, disk_usage) used = "system.disk.used" free = "system.disk.free" pmets["system.disk.in_use"] = pmets[used] / (pmets[used] + pmets[free]) # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip("\\").lower() for metric_name, metric_value in pmets.iteritems(): self.gauge(metric_name, metric_value, tags=tags, device_name=device_name)
def _exclude_disk_psutil(self, part): return ((Platform.is_win32() and ('cdrom' in part.opts or part.fstype == '')) or self._exclude_disk(part.device, part.fstype))
def get_jmx_status_path(): if Platform.is_win32(): path = os.path.join(_windows_commondata_path(), 'Datadog') else: path = tempfile.gettempdir() return path
def get_process_metrics(self, pids, cpu_check_interval, ignore_denied_access=True): # initialize process metrics # process metrics available for all versions of psutil rss = 0 vms = 0 cpu = 0 thr = 0 voluntary_ctx_switches = 0 involuntary_ctx_switches = 0 # process metrics available for psutil versions 0.6.0 and later if Platform.is_win32() or Platform.is_solaris(): real = None else: real = 0 if Platform.is_unix(): open_file_descriptors = 0 else: open_file_descriptors = None # process I/O counters (agent might not have permission to access) read_count = 0 write_count = 0 read_bytes = 0 write_bytes = 0 got_denied = False for pid in set(pids): try: p = psutil.Process(pid) try: if real is not None: mem = p.memory_info_ex() real += mem.rss - mem.shared else: mem = p.memory_info() if Platform.is_unix(): ctx_switches = p.num_ctx_switches() voluntary_ctx_switches += ctx_switches.voluntary involuntary_ctx_switches += ctx_switches.involuntary rss += mem.rss vms += mem.vms thr += p.num_threads() cpu += p.cpu_percent(cpu_check_interval) if open_file_descriptors is not None: open_file_descriptors += p.num_fds() except NotImplementedError: # Handle old Kernels which don't provide this info. voluntary_ctx_switches = None involuntary_ctx_switches = None except AttributeError: self.log.debug("process attribute not supported on this platform") except psutil.AccessDenied: got_denied = True # user agent might not have permission to call io_counters() # user agent might have access to io counters for some processes and not others if read_count is not None: try: io_counters = p.io_counters() read_count += io_counters.read_count write_count += io_counters.write_count read_bytes += io_counters.read_bytes write_bytes += io_counters.write_bytes except AttributeError: self.log.debug("process attribute not supported on this platform") except psutil.AccessDenied: log_func = self.log.debug if ignore_denied_access else self.log.info log_func('dd-agent user does not have access \ to I/O counters for process %d: %s' % (pid, p.name())) read_count = None write_count = None read_bytes = None write_bytes = None # Skip processes dead in the meantime except psutil.NoSuchProcess: self.warning('Process %s disappeared while scanning' % pid) if got_denied and not ignore_denied_access: self.warning("The Datadog Agent was denied access when trying to get the number of file descriptors") #Memory values are in Byte return (thr, cpu, rss, vms, real, open_file_descriptors, read_count, write_count, read_bytes, write_bytes, voluntary_ctx_switches, involuntary_ctx_switches)
# stdlib import time # 3p import dns.resolver # project from checks import AgentCheck from util import Platform # These imports are necessary because otherwise dynamic type # resolution will fail on windows without it. # See more here: https://github.com/rthalley/dnspython/issues/39. if Platform.is_win32(): from dns.rdtypes.ANY import * # noqa from dns.rdtypes.IN import * # noqa class DNSCheck(AgentCheck): SERVICE_CHECK_NAME = 'dns.can_resolve' DEFAULT_TIMEOUT = 5 def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.default_timeout = init_config.get('default_timeout', self.DEFAULT_TIMEOUT) def check(self, instance): if 'hostname' not in instance: self.log.info("Skipping instance, no hostname found.") return
def get_process_metrics(self, pids, cpu_check_interval, ignore_denied_access=True): # initialize process metrics # process metrics available for all versions of psutil rss = 0 vms = 0 cpu = 0 thr = 0 voluntary_ctx_switches = 0 involuntary_ctx_switches = 0 # process metrics available for psutil versions 0.6.0 and later if Platform.is_win32() or Platform.is_solaris(): real = None else: real = 0 if Platform.is_unix(): open_file_descriptors = 0 else: open_file_descriptors = None # process I/O counters (agent might not have permission to access) read_count = 0 write_count = 0 read_bytes = 0 write_bytes = 0 got_denied = False for pid in set(pids): try: p = psutil.Process(pid) try: if real is not None: mem = p.memory_info_ex() real += mem.rss - mem.shared else: mem = p.memory_info() if Platform.is_unix(): ctx_switches = p.num_ctx_switches() voluntary_ctx_switches += ctx_switches.voluntary involuntary_ctx_switches += ctx_switches.involuntary rss += mem.rss vms += mem.vms thr += p.num_threads() cpu += p.cpu_percent(cpu_check_interval) if open_file_descriptors is not None: open_file_descriptors += p.num_fds() except NotImplementedError: # Handle old Kernels which don't provide this info. voluntary_ctx_switches = None involuntary_ctx_switches = None except AttributeError: self.log.debug( "process attribute not supported on this platform") except psutil.AccessDenied: got_denied = True # user agent might not have permission to call io_counters() # user agent might have access to io counters for some processes and not others if read_count is not None: try: io_counters = p.io_counters() read_count += io_counters.read_count write_count += io_counters.write_count read_bytes += io_counters.read_bytes write_bytes += io_counters.write_bytes except AttributeError: self.log.debug( "process attribute not supported on this platform") except psutil.AccessDenied: log_func = self.log.debug if ignore_denied_access else self.log.info log_func('dd-agent user does not have access \ to I/O counters for process %d: %s' % (pid, p.name())) read_count = None write_count = None read_bytes = None write_bytes = None # Skip processes dead in the meantime except psutil.NoSuchProcess: self.warning('Process %s disappeared while scanning' % pid) if got_denied and not ignore_denied_access: self.warning('The Datadog Agent was denied access ' 'when trying to get the number of file descriptors') # Memory values are in Byte return (thr, cpu, rss, vms, real, open_file_descriptors, read_count, write_count, read_bytes, write_bytes, voluntary_ctx_switches, involuntary_ctx_switches)
# stdlib import time # 3p import dns.resolver # project from util import Platform from checks.network_checks import NetworkCheck, Status # These imports are necessary because otherwise dynamic type # resolution will fail on windows without it. # See more here: https://github.com/rthalley/dnspython/issues/39. if Platform.is_win32(): from dns.rdtypes.ANY import * # noqa from dns.rdtypes.IN import * # noqa class BadConfException(Exception): pass class DNSCheck(NetworkCheck): SERVICE_CHECK_NAME = 'dns.can_resolve' DEFAULT_TIMEOUT = 5 def __init__(self, name, init_config, agentConfig, instances): # Now that the DNS check is a Network check, we must provide a `name` for each # instance before calling NetworkCheck to make backwards compatible with old yaml. for idx, inst in enumerate(instances): try:
def get_process_metrics(self, pids, psutil, cpu_check_interval): # initialize process metrics # process metrics available for all versions of psutil rss = 0 vms = 0 cpu = 0 thr = 0 # process metrics available for psutil versions 0.6.0 and later extended_metrics_0_6_0 = self.is_psutil_version_later_than((0, 6, 0)) and \ not Platform.is_win32() # On Windows get_ext_memory_info returns different metrics if extended_metrics_0_6_0: real = 0 voluntary_ctx_switches = 0 involuntary_ctx_switches = 0 else: real = None voluntary_ctx_switches = None involuntary_ctx_switches = None # process metrics available for psutil versions 0.5.0 and later on UNIX extended_metrics_0_5_0_unix = self.is_psutil_version_later_than((0, 5, 0)) and \ Platform.is_unix() if extended_metrics_0_5_0_unix: open_file_descriptors = 0 else: open_file_descriptors = None # process I/O counters (agent might not have permission to access) read_count = 0 write_count = 0 read_bytes = 0 write_bytes = 0 got_denied = False for pid in set(pids): try: p = psutil.Process(pid) if extended_metrics_0_6_0: mem = p.get_ext_memory_info() real += mem.rss - mem.shared try: ctx_switches = p.get_num_ctx_switches() voluntary_ctx_switches += ctx_switches.voluntary involuntary_ctx_switches += ctx_switches.involuntary except NotImplementedError: # Handle old Kernels which don't provide this info. voluntary_ctx_switches = None involuntary_ctx_switches = None else: mem = p.get_memory_info() if extended_metrics_0_5_0_unix: try: open_file_descriptors += p.get_num_fds() except psutil.AccessDenied: got_denied = True rss += mem.rss vms += mem.vms thr += p.get_num_threads() cpu += p.get_cpu_percent(cpu_check_interval) # user agent might not have permission to call get_io_counters() # user agent might have access to io counters for some processes and not others if read_count is not None: try: io_counters = p.get_io_counters() read_count += io_counters.read_count write_count += io_counters.write_count read_bytes += io_counters.read_bytes write_bytes += io_counters.write_bytes except psutil.AccessDenied: self.log.info('DD user agent does not have access \ to I/O counters for process %d: %s' % (pid, p.name)) read_count = None write_count = None read_bytes = None write_bytes = None # Skip processes dead in the meantime except psutil.NoSuchProcess: self.warning('Process %s disappeared while scanning' % pid) pass if got_denied: self.warning("The Datadog Agent was denied access when trying to get the number of file descriptors") #Memory values are in Byte return (thr, cpu, rss, vms, real, open_file_descriptors, read_count, write_count, read_bytes, write_bytes, voluntary_ctx_switches, involuntary_ctx_switches)
class Disk(AgentCheck): """ Collects metrics about the machine's disks. """ # -T for filesystem info DF_COMMAND = ['df', '-T'] METRIC_DISK = 'system.disk.{0}' METRIC_INODE = 'system.fs.inodes.{0}' def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception( "Disk check only supports one configured instance.") AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) # Get the configuration once for all self._load_conf(instances[0]) def check(self, instance): """Get disk space/inode stats""" # Windows and Mac will always have psutil # (we have packaged for both of them) if self._psutil(): self.collect_metrics_psutil() else: # FIXME: implement all_partitions (df -a) self.collect_metrics_manually() @classmethod def _psutil(cls): return psutil is not None def _load_conf(self, instance): self._excluded_filesystems = instance.get('excluded_filesystems', []) self._excluded_disks = instance.get('excluded_disks', []) self._tag_by_filesystem = _is_affirmative( instance.get('tag_by_filesystem', False)) self._all_partitions = _is_affirmative( instance.get('all_partitions', False)) # Force exclusion of CDROM (iso9660) from disk check self._excluded_filesystems.append('iso9660') # FIXME: 6.x, drop use_mount option in datadog.conf self._load_legacy_option(instance, 'use_mount', False, operation=_is_affirmative) # FIXME: 6.x, drop device_blacklist_re option in datadog.conf self._load_legacy_option(instance, 'excluded_disk_re', '^$', legacy_name='device_blacklist_re', operation=re.compile) def _load_legacy_option(self, instance, option, default, legacy_name=None, operation=lambda l: l): value = instance.get(option, default) legacy_name = legacy_name or option if value == default and legacy_name in self.agentConfig: self.log.warn("Using `{0}` in datadog.conf has been deprecated" " in favor of `{1}` in disk.yaml".format( legacy_name, option)) value = self.agentConfig.get(legacy_name) or default setattr(self, '_{0}'.format(option), operation(value)) def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=True): # we check all exclude conditions if self._exclude_disk_psutil(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = psutil.disk_usage(part.mountpoint) except Exception, e: self.log.debug("Unable to get disk metrics for %s: %s", part.mountpoint, e) continue # Exclude disks with total disk size 0 if disk_usage.total == 0: continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {0}'.format(part.device)) tags = [part.fstype] if self._tag_by_filesystem else [] device_name = part.mountpoint if self._use_mount else part.device # Note: psutil (0.3.0 to at least 3.1.1) calculates in_use as (used / total) # The problem here is that total includes reserved space the user # doesn't have access to. This causes psutil to calculate a misleadng # percentage for in_use; a lower percentage than df shows. # Calculate in_use w/o reserved space; consistent w/ df's Use% metric. pmets = self._collect_part_metrics(part, disk_usage) used = 'system.disk.used' free = 'system.disk.free' pmets['system.disk.in_use'] = pmets[used] / (pmets[used] + pmets[free]) # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() for metric_name, metric_value in pmets.iteritems(): self.gauge(metric_name, metric_value, tags=tags, device_name=device_name) # And finally, latency metrics, a legacy gift from the old Windows Check if Platform.is_win32(): self.collect_latency_metrics()
class Disk(AgentCheck): DF_COMMAND = ['df', '-T'] METRIC_DISK = 'system.disk.{0}' METRIC_INODE = 'system.fs.inodes.{0}' def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception("Disk check only supports one configured instance.") AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) self._load_conf(instances[0]) def check(self, instance): if self._psutil(): self.collect_metrics_psutil() else: self.collect_metrics_manually() @classmethod def _psutil(cls): return psutil is not None def _load_conf(self, instance): self._excluded_filesystems = instance.get('excluded_filesystems', []) self._excluded_disks = instance.get('excluded_disks', []) self._tag_by_filesystem = _is_affirmative( instance.get('tag_by_filesystem', False)) self._all_partitions = _is_affirmative( instance.get('all_partitions', False)) self._excluded_filesystems.append('iso9660') self._load_legacy_option(instance, 'use_mount', False, operation=_is_affirmative) self._load_legacy_option(instance, 'excluded_disk_re', '^$', legacy_name='device_blacklist_re', operation=re.compile) def _load_legacy_option(self, instance, option, default, legacy_name=None, operation=lambda l: l): value = instance.get(option, default) legacy_name = legacy_name or option if value == default and legacy_name in self.agentConfig: self.log.warn( "Using `{0}` in datamonitor.conf has been deprecated" " in favor of `{1}` in disk.yaml".format(legacy_name, option) ) value = self.agentConfig.get(legacy_name) or default setattr(self, '_{0}'.format(option), operation(value)) def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=True): if self._exclude_disk_psutil(part): continue try: disk_usage = psutil.disk_usage(part.mountpoint) except Exception, e: self.log.debug("Unable to get disk metrics for %s: %s", part.mountpoint, e) continue if disk_usage.total == 0: continue self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {0}'.format(part.device)) tags = [part.fstype] if self._tag_by_filesystem else [] device_name = part.mountpoint if self._use_mount else part.device pmets = self._collect_part_metrics(part, disk_usage) used = 'system.disk.used' free = 'system.disk.free' pmets['system.disk.pct_usage'] = (pmets[used] / (pmets[used] + pmets[free])) * 100 if Platform.is_win32(): device_name = device_name.strip('\\').lower() for metric_name, metric_value in pmets.iteritems(): self.gauge(metric_name, metric_value, tags=tags, device_name=device_name) if Platform.is_win32(): self.collect_latency_metrics()