def check(self, instance): if instance is None: instance = {} self._excluded_ifaces = instance.get('excluded_interfaces', []) self._collect_cx_state = instance.get('collect_connection_state', False) self._collect_rate_metrics = instance.get('collect_rate_metrics', True) self._collect_count_metrics = instance.get('collect_count_metrics', False) # This decides whether we should split or combine connection states, # along with a few other things self._setup_metrics(instance) self._exclude_iface_re = None exclude_re = instance.get('excluded_interface_re', None) if exclude_re: self.log.debug("Excluding network devices matching: %s" % exclude_re) self._exclude_iface_re = re.compile(exclude_re) if Platform.is_linux(): self._check_linux(instance) elif Platform.is_bsd(): self._check_bsd(instance) elif Platform.is_solaris(): self._check_solaris(instance) elif Platform.is_windows(): self._check_psutil(instance)
def check(self, instance): if instance is None: instance = {} self._excluded_ifaces = instance.get('excluded_interfaces', []) if not isinstance(self._excluded_ifaces, list): raise ConfigurationError( "Expected 'excluded_interfaces' to be a list, got '{}'".format( type(self._excluded_ifaces).__name__)) self._collect_cx_state = instance.get('collect_connection_state', False) self._collect_rate_metrics = instance.get('collect_rate_metrics', True) self._collect_count_metrics = instance.get('collect_count_metrics', False) # This decides whether we should split or combine connection states, # along with a few other things self._setup_metrics(instance) self._exclude_iface_re = None exclude_re = instance.get('excluded_interface_re', None) if exclude_re: self.log.debug("Excluding network devices matching: %s", exclude_re) self._exclude_iface_re = re.compile(exclude_re) if Platform.is_linux(): self._check_linux(instance) elif Platform.is_bsd(): self._check_bsd(instance) elif Platform.is_solaris(): self._check_solaris(instance) elif Platform.is_windows(): self._check_psutil(instance)
def psutil_wrapper(self, process, method, accessors, try_sudo, *args, **kwargs): """ A psutil wrapper that is calling * psutil.method(*args, **kwargs) and returns the result OR * psutil.method(*args, **kwargs).accessor[i] for each accessors given in a list, the result being indexed in a dictionary by the accessor name """ if accessors is None: result = None else: result = {} # Ban certain method that we know fail if method == 'num_fds' and not Platform.is_unix(): return result elif method == 'num_handles' and not Platform.is_win32(): return result try: res = getattr(process, method)(*args, **kwargs) if accessors is None: result = res else: for acc in accessors: try: result[acc] = getattr(res, acc) except AttributeError: self.log.debug( "psutil.%s().%s attribute does not exist", method, acc) except (NotImplementedError, AttributeError): self.log.debug("psutil method %s not implemented", method) except psutil.AccessDenied: self.log.debug("psutil was denied access for method %s", method) if method == 'num_fds' and Platform.is_unix() and try_sudo: try: # It is up the agent's packager to grant # corresponding sudo policy on unix platforms ls_args = [ 'sudo', 'ls', '/proc/{}/fd/'.format(process.pid) ] process_ls = subprocess.check_output(ls_args) result = len(process_ls.splitlines()) except subprocess.CalledProcessError as e: self.log.exception( "trying to retrieve %s with sudo failed with return code %s", method, e.returncode) except Exception: self.log.exception( "trying to retrieve %s with sudo also failed", method) except psutil.NoSuchProcess: self.warning("Process %s disappeared while scanning", process.pid) return result
def _assert_complex_config(aggregator): # Test service check aggregator.assert_service_check('mysql.can_connect', status=MySql.OK, tags=tags.SC_TAGS, count=1) aggregator.assert_service_check('mysql.replication.slave_running', status=MySql.OK, tags=tags.SC_TAGS, at_least=1) testable_metrics = ( variables.STATUS_VARS + variables.VARIABLES_VARS + variables.INNODB_VARS + variables.BINLOG_VARS + variables.SYSTEM_METRICS + variables.SCHEMA_VARS + variables.SYNTHETIC_VARS ) if MYSQL_VERSION_PARSED >= parse_version('5.6') and environ.get('MYSQL_FLAVOR') != 'mariadb': testable_metrics.extend(variables.PERFORMANCE_VARS) # Test metrics for mname in testable_metrics: # These two are currently not guaranteed outside of a Linux # environment. if mname == 'mysql.performance.user_time' and not Platform.is_linux(): continue if mname == 'mysql.performance.kernel_time' and not Platform.is_linux(): continue if mname == 'mysql.performance.cpu_time' and Platform.is_windows(): continue if mname == 'mysql.performance.query_run_time.avg': aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], count=1) aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:mysql'], count=1) elif mname == 'mysql.info.schema.size': aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], count=1) aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:information_schema'], count=1) aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:performance_schema'], count=1) else: aggregator.assert_metric(mname, tags=tags.METRIC_TAGS, at_least=0) # TODO: test this if it is implemented # Assert service metadata # version_metadata = mysql_check.service_metadata['version'] # assert len(version_metadata) == 1 # test custom query metrics aggregator.assert_metric('alice.age', value=25) aggregator.assert_metric('bob.age', value=20) # test optional metrics optional_metrics = ( variables.OPTIONAL_REPLICATION_METRICS + variables.OPTIONAL_INNODB_VARS + variables.OPTIONAL_STATUS_VARS + variables.OPTIONAL_STATUS_VARS_5_6_6 ) _test_optional_metrics(aggregator, optional_metrics, 1) # Raises when coverage < 100% aggregator.assert_all_metrics_covered()
def psutil_wrapper(self, process, method, accessors=None, *args, **kwargs): """ A psutil wrapper that is calling * psutil.method(*args, **kwargs) and returns the result OR * psutil.method(*args, **kwargs).accessor[i] for each accessors given in a list, the result being indexed in a dictionary by the accessor name """ if accessors is None: result = None else: result = {} # Ban certain method that we know fail if method == 'num_fds' and not Platform.is_unix(): return result elif method == 'num_handles' and not Platform.is_win32(): return result # Try running `num_fds` with sudo if possible if method == 'num_fds' and self.try_sudo: self.log.debug("Running num_fds using sudo") try: ls_args = ['sudo', 'ls', '/proc/{}/fd/'.format(process.pid)] process_ls = subprocess.check_output(ls_args) result = len(process_ls.splitlines()) except Exception as e: self.log.exception( "Trying to retrieve %s with sudo failed with error: %s", method, e) else: try: res = getattr(process, method)(*args, **kwargs) if accessors is None: result = res else: for acc in accessors: try: result[acc] = getattr(res, acc) except AttributeError: self.log.debug( "psutil.%s().%s attribute does not exist", method, acc) except (NotImplementedError, AttributeError): self.log.debug("psutil method %s not implemented", method) except psutil.AccessDenied: self.log.debug("psutil was denied access for method %s", method) except psutil.NoSuchProcess: self.log.debug("Process %s disappeared while scanning", process.pid) return result
def init_db(): # exit if we are not on linux # that's the only platform where the client successfully installs for version 3.10 if not Platform.is_linux(): return import aerospike # sample Aerospike Python Client code # https://www.aerospike.com/docs/client/python/usage/kvs/write.html client = aerospike.client({'hosts': [(HOST, PORT)]}).connect() key = ('test', 'characters', 'bender') bins = { 'name': 'Bender', 'serialnum': 2716057, 'lastsentence': { 'BBS': "Well, we're boned", 'TBwaBB': 'I love you, meatbags!', 'BG': 'Whip harder, Professor!', 'ltWGY': 'Into the breach, meatbags. Or not, whatever', }, 'composition': ['40% zinc', '40% titanium', '30% iron', '40% dolomite'], 'apartment': bytearray(b'\x24'), 'quote_cnt': 47, } client.put(key, bins) for _ in range(10): client.get(key) client.close()
def exclude_disk(self, part): # skip cd-rom drives with no disk in it; they may raise # ENOENT, pop-up a Windows GUI error for a non-ready # partition or just hang; # and all the other excluded disks skip_win = Platform.is_win32() and ('cdrom' in part.opts or part.fstype == '') return skip_win or self._exclude_disk(part.device, part.fstype, part.mountpoint)
def collect_metrics_psutil(self): self._valid_disks = {} for part in psutil.disk_partitions(all=True): # we check all exclude conditions if self.exclude_disk(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint) except TimeoutException: self.log.warning( u'Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...', part.mountpoint ) continue except Exception as e: self.log.warning('Unable to get disk metrics for %s: %s', part.mountpoint, e) continue # Exclude disks with total disk size 0 if disk_usage.total == 0: continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {}'.format(part.device)) device_name = part.mountpoint if self._use_mount else part.device tags = [part.fstype, 'filesystem:{}'.format(part.fstype)] if self._tag_by_filesystem else [] tags.extend(self._custom_tags) # apply device/mountpoint specific tags for regex, device_tags in self._device_tag_re: if regex.match(device_name): tags.extend(device_tags) if self.devices_label.get(device_name): tags.append(self.devices_label.get(device_name)) # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() tags.append('device:{}'.format(device_name)) for metric_name, metric_value in iteritems(self._collect_part_metrics(part, disk_usage)): self.gauge(metric_name, metric_value, tags=tags) # Add in a disk read write or read only check if self._service_check_rw: rwro = {'rw', 'ro'} & set(part.opts.split(',')) if len(rwro) == 1: self.service_check( 'disk.read_write', AgentCheck.OK if rwro.pop() == 'rw' else AgentCheck.CRITICAL, tags=tags ) else: self.service_check('disk.read_write', AgentCheck.UNKNOWN, tags=tags) self.collect_latency_metrics()
def set_default_driver_conf(): if Platform.is_containerized(): # Use default `./driver_config/odbcinst.ini` when Agent is running in docker. # `freetds` is shipped with the Docker Agent. os.environ.setdefault('ODBCSYSINI', DRIVER_CONFIG_DIR) else: # required when using pyodbc with FreeTDS on Ubuntu 18.04 # see https://stackoverflow.com/a/22988748/1258743 os.environ.setdefault('TDSVER', '8.0')
def check(self, instance): """Get disk space/inode stats""" if self._tag_by_label and Platform.is_linux(): self.devices_label = self._get_devices_label() # Windows and Mac will always have psutil # (we have packaged for both of them) if self._psutil(): self.collect_metrics_psutil() else: # FIXME: implement all_partitions (df -a) self.collect_metrics_manually()
def uds_path(): if Platform.is_mac(): # See: https://github.com/docker/for-mac/issues/483 pytest.skip('Sharing Unix sockets is not supported by Docker for Mac.') if Platform.is_windows(): pytest.skip('Nginx does not run on Windows.') with TempDir() as tmp_dir: compose_file = os.path.join(HERE, 'compose', 'uds.yaml') uds_filename = 'tmp.sock' uds_path = os.path.join(tmp_dir, uds_filename) with docker_run( compose_file=compose_file, env_vars={ "UDS_HOST_DIRECTORY": tmp_dir, 'UDS_FILENAME': uds_filename, }, ): yield uds_path
def mock_server(): if Platform.is_windows(): compose_filename = 'docker-compose-windows.yaml' else: compose_filename = 'docker-compose.yaml' compose_file = os.path.join(common.HERE, 'compose', 'mock_server', compose_filename) env_vars = {"MOCK_SERVER_PORT": str(common.MOCK_SERVER_PORT)} with docker_run(compose_file, env_vars=env_vars): yield
def __init__(self, name, init_config, instances): super(ProcessCheck, self).__init__(name, init_config, instances) self.name = self.instance.get('name', None) self.tags = self.instance.get('tags', []) self.exact_match = is_affirmative( self.instance.get('exact_match', True)) self.search_string = self.instance.get('search_string', None) self.ignore_ad = is_affirmative( self.instance.get('ignore_denied_access', True)) self.pid = self.instance.get('pid') self.pid_file = self.instance.get('pid_file') self.collect_children = is_affirmative( self.instance.get('collect_children', False)) self.user = self.instance.get('user', False) self.try_sudo = self.instance.get('try_sudo', False) # ad stands for access denied # We cache the PIDs getting this error and don't iterate on them more often than `access_denied_cache_duration`` # This cache is for all PIDs so it's global, but it should be refreshed by instance self.last_ad_cache_ts = {} self.ad_cache = set() self.access_denied_cache_duration = int( init_config.get('access_denied_cache_duration', DEFAULT_AD_CACHE_DURATION)) # By default cache the PID list for a while # Sometimes it's not wanted b/c it can mess with no-data monitoring # This cache is indexed per instance self.last_pid_cache_ts = {} self.pid_cache = {} self.pid_cache_duration = int( init_config.get('pid_cache_duration', DEFAULT_PID_CACHE_DURATION)) self._conflicting_procfs = False self._deprecated_init_procfs = False if Platform.is_linux(): procfs_path = init_config.get('procfs_path') if procfs_path: agent_procfs_path = datadog_agent.get_config('procfs_path') if agent_procfs_path and procfs_path != agent_procfs_path.rstrip( '/'): self._conflicting_procfs = True else: self._deprecated_init_procfs = True psutil.PROCFS_PATH = procfs_path # Process cache, indexed by instance self.process_cache = defaultdict(dict) self.process_list_cache.cache_duration = int( init_config.get('shared_process_list_cache_duration', DEFAULT_SHARED_PROCESS_LIST_CACHE_DURATION))
def _collect_part_metrics(self, part, usage): metrics = {} for name in ['total', 'used', 'free']: # For legacy reasons, the standard unit it kB metrics[self.METRIC_DISK.format(name)] = getattr(usage, name) / 1024 # FIXME: 6.x, use percent, a lot more logical than in_use metrics[self.METRIC_DISK.format('in_use')] = usage.percent / 100 if Platform.is_unix(): metrics.update(self._collect_inodes_metrics(part.mountpoint)) return metrics
def test_check_real_process_regex(aggregator, dd_run_check): "Check to specifically find this python pytest running process using regex." from datadog_checks.base.utils.platform import Platform instance = { 'name': 'py', 'search_string': ['.*python.*pytest'], 'exact_match': False, 'ignored_denied_access': True, 'thresholds': { 'warning': [1, 10], 'critical': [1, 100] }, } process = ProcessCheck(common.CHECK_NAME, {}, [instance]) expected_tags = generate_expected_tags(instance) dd_run_check(process) for mname in common.PROCESS_METRIC: # cases where we don't actually expect some metrics here: # - if io_counters() is not available # - if memory_info_ex() is not available # - first run so no `cpu.pct` if ((not _PSUTIL_IO_COUNTERS and '.io' in mname) or (not _PSUTIL_MEM_SHARED and 'mem.real' in mname) or mname == 'system.processes.cpu.pct'): continue if Platform.is_windows(): metric = common.UNIX_TO_WINDOWS_MAP.get(mname, mname) else: metric = mname aggregator.assert_metric(metric, at_least=1, tags=expected_tags) aggregator.assert_service_check('process.up', count=1, tags=expected_tags + ['process:py']) # this requires another run dd_run_check(process) aggregator.assert_metric('system.processes.cpu.pct', count=1, tags=expected_tags) aggregator.assert_metric('system.processes.cpu.normalized_pct', count=1, tags=expected_tags)
def uds_path(): if Platform.is_mac(): # See: https://github.com/docker/for-mac/issues/483 pytest.skip('Sharing Unix sockets is not supported by Docker for Mac.') with TempDir() as tmp_dir: compose_file = os.path.join(HERE, 'compose', 'uds.yaml') uds_filename = 'tmp.sock' uds_path = os.path.join(tmp_dir, uds_filename) with docker_run( compose_file=compose_file, env_vars={ "UDS_HOST_DIRECTORY": tmp_dir, 'UDS_FILENAME': uds_filename, }, conditions=[WaitFor(lambda: os.path.exists(uds_path))], attempts=2, ): yield uds_path
def get_pagefault_stats(self, pid): if not Platform.is_linux(): return None def file_to_string(path): with open(path, 'r') as f: res = f.read() return res # http://man7.org/linux/man-pages/man5/proc.5.html try: data = file_to_string('/{}/{}/stat'.format(psutil.PROCFS_PATH, pid)) except Exception: self.log.debug( 'error getting proc stats: file_to_string failed for /%s/%s/stat', psutil.PROCFS_PATH, pid) return None return (int(i) for i in data.split()[9:13])
def __init__(self, name, init_config, instances=None): super(ProcessCheck, self).__init__(name, init_config, instances) # ad stands for access denied # We cache the PIDs getting this error and don't iterate on them more often than `access_denied_cache_duration`` # This cache is for all PIDs so it's global, but it should be refreshed by instance self.last_ad_cache_ts = {} self.ad_cache = set() self.access_denied_cache_duration = int( init_config.get('access_denied_cache_duration', DEFAULT_AD_CACHE_DURATION)) # By default cache the PID list for a while # Sometimes it's not wanted b/c it can mess with no-data monitoring # This cache is indexed per instance self.last_pid_cache_ts = {} self.pid_cache = {} self.pid_cache_duration = int( init_config.get('pid_cache_duration', DEFAULT_PID_CACHE_DURATION)) self._conflicting_procfs = False self._deprecated_init_procfs = False if Platform.is_linux(): procfs_path = init_config.get('procfs_path') if procfs_path: agent_procfs_path = datadog_agent.get_config('procfs_path') if agent_procfs_path and procfs_path != agent_procfs_path.rstrip( '/'): self._conflicting_procfs = True else: self._deprecated_init_procfs = True psutil.PROCFS_PATH = procfs_path # Process cache, indexed by instance self.process_cache = defaultdict(dict) self.process_list_cache.cache_duration = int( init_config.get('shared_process_list_cache_duration', DEFAULT_SHARED_PROCESS_LIST_CACHE_DURATION))
def test_complex_config_replica(aggregator, instance_complex): mysql_check = MySql(common.CHECK_NAME, {}, {}) config = copy.deepcopy(instance_complex) config['port'] = common.SLAVE_PORT mysql_check.check(config) # self.assertMetricTag('mysql.replication.seconds_behind_master', 'channel:default') # Test service check aggregator.assert_service_check('mysql.can_connect', status=MySql.OK, tags=tags.SC_TAGS_REPLICA, count=1) # Travis MySQL not running replication - FIX in flavored test. aggregator.assert_service_check( 'mysql.replication.slave_running', status=MySql.OK, tags=tags.SC_TAGS_REPLICA, at_least=1 ) testable_metrics = ( variables.STATUS_VARS + variables.VARIABLES_VARS + variables.INNODB_VARS + variables.BINLOG_VARS + variables.SYSTEM_METRICS + variables.SCHEMA_VARS + variables.SYNTHETIC_VARS ) if MYSQL_VERSION_PARSED >= parse_version('5.6') and environ.get('MYSQL_FLAVOR') != 'mariadb': testable_metrics.extend(variables.PERFORMANCE_VARS) # Test metrics for mname in testable_metrics: # These two are currently not guaranteed outside of a Linux # environment. if mname == 'mysql.performance.user_time' and not Platform.is_linux(): continue if mname == 'mysql.performance.kernel_time' and not Platform.is_linux(): continue if mname == 'mysql.performance.cpu_time' and Platform.is_windows(): continue if mname == 'mysql.performance.query_run_time.avg': aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], at_least=1) elif mname == 'mysql.info.schema.size': aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], count=1) aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:information_schema'], count=1) aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:performance_schema'], count=1) else: aggregator.assert_metric(mname, tags=tags.METRIC_TAGS, at_least=0) # test custom query metrics aggregator.assert_metric('alice.age', value=25) aggregator.assert_metric('bob.age', value=20) # test optional metrics optional_metrics = ( variables.OPTIONAL_REPLICATION_METRICS + variables.OPTIONAL_INNODB_VARS + variables.OPTIONAL_STATUS_VARS + variables.OPTIONAL_STATUS_VARS_5_6_6 ) _test_optional_metrics(aggregator, optional_metrics, 1) # Raises when coverage < 100% aggregator.assert_all_metrics_covered()
for name in gauge_metrics: aggregator.assert_metric(name, count=0) for name in rate_metrics: aggregator.assert_metric_has_tag(name, 'device:{}'.format(DEFAULT_DEVICE_NAME)) aggregator.assert_metric_has_tag(name, 'device_name:{}'.format(DEFAULT_DEVICE_BASE_NAME)) for name in count_metrics: aggregator.assert_metric_has_tag(name, 'device:{}'.format(DEFAULT_DEVICE_NAME)) aggregator.assert_metric_has_tag(name, 'device_name:{}'.format(DEFAULT_DEVICE_BASE_NAME)) aggregator.assert_all_metrics_covered() @pytest.mark.skipif(not Platform.is_linux(), reason='disk labels are only available on Linux') @pytest.mark.usefixtures('psutil_mocks') def test_labels_from_blkid_cache_file( aggregator, instance_blkid_cache_file, gauge_metrics, rate_metrics, count_metrics ): """ Verify that the disk labels are set when the blkid_cache_file option is set """ c = Disk('disk', {}, [instance_blkid_cache_file]) c.check(instance_blkid_cache_file) for metric in chain(gauge_metrics, rate_metrics, count_metrics): aggregator.assert_metric( metric, tags=['device:/dev/sda1', 'device_name:sda1', 'label:MYLABEL', 'device_label:MYLABEL'] )
__name__='disk_usage'): c.check(instance) for name in gauge_metrics: aggregator.assert_metric(name, count=0) for name in rate_metrics: aggregator.assert_metric_has_tag( name, 'device:{}'.format(DEFAULT_DEVICE_NAME)) aggregator.assert_metric_has_tag( name, 'device_name:{}'.format(DEFAULT_DEVICE_BASE_NAME)) aggregator.assert_all_metrics_covered() @pytest.mark.skipif(not Platform.is_linux(), reason='disk labels are only available on Linux') @pytest.mark.usefixtures('psutil_mocks') def test_labels_from_blkid_cache_file(aggregator, instance_blkid_cache_file, gauge_metrics, rate_metrics): """ Verify that the disk labels are set when the blkid_cache_file option is set """ c = Disk('disk', {}, [instance_blkid_cache_file]) c.check(instance_blkid_cache_file) for metric in chain(gauge_metrics, rate_metrics): aggregator.assert_metric(metric, tags=[ 'device:/dev/sda1', 'device_name:sda1', 'label:MYLABEL', 'device_label:MYLABEL' ])
def _check_bsd(self, instance): netstat_flags = ['-i', '-b'] custom_tags = instance.get('tags', []) # FreeBSD's netstat truncates device names unless you pass '-W' if Platform.is_freebsd(): netstat_flags.append('-W') try: output, _, _ = get_subprocess_output(["netstat"] + netstat_flags, self.log) lines = output.splitlines() # Name Mtu Network Address Ipkts Ierrs Ibytes Opkts Oerrs Obytes Coll # lo0 16384 <Link#1> 318258 0 428252203 318258 0 428252203 0 # lo0 16384 localhost fe80:1::1 318258 - 428252203 318258 - 428252203 - # lo0 16384 127 localhost 318258 - 428252203 318258 - 428252203 - # lo0 16384 localhost ::1 318258 - 428252203 318258 - 428252203 - # gif0* 1280 <Link#2> 0 0 0 0 0 0 0 # stf0* 1280 <Link#3> 0 0 0 0 0 0 0 # en0 1500 <Link#4> 04:0c:ce:db:4e:fa 20801309 0 13835457425 15149389 0 11508790198 0 # en0 1500 seneca.loca fe80:4::60c:ceff: 20801309 - 13835457425 15149389 - 11508790198 - # en0 1500 2001:470:1f 2001:470:1f07:11d 20801309 - 13835457425 15149389 - 11508790198 - # en0 1500 2001:470:1f 2001:470:1f07:11d 20801309 - 13835457425 15149389 - 11508790198 - # en0 1500 192.168.1 192.168.1.63 20801309 - 13835457425 15149389 - 11508790198 - # en0 1500 2001:470:1f 2001:470:1f07:11d 20801309 - 13835457425 15149389 - 11508790198 - # p2p0 2304 <Link#5> 06:0c:ce:db:4e:fa 0 0 0 0 0 0 0 # ham0 1404 <Link#6> 7a:79:05:4d:bf:f5 30100 0 6815204 18742 0 8494811 0 # ham0 1404 5 5.77.191.245 30100 - 6815204 18742 - 8494811 - # ham0 1404 seneca.loca fe80:6::7879:5ff: 30100 - 6815204 18742 - 8494811 - # ham0 1404 2620:9b::54 2620:9b::54d:bff5 30100 - 6815204 18742 - 8494811 - headers = lines[0].split() # Given the irregular structure of the table above, better to parse from the end of each line # Verify headers first # -7 -6 -5 -4 -3 -2 -1 for h in ("Ipkts", "Ierrs", "Ibytes", "Opkts", "Oerrs", "Obytes", "Coll"): if h not in headers: self.log.error("%s not found in %s; cannot parse", h, headers) return False current = None for l in lines[1:]: # Another header row, abort now, this is IPv6 land if "Name" in l: break x = l.split() if len(x) == 0: break iface = x[0] if iface.endswith("*"): iface = iface[:-1] if iface == current: # skip multiple lines of same interface continue else: current = iface # Filter inactive interfaces if self._parse_value(x[-5]) or self._parse_value(x[-2]): iface = current metrics = { 'bytes_rcvd': self._parse_value(x[-5]), 'bytes_sent': self._parse_value(x[-2]), 'packets_in.count': self._parse_value(x[-7]), 'packets_in.error': self._parse_value(x[-6]), 'packets_out.count': self._parse_value(x[-4]), 'packets_out.error': self._parse_value(x[-3]), } self._submit_devicemetrics(iface, metrics, custom_tags) except SubprocessOutputEmptyError: self.log.exception("Error collecting connection stats.") try: netstat, _, _ = get_subprocess_output( ["netstat", "-s", "-p" "tcp"], self.log) # 3651535 packets sent # 972097 data packets (615753248 bytes) # 5009 data packets (2832232 bytes) retransmitted # 0 resends initiated by MTU discovery # 2086952 ack-only packets (471 delayed) # 0 URG only packets # 0 window probe packets # 310851 window update packets # 336829 control packets # 0 data packets sent after flow control # 3058232 checksummed in software # 3058232 segments (571218834 bytes) over IPv4 # 0 segments (0 bytes) over IPv6 # 4807551 packets received # 1143534 acks (for 616095538 bytes) # 165400 duplicate acks # ... self._submit_regexed_values(netstat, BSD_TCP_METRICS, custom_tags) except SubprocessOutputEmptyError: self.log.exception("Error collecting TCP stats.") proc_location = self.agentConfig.get('procfs_path', '/proc').rstrip('/') net_proc_base_location = self._get_net_proc_base_location( proc_location) if self._is_collect_cx_state_runnable(net_proc_base_location): try: self.log.debug("Using `netstat` to collect connection state") output_TCP, _, _ = get_subprocess_output( ["netstat", "-n", "-a", "-p", "tcp"], self.log) output_UDP, _, _ = get_subprocess_output( ["netstat", "-n", "-a", "-p", "udp"], self.log) lines = output_TCP.splitlines() + output_UDP.splitlines() # Active Internet connections (w/o servers) # Proto Recv-Q Send-Q Local Address Foreign Address State # tcp 0 0 46.105.75.4:80 79.220.227.193:2032 SYN_RECV # tcp 0 0 46.105.75.4:143 90.56.111.177:56867 ESTABLISHED # tcp 0 0 46.105.75.4:50468 107.20.207.175:443 TIME_WAIT # tcp6 0 0 46.105.75.4:80 93.15.237.188:58038 FIN_WAIT2 # tcp6 0 0 46.105.75.4:80 79.220.227.193:2029 ESTABLISHED # udp 0 0 0.0.0.0:123 0.0.0.0:* # udp6 0 0 :::41458 :::* metrics = self._parse_linux_cx_state( lines[2:], self.tcp_states['netstat'], 5) for metric, value in iteritems(metrics): self.gauge(metric, value, tags=custom_tags) except SubprocessOutputEmptyError: self.log.exception("Error collecting connection states.")
# In order to collect connection state we need `ss` command included in `iproute2` package E2E_METADATA = { 'start_commands': ['apt-get update', 'apt-get install iproute2 -y'] } EXPECTED_METRICS = [ 'system.net.bytes_rcvd', 'system.net.bytes_sent', 'system.net.packets_in.count', 'system.net.packets_in.error', 'system.net.packets_out.count', 'system.net.packets_out.error', ] if Platform.is_linux() or Platform.is_windows(): EXPECTED_METRICS.extend([ 'system.net.packets_in.drop', 'system.net.packets_out.drop', ]) E2E_EXPECTED_METRICS = EXPECTED_METRICS + [ "system.net.tcp4.closing", "system.net.tcp4.established", "system.net.tcp4.listening", "system.net.tcp4.opening", "system.net.tcp4.time_wait", "system.net.tcp6.closing", "system.net.tcp6.established", "system.net.tcp6.listening", "system.net.tcp6.opening",
def test_complex_config_replica(aggregator, instance_complex): config = copy.deepcopy(instance_complex) config['port'] = common.SLAVE_PORT mysql_check = MySql(common.CHECK_NAME, {}, instances=[config]) mysql_check.check(config) # Test service check aggregator.assert_service_check('mysql.can_connect', status=MySql.OK, tags=tags.SC_TAGS_REPLICA, count=1) # Travis MySQL not running replication - FIX in flavored test. aggregator.assert_service_check( 'mysql.replication.slave_running', status=MySql.OK, tags=tags.SC_TAGS_REPLICA + ['replication_mode:replica'], at_least=1, ) testable_metrics = (variables.STATUS_VARS + variables.COMPLEX_STATUS_VARS + variables.VARIABLES_VARS + variables.COMPLEX_VARIABLES_VARS + variables.INNODB_VARS + variables.COMPLEX_INNODB_VARS + variables.BINLOG_VARS + variables.SYSTEM_METRICS + variables.SCHEMA_VARS + variables.SYNTHETIC_VARS + variables.STATEMENT_VARS) if MYSQL_VERSION_PARSED >= parse_version('5.6') and environ.get( 'MYSQL_FLAVOR') != 'mariadb': testable_metrics.extend(variables.PERFORMANCE_VARS) # Test metrics for mname in testable_metrics: # These two are currently not guaranteed outside of a Linux # environment. if mname == 'mysql.performance.user_time' and not Platform.is_linux(): continue if mname == 'mysql.performance.kernel_time' and not Platform.is_linux( ): continue if mname == 'mysql.performance.cpu_time' and Platform.is_windows(): continue if mname == 'mysql.performance.query_run_time.avg': aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], at_least=1) elif mname == 'mysql.info.schema.size': aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], count=1) aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:information_schema'], count=1) aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:performance_schema'], count=1) else: aggregator.assert_metric(mname, tags=tags.METRIC_TAGS, at_least=0) # test custom query metrics aggregator.assert_metric('alice.age', value=25) aggregator.assert_metric('bob.age', value=20) # test optional metrics optional_metrics = (variables.OPTIONAL_REPLICATION_METRICS + variables.OPTIONAL_INNODB_VARS + variables.OPTIONAL_STATUS_VARS + variables.OPTIONAL_STATUS_VARS_5_6_6) # Note, this assertion will pass even if some metrics are not present. # Manual testing is required for optional metrics _test_optional_metrics(aggregator, optional_metrics) # Raises when coverage < 100% aggregator.assert_all_metrics_covered() aggregator.assert_metrics_using_metadata(get_metadata_metrics(), check_submission_type=True, exclude=['alice.age', 'bob.age'] + variables.STATEMENT_VARS)
def check(self, instance): """Get disk space/inode stats""" if self._tag_by_label and Platform.is_linux(): self.devices_label = self._get_devices_label() self._valid_disks = {} for part in psutil.disk_partitions(all=self._include_all_devices): # we check all exclude conditions if self.exclude_disk(part): continue # Get disk metrics here to be able to exclude on total usage try: disk_usage = timeout(self._timeout)(psutil.disk_usage)( part.mountpoint) except TimeoutException: self.log.warning( u'Timeout after %d seconds while retrieving the disk usage of `%s` mountpoint. ' u'You might want to change the timeout length in the settings.', self._timeout, part.mountpoint, ) continue except Exception as e: self.log.warning( u'Unable to get disk metrics for %s: %s. ' u'You can exclude this mountpoint in the settings if it is invalid.', part.mountpoint, e, ) continue # Exclude disks with size less than min_disk_size if disk_usage.total <= self._min_disk_size: if disk_usage.total > 0: self.log.info( 'Excluding device %s with total disk size %s', part.device, disk_usage.total) continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: %s', part.device) device_name = part.mountpoint if self._use_mount else part.device tags = [part.fstype, 'filesystem:{}'.format(part.fstype) ] if self._tag_by_filesystem else [] tags.extend(self._custom_tags) # apply device/mountpoint specific tags for regex, device_tags in self._device_tag_re: if regex.match(device_name): tags.extend(device_tags) if self.devices_label.get(device_name): tags.extend(self.devices_label.get(device_name)) # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() tags.append('device:{}'.format(device_name)) tags.append('device_name:{}'.format(_base_device_name( part.device))) for metric_name, metric_value in iteritems( self._collect_part_metrics(part, disk_usage)): self.gauge(metric_name, metric_value, tags=tags) # Add in a disk read write or read only check if self._service_check_rw: rwro = {'rw', 'ro'} & set(part.opts.split(',')) if len(rwro) == 1: self.service_check('disk.read_write', AgentCheck.OK if rwro.pop() == 'rw' else AgentCheck.CRITICAL, tags=tags) else: self.service_check('disk.read_write', AgentCheck.UNKNOWN, tags=tags) self.collect_latency_metrics()
def set_default_driver_conf(): if Platform.is_containerized(): # Use default `./driver_config/odbcinst.ini` when Agent is running in docker. # `freetds` is shipped with the Docker Agent. os.environ.setdefault('ODBCSYSINI', DRIVER_CONFIG_DIR)
def _get_net_proc_base_location(proc_location): if Platform.is_containerized() and proc_location != "/proc": net_proc_base_location = "%s/1" % proc_location else: net_proc_base_location = proc_location return net_proc_base_location
from datadog_checks.base import AgentCheck from datadog_checks.base.utils.platform import Platform PY3 = sys.version_info[0] == 3 if PY3: # use higher precision clock available in Python3 time_func = time.perf_counter else: time_func = time.time # These imports are necessary because otherwise dynamic type # resolution will fail on windows without it. # See more here: https://github.com/rthalley/dnspython/issues/39. if Platform.is_win32(): from dns.rdtypes.ANY import * # noqa from dns.rdtypes.IN import * # noqa # for tiny time deltas, time.time on Windows reports the same value # of the clock more than once, causing the computation of response_time # to be often 0; let's use time.clock that is more precise. if not PY3: time_func = time.clock class BadConfException(Exception): pass class DNSCheck(AgentCheck):
def _check_bsd(self, instance): netstat_flags = ['-i', '-b'] custom_tags = instance.get('tags', []) # FreeBSD's netstat truncates device names unless you pass '-W' if Platform.is_freebsd(): netstat_flags.append('-W') try: output, _, _ = get_subprocess_output(["netstat"] + netstat_flags, self.log) lines = output.splitlines() # Name Mtu Network Address Ipkts Ierrs Ibytes Opkts Oerrs Obytes Coll # lo0 16384 <Link#1> 318258 0 428252203 318258 0 428252203 0 # lo0 16384 localhost fe80:1::1 318258 - 428252203 318258 - 428252203 - # lo0 16384 127 localhost 318258 - 428252203 318258 - 428252203 - # lo0 16384 localhost ::1 318258 - 428252203 318258 - 428252203 - # gif0* 1280 <Link#2> 0 0 0 0 0 0 0 # stf0* 1280 <Link#3> 0 0 0 0 0 0 0 # en0 1500 <Link#4> 04:0c:ce:db:4e:fa 20801309 0 13835457425 15149389 0 11508790198 0 # en0 1500 seneca.loca fe80:4::60c:ceff: 20801309 - 13835457425 15149389 - 11508790198 - # en0 1500 2001:470:1f 2001:470:1f07:11d 20801309 - 13835457425 15149389 - 11508790198 - # en0 1500 2001:470:1f 2001:470:1f07:11d 20801309 - 13835457425 15149389 - 11508790198 - # en0 1500 192.168.1 192.168.1.63 20801309 - 13835457425 15149389 - 11508790198 - # en0 1500 2001:470:1f 2001:470:1f07:11d 20801309 - 13835457425 15149389 - 11508790198 - # p2p0 2304 <Link#5> 06:0c:ce:db:4e:fa 0 0 0 0 0 0 0 # ham0 1404 <Link#6> 7a:79:05:4d:bf:f5 30100 0 6815204 18742 0 8494811 0 # ham0 1404 5 5.77.191.245 30100 - 6815204 18742 - 8494811 - # ham0 1404 seneca.loca fe80:6::7879:5ff: 30100 - 6815204 18742 - 8494811 - # ham0 1404 2620:9b::54 2620:9b::54d:bff5 30100 - 6815204 18742 - 8494811 - headers = lines[0].split() # Given the irregular structure of the table above, better to parse from the end of each line # Verify headers first # -7 -6 -5 -4 -3 -2 -1 for h in ("Ipkts", "Ierrs", "Ibytes", "Opkts", "Oerrs", "Obytes", "Coll"): if h not in headers: self.log.error("%s not found in %s; cannot parse" % (h, headers)) return False current = None for l in lines[1:]: # Another header row, abort now, this is IPv6 land if "Name" in l: break x = l.split() if len(x) == 0: break iface = x[0] if iface.endswith("*"): iface = iface[:-1] if iface == current: # skip multiple lines of same interface continue else: current = iface # Filter inactive interfaces if self._parse_value(x[-5]) or self._parse_value(x[-2]): iface = current metrics = { 'bytes_rcvd': self._parse_value(x[-5]), 'bytes_sent': self._parse_value(x[-2]), 'packets_in.count': self._parse_value(x[-7]), 'packets_in.error': self._parse_value(x[-6]), 'packets_out.count': self._parse_value(x[-4]), 'packets_out.error': self._parse_value(x[-3]), } self._submit_devicemetrics(iface, metrics, custom_tags) except SubprocessOutputEmptyError: self.log.exception("Error collecting connection stats.") try: netstat, _, _ = get_subprocess_output( ["netstat", "-s", "-p" "tcp"], self.log) # 3651535 packets sent # 972097 data packets (615753248 bytes) # 5009 data packets (2832232 bytes) retransmitted # 0 resends initiated by MTU discovery # 2086952 ack-only packets (471 delayed) # 0 URG only packets # 0 window probe packets # 310851 window update packets # 336829 control packets # 0 data packets sent after flow control # 3058232 checksummed in software # 3058232 segments (571218834 bytes) over IPv4 # 0 segments (0 bytes) over IPv6 # 4807551 packets received # 1143534 acks (for 616095538 bytes) # 165400 duplicate acks # ... self._submit_regexed_values(netstat, BSD_TCP_METRICS, custom_tags) except SubprocessOutputEmptyError: self.log.exception("Error collecting TCP stats.")
# (C) Datadog, Inc. 2018 # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) import os import sys from datadog_checks.base.utils.platform import Platform EMBEDDED_DIR = 'embedded' if Platform.is_windows(): EMBEDDED_DIR += str(sys.version_info[0]) def get_ca_certs_path(): """ Get a path to the trusted certificates of the system """ for f in _get_ca_certs_paths(): if os.path.exists(f): return f return None def _get_ca_certs_paths(): """ Get a list of possible paths containing certificates Check is installed via pip to: * Windows: embedded/lib/site-packages/datadog_checks/http_check * Linux: embedded/lib/python2.7/site-packages/datadog_checks/http_check