Example #1
0
    def check(self, instance):
        if instance is None:
            instance = {}

        self._excluded_ifaces = instance.get('excluded_interfaces', [])
        self._collect_cx_state = instance.get('collect_connection_state',
                                              False)
        self._collect_rate_metrics = instance.get('collect_rate_metrics', True)
        self._collect_count_metrics = instance.get('collect_count_metrics',
                                                   False)

        # This decides whether we should split or combine connection states,
        # along with a few other things
        self._setup_metrics(instance)

        self._exclude_iface_re = None
        exclude_re = instance.get('excluded_interface_re', None)
        if exclude_re:
            self.log.debug("Excluding network devices matching: %s" %
                           exclude_re)
            self._exclude_iface_re = re.compile(exclude_re)

        if Platform.is_linux():
            self._check_linux(instance)
        elif Platform.is_bsd():
            self._check_bsd(instance)
        elif Platform.is_solaris():
            self._check_solaris(instance)
        elif Platform.is_windows():
            self._check_psutil(instance)
Example #2
0
    def check(self, instance):
        if instance is None:
            instance = {}

        self._excluded_ifaces = instance.get('excluded_interfaces', [])
        if not isinstance(self._excluded_ifaces, list):
            raise ConfigurationError(
                "Expected 'excluded_interfaces' to be a list, got '{}'".format(
                    type(self._excluded_ifaces).__name__))

        self._collect_cx_state = instance.get('collect_connection_state',
                                              False)
        self._collect_rate_metrics = instance.get('collect_rate_metrics', True)
        self._collect_count_metrics = instance.get('collect_count_metrics',
                                                   False)

        # This decides whether we should split or combine connection states,
        # along with a few other things
        self._setup_metrics(instance)

        self._exclude_iface_re = None
        exclude_re = instance.get('excluded_interface_re', None)
        if exclude_re:
            self.log.debug("Excluding network devices matching: %s",
                           exclude_re)
            self._exclude_iface_re = re.compile(exclude_re)

        if Platform.is_linux():
            self._check_linux(instance)
        elif Platform.is_bsd():
            self._check_bsd(instance)
        elif Platform.is_solaris():
            self._check_solaris(instance)
        elif Platform.is_windows():
            self._check_psutil(instance)
Example #3
0
    def psutil_wrapper(self, process, method, accessors, try_sudo, *args,
                       **kwargs):
        """
        A psutil wrapper that is calling
        * psutil.method(*args, **kwargs) and returns the result
        OR
        * psutil.method(*args, **kwargs).accessor[i] for each accessors
        given in a list, the result being indexed in a dictionary
        by the accessor name
        """

        if accessors is None:
            result = None
        else:
            result = {}

        # Ban certain method that we know fail
        if method == 'num_fds' and not Platform.is_unix():
            return result
        elif method == 'num_handles' and not Platform.is_win32():
            return result

        try:
            res = getattr(process, method)(*args, **kwargs)
            if accessors is None:
                result = res
            else:
                for acc in accessors:
                    try:
                        result[acc] = getattr(res, acc)
                    except AttributeError:
                        self.log.debug(
                            "psutil.%s().%s attribute does not exist", method,
                            acc)
        except (NotImplementedError, AttributeError):
            self.log.debug("psutil method %s not implemented", method)
        except psutil.AccessDenied:
            self.log.debug("psutil was denied access for method %s", method)
            if method == 'num_fds' and Platform.is_unix() and try_sudo:
                try:
                    # It is up the agent's packager to grant
                    # corresponding sudo policy on unix platforms
                    ls_args = [
                        'sudo', 'ls', '/proc/{}/fd/'.format(process.pid)
                    ]
                    process_ls = subprocess.check_output(ls_args)
                    result = len(process_ls.splitlines())

                except subprocess.CalledProcessError as e:
                    self.log.exception(
                        "trying to retrieve %s with sudo failed with return code %s",
                        method, e.returncode)
                except Exception:
                    self.log.exception(
                        "trying to retrieve %s with sudo also failed", method)
        except psutil.NoSuchProcess:
            self.warning("Process %s disappeared while scanning", process.pid)

        return result
Example #4
0
def _assert_complex_config(aggregator):
    # Test service check
    aggregator.assert_service_check('mysql.can_connect', status=MySql.OK, tags=tags.SC_TAGS, count=1)
    aggregator.assert_service_check('mysql.replication.slave_running', status=MySql.OK, tags=tags.SC_TAGS, at_least=1)
    testable_metrics = (
        variables.STATUS_VARS
        + variables.VARIABLES_VARS
        + variables.INNODB_VARS
        + variables.BINLOG_VARS
        + variables.SYSTEM_METRICS
        + variables.SCHEMA_VARS
        + variables.SYNTHETIC_VARS
    )

    if MYSQL_VERSION_PARSED >= parse_version('5.6') and environ.get('MYSQL_FLAVOR') != 'mariadb':
        testable_metrics.extend(variables.PERFORMANCE_VARS)

    # Test metrics
    for mname in testable_metrics:
        # These two are currently not guaranteed outside of a Linux
        # environment.
        if mname == 'mysql.performance.user_time' and not Platform.is_linux():
            continue
        if mname == 'mysql.performance.kernel_time' and not Platform.is_linux():
            continue
        if mname == 'mysql.performance.cpu_time' and Platform.is_windows():
            continue

        if mname == 'mysql.performance.query_run_time.avg':
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], count=1)
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:mysql'], count=1)
        elif mname == 'mysql.info.schema.size':
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], count=1)
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:information_schema'], count=1)
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:performance_schema'], count=1)
        else:
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS, at_least=0)

    # TODO: test this if it is implemented
    # Assert service metadata
    # version_metadata = mysql_check.service_metadata['version']
    # assert len(version_metadata) == 1

    # test custom query metrics
    aggregator.assert_metric('alice.age', value=25)
    aggregator.assert_metric('bob.age', value=20)

    # test optional metrics
    optional_metrics = (
        variables.OPTIONAL_REPLICATION_METRICS
        + variables.OPTIONAL_INNODB_VARS
        + variables.OPTIONAL_STATUS_VARS
        + variables.OPTIONAL_STATUS_VARS_5_6_6
    )
    _test_optional_metrics(aggregator, optional_metrics, 1)

    # Raises when coverage < 100%
    aggregator.assert_all_metrics_covered()
Example #5
0
    def psutil_wrapper(self, process, method, accessors=None, *args, **kwargs):
        """
        A psutil wrapper that is calling
        * psutil.method(*args, **kwargs) and returns the result
        OR
        * psutil.method(*args, **kwargs).accessor[i] for each accessors
        given in a list, the result being indexed in a dictionary
        by the accessor name
        """

        if accessors is None:
            result = None
        else:
            result = {}

        # Ban certain method that we know fail
        if method == 'num_fds' and not Platform.is_unix():
            return result
        elif method == 'num_handles' and not Platform.is_win32():
            return result

        # Try running `num_fds` with sudo if possible
        if method == 'num_fds' and self.try_sudo:
            self.log.debug("Running num_fds using sudo")
            try:
                ls_args = ['sudo', 'ls', '/proc/{}/fd/'.format(process.pid)]
                process_ls = subprocess.check_output(ls_args)
                result = len(process_ls.splitlines())
            except Exception as e:
                self.log.exception(
                    "Trying to retrieve %s with sudo failed with error: %s",
                    method, e)

        else:
            try:
                res = getattr(process, method)(*args, **kwargs)
                if accessors is None:
                    result = res
                else:
                    for acc in accessors:
                        try:
                            result[acc] = getattr(res, acc)
                        except AttributeError:
                            self.log.debug(
                                "psutil.%s().%s attribute does not exist",
                                method, acc)
            except (NotImplementedError, AttributeError):
                self.log.debug("psutil method %s not implemented", method)
            except psutil.AccessDenied:
                self.log.debug("psutil was denied access for method %s",
                               method)
            except psutil.NoSuchProcess:
                self.log.debug("Process %s disappeared while scanning",
                               process.pid)

        return result
Example #6
0
def init_db():
    # exit if we are not on linux
    # that's the only platform where the client successfully installs for version 3.10
    if not Platform.is_linux():
        return

    import aerospike

    # sample Aerospike Python Client code
    # https://www.aerospike.com/docs/client/python/usage/kvs/write.html
    client = aerospike.client({'hosts': [(HOST, PORT)]}).connect()

    key = ('test', 'characters', 'bender')
    bins = {
        'name': 'Bender',
        'serialnum': 2716057,
        'lastsentence': {
            'BBS': "Well, we're boned",
            'TBwaBB': 'I love you, meatbags!',
            'BG': 'Whip harder, Professor!',
            'ltWGY': 'Into the breach, meatbags. Or not, whatever',
        },
        'composition':
        ['40% zinc', '40% titanium', '30% iron', '40% dolomite'],
        'apartment': bytearray(b'\x24'),
        'quote_cnt': 47,
    }
    client.put(key, bins)

    for _ in range(10):
        client.get(key)

    client.close()
Example #7
0
 def exclude_disk(self, part):
     # skip cd-rom drives with no disk in it; they may raise
     # ENOENT, pop-up a Windows GUI error for a non-ready
     # partition or just hang;
     # and all the other excluded disks
     skip_win = Platform.is_win32() and ('cdrom' in part.opts or part.fstype == '')
     return skip_win or self._exclude_disk(part.device, part.fstype, part.mountpoint)
Example #8
0
    def collect_metrics_psutil(self):
        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            # we check all exclude conditions
            if self.exclude_disk(part):
                continue

            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint)
            except TimeoutException:
                self.log.warning(
                    u'Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...', part.mountpoint
                )
                continue
            except Exception as e:
                self.log.warning('Unable to get disk metrics for %s: %s', part.mountpoint, e)
                continue

            # Exclude disks with total disk size 0
            if disk_usage.total == 0:
                continue

            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {}'.format(part.device))

            device_name = part.mountpoint if self._use_mount else part.device

            tags = [part.fstype, 'filesystem:{}'.format(part.fstype)] if self._tag_by_filesystem else []
            tags.extend(self._custom_tags)

            # apply device/mountpoint specific tags
            for regex, device_tags in self._device_tag_re:
                if regex.match(device_name):
                    tags.extend(device_tags)

            if self.devices_label.get(device_name):
                tags.append(self.devices_label.get(device_name))

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()

            tags.append('device:{}'.format(device_name))
            for metric_name, metric_value in iteritems(self._collect_part_metrics(part, disk_usage)):
                self.gauge(metric_name, metric_value, tags=tags)

            # Add in a disk read write or read only check
            if self._service_check_rw:
                rwro = {'rw', 'ro'} & set(part.opts.split(','))
                if len(rwro) == 1:
                    self.service_check(
                        'disk.read_write', AgentCheck.OK if rwro.pop() == 'rw' else AgentCheck.CRITICAL, tags=tags
                    )
                else:
                    self.service_check('disk.read_write', AgentCheck.UNKNOWN, tags=tags)

        self.collect_latency_metrics()
Example #9
0
def set_default_driver_conf():
    if Platform.is_containerized():
        # Use default `./driver_config/odbcinst.ini` when Agent is running in docker.
        # `freetds` is shipped with the Docker Agent.
        os.environ.setdefault('ODBCSYSINI', DRIVER_CONFIG_DIR)
    else:
        # required when using pyodbc with FreeTDS on Ubuntu 18.04
        # see https://stackoverflow.com/a/22988748/1258743
        os.environ.setdefault('TDSVER', '8.0')
Example #10
0
 def check(self, instance):
     """Get disk space/inode stats"""
     if self._tag_by_label and Platform.is_linux():
         self.devices_label = self._get_devices_label()
     # Windows and Mac will always have psutil
     # (we have packaged for both of them)
     if self._psutil():
         self.collect_metrics_psutil()
     else:
         # FIXME: implement all_partitions (df -a)
         self.collect_metrics_manually()
Example #11
0
def uds_path():
    if Platform.is_mac():
        # See: https://github.com/docker/for-mac/issues/483
        pytest.skip('Sharing Unix sockets is not supported by Docker for Mac.')

    if Platform.is_windows():
        pytest.skip('Nginx does not run on Windows.')

    with TempDir() as tmp_dir:
        compose_file = os.path.join(HERE, 'compose', 'uds.yaml')
        uds_filename = 'tmp.sock'
        uds_path = os.path.join(tmp_dir, uds_filename)
        with docker_run(
                compose_file=compose_file,
                env_vars={
                    "UDS_HOST_DIRECTORY": tmp_dir,
                    'UDS_FILENAME': uds_filename,
                },
        ):
            yield uds_path
Example #12
0
def mock_server():
    if Platform.is_windows():
        compose_filename = 'docker-compose-windows.yaml'
    else:
        compose_filename = 'docker-compose.yaml'

    compose_file = os.path.join(common.HERE, 'compose', 'mock_server',
                                compose_filename)
    env_vars = {"MOCK_SERVER_PORT": str(common.MOCK_SERVER_PORT)}

    with docker_run(compose_file, env_vars=env_vars):
        yield
Example #13
0
    def __init__(self, name, init_config, instances):
        super(ProcessCheck, self).__init__(name, init_config, instances)

        self.name = self.instance.get('name', None)
        self.tags = self.instance.get('tags', [])
        self.exact_match = is_affirmative(
            self.instance.get('exact_match', True))
        self.search_string = self.instance.get('search_string', None)
        self.ignore_ad = is_affirmative(
            self.instance.get('ignore_denied_access', True))
        self.pid = self.instance.get('pid')
        self.pid_file = self.instance.get('pid_file')
        self.collect_children = is_affirmative(
            self.instance.get('collect_children', False))
        self.user = self.instance.get('user', False)
        self.try_sudo = self.instance.get('try_sudo', False)

        # ad stands for access denied
        # We cache the PIDs getting this error and don't iterate on them more often than `access_denied_cache_duration``
        # This cache is for all PIDs so it's global, but it should be refreshed by instance
        self.last_ad_cache_ts = {}
        self.ad_cache = set()
        self.access_denied_cache_duration = int(
            init_config.get('access_denied_cache_duration',
                            DEFAULT_AD_CACHE_DURATION))

        # By default cache the PID list for a while
        # Sometimes it's not wanted b/c it can mess with no-data monitoring
        # This cache is indexed per instance
        self.last_pid_cache_ts = {}
        self.pid_cache = {}
        self.pid_cache_duration = int(
            init_config.get('pid_cache_duration', DEFAULT_PID_CACHE_DURATION))

        self._conflicting_procfs = False
        self._deprecated_init_procfs = False
        if Platform.is_linux():
            procfs_path = init_config.get('procfs_path')
            if procfs_path:
                agent_procfs_path = datadog_agent.get_config('procfs_path')
                if agent_procfs_path and procfs_path != agent_procfs_path.rstrip(
                        '/'):
                    self._conflicting_procfs = True
                else:
                    self._deprecated_init_procfs = True
                    psutil.PROCFS_PATH = procfs_path

        # Process cache, indexed by instance
        self.process_cache = defaultdict(dict)

        self.process_list_cache.cache_duration = int(
            init_config.get('shared_process_list_cache_duration',
                            DEFAULT_SHARED_PROCESS_LIST_CACHE_DURATION))
Example #14
0
    def _collect_part_metrics(self, part, usage):
        metrics = {}

        for name in ['total', 'used', 'free']:
            # For legacy reasons,  the standard unit it kB
            metrics[self.METRIC_DISK.format(name)] = getattr(usage, name) / 1024

        # FIXME: 6.x, use percent, a lot more logical than in_use
        metrics[self.METRIC_DISK.format('in_use')] = usage.percent / 100

        if Platform.is_unix():
            metrics.update(self._collect_inodes_metrics(part.mountpoint))

        return metrics
Example #15
0
def test_check_real_process_regex(aggregator, dd_run_check):
    "Check to specifically find this python pytest running process using regex."
    from datadog_checks.base.utils.platform import Platform

    instance = {
        'name': 'py',
        'search_string': ['.*python.*pytest'],
        'exact_match': False,
        'ignored_denied_access': True,
        'thresholds': {
            'warning': [1, 10],
            'critical': [1, 100]
        },
    }
    process = ProcessCheck(common.CHECK_NAME, {}, [instance])
    expected_tags = generate_expected_tags(instance)
    dd_run_check(process)
    for mname in common.PROCESS_METRIC:
        # cases where we don't actually expect some metrics here:
        #  - if io_counters() is not available
        #  - if memory_info_ex() is not available
        #  - first run so no `cpu.pct`
        if ((not _PSUTIL_IO_COUNTERS and '.io' in mname)
                or (not _PSUTIL_MEM_SHARED and 'mem.real' in mname)
                or mname == 'system.processes.cpu.pct'):
            continue

        if Platform.is_windows():
            metric = common.UNIX_TO_WINDOWS_MAP.get(mname, mname)
        else:
            metric = mname
        aggregator.assert_metric(metric, at_least=1, tags=expected_tags)

    aggregator.assert_service_check('process.up',
                                    count=1,
                                    tags=expected_tags + ['process:py'])

    # this requires another run
    dd_run_check(process)
    aggregator.assert_metric('system.processes.cpu.pct',
                             count=1,
                             tags=expected_tags)
    aggregator.assert_metric('system.processes.cpu.normalized_pct',
                             count=1,
                             tags=expected_tags)
Example #16
0
def uds_path():
    if Platform.is_mac():
        # See: https://github.com/docker/for-mac/issues/483
        pytest.skip('Sharing Unix sockets is not supported by Docker for Mac.')

    with TempDir() as tmp_dir:
        compose_file = os.path.join(HERE, 'compose', 'uds.yaml')
        uds_filename = 'tmp.sock'
        uds_path = os.path.join(tmp_dir, uds_filename)
        with docker_run(
                compose_file=compose_file,
                env_vars={
                    "UDS_HOST_DIRECTORY": tmp_dir,
                    'UDS_FILENAME': uds_filename,
                },
                conditions=[WaitFor(lambda: os.path.exists(uds_path))],
                attempts=2,
        ):
            yield uds_path
Example #17
0
    def get_pagefault_stats(self, pid):
        if not Platform.is_linux():
            return None

        def file_to_string(path):
            with open(path, 'r') as f:
                res = f.read()
            return res

        # http://man7.org/linux/man-pages/man5/proc.5.html
        try:
            data = file_to_string('/{}/{}/stat'.format(psutil.PROCFS_PATH,
                                                       pid))
        except Exception:
            self.log.debug(
                'error getting proc stats: file_to_string failed for /%s/%s/stat',
                psutil.PROCFS_PATH, pid)
            return None
        return (int(i) for i in data.split()[9:13])
Example #18
0
    def __init__(self, name, init_config, instances=None):
        super(ProcessCheck, self).__init__(name, init_config, instances)

        # ad stands for access denied
        # We cache the PIDs getting this error and don't iterate on them more often than `access_denied_cache_duration``
        # This cache is for all PIDs so it's global, but it should be refreshed by instance
        self.last_ad_cache_ts = {}
        self.ad_cache = set()
        self.access_denied_cache_duration = int(
            init_config.get('access_denied_cache_duration',
                            DEFAULT_AD_CACHE_DURATION))

        # By default cache the PID list for a while
        # Sometimes it's not wanted b/c it can mess with no-data monitoring
        # This cache is indexed per instance
        self.last_pid_cache_ts = {}
        self.pid_cache = {}
        self.pid_cache_duration = int(
            init_config.get('pid_cache_duration', DEFAULT_PID_CACHE_DURATION))

        self._conflicting_procfs = False
        self._deprecated_init_procfs = False
        if Platform.is_linux():
            procfs_path = init_config.get('procfs_path')
            if procfs_path:
                agent_procfs_path = datadog_agent.get_config('procfs_path')
                if agent_procfs_path and procfs_path != agent_procfs_path.rstrip(
                        '/'):
                    self._conflicting_procfs = True
                else:
                    self._deprecated_init_procfs = True
                    psutil.PROCFS_PATH = procfs_path

        # Process cache, indexed by instance
        self.process_cache = defaultdict(dict)

        self.process_list_cache.cache_duration = int(
            init_config.get('shared_process_list_cache_duration',
                            DEFAULT_SHARED_PROCESS_LIST_CACHE_DURATION))
Example #19
0
def test_complex_config_replica(aggregator, instance_complex):
    mysql_check = MySql(common.CHECK_NAME, {}, {})
    config = copy.deepcopy(instance_complex)
    config['port'] = common.SLAVE_PORT
    mysql_check.check(config)

    # self.assertMetricTag('mysql.replication.seconds_behind_master', 'channel:default')

    # Test service check
    aggregator.assert_service_check('mysql.can_connect', status=MySql.OK, tags=tags.SC_TAGS_REPLICA, count=1)

    # Travis MySQL not running replication - FIX in flavored test.
    aggregator.assert_service_check(
        'mysql.replication.slave_running', status=MySql.OK, tags=tags.SC_TAGS_REPLICA, at_least=1
    )

    testable_metrics = (
        variables.STATUS_VARS
        + variables.VARIABLES_VARS
        + variables.INNODB_VARS
        + variables.BINLOG_VARS
        + variables.SYSTEM_METRICS
        + variables.SCHEMA_VARS
        + variables.SYNTHETIC_VARS
    )

    if MYSQL_VERSION_PARSED >= parse_version('5.6') and environ.get('MYSQL_FLAVOR') != 'mariadb':
        testable_metrics.extend(variables.PERFORMANCE_VARS)

    # Test metrics
    for mname in testable_metrics:
        # These two are currently not guaranteed outside of a Linux
        # environment.
        if mname == 'mysql.performance.user_time' and not Platform.is_linux():
            continue
        if mname == 'mysql.performance.kernel_time' and not Platform.is_linux():
            continue
        if mname == 'mysql.performance.cpu_time' and Platform.is_windows():
            continue
        if mname == 'mysql.performance.query_run_time.avg':
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], at_least=1)
        elif mname == 'mysql.info.schema.size':
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], count=1)
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:information_schema'], count=1)
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:performance_schema'], count=1)
        else:
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS, at_least=0)

    # test custom query metrics
    aggregator.assert_metric('alice.age', value=25)
    aggregator.assert_metric('bob.age', value=20)

    # test optional metrics
    optional_metrics = (
        variables.OPTIONAL_REPLICATION_METRICS
        + variables.OPTIONAL_INNODB_VARS
        + variables.OPTIONAL_STATUS_VARS
        + variables.OPTIONAL_STATUS_VARS_5_6_6
    )
    _test_optional_metrics(aggregator, optional_metrics, 1)

    # Raises when coverage < 100%
    aggregator.assert_all_metrics_covered()
Example #20
0
    for name in gauge_metrics:
        aggregator.assert_metric(name, count=0)

    for name in rate_metrics:
        aggregator.assert_metric_has_tag(name, 'device:{}'.format(DEFAULT_DEVICE_NAME))
        aggregator.assert_metric_has_tag(name, 'device_name:{}'.format(DEFAULT_DEVICE_BASE_NAME))

    for name in count_metrics:
        aggregator.assert_metric_has_tag(name, 'device:{}'.format(DEFAULT_DEVICE_NAME))
        aggregator.assert_metric_has_tag(name, 'device_name:{}'.format(DEFAULT_DEVICE_BASE_NAME))

    aggregator.assert_all_metrics_covered()


@pytest.mark.skipif(not Platform.is_linux(), reason='disk labels are only available on Linux')
@pytest.mark.usefixtures('psutil_mocks')
def test_labels_from_blkid_cache_file(
    aggregator, instance_blkid_cache_file, gauge_metrics, rate_metrics, count_metrics
):
    """
    Verify that the disk labels are set when the blkid_cache_file option is set
    """
    c = Disk('disk', {}, [instance_blkid_cache_file])
    c.check(instance_blkid_cache_file)
    for metric in chain(gauge_metrics, rate_metrics, count_metrics):
        aggregator.assert_metric(
            metric, tags=['device:/dev/sda1', 'device_name:sda1', 'label:MYLABEL', 'device_label:MYLABEL']
        )

Example #21
0
                    __name__='disk_usage'):
        c.check(instance)

    for name in gauge_metrics:
        aggregator.assert_metric(name, count=0)

    for name in rate_metrics:
        aggregator.assert_metric_has_tag(
            name, 'device:{}'.format(DEFAULT_DEVICE_NAME))
        aggregator.assert_metric_has_tag(
            name, 'device_name:{}'.format(DEFAULT_DEVICE_BASE_NAME))

    aggregator.assert_all_metrics_covered()


@pytest.mark.skipif(not Platform.is_linux(),
                    reason='disk labels are only available on Linux')
@pytest.mark.usefixtures('psutil_mocks')
def test_labels_from_blkid_cache_file(aggregator, instance_blkid_cache_file,
                                      gauge_metrics, rate_metrics):
    """
    Verify that the disk labels are set when the blkid_cache_file option is set
    """
    c = Disk('disk', {}, [instance_blkid_cache_file])
    c.check(instance_blkid_cache_file)
    for metric in chain(gauge_metrics, rate_metrics):
        aggregator.assert_metric(metric,
                                 tags=[
                                     'device:/dev/sda1', 'device_name:sda1',
                                     'label:MYLABEL', 'device_label:MYLABEL'
                                 ])
Example #22
0
    def _check_bsd(self, instance):
        netstat_flags = ['-i', '-b']

        custom_tags = instance.get('tags', [])

        # FreeBSD's netstat truncates device names unless you pass '-W'
        if Platform.is_freebsd():
            netstat_flags.append('-W')

        try:
            output, _, _ = get_subprocess_output(["netstat"] + netstat_flags,
                                                 self.log)
            lines = output.splitlines()
            # Name  Mtu   Network       Address            Ipkts Ierrs     Ibytes    Opkts Oerrs     Obytes  Coll
            # lo0   16384 <Link#1>                        318258     0  428252203   318258     0  428252203     0
            # lo0   16384 localhost   fe80:1::1           318258     -  428252203   318258     -  428252203     -
            # lo0   16384 127           localhost         318258     -  428252203   318258     -  428252203     -
            # lo0   16384 localhost   ::1                 318258     -  428252203   318258     -  428252203     -
            # gif0* 1280  <Link#2>                             0     0          0        0     0          0     0
            # stf0* 1280  <Link#3>                             0     0          0        0     0          0     0
            # en0   1500  <Link#4>    04:0c:ce:db:4e:fa 20801309     0 13835457425 15149389     0 11508790198     0
            # en0   1500  seneca.loca fe80:4::60c:ceff: 20801309     - 13835457425 15149389     - 11508790198     -
            # en0   1500  2001:470:1f 2001:470:1f07:11d 20801309     - 13835457425 15149389     - 11508790198     -
            # en0   1500  2001:470:1f 2001:470:1f07:11d 20801309     - 13835457425 15149389     - 11508790198     -
            # en0   1500  192.168.1     192.168.1.63    20801309     - 13835457425 15149389     - 11508790198     -
            # en0   1500  2001:470:1f 2001:470:1f07:11d 20801309     - 13835457425 15149389     - 11508790198     -
            # p2p0  2304  <Link#5>    06:0c:ce:db:4e:fa        0     0          0        0     0          0     0
            # ham0  1404  <Link#6>    7a:79:05:4d:bf:f5    30100     0    6815204    18742     0    8494811     0
            # ham0  1404  5             5.77.191.245       30100     -    6815204    18742     -    8494811     -
            # ham0  1404  seneca.loca fe80:6::7879:5ff:    30100     -    6815204    18742     -    8494811     -
            # ham0  1404  2620:9b::54 2620:9b::54d:bff5    30100     -    6815204    18742     -    8494811     -

            headers = lines[0].split()

            # Given the irregular structure of the table above, better to parse from the end of each line
            # Verify headers first
            #          -7       -6       -5        -4       -3       -2        -1
            for h in ("Ipkts", "Ierrs", "Ibytes", "Opkts", "Oerrs", "Obytes",
                      "Coll"):
                if h not in headers:
                    self.log.error("%s not found in %s; cannot parse", h,
                                   headers)
                    return False

            current = None
            for l in lines[1:]:
                # Another header row, abort now, this is IPv6 land
                if "Name" in l:
                    break

                x = l.split()
                if len(x) == 0:
                    break

                iface = x[0]
                if iface.endswith("*"):
                    iface = iface[:-1]
                if iface == current:
                    # skip multiple lines of same interface
                    continue
                else:
                    current = iface

                # Filter inactive interfaces
                if self._parse_value(x[-5]) or self._parse_value(x[-2]):
                    iface = current
                    metrics = {
                        'bytes_rcvd': self._parse_value(x[-5]),
                        'bytes_sent': self._parse_value(x[-2]),
                        'packets_in.count': self._parse_value(x[-7]),
                        'packets_in.error': self._parse_value(x[-6]),
                        'packets_out.count': self._parse_value(x[-4]),
                        'packets_out.error': self._parse_value(x[-3]),
                    }
                    self._submit_devicemetrics(iface, metrics, custom_tags)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting connection stats.")

        try:
            netstat, _, _ = get_subprocess_output(
                ["netstat", "-s", "-p"
                 "tcp"], self.log)
            # 3651535 packets sent
            #         972097 data packets (615753248 bytes)
            #         5009 data packets (2832232 bytes) retransmitted
            #         0 resends initiated by MTU discovery
            #         2086952 ack-only packets (471 delayed)
            #         0 URG only packets
            #         0 window probe packets
            #         310851 window update packets
            #         336829 control packets
            #         0 data packets sent after flow control
            #         3058232 checksummed in software
            #         3058232 segments (571218834 bytes) over IPv4
            #         0 segments (0 bytes) over IPv6
            # 4807551 packets received
            #         1143534 acks (for 616095538 bytes)
            #         165400 duplicate acks
            #         ...

            self._submit_regexed_values(netstat, BSD_TCP_METRICS, custom_tags)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting TCP stats.")

        proc_location = self.agentConfig.get('procfs_path',
                                             '/proc').rstrip('/')

        net_proc_base_location = self._get_net_proc_base_location(
            proc_location)

        if self._is_collect_cx_state_runnable(net_proc_base_location):
            try:
                self.log.debug("Using `netstat` to collect connection state")
                output_TCP, _, _ = get_subprocess_output(
                    ["netstat", "-n", "-a", "-p", "tcp"], self.log)
                output_UDP, _, _ = get_subprocess_output(
                    ["netstat", "-n", "-a", "-p", "udp"], self.log)
                lines = output_TCP.splitlines() + output_UDP.splitlines()
                # Active Internet connections (w/o servers)
                # Proto Recv-Q Send-Q Local Address           Foreign Address         State
                # tcp        0      0 46.105.75.4:80          79.220.227.193:2032     SYN_RECV
                # tcp        0      0 46.105.75.4:143         90.56.111.177:56867     ESTABLISHED
                # tcp        0      0 46.105.75.4:50468       107.20.207.175:443      TIME_WAIT
                # tcp6       0      0 46.105.75.4:80          93.15.237.188:58038     FIN_WAIT2
                # tcp6       0      0 46.105.75.4:80          79.220.227.193:2029     ESTABLISHED
                # udp        0      0 0.0.0.0:123             0.0.0.0:*
                # udp6       0      0 :::41458                :::*

                metrics = self._parse_linux_cx_state(
                    lines[2:], self.tcp_states['netstat'], 5)
                for metric, value in iteritems(metrics):
                    self.gauge(metric, value, tags=custom_tags)
            except SubprocessOutputEmptyError:
                self.log.exception("Error collecting connection states.")
Example #23
0
# In order to collect connection state we need `ss` command included in `iproute2` package
E2E_METADATA = {
    'start_commands': ['apt-get update', 'apt-get install iproute2 -y']
}

EXPECTED_METRICS = [
    'system.net.bytes_rcvd',
    'system.net.bytes_sent',
    'system.net.packets_in.count',
    'system.net.packets_in.error',
    'system.net.packets_out.count',
    'system.net.packets_out.error',
]

if Platform.is_linux() or Platform.is_windows():
    EXPECTED_METRICS.extend([
        'system.net.packets_in.drop',
        'system.net.packets_out.drop',
    ])

E2E_EXPECTED_METRICS = EXPECTED_METRICS + [
    "system.net.tcp4.closing",
    "system.net.tcp4.established",
    "system.net.tcp4.listening",
    "system.net.tcp4.opening",
    "system.net.tcp4.time_wait",
    "system.net.tcp6.closing",
    "system.net.tcp6.established",
    "system.net.tcp6.listening",
    "system.net.tcp6.opening",
Example #24
0
def test_complex_config_replica(aggregator, instance_complex):
    config = copy.deepcopy(instance_complex)
    config['port'] = common.SLAVE_PORT
    mysql_check = MySql(common.CHECK_NAME, {}, instances=[config])

    mysql_check.check(config)

    # Test service check
    aggregator.assert_service_check('mysql.can_connect',
                                    status=MySql.OK,
                                    tags=tags.SC_TAGS_REPLICA,
                                    count=1)

    # Travis MySQL not running replication - FIX in flavored test.
    aggregator.assert_service_check(
        'mysql.replication.slave_running',
        status=MySql.OK,
        tags=tags.SC_TAGS_REPLICA + ['replication_mode:replica'],
        at_least=1,
    )

    testable_metrics = (variables.STATUS_VARS + variables.COMPLEX_STATUS_VARS +
                        variables.VARIABLES_VARS +
                        variables.COMPLEX_VARIABLES_VARS +
                        variables.INNODB_VARS + variables.COMPLEX_INNODB_VARS +
                        variables.BINLOG_VARS + variables.SYSTEM_METRICS +
                        variables.SCHEMA_VARS + variables.SYNTHETIC_VARS +
                        variables.STATEMENT_VARS)

    if MYSQL_VERSION_PARSED >= parse_version('5.6') and environ.get(
            'MYSQL_FLAVOR') != 'mariadb':
        testable_metrics.extend(variables.PERFORMANCE_VARS)

    # Test metrics
    for mname in testable_metrics:
        # These two are currently not guaranteed outside of a Linux
        # environment.
        if mname == 'mysql.performance.user_time' and not Platform.is_linux():
            continue
        if mname == 'mysql.performance.kernel_time' and not Platform.is_linux(
        ):
            continue
        if mname == 'mysql.performance.cpu_time' and Platform.is_windows():
            continue
        if mname == 'mysql.performance.query_run_time.avg':
            aggregator.assert_metric(mname,
                                     tags=tags.METRIC_TAGS + ['schema:testdb'],
                                     at_least=1)
        elif mname == 'mysql.info.schema.size':
            aggregator.assert_metric(mname,
                                     tags=tags.METRIC_TAGS + ['schema:testdb'],
                                     count=1)
            aggregator.assert_metric(mname,
                                     tags=tags.METRIC_TAGS +
                                     ['schema:information_schema'],
                                     count=1)
            aggregator.assert_metric(mname,
                                     tags=tags.METRIC_TAGS +
                                     ['schema:performance_schema'],
                                     count=1)
        else:
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS, at_least=0)

    # test custom query metrics
    aggregator.assert_metric('alice.age', value=25)
    aggregator.assert_metric('bob.age', value=20)

    # test optional metrics
    optional_metrics = (variables.OPTIONAL_REPLICATION_METRICS +
                        variables.OPTIONAL_INNODB_VARS +
                        variables.OPTIONAL_STATUS_VARS +
                        variables.OPTIONAL_STATUS_VARS_5_6_6)
    # Note, this assertion will pass even if some metrics are not present.
    # Manual testing is required for optional metrics
    _test_optional_metrics(aggregator, optional_metrics)

    # Raises when coverage < 100%
    aggregator.assert_all_metrics_covered()
    aggregator.assert_metrics_using_metadata(get_metadata_metrics(),
                                             check_submission_type=True,
                                             exclude=['alice.age', 'bob.age'] +
                                             variables.STATEMENT_VARS)
Example #25
0
    def check(self, instance):
        """Get disk space/inode stats"""
        if self._tag_by_label and Platform.is_linux():
            self.devices_label = self._get_devices_label()

        self._valid_disks = {}
        for part in psutil.disk_partitions(all=self._include_all_devices):
            # we check all exclude conditions
            if self.exclude_disk(part):
                continue

            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = timeout(self._timeout)(psutil.disk_usage)(
                    part.mountpoint)
            except TimeoutException:
                self.log.warning(
                    u'Timeout after %d seconds while retrieving the disk usage of `%s` mountpoint. '
                    u'You might want to change the timeout length in the settings.',
                    self._timeout,
                    part.mountpoint,
                )
                continue
            except Exception as e:
                self.log.warning(
                    u'Unable to get disk metrics for %s: %s. '
                    u'You can exclude this mountpoint in the settings if it is invalid.',
                    part.mountpoint,
                    e,
                )
                continue

            # Exclude disks with size less than min_disk_size
            if disk_usage.total <= self._min_disk_size:
                if disk_usage.total > 0:
                    self.log.info(
                        'Excluding device %s with total disk size %s',
                        part.device, disk_usage.total)
                continue

            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: %s', part.device)

            device_name = part.mountpoint if self._use_mount else part.device

            tags = [part.fstype, 'filesystem:{}'.format(part.fstype)
                    ] if self._tag_by_filesystem else []
            tags.extend(self._custom_tags)

            # apply device/mountpoint specific tags
            for regex, device_tags in self._device_tag_re:
                if regex.match(device_name):
                    tags.extend(device_tags)

            if self.devices_label.get(device_name):
                tags.extend(self.devices_label.get(device_name))

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()

            tags.append('device:{}'.format(device_name))
            tags.append('device_name:{}'.format(_base_device_name(
                part.device)))
            for metric_name, metric_value in iteritems(
                    self._collect_part_metrics(part, disk_usage)):
                self.gauge(metric_name, metric_value, tags=tags)

            # Add in a disk read write or read only check
            if self._service_check_rw:
                rwro = {'rw', 'ro'} & set(part.opts.split(','))
                if len(rwro) == 1:
                    self.service_check('disk.read_write',
                                       AgentCheck.OK if rwro.pop() == 'rw' else
                                       AgentCheck.CRITICAL,
                                       tags=tags)
                else:
                    self.service_check('disk.read_write',
                                       AgentCheck.UNKNOWN,
                                       tags=tags)

        self.collect_latency_metrics()
Example #26
0
def set_default_driver_conf():
    if Platform.is_containerized():
        # Use default `./driver_config/odbcinst.ini` when Agent is running in docker.
        # `freetds` is shipped with the Docker Agent.
        os.environ.setdefault('ODBCSYSINI', DRIVER_CONFIG_DIR)
Example #27
0
 def _get_net_proc_base_location(proc_location):
     if Platform.is_containerized() and proc_location != "/proc":
         net_proc_base_location = "%s/1" % proc_location
     else:
         net_proc_base_location = proc_location
     return net_proc_base_location
Example #28
0
from datadog_checks.base import AgentCheck
from datadog_checks.base.utils.platform import Platform

PY3 = sys.version_info[0] == 3

if PY3:
    # use higher precision clock available in Python3
    time_func = time.perf_counter
else:
    time_func = time.time

# These imports are necessary because otherwise dynamic type
# resolution will fail on windows without it.
# See more here: https://github.com/rthalley/dnspython/issues/39.
if Platform.is_win32():
    from dns.rdtypes.ANY import *  # noqa
    from dns.rdtypes.IN import *  # noqa

    # for tiny time deltas, time.time on Windows reports the same value
    # of the clock more than once, causing the computation of response_time
    # to be often 0; let's use time.clock that is more precise.
    if not PY3:
        time_func = time.clock


class BadConfException(Exception):
    pass


class DNSCheck(AgentCheck):
Example #29
0
    def _check_bsd(self, instance):
        netstat_flags = ['-i', '-b']

        custom_tags = instance.get('tags', [])

        # FreeBSD's netstat truncates device names unless you pass '-W'
        if Platform.is_freebsd():
            netstat_flags.append('-W')

        try:
            output, _, _ = get_subprocess_output(["netstat"] + netstat_flags,
                                                 self.log)
            lines = output.splitlines()
            # Name  Mtu   Network       Address            Ipkts Ierrs     Ibytes    Opkts Oerrs     Obytes  Coll
            # lo0   16384 <Link#1>                        318258     0  428252203   318258     0  428252203     0
            # lo0   16384 localhost   fe80:1::1           318258     -  428252203   318258     -  428252203     -
            # lo0   16384 127           localhost         318258     -  428252203   318258     -  428252203     -
            # lo0   16384 localhost   ::1                 318258     -  428252203   318258     -  428252203     -
            # gif0* 1280  <Link#2>                             0     0          0        0     0          0     0
            # stf0* 1280  <Link#3>                             0     0          0        0     0          0     0
            # en0   1500  <Link#4>    04:0c:ce:db:4e:fa 20801309     0 13835457425 15149389     0 11508790198     0
            # en0   1500  seneca.loca fe80:4::60c:ceff: 20801309     - 13835457425 15149389     - 11508790198     -
            # en0   1500  2001:470:1f 2001:470:1f07:11d 20801309     - 13835457425 15149389     - 11508790198     -
            # en0   1500  2001:470:1f 2001:470:1f07:11d 20801309     - 13835457425 15149389     - 11508790198     -
            # en0   1500  192.168.1     192.168.1.63    20801309     - 13835457425 15149389     - 11508790198     -
            # en0   1500  2001:470:1f 2001:470:1f07:11d 20801309     - 13835457425 15149389     - 11508790198     -
            # p2p0  2304  <Link#5>    06:0c:ce:db:4e:fa        0     0          0        0     0          0     0
            # ham0  1404  <Link#6>    7a:79:05:4d:bf:f5    30100     0    6815204    18742     0    8494811     0
            # ham0  1404  5             5.77.191.245       30100     -    6815204    18742     -    8494811     -
            # ham0  1404  seneca.loca fe80:6::7879:5ff:    30100     -    6815204    18742     -    8494811     -
            # ham0  1404  2620:9b::54 2620:9b::54d:bff5    30100     -    6815204    18742     -    8494811     -

            headers = lines[0].split()

            # Given the irregular structure of the table above, better to parse from the end of each line
            # Verify headers first
            #          -7       -6       -5        -4       -3       -2        -1
            for h in ("Ipkts", "Ierrs", "Ibytes", "Opkts", "Oerrs", "Obytes",
                      "Coll"):
                if h not in headers:
                    self.log.error("%s not found in %s; cannot parse" %
                                   (h, headers))
                    return False

            current = None
            for l in lines[1:]:
                # Another header row, abort now, this is IPv6 land
                if "Name" in l:
                    break

                x = l.split()
                if len(x) == 0:
                    break

                iface = x[0]
                if iface.endswith("*"):
                    iface = iface[:-1]
                if iface == current:
                    # skip multiple lines of same interface
                    continue
                else:
                    current = iface

                # Filter inactive interfaces
                if self._parse_value(x[-5]) or self._parse_value(x[-2]):
                    iface = current
                    metrics = {
                        'bytes_rcvd': self._parse_value(x[-5]),
                        'bytes_sent': self._parse_value(x[-2]),
                        'packets_in.count': self._parse_value(x[-7]),
                        'packets_in.error': self._parse_value(x[-6]),
                        'packets_out.count': self._parse_value(x[-4]),
                        'packets_out.error': self._parse_value(x[-3]),
                    }
                    self._submit_devicemetrics(iface, metrics, custom_tags)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting connection stats.")

        try:
            netstat, _, _ = get_subprocess_output(
                ["netstat", "-s", "-p"
                 "tcp"], self.log)
            # 3651535 packets sent
            #         972097 data packets (615753248 bytes)
            #         5009 data packets (2832232 bytes) retransmitted
            #         0 resends initiated by MTU discovery
            #         2086952 ack-only packets (471 delayed)
            #         0 URG only packets
            #         0 window probe packets
            #         310851 window update packets
            #         336829 control packets
            #         0 data packets sent after flow control
            #         3058232 checksummed in software
            #         3058232 segments (571218834 bytes) over IPv4
            #         0 segments (0 bytes) over IPv6
            # 4807551 packets received
            #         1143534 acks (for 616095538 bytes)
            #         165400 duplicate acks
            #         ...

            self._submit_regexed_values(netstat, BSD_TCP_METRICS, custom_tags)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting TCP stats.")
Example #30
0
# (C) Datadog, Inc. 2018
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
import os
import sys

from datadog_checks.base.utils.platform import Platform

EMBEDDED_DIR = 'embedded'

if Platform.is_windows():
    EMBEDDED_DIR += str(sys.version_info[0])


def get_ca_certs_path():
    """
    Get a path to the trusted certificates of the system
    """
    for f in _get_ca_certs_paths():
        if os.path.exists(f):
            return f
    return None


def _get_ca_certs_paths():
    """
    Get a list of possible paths containing certificates

    Check is installed via pip to:
     * Windows: embedded/lib/site-packages/datadog_checks/http_check
     * Linux: embedded/lib/python2.7/site-packages/datadog_checks/http_check