def _assert_complex_config(aggregator):
    # Test service check
    aggregator.assert_service_check('mysql.can_connect', status=MySql.OK, tags=tags.SC_TAGS, count=1)
    aggregator.assert_service_check('mysql.replication.slave_running', status=MySql.OK, tags=tags.SC_TAGS, at_least=1)
    testable_metrics = (
        variables.STATUS_VARS
        + variables.VARIABLES_VARS
        + variables.INNODB_VARS
        + variables.BINLOG_VARS
        + variables.SYSTEM_METRICS
        + variables.SCHEMA_VARS
        + variables.SYNTHETIC_VARS
    )

    if MYSQL_VERSION_PARSED >= parse_version('5.6') and environ.get('MYSQL_FLAVOR') != 'mariadb':
        testable_metrics.extend(variables.PERFORMANCE_VARS)

    # Test metrics
    for mname in testable_metrics:
        # These two are currently not guaranteed outside of a Linux
        # environment.
        if mname == 'mysql.performance.user_time' and not Platform.is_linux():
            continue
        if mname == 'mysql.performance.kernel_time' and not Platform.is_linux():
            continue
        if mname == 'mysql.performance.cpu_time' and Platform.is_windows():
            continue

        if mname == 'mysql.performance.query_run_time.avg':
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], count=1)
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:mysql'], count=1)
        elif mname == 'mysql.info.schema.size':
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], count=1)
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:information_schema'], count=1)
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:performance_schema'], count=1)
        else:
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS, at_least=0)

    # TODO: test this if it is implemented
    # Assert service metadata
    # version_metadata = mysql_check.service_metadata['version']
    # assert len(version_metadata) == 1

    # test custom query metrics
    aggregator.assert_metric('alice.age', value=25)
    aggregator.assert_metric('bob.age', value=20)

    # test optional metrics
    optional_metrics = (
        variables.OPTIONAL_REPLICATION_METRICS
        + variables.OPTIONAL_INNODB_VARS
        + variables.OPTIONAL_STATUS_VARS
        + variables.OPTIONAL_STATUS_VARS_5_6_6
    )
    _test_optional_metrics(aggregator, optional_metrics, 1)

    # Raises when coverage < 100%
    aggregator.assert_all_metrics_covered()
    def check(self, instance):
        if instance is None:
            instance = {}

        self._excluded_ifaces = instance.get('excluded_interfaces', [])
        self._collect_cx_state = instance.get('collect_connection_state',
                                              False)
        self._collect_rate_metrics = instance.get('collect_rate_metrics', True)
        self._collect_count_metrics = instance.get('collect_count_metrics',
                                                   False)

        # This decides whether we should split or combine connection states,
        # along with a few other things
        self._setup_metrics(instance)

        self._exclude_iface_re = None
        exclude_re = instance.get('excluded_interface_re', None)
        if exclude_re:
            self.log.debug("Excluding network devices matching: %s" %
                           exclude_re)
            self._exclude_iface_re = re.compile(exclude_re)

        if Platform.is_linux():
            self._check_linux(instance)
        elif Platform.is_bsd():
            self._check_bsd(instance)
        elif Platform.is_solaris():
            self._check_solaris(instance)
        elif Platform.is_windows():
            self._check_psutil(instance)
Exemple #3
0
def init_db():
    # exit if we are not on linux
    # that's the only platform where the client successfully installs for version 3.10
    if not Platform.is_linux():
        return

    import aerospike

    # sample Aerospike Python Client code
    # https://www.aerospike.com/docs/client/python/usage/kvs/write.html
    client = aerospike.client({'hosts': [(HOST, PORT)]}).connect()

    key = ('test', 'characters', 'bender')
    bins = {
        'name': 'Bender',
        'serialnum': 2716057,
        'lastsentence': {
            'BBS': "Well, we're boned",
            'TBwaBB': 'I love you, meatbags!',
            'BG': 'Whip harder, Professor!',
            'ltWGY': 'Into the breach, meatbags. Or not, whatever',
        },
        'composition':
        ['40% zinc', '40% titanium', '30% iron', '40% dolomite'],
        'apartment': bytearray(b'\x24'),
        'quote_cnt': 47,
    }
    client.put(key, bins)

    for _ in range(10):
        client.get(key)

    client.close()
    def check(self, instance):
        if instance is None:
            instance = {}

        self._excluded_ifaces = instance.get('excluded_interfaces', [])
        if not isinstance(self._excluded_ifaces, list):
            raise ConfigurationError(
                "Expected 'excluded_interfaces' to be a list, got '{}'".format(
                    type(self._excluded_ifaces).__name__))

        self._collect_cx_state = instance.get('collect_connection_state',
                                              False)
        self._collect_rate_metrics = instance.get('collect_rate_metrics', True)
        self._collect_count_metrics = instance.get('collect_count_metrics',
                                                   False)

        # This decides whether we should split or combine connection states,
        # along with a few other things
        self._setup_metrics(instance)

        self._exclude_iface_re = None
        exclude_re = instance.get('excluded_interface_re', None)
        if exclude_re:
            self.log.debug("Excluding network devices matching: %s",
                           exclude_re)
            self._exclude_iface_re = re.compile(exclude_re)

        if Platform.is_linux():
            self._check_linux(instance)
        elif Platform.is_bsd():
            self._check_bsd(instance)
        elif Platform.is_solaris():
            self._check_solaris(instance)
        elif Platform.is_windows():
            self._check_psutil(instance)
Exemple #5
0
 def check(self, instance):
     """Get disk space/inode stats"""
     if self._tag_by_label and Platform.is_linux():
         self.devices_label = self._get_devices_label()
     # Windows and Mac will always have psutil
     # (we have packaged for both of them)
     if self._psutil():
         self.collect_metrics_psutil()
     else:
         # FIXME: implement all_partitions (df -a)
         self.collect_metrics_manually()
Exemple #6
0
    def __init__(self, name, init_config, instances):
        super(ProcessCheck, self).__init__(name, init_config, instances)

        self.name = self.instance.get('name', None)
        self.tags = self.instance.get('tags', [])
        self.exact_match = is_affirmative(
            self.instance.get('exact_match', True))
        self.search_string = self.instance.get('search_string', None)
        self.ignore_ad = is_affirmative(
            self.instance.get('ignore_denied_access', True))
        self.pid = self.instance.get('pid')
        self.pid_file = self.instance.get('pid_file')
        self.collect_children = is_affirmative(
            self.instance.get('collect_children', False))
        self.user = self.instance.get('user', False)
        self.try_sudo = self.instance.get('try_sudo', False)

        # ad stands for access denied
        # We cache the PIDs getting this error and don't iterate on them more often than `access_denied_cache_duration``
        # This cache is for all PIDs so it's global, but it should be refreshed by instance
        self.last_ad_cache_ts = {}
        self.ad_cache = set()
        self.access_denied_cache_duration = int(
            init_config.get('access_denied_cache_duration',
                            DEFAULT_AD_CACHE_DURATION))

        # By default cache the PID list for a while
        # Sometimes it's not wanted b/c it can mess with no-data monitoring
        # This cache is indexed per instance
        self.last_pid_cache_ts = {}
        self.pid_cache = {}
        self.pid_cache_duration = int(
            init_config.get('pid_cache_duration', DEFAULT_PID_CACHE_DURATION))

        self._conflicting_procfs = False
        self._deprecated_init_procfs = False
        if Platform.is_linux():
            procfs_path = init_config.get('procfs_path')
            if procfs_path:
                agent_procfs_path = datadog_agent.get_config('procfs_path')
                if agent_procfs_path and procfs_path != agent_procfs_path.rstrip(
                        '/'):
                    self._conflicting_procfs = True
                else:
                    self._deprecated_init_procfs = True
                    psutil.PROCFS_PATH = procfs_path

        # Process cache, indexed by instance
        self.process_cache = defaultdict(dict)

        self.process_list_cache.cache_duration = int(
            init_config.get('shared_process_list_cache_duration',
                            DEFAULT_SHARED_PROCESS_LIST_CACHE_DURATION))
Exemple #7
0
    def get_pagefault_stats(self, pid):
        if not Platform.is_linux():
            return None

        def file_to_string(path):
            with open(path, 'r') as f:
                res = f.read()
            return res

        # http://man7.org/linux/man-pages/man5/proc.5.html
        try:
            data = file_to_string('/{}/{}/stat'.format(psutil.PROCFS_PATH,
                                                       pid))
        except Exception:
            self.log.debug(
                'error getting proc stats: file_to_string failed for /%s/%s/stat',
                psutil.PROCFS_PATH, pid)
            return None
        return (int(i) for i in data.split()[9:13])
Exemple #8
0
    def __init__(self, name, init_config, instances=None):
        super(ProcessCheck, self).__init__(name, init_config, instances)

        # ad stands for access denied
        # We cache the PIDs getting this error and don't iterate on them more often than `access_denied_cache_duration``
        # This cache is for all PIDs so it's global, but it should be refreshed by instance
        self.last_ad_cache_ts = {}
        self.ad_cache = set()
        self.access_denied_cache_duration = int(
            init_config.get('access_denied_cache_duration',
                            DEFAULT_AD_CACHE_DURATION))

        # By default cache the PID list for a while
        # Sometimes it's not wanted b/c it can mess with no-data monitoring
        # This cache is indexed per instance
        self.last_pid_cache_ts = {}
        self.pid_cache = {}
        self.pid_cache_duration = int(
            init_config.get('pid_cache_duration', DEFAULT_PID_CACHE_DURATION))

        self._conflicting_procfs = False
        self._deprecated_init_procfs = False
        if Platform.is_linux():
            procfs_path = init_config.get('procfs_path')
            if procfs_path:
                agent_procfs_path = datadog_agent.get_config('procfs_path')
                if agent_procfs_path and procfs_path != agent_procfs_path.rstrip(
                        '/'):
                    self._conflicting_procfs = True
                else:
                    self._deprecated_init_procfs = True
                    psutil.PROCFS_PATH = procfs_path

        # Process cache, indexed by instance
        self.process_cache = defaultdict(dict)

        self.process_list_cache.cache_duration = int(
            init_config.get('shared_process_list_cache_duration',
                            DEFAULT_SHARED_PROCESS_LIST_CACHE_DURATION))
def test_complex_config_replica(aggregator, instance_complex):
    mysql_check = MySql(common.CHECK_NAME, {}, {})
    config = copy.deepcopy(instance_complex)
    config['port'] = common.SLAVE_PORT
    mysql_check.check(config)

    # self.assertMetricTag('mysql.replication.seconds_behind_master', 'channel:default')

    # Test service check
    aggregator.assert_service_check('mysql.can_connect', status=MySql.OK, tags=tags.SC_TAGS_REPLICA, count=1)

    # Travis MySQL not running replication - FIX in flavored test.
    aggregator.assert_service_check(
        'mysql.replication.slave_running', status=MySql.OK, tags=tags.SC_TAGS_REPLICA, at_least=1
    )

    testable_metrics = (
        variables.STATUS_VARS
        + variables.VARIABLES_VARS
        + variables.INNODB_VARS
        + variables.BINLOG_VARS
        + variables.SYSTEM_METRICS
        + variables.SCHEMA_VARS
        + variables.SYNTHETIC_VARS
    )

    if MYSQL_VERSION_PARSED >= parse_version('5.6') and environ.get('MYSQL_FLAVOR') != 'mariadb':
        testable_metrics.extend(variables.PERFORMANCE_VARS)

    # Test metrics
    for mname in testable_metrics:
        # These two are currently not guaranteed outside of a Linux
        # environment.
        if mname == 'mysql.performance.user_time' and not Platform.is_linux():
            continue
        if mname == 'mysql.performance.kernel_time' and not Platform.is_linux():
            continue
        if mname == 'mysql.performance.cpu_time' and Platform.is_windows():
            continue
        if mname == 'mysql.performance.query_run_time.avg':
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], at_least=1)
        elif mname == 'mysql.info.schema.size':
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:testdb'], count=1)
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:information_schema'], count=1)
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS + ['schema:performance_schema'], count=1)
        else:
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS, at_least=0)

    # test custom query metrics
    aggregator.assert_metric('alice.age', value=25)
    aggregator.assert_metric('bob.age', value=20)

    # test optional metrics
    optional_metrics = (
        variables.OPTIONAL_REPLICATION_METRICS
        + variables.OPTIONAL_INNODB_VARS
        + variables.OPTIONAL_STATUS_VARS
        + variables.OPTIONAL_STATUS_VARS_5_6_6
    )
    _test_optional_metrics(aggregator, optional_metrics, 1)

    # Raises when coverage < 100%
    aggregator.assert_all_metrics_covered()
                    __name__='disk_usage'):
        c.check(instance)

    for name in gauge_metrics:
        aggregator.assert_metric(name, count=0)

    for name in rate_metrics:
        aggregator.assert_metric_has_tag(
            name, 'device:{}'.format(DEFAULT_DEVICE_NAME))
        aggregator.assert_metric_has_tag(
            name, 'device_name:{}'.format(DEFAULT_DEVICE_BASE_NAME))

    aggregator.assert_all_metrics_covered()


@pytest.mark.skipif(not Platform.is_linux(),
                    reason='disk labels are only available on Linux')
@pytest.mark.usefixtures('psutil_mocks')
def test_labels_from_blkid_cache_file(aggregator, instance_blkid_cache_file,
                                      gauge_metrics, rate_metrics):
    """
    Verify that the disk labels are set when the blkid_cache_file option is set
    """
    c = Disk('disk', {}, [instance_blkid_cache_file])
    c.check(instance_blkid_cache_file)
    for metric in chain(gauge_metrics, rate_metrics):
        aggregator.assert_metric(metric,
                                 tags=[
                                     'device:/dev/sda1', 'device_name:sda1',
                                     'label:MYLABEL', 'device_label:MYLABEL'
                                 ])
Exemple #11
0
    def check(self, instance):
        """Get disk space/inode stats"""
        if self._tag_by_label and Platform.is_linux():
            self.devices_label = self._get_devices_label()

        self._valid_disks = {}
        for part in psutil.disk_partitions(all=True):
            # we check all exclude conditions
            if self.exclude_disk(part):
                continue

            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = timeout(5)(psutil.disk_usage)(part.mountpoint)
            except TimeoutException:
                self.log.warning(
                    u'Timeout while retrieving the disk usage of `%s` mountpoint. Skipping...',
                    part.mountpoint)
                continue
            except Exception as e:
                self.log.warning('Unable to get disk metrics for %s: %s',
                                 part.mountpoint, e)
                continue

            # Exclude disks with size less than min_disk_size
            if disk_usage.total <= self._min_disk_size:
                if disk_usage.total > 0:
                    self.log.info(
                        'Excluding device {} with total disk size {}'.format(
                            part.device, disk_usage.total))
                continue

            # For later, latency metrics
            self._valid_disks[part.device] = (part.fstype, part.mountpoint)
            self.log.debug('Passed: {}'.format(part.device))

            device_name = part.mountpoint if self._use_mount else part.device

            tags = [part.fstype, 'filesystem:{}'.format(part.fstype)
                    ] if self._tag_by_filesystem else []
            tags.extend(self._custom_tags)

            # apply device/mountpoint specific tags
            for regex, device_tags in self._device_tag_re:
                if regex.match(device_name):
                    tags.extend(device_tags)

            if self.devices_label.get(device_name):
                tags.append(self.devices_label.get(device_name))

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()

            tags.append('device:{}'.format(device_name))
            for metric_name, metric_value in iteritems(
                    self._collect_part_metrics(part, disk_usage)):
                self.gauge(metric_name, metric_value, tags=tags)

            # Add in a disk read write or read only check
            if self._service_check_rw:
                rwro = {'rw', 'ro'} & set(part.opts.split(','))
                if len(rwro) == 1:
                    self.service_check('disk.read_write',
                                       AgentCheck.OK if rwro.pop() == 'rw' else
                                       AgentCheck.CRITICAL,
                                       tags=tags)
                else:
                    self.service_check('disk.read_write',
                                       AgentCheck.UNKNOWN,
                                       tags=tags)

        self.collect_latency_metrics()
Exemple #12
0
def test_complex_config_replica(aggregator, instance_complex):
    config = copy.deepcopy(instance_complex)
    config['port'] = common.SLAVE_PORT
    mysql_check = MySql(common.CHECK_NAME, {}, instances=[config])

    mysql_check.check(config)

    # Test service check
    aggregator.assert_service_check('mysql.can_connect',
                                    status=MySql.OK,
                                    tags=tags.SC_TAGS_REPLICA,
                                    count=1)

    # Travis MySQL not running replication - FIX in flavored test.
    aggregator.assert_service_check(
        'mysql.replication.slave_running',
        status=MySql.OK,
        tags=tags.SC_TAGS_REPLICA + ['replication_mode:replica'],
        at_least=1,
    )

    testable_metrics = (variables.STATUS_VARS + variables.COMPLEX_STATUS_VARS +
                        variables.VARIABLES_VARS +
                        variables.COMPLEX_VARIABLES_VARS +
                        variables.INNODB_VARS + variables.COMPLEX_INNODB_VARS +
                        variables.BINLOG_VARS + variables.SYSTEM_METRICS +
                        variables.SCHEMA_VARS + variables.SYNTHETIC_VARS +
                        variables.STATEMENT_VARS)

    if MYSQL_VERSION_PARSED >= parse_version('5.6') and environ.get(
            'MYSQL_FLAVOR') != 'mariadb':
        testable_metrics.extend(variables.PERFORMANCE_VARS)

    # Test metrics
    for mname in testable_metrics:
        # These two are currently not guaranteed outside of a Linux
        # environment.
        if mname == 'mysql.performance.user_time' and not Platform.is_linux():
            continue
        if mname == 'mysql.performance.kernel_time' and not Platform.is_linux(
        ):
            continue
        if mname == 'mysql.performance.cpu_time' and Platform.is_windows():
            continue
        if mname == 'mysql.performance.query_run_time.avg':
            aggregator.assert_metric(mname,
                                     tags=tags.METRIC_TAGS + ['schema:testdb'],
                                     at_least=1)
        elif mname == 'mysql.info.schema.size':
            aggregator.assert_metric(mname,
                                     tags=tags.METRIC_TAGS + ['schema:testdb'],
                                     count=1)
            aggregator.assert_metric(mname,
                                     tags=tags.METRIC_TAGS +
                                     ['schema:information_schema'],
                                     count=1)
            aggregator.assert_metric(mname,
                                     tags=tags.METRIC_TAGS +
                                     ['schema:performance_schema'],
                                     count=1)
        else:
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS, at_least=0)

    # test custom query metrics
    aggregator.assert_metric('alice.age', value=25)
    aggregator.assert_metric('bob.age', value=20)

    # test optional metrics
    optional_metrics = (variables.OPTIONAL_REPLICATION_METRICS +
                        variables.OPTIONAL_INNODB_VARS +
                        variables.OPTIONAL_STATUS_VARS +
                        variables.OPTIONAL_STATUS_VARS_5_6_6)
    # Note, this assertion will pass even if some metrics are not present.
    # Manual testing is required for optional metrics
    _test_optional_metrics(aggregator, optional_metrics)

    # Raises when coverage < 100%
    aggregator.assert_all_metrics_covered()
    aggregator.assert_metrics_using_metadata(get_metadata_metrics(),
                                             check_submission_type=True,
                                             exclude=['alice.age', 'bob.age'] +
                                             variables.STATEMENT_VARS)
Exemple #13
0
# In order to collect connection state we need `ss` command included in `iproute2` package
E2E_METADATA = {
    'start_commands': ['apt-get update', 'apt-get install iproute2 -y']
}

EXPECTED_METRICS = [
    'system.net.bytes_rcvd',
    'system.net.bytes_sent',
    'system.net.packets_in.count',
    'system.net.packets_in.error',
    'system.net.packets_out.count',
    'system.net.packets_out.error',
]

if Platform.is_linux() or Platform.is_windows():
    EXPECTED_METRICS.extend([
        'system.net.packets_in.drop',
        'system.net.packets_out.drop',
    ])

E2E_EXPECTED_METRICS = EXPECTED_METRICS + [
    "system.net.tcp4.closing",
    "system.net.tcp4.established",
    "system.net.tcp4.listening",
    "system.net.tcp4.opening",
    "system.net.tcp4.time_wait",
    "system.net.tcp6.closing",
    "system.net.tcp6.established",
    "system.net.tcp6.listening",
    "system.net.tcp6.opening",
Exemple #14
0
    for name in gauge_metrics:
        aggregator.assert_metric(name, count=0)

    for name in rate_metrics:
        aggregator.assert_metric_has_tag(name, 'device:{}'.format(DEFAULT_DEVICE_NAME))
        aggregator.assert_metric_has_tag(name, 'device_name:{}'.format(DEFAULT_DEVICE_BASE_NAME))

    for name in count_metrics:
        aggregator.assert_metric_has_tag(name, 'device:{}'.format(DEFAULT_DEVICE_NAME))
        aggregator.assert_metric_has_tag(name, 'device_name:{}'.format(DEFAULT_DEVICE_BASE_NAME))

    aggregator.assert_all_metrics_covered()


@pytest.mark.skipif(not Platform.is_linux(), reason='disk labels are only available on Linux')
@pytest.mark.usefixtures('psutil_mocks')
def test_labels_from_blkid_cache_file(
    aggregator, instance_blkid_cache_file, gauge_metrics, rate_metrics, count_metrics
):
    """
    Verify that the disk labels are set when the blkid_cache_file option is set
    """
    c = Disk('disk', {}, [instance_blkid_cache_file])
    c.check(instance_blkid_cache_file)
    for metric in chain(gauge_metrics, rate_metrics, count_metrics):
        aggregator.assert_metric(
            metric, tags=['device:/dev/sda1', 'device_name:sda1', 'label:MYLABEL', 'device_label:MYLABEL']
        )

Exemple #15
0
def _assert_complex_config(aggregator, hostname='stubbed.hostname'):
    # Test service check
    aggregator.assert_service_check('mysql.can_connect',
                                    status=MySql.OK,
                                    tags=tags.SC_TAGS,
                                    hostname=hostname,
                                    count=1)
    if MYSQL_REPLICATION == 'classic':
        aggregator.assert_service_check(
            'mysql.replication.slave_running',
            status=MySql.OK,
            tags=tags.SC_TAGS + ['replication_mode:source'],
            hostname=hostname,
            at_least=1,
        )
    testable_metrics = (variables.STATUS_VARS + variables.COMPLEX_STATUS_VARS +
                        variables.VARIABLES_VARS +
                        variables.COMPLEX_VARIABLES_VARS +
                        variables.INNODB_VARS + variables.COMPLEX_INNODB_VARS +
                        variables.BINLOG_VARS + variables.SYSTEM_METRICS +
                        variables.SCHEMA_VARS + variables.SYNTHETIC_VARS +
                        variables.STATEMENT_VARS + variables.TABLE_VARS)
    if MYSQL_REPLICATION == 'group':
        testable_metrics.extend(variables.GROUP_REPLICATION_VARS)
        aggregator.assert_service_check(
            'mysql.replication.group.status',
            status=MySql.OK,
            tags=tags.SC_TAGS + [
                'channel_name:group_replication_applier',
                'member_role:PRIMARY', 'member_state:ONLINE'
            ],
            count=1,
        )

    if MYSQL_VERSION_PARSED >= parse_version('5.6'):
        testable_metrics.extend(variables.PERFORMANCE_VARS)

    # Test metrics
    for mname in testable_metrics:
        # These three are currently not guaranteed outside of a Linux
        # environment.
        if mname == 'mysql.performance.user_time' and not Platform.is_linux():
            continue
        if mname == 'mysql.performance.kernel_time' and not Platform.is_linux(
        ):
            continue
        if mname == 'mysql.performance.cpu_time' and Platform.is_windows():
            continue

        if mname == 'mysql.performance.query_run_time.avg':
            aggregator.assert_metric(mname,
                                     tags=tags.METRIC_TAGS + ['schema:testdb'],
                                     count=1)
            aggregator.assert_metric(mname,
                                     tags=tags.METRIC_TAGS + ['schema:mysql'],
                                     count=1)
        elif mname == 'mysql.info.schema.size':
            aggregator.assert_metric(mname,
                                     tags=tags.METRIC_TAGS + ['schema:testdb'],
                                     count=1)
            aggregator.assert_metric(mname,
                                     tags=tags.METRIC_TAGS +
                                     ['schema:information_schema'],
                                     count=1)
            aggregator.assert_metric(mname,
                                     tags=tags.METRIC_TAGS +
                                     ['schema:performance_schema'],
                                     count=1)
        else:
            aggregator.assert_metric(mname, tags=tags.METRIC_TAGS, at_least=0)

    # TODO: test this if it is implemented
    # Assert service metadata
    # version_metadata = mysql_check.service_metadata['version']
    # assert len(version_metadata) == 1

    # test custom query metrics
    aggregator.assert_metric('alice.age', value=25)
    aggregator.assert_metric('bob.age', value=20)

    # test optional metrics
    optional_metrics = (variables.OPTIONAL_REPLICATION_METRICS +
                        variables.OPTIONAL_INNODB_VARS +
                        variables.OPTIONAL_STATUS_VARS +
                        variables.OPTIONAL_STATUS_VARS_5_6_6)
    # Note, this assertion will pass even if some metrics are not present.
    # Manual testing is required for optional metrics
    _test_optional_metrics(aggregator, optional_metrics)

    # Raises when coverage < 100%
    aggregator.assert_all_metrics_covered()
Exemple #16
0
    def check(self, _):
        """Get disk space/inode stats"""
        if self._tag_by_label and Platform.is_linux():
            self.devices_label = self._get_devices_label()

        for part in psutil.disk_partitions(all=self._include_all_devices):
            # we check all exclude conditions
            if self.exclude_disk(part):
                self.log.debug('Excluding device %s', part.device)
                continue

            # Get disk metrics here to be able to exclude on total usage
            try:
                disk_usage = timeout(self._timeout)(psutil.disk_usage)(part.mountpoint)
            except TimeoutException:
                self.log.warning(
                    u'Timeout after %d seconds while retrieving the disk usage of `%s` mountpoint. '
                    u'You might want to change the timeout length in the settings.',
                    self._timeout,
                    part.mountpoint,
                )
                continue
            except Exception as e:
                self.log.warning(
                    u'Unable to get disk metrics for %s: %s. '
                    u'You can exclude this mountpoint in the settings if it is invalid.',
                    part.mountpoint,
                    e,
                )
                continue

            # Exclude disks with size less than min_disk_size
            if disk_usage.total <= self._min_disk_size:
                if disk_usage.total > 0:
                    self.log.info('Excluding device %s with total disk size %s', part.device, disk_usage.total)
                continue

            self.log.debug('Passed: %s', part.device)

            device_name = part.mountpoint if self._use_mount else part.device

            tags = [part.fstype, 'filesystem:{}'.format(part.fstype)] if self._tag_by_filesystem else []
            tags.extend(self._custom_tags)

            # apply device/mountpoint specific tags
            for regex, device_tags in self._device_tag_re:
                if regex.match(device_name):
                    tags.extend(device_tags)

            # apply device labels as tags (from blkid or lsblk).
            # we want to use the real device name and not the device_name (which can be the mountpoint)
            if self.devices_label.get(part.device):
                tags.extend(self.devices_label.get(part.device))

            # legacy check names c: vs psutil name C:\\
            if Platform.is_win32():
                device_name = device_name.strip('\\').lower()

            tags.append('device:{}'.format(device_name))
            tags.append('device_name:{}'.format(_base_device_name(part.device)))
            for metric_name, metric_value in iteritems(self._collect_part_metrics(part, disk_usage)):
                self.gauge(metric_name, metric_value, tags=tags)

            # Add in a disk read write or read only check
            if self._service_check_rw:
                rwro = {'rw', 'ro'} & set(part.opts.split(','))
                if len(rwro) == 1:
                    self.service_check(
                        'disk.read_write', AgentCheck.OK if rwro.pop() == 'rw' else AgentCheck.CRITICAL, tags=tags
                    )
                else:
                    self.service_check('disk.read_write', AgentCheck.UNKNOWN, tags=tags)

        self.collect_latency_metrics()