def run_sampler(self, tags):
     """
     start the sampler thread if not already running
     :param tags:
     :return:
     """
     if not self._enabled:
         self._log.debug("Statement sampler not enabled")
         return
     self._tags = tags
     self._tags_str = ','.join(self._tags)
     for t in self._tags:
         if t.startswith('service:'):
             self._service = t[len('service:'):]
     self._last_check_run = time.time()
     if self._run_sync or is_affirmative(
             os.environ.get('DBM_STATEMENT_SAMPLER_RUN_SYNC', "false")):
         self._log.debug("Running statement sampler synchronously")
         self._collect_statement_samples()
     elif self._collection_loop_future is None or not self._collection_loop_future.running(
     ):
         self._collection_loop_future = PostgresStatementSamples.executor.submit(
             self._collection_loop)
     else:
         self._log.debug(
             "Statement sampler collection loop already running")
Esempio n. 2
0
 def check(self, instance):
     if is_affirmative(instance.get('use_preview', True)):
         self.check_post_v3(instance)
     else:
         self.warning(
             'In the future etcd check will only support ETCD v3+.')
         self.check_pre_v3(instance)
Esempio n. 3
0
    def _get_running_apps(self):
        """
        Determine what mode was specified
        """
        tags = list(self.tags)

        tags.append('spark_cluster:%s' % self.cluster_name)
        if not self._disable_legacy_cluster_tag:
            tags.append('cluster_name:%s' % self.cluster_name)

        if self.cluster_mode == SPARK_STANDALONE_MODE:
            # check for PRE-20
            pre20 = is_affirmative(self.instance.get(SPARK_PRE_20_MODE, False))
            return self._standalone_init(pre20, tags)

        elif self.cluster_mode == SPARK_MESOS_MODE:
            running_apps = self._mesos_init(tags)
            return self._get_spark_app_ids(running_apps, tags)

        elif self.cluster_mode == SPARK_YARN_MODE:
            running_apps = self._yarn_init(tags)
            return self._get_spark_app_ids(running_apps, tags)

        elif self.cluster_mode == SPARK_DRIVER_MODE:
            return self._driver_init(tags)

        else:
            raise Exception('Invalid setting for %s. Received %s.' %
                            (SPARK_CLUSTER_MODE, self.cluster_mode))
Esempio n. 4
0
 def setup_configured_stats(self, instance):
     collect_stats = {}
     for category, prefix in iteritems(metrics.METRIC_CATEGORIES):
         if is_affirmative(
                 instance.get('collect_{}_stats'.format(prefix), True)):
             collect_stats[category] = True
     return collect_stats
Esempio n. 5
0
    def __new__(cls, name, init_config, instances):
        instance = instances[0]

        if is_affirmative(instance.get('use_legacy', True)):
            return HAProxyCheckLegacy(name, init_config, instances)
        else:
            return super(HAProxyCheck, cls).__new__(cls)
Esempio n. 6
0
    def __init__(self, name, init_config, instances):
        super(IbmDb2Check, self).__init__(name, init_config, instances)
        self._db = self.instance.get('db', '')
        self._username = self.instance.get('username', '')
        self._password = self.instance.get('password', '')
        self._host = self.instance.get('host', '')
        self._port = self.instance.get('port', 5000)
        self._tags = self.instance.get('tags', [])
        self._tls_cert = self.instance.get('tls_cert')

        # Add global database tag
        self._tags.append('db:{}'.format(self._db))

        # Track table space state changes
        self._table_space_states = {}

        # We'll connect on the first check run
        self._conn = None

        custom_queries = self.instance.get('custom_queries', [])
        use_global_custom_queries = self.instance.get(
            'use_global_custom_queries', True)

        # Handle overrides
        if use_global_custom_queries == 'extend':
            custom_queries.extend(
                self.init_config.get('global_custom_queries', []))
        elif 'global_custom_queries' in self.init_config and is_affirmative(
                use_global_custom_queries):
            custom_queries = self.init_config.get('global_custom_queries', [])

        # Deduplicate
        self._custom_queries = list(iter_unique(custom_queries))
Esempio n. 7
0
 def run_sampler(self, tags):
     """
     start the sampler thread if not already running & update tag metadata
     :param tags:
     :return:
     """
     if not self._enabled:
         self._log.debug("Statement sampler not enabled")
         return
     self._tags = tags
     self._tags_str = ','.join(tags)
     for t in self._tags:
         if t.startswith('service:'):
             self._service = t[len('service:'):]
     if not self._version_processed and self._check.version:
         self._has_window_functions = self._check.version.version_compatible(
             (8, 0, 0))
         if self._check.version.flavor == "MariaDB" or not self._check.version.version_compatible(
             (5, 7, 0)):
             self._global_status_table = "information_schema.global_status"
         else:
             self._global_status_table = "performance_schema.global_status"
         self._version_processed = True
     self._last_check_run = time.time()
     if self._run_sync or is_affirmative(
             os.environ.get('DBM_STATEMENT_SAMPLER_RUN_SYNC', "false")):
         self._log.debug("Running statement sampler synchronously")
         self._collect_statement_samples()
     elif self._collection_loop_future is None or not self._collection_loop_future.running(
     ):
         self._collection_loop_future = MySQLStatementSamples.executor.submit(
             self.collection_loop)
     else:
         self._log.debug(
             "Statement sampler collection loop already running")
Esempio n. 8
0
    def __init__(self, name, init_config, instances):
        super(SnmpCheck, self).__init__(name, init_config, instances)

        # Set OID batch size
        self.oid_batch_size = int(init_config.get('oid_batch_size', DEFAULT_OID_BATCH_SIZE))

        # Load Custom MIB directory
        self.mibs_path = init_config.get('mibs_folder')
        self.ignore_nonincreasing_oid = is_affirmative(init_config.get('ignore_nonincreasing_oid', False))
        self.profiles = init_config.get('profiles', {})
        self.profiles_by_oid = {}
        confd = get_config('confd_path')
        for profile, profile_data in self.profiles.items():
            filename = profile_data.get('definition_file')
            if filename:
                if not os.path.isabs(filename):
                    filename = os.path.join(confd, 'snmp.d', 'profiles', filename)
                try:
                    with open(filename) as f:
                        data = yaml.safe_load(f)
                except Exception:
                    raise ConfigurationError("Couldn't read profile '{}' in '{}'".format(profile, filename))
            else:
                data = profile_data['definition']
            self.profiles[profile] = {'definition': data}
            sys_object_oid = data.get('sysobjectid')
            if sys_object_oid:
                self.profiles_by_oid[sys_object_oid] = profile

        self.instance['name'] = self._get_instance_key(self.instance)
        self._config = self._build_config(self.instance)
Esempio n. 9
0
 def run_job_loop(self, tags):
     """
     :param tags:
     :return:
     """
     if not self._enabled:
         self._log.debug("[job=%s] Job not enabled.", self._job_name)
         return
     if not self._db_hostname:
         self._db_hostname = resolve_db_host(self._config_host)
     self._tags = tags
     self._tags_str = ','.join(self._tags)
     self._job_tags = self._tags + ["job:{}".format(self._job_name)]
     self._job_tags_str = ','.join(self._job_tags)
     self._last_check_run = time.time()
     if self._run_sync or is_affirmative(
             os.environ.get('DBM_THREADED_JOB_RUN_SYNC', "false")):
         self._log.debug("Running threaded job synchronously. job=%s",
                         self._job_name)
         self._run_job_rate_limited()
     elif self._job_loop_future is None or not self._job_loop_future.running(
     ):
         self._job_loop_future = DBMAsyncJob.executor.submit(self._job_loop)
     else:
         self._log.debug("Job loop already running. job=%s", self._job_name)
Esempio n. 10
0
    def __new__(cls, name, init_config, instances):
        instance = instances[0]

        if PY2 or is_affirmative(instance.get('legacy_mode', True)):
            return Win32EventLogWMI(name, init_config, instances)
        else:
            return super(Win32EventLogCheck, cls).__new__(cls)
Esempio n. 11
0
    def __new__(cls, name, init_config, instances):
        instance = instances[0]

        if is_affirmative(instance.get('post_0_10_2', False)):
            return super(KafkaCheck, cls).__new__(cls)
        else:
            return LegacyKafkaCheck_0_10_2(name, init_config, instances)
Esempio n. 12
0
    def __init__(self, name, init_config, instances):
        super(SnmpCheck, self).__init__(name, init_config, instances)

        # Set OID batch size
        self.oid_batch_size = int(
            init_config.get('oid_batch_size', DEFAULT_OID_BATCH_SIZE))

        # Load Custom MIB directory
        self.mibs_path = init_config.get('mibs_folder')
        self.ignore_nonincreasing_oid = is_affirmative(
            init_config.get('ignore_nonincreasing_oid', False))
        self.profiles = init_config.get('profiles', {})
        for profile, profile_data in self.profiles.items():
            filename = profile_data.get('definition_file')
            if filename:
                try:
                    with open(filename) as f:
                        data = yaml.safe_load(f)
                except Exception:
                    raise ConfigurationError(
                        "Couldn't read profile '{}' in '{}'".format(
                            profile, filename))
            else:
                data = profile_data['definition']
            self.profiles[profile] = {'definition': data}

        self.instance['name'] = self._get_instance_key(self.instance)
        self._config = InstanceConfig(
            self.instance, self.warning,
            self.init_config.get('global_metrics', []), self.mibs_path,
            self.profiles)
Esempio n. 13
0
    def __init__(self, name, init_config, instances):
        super(Envoy, self).__init__(name, init_config, instances)
        self.unknown_metrics = defaultdict(int)
        self.unknown_tags = defaultdict(int)

        self.custom_tags = self.instance.get('tags', [])
        self.caching_metrics = self.instance.get('cache_metrics', True)

        self.stats_url = self.instance.get('stats_url')
        if self.stats_url is None:
            raise ConfigurationError('Envoy configuration setting `stats_url` is required')

        included_metrics = set(
            re.sub(r'^envoy\\?\.', '', s, 1)
            for s in self.instance.get('included_metrics', self.instance.get('metric_whitelist', []))
        )
        self.config_included_metrics = [re.compile(pattern) for pattern in included_metrics]

        excluded_metrics = set(
            re.sub(r'^envoy\\?\.', '', s, 1)
            for s in self.instance.get('excluded_metrics', self.instance.get('metric_blacklist', []))
        )
        self.config_excluded_metrics = [re.compile(pattern) for pattern in excluded_metrics]

        # The memory implications here are unclear to me. We may want a bloom filter
        # or a data structure that expires elements based on inactivity.
        self.included_metrics_cache = set()
        self.excluded_metrics_cache = set()

        self.caching_metrics = None
        self.parse_unknown_metrics = is_affirmative(self.instance.get('parse_unknown_metrics', False))
Esempio n. 14
0
    def check(self, instance):
        # Get properties from conf file
        rm_address = instance.get('resourcemanager_uri')
        if rm_address is None:
            raise Exception(
                "The ResourceManager URL must be specified in the instance configuration"
            )

        collect_task_metrics = is_affirmative(
            instance.get('collect_task_metrics', False))

        # Get additional tags from the conf file
        custom_tags = instance.get("tags", [])
        tags = list(set(custom_tags))

        # Get the cluster name from the conf file
        cluster_name = instance.get('cluster_name')
        if cluster_name is None:
            self.warning(
                "The cluster_name must be specified in the instance configuration, "
                "defaulting to '{}'".format(self.DEFAULT_CLUSTER_NAME))
            cluster_name = self.DEFAULT_CLUSTER_NAME

        tags.append('cluster_name:{}'.format(cluster_name))

        # Get the running MR applications from YARN
        running_apps = self._get_running_app_ids(rm_address)

        # Report success after gathering all metrics from ResourceManaager
        self.service_check(
            self.YARN_SERVICE_CHECK,
            AgentCheck.OK,
            tags=['url:{}'.format(rm_address)] + custom_tags,
            message='Connection to ResourceManager "{}" was successful'.format(
                rm_address),
        )

        # Get the applications from the application master
        running_jobs = self._mapreduce_job_metrics(running_apps, tags)

        # # Get job counter metrics
        self._mapreduce_job_counters_metrics(running_jobs, tags)

        # Get task metrics
        if collect_task_metrics:
            self._mapreduce_task_metrics(running_jobs, tags)

        # Report success after gathering all metrics from Application Master
        if running_jobs:
            job_id, metrics = next(iteritems(running_jobs))
            am_address = self._get_url_base(metrics['tracking_url'])

            self.service_check(
                self.MAPREDUCE_SERVICE_CHECK,
                AgentCheck.OK,
                tags=['url:{}'.format(am_address)] + custom_tags,
                message='Connection to ApplicationManager "{}" was successful'.
                format(am_address),
            )
    def check(self, instance):
        """The main entrypoint of the check."""
        self.log.debug("Running legacy Kafka Consumer check.")
        self._zk_consumer_offsets = {}  # Expected format: {(consumer_group, topic, partition): offset}
        self._kafka_consumer_offsets = {}  # Expected format: {(consumer_group, topic, partition): offset}
        self._highwater_offsets = {}  # Expected format: {(topic, partition): offset}

        # For calculating consumer lag, we have to fetch both the consumer offset and the broker highwater offset.
        # There's a potential race condition because whichever one we check first may be outdated by the time we check
        # the other. Better to check consumer offsets before checking broker offsets because worst case is that
        # overstates consumer lag a little. Doing it the other way can understate consumer lag to the point of having
        # negative consumer lag, which just creates confusion because it's theoretically impossible.

        # Fetch consumer group offsets from Zookeeper
        if self._zk_hosts_ports is not None:
            try:
                self._get_zk_consumer_offsets()
            except Exception:
                self.log.exception("There was a problem collecting consumer offsets from Zookeeper.")
                # don't raise because we might get valid broker offsets

        # Fetch consumer group offsets from Kafka
        # Support for storing offsets in Kafka not available until Kafka 0.8.2. Also, for legacy reasons, this check
        # only fetches consumer offsets from Kafka if Zookeeper is omitted or kafka_consumer_offsets is True.
        if self._kafka_client.config.get('api_version') >= (0, 8, 2) and is_affirmative(
            instance.get('kafka_consumer_offsets', self._zk_hosts_ports is None)
        ):
            try:
                self._get_kafka_consumer_offsets()
            except Exception:
                self.log.exception("There was a problem collecting consumer offsets from Kafka.")
                # don't raise because we might get valid broker offsets

        # Fetch the broker highwater offsets
        try:
            self._get_highwater_offsets()
        except Exception:
            self.log.exception('There was a problem collecting the highwater mark offsets')
            # Unlike consumer offsets, fail immediately because we can't calculate consumer lag w/o highwater_offsets
            raise

        total_contexts = sum(
            [len(self._zk_consumer_offsets), len(self._kafka_consumer_offsets), len(self._highwater_offsets)]
        )
        if total_contexts > self._context_limit:
            self.warning(
                """Discovered %s metric contexts - this exceeds the maximum number of %s contexts permitted by the
                check. Please narrow your target by specifying in your kafka_consumer.yaml the consumer groups, topics
                and partitions you wish to monitor.""",
                total_contexts,
                self._context_limit,
            )

        # Report the metics
        self._report_highwater_offsets()
        self._report_consumer_offsets_and_lag(self._kafka_consumer_offsets)
        # if someone is in the middle of migrating their offset storage from zookeeper to kafka, they need to identify
        # which source is reporting which offsets. So we tag zookeeper with 'source:zk'
        self._report_consumer_offsets_and_lag(self._zk_consumer_offsets, source='zk')
Esempio n. 16
0
    def check_health_v1(self, submission_queue, dynamic_tags):
        url = self._api_url + '/sys/health'
        health_data = self.access_api(
            url, ignore_status_codes=SYS_HEALTH_DEFAULT_CODES)
        cluster_name = health_data.get('cluster_name')
        if cluster_name:
            dynamic_tags.append('vault_cluster:{}'.format(cluster_name))
            if not self._disable_legacy_cluster_tag:
                dynamic_tags.append('cluster_name:{}'.format(cluster_name))

        replication_mode = health_data.get('replication_dr_mode', '').lower()
        if replication_mode == 'secondary':
            if self.instance.get("collect_secondary_dr", False):
                self._replication_dr_secondary_mode = False
                self.log.debug(
                    'Detected vault in replication DR secondary mode but also detected that '
                    '`collect_secondary_dr` is enabled, Prometheus metric collection will still occur.'
                )
            else:
                self._replication_dr_secondary_mode = True
                self.log.debug(
                    'Detected vault in replication DR secondary mode, skipping Prometheus metric collection.'
                )
        else:
            self._replication_dr_secondary_mode = False

        vault_version = health_data.get('version')
        if vault_version:
            dynamic_tags.append('vault_version:{}'.format(vault_version))
            self.set_metadata('version', vault_version)

        unsealed = not is_affirmative(health_data.get('sealed'))
        if unsealed:
            submission_queue.append(lambda tags: self.service_check(
                self.SERVICE_CHECK_UNSEALED, self.OK, tags=tags))
        else:
            submission_queue.append(lambda tags: self.service_check(
                self.SERVICE_CHECK_UNSEALED, self.CRITICAL, tags=tags))

        initialized = is_affirmative(health_data.get('initialized'))
        if initialized:
            submission_queue.append(lambda tags: self.service_check(
                self.SERVICE_CHECK_INITIALIZED, self.OK, tags=tags))
        else:
            submission_queue.append(lambda tags: self.service_check(
                self.SERVICE_CHECK_INITIALIZED, self.CRITICAL, tags=tags))
Esempio n. 17
0
    def __init__(self, name, init_config, instances):
        super(KafkaCheck, self).__init__(name, init_config, instances)
        self._context_limit = int(
            init_config.get('max_partition_contexts', CONTEXT_UPPER_BOUND))
        self._custom_tags = self.instance.get('tags', [])
        self._monitor_unlisted_consumer_groups = is_affirmative(
            self.instance.get('monitor_unlisted_consumer_groups', False))
        self._monitor_all_broker_highwatermarks = is_affirmative(
            self.instance.get('monitor_all_broker_highwatermarks', False))
        self._consumer_groups = self.instance.get('consumer_groups', {})

        kafka_version = self.instance.get('kafka_client_api_version')
        if isinstance(kafka_version, str):
            kafka_version = tuple(map(int, kafka_version.split(".")))

        self._kafka_client = self._create_kafka_admin_client(
            api_version=kafka_version)
Esempio n. 18
0
    def _should_process(self, data_dict):
        """if collect_aggregates_only, we process only the aggregates"""
        if is_affirmative(self.collect_aggregates_only):
            return self._is_aggregate(data_dict)
        elif str(self.collect_aggregates_only).lower() == 'both':
            return True

        return data_dict['svname'] != Services.BACKEND
Esempio n. 19
0
 def _collect_replication_metrics(self, db, results, above_560):
     # Get replica stats
     is_mariadb = self.version.flavor == "MariaDB"
     replication_channel = self.config.options.get('replication_channel')
     results.update(self._get_replica_stats(db, is_mariadb, replication_channel))
     nonblocking = is_affirmative(self.config.options.get('replication_non_blocking_status', False))
     results.update(self._get_replica_status(db, above_560, nonblocking))
     return REPLICA_VARS
Esempio n. 20
0
    def check(self, instance):
        try:
            directory = instance['directory']
        except KeyError:
            raise ConfigurationError(
                'DirectoryCheck: missing `directory` in config')

        abs_directory = abspath(directory)
        name = instance.get('name', directory)
        pattern = instance.get('pattern')
        exclude_dirs = instance.get('exclude_dirs', [])
        exclude_dirs_pattern = re_compile(
            '|'.join(exclude_dirs)) if exclude_dirs else None
        dirs_patterns_full = is_affirmative(
            instance.get('dirs_patterns_full', False))
        recursive = is_affirmative(instance.get('recursive', False))
        dirtagname = instance.get('dirtagname', 'name')
        filetagname = instance.get('filetagname', 'filename')
        filegauges = is_affirmative(instance.get('filegauges', False))
        countonly = is_affirmative(instance.get('countonly', False))
        ignore_missing = is_affirmative(instance.get('ignore_missing', False))
        custom_tags = instance.get('tags', [])

        if not exists(abs_directory):
            msg = ("Either directory '{}' doesn't exist or the Agent doesn't "
                   "have permissions to access it, skipping.".format(
                       abs_directory))

            if not ignore_missing:
                raise ConfigurationError(msg)

            self.log.warning(msg)

        self._get_stats(
            abs_directory,
            name,
            dirtagname,
            filetagname,
            filegauges,
            pattern,
            exclude_dirs_pattern,
            dirs_patterns_full,
            recursive,
            countonly,
            custom_tags,
        )
Esempio n. 21
0
    def __init__(self, name, init_config, instances):

        instance = instances[0]
        if is_affirmative(instance.get('use_preview', True)):
            self.HTTP_CONFIG_REMAPPER = {
                'ssl_cert': {
                    'name': 'tls_cert'
                },
                'ssl_private_key': {
                    'name': 'tls_private_key'
                },
                'ssl_ca_cert': {
                    'name': 'tls_ca_cert'
                },
                'ssl_verify': {
                    'name': 'tls_verify'
                },
                'prometheus_timeout': {
                    'name': 'timeout'
                },
            }
        else:
            # For legacy check ensure prometheus_url is set so
            # OpenMetricsBaseCheck instantiation succeeds
            instance.setdefault('prometheus_url', '')
            self.HTTP_CONFIG_REMAPPER = {
                'ssl_keyfile': {
                    'name': 'tls_private_key'
                },
                'ssl_certfile': {
                    'name': 'tls_cert'
                },
                'ssl_cert_validation': {
                    'name': 'tls_verify'
                },
                'ssl_ca_certs': {
                    'name': 'tls_ca_cert'
                },
            }

        super(Etcd, self).__init__(
            name,
            init_config,
            instances,
            default_instances={
                'etcd': {
                    'prometheus_url': 'http://localhost:2379/metrics',
                    'namespace': 'etcd',
                    'metrics': [METRIC_MAP],
                    'send_histograms_buckets': True,
                    'metadata_metric_name': 'etcd_server_version',
                    'metadata_label_map': {
                        'version': 'server_version'
                    },
                }
            },
            default_namespace='etcd',
        )
Esempio n. 22
0
    def _get_config(self, instance):
        # make sure 'rabbitmq_api_url' is present and get parameters
        base_url = instance.get('rabbitmq_api_url', None)
        if not base_url:
            raise Exception('Missing "rabbitmq_api_url" in RabbitMQ config.')
        if not base_url.endswith('/'):
            base_url += '/'

        collect_nodes = is_affirmative(
            instance.get('collect_node_metrics', True))
        custom_tags = instance.get('tags', [])
        parsed_url = urlparse(base_url)
        if not parsed_url.scheme or "://" not in parsed_url.geturl():
            self.log.warning(
                'The rabbit url did not include a protocol, assuming http')
            # urljoin cannot add a protocol to the rest of the url for some reason.
            # This still leaves the potential for errors, but such urls would never have been valid, either
            # and it's not likely to be useful to attempt to catch all possible mistakes people could make.
            # urlparse also has a known issue parsing url with no schema, but a port in the host section
            # mistakingly taking the host for the schema, hence the additional validation
            base_url = 'http://' + base_url
            parsed_url = urlparse(base_url)

        # Limit of queues/nodes to collect metrics from
        max_detailed = {
            EXCHANGE_TYPE:
            int(instance.get('max_detailed_exchanges',
                             MAX_DETAILED_EXCHANGES)),
            QUEUE_TYPE:
            int(instance.get('max_detailed_queues', MAX_DETAILED_QUEUES)),
            NODE_TYPE:
            int(instance.get('max_detailed_nodes', MAX_DETAILED_NODES)),
        }

        # List of queues/nodes to collect metrics from
        specified = {
            EXCHANGE_TYPE: {
                'explicit': instance.get('exchanges', []),
                'regexes': instance.get('exchanges_regexes', []),
            },
            QUEUE_TYPE: {
                'explicit': instance.get('queues', []),
                'regexes': instance.get('queues_regexes', [])
            },
            NODE_TYPE: {
                'explicit': instance.get('nodes', []),
                'regexes': instance.get('nodes_regexes', [])
            },
        }

        for object_type, filters in iteritems(specified):
            for _, filter_objects in iteritems(filters):
                if type(filter_objects) != list:
                    raise TypeError(
                        "{0} / {0}_regexes parameter must be a list".format(
                            object_type))

        return base_url, max_detailed, specified, custom_tags, collect_nodes
Esempio n. 23
0
    def check(self, instance):
        ssl = self.instance.get('ssl', False)
        if ssl not in SSL_MODES:
            ssl = 'require' if is_affirmative(ssl) else 'disable'

        user = self.instance.get('username', '')
        password = self.instance.get('password', '')

        table_count_limit = self.instance.get('table_count_limit', TABLE_COUNT_LIMIT)
        collect_function_metrics = is_affirmative(self.instance.get('collect_function_metrics', False))
        # Default value for `count_metrics` is True for backward compatibility
        collect_count_metrics = is_affirmative(self.instance.get('collect_count_metrics', True))
        collect_activity_metrics = is_affirmative(self.instance.get('collect_activity_metrics', False))
        collect_database_size_metrics = is_affirmative(self.instance.get('collect_database_size_metrics', True))
        collect_default_db = is_affirmative(self.instance.get('collect_default_database', False))

        custom_metrics = self._get_custom_metrics(instance.get('custom_metrics', []))
        custom_queries = instance.get('custom_queries', [])

        (host, port, dbname) = self.key

        self.log.debug("Custom metrics: %s", custom_metrics)

        tag_replication_role = is_affirmative(self.instance.get('tag_replication_role', False))
        tags = self.tags

        # Collect metrics
        try:
            # Check version
            self._connect(host, port, user, password, dbname, ssl, tags)
            if tag_replication_role:
                tags.extend(["replication_role:{}".format(self._get_replication_role())])
            self.log.debug("Running check against version %s", str(self.version))
            self._collect_stats(
                user,
                tags,
                self.relations,
                custom_metrics,
                table_count_limit,
                collect_function_metrics,
                collect_count_metrics,
                collect_activity_metrics,
                collect_database_size_metrics,
                collect_default_db,
            )
            self._get_custom_queries(tags, custom_queries)
        except (psycopg2.InterfaceError, socket.error):
            self.log.info("Connection error, will retry on next agent run")
            self._clean_state()

        if self.db is not None:
            service_check_tags = self._get_service_check_tags(host, tags)
            message = u'Established connection to postgres://%s:%s/%s' % (host, port, dbname)
            self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK, tags=service_check_tags, message=message)
            try:
                # commit to close the current query transaction
                self.db.commit()
            except Exception as e:
                self.log.warning("Unable to commit: %s", e)
        self._version = None  # We don't want to cache versions between runs to capture minor updates for metadata
Esempio n. 24
0
    def __init__(self, check, config):
        self._check = check
        self._db = None
        self._config = config
        self._log = get_check_logger()
        self._activity_last_query_start = None
        self._last_check_run = 0
        self._collection_loop_future = None
        self._cancel_event = threading.Event()
        self._tags = None
        self._tags_str = None
        self._service = "postgres"
        self._db_hostname = resolve_db_host(self._config.host)
        self._enabled = is_affirmative(
            self._config.statement_samples_config.get('enabled', False))
        self._run_sync = is_affirmative(
            self._config.statement_samples_config.get('run_sync', False))
        self._rate_limiter = ConstantRateLimiter(
            float(
                self._config.statement_samples_config.get(
                    'collections_per_second', 1)))
        self._explain_function = self._config.statement_samples_config.get(
            'explain_function', 'datadog.explain_statement')

        # explained_statements_cache: limit how often we try to re-explain the same query
        self._explained_statements_cache = TTLCache(
            maxsize=int(
                self._config.statement_samples_config.get(
                    'explained_statements_cache_maxsize', 5000)),
            ttl=60 * 60 / int(
                self._config.statement_samples_config.get(
                    'explained_statements_per_hour_per_query', 60)),
        )

        # seen_samples_cache: limit the ingestion rate per (query_signature, plan_signature)
        self._seen_samples_cache = TTLCache(
            # assuming ~100 bytes per entry (query & plan signature, key hash, 4 pointers (ordered dict), expiry time)
            # total size: 10k * 100 = 1 Mb
            maxsize=int(
                self._config.statement_samples_config.get(
                    'seen_samples_cache_maxsize', 10000)),
            ttl=60 * 60 / int(
                self._config.statement_samples_config.get(
                    'samples_per_hour_per_query', 15)),
        )
Esempio n. 25
0
    def __init__(self, name, init_config, instances):
        super(Win32EventLogWMI, self).__init__(name, init_config, instances)
        # Settings
        self._tag_event_id = is_affirmative(init_config.get('tag_event_id', False))
        self._verbose = init_config.get('verbose', True)
        self._default_event_priority = init_config.get('default_event_priority', 'normal')

        # State
        self.last_ts = {}
Esempio n. 26
0
    def _get_pg_attrs(self, instance):
        if is_affirmative(instance.get('use_psycopg2', False)):
            if psycopg2 is None:
                self.log.error("Unable to import psycopg2, falling back to pg8000")
            else:
                return psycopg2_connect, psycopg2.InterfaceError, psycopg2.ProgrammingError

        # Let's use pg8000
        return pg8000.connect, pg8000.InterfaceError, pg8000.ProgrammingError
    def __init__(self, name, init_config, agentConfig, instances=None):
        WinWMICheck.__init__(self, name, init_config, agentConfig, instances=instances)
        # Settings
        self._tag_event_id = is_affirmative(init_config.get('tag_event_id', False))
        self._verbose = init_config.get('verbose', True)
        self._default_event_priority = init_config.get('default_event_priority', 'normal')

        # State
        self.last_ts = {}
Esempio n. 28
0
    def __new__(cls, name, init_config, instances):
        """For backward compatibility reasons, there are two side-by-side implementations of the VSphereCheck.
        Instantiating this class will return an instance of the legacy integration for existing users and
        an instance of the new implementation for new users."""
        if is_affirmative(instances[0].get('use_legacy_check_version', True)):
            from datadog_checks.vsphere.legacy.vsphere_legacy import VSphereLegacyCheck

            return VSphereLegacyCheck(name, init_config, instances)
        return super(VSphereCheck, cls).__new__(cls)
Esempio n. 29
0
 def check(self, instance):
     if is_affirmative(instance.get('use_preview', False)):
         self.check_post_v3(instance)
     else:
         self.warning(
             'In Agent 6.9 this check will only support ETCD v3+. If you '
             'wish to preview the new version, set `use_preview` to `true`.'
         )
         self.check_pre_v3(instance)
Esempio n. 30
0
    def __init__(self, *args, **kwargs):
        # type: (*Any, **Any) -> None
        super(SnmpCheck, self).__init__(*args, **kwargs)

        # Set OID batch size
        self.oid_batch_size = int(self.init_config.get('oid_batch_size', DEFAULT_OID_BATCH_SIZE))

        # Load Custom MIB directory
        self.mibs_path = self.init_config.get('mibs_folder')

        self.optimize_mib_memory_usage = is_affirmative(self.init_config.get('optimize_mib_memory_usage', False))

        self.ignore_nonincreasing_oid = is_affirmative(self.init_config.get('ignore_nonincreasing_oid', False))

        self.profiles = self._load_profiles()
        self.profiles_by_oid = self._get_profiles_mapping()

        self._config = self._build_config(self.instance)