def __init__(self, name, init_config, instances):
        AgentCheck.__init__(self, name, init_config, instances)
        self._clean_state()
        self.db = None
        self._version = None
        self.custom_metrics = None

        # Deprecate custom_metrics in favor of custom_queries
        if 'custom_metrics' in self.instance:
            self.warning(
                "DEPRECATION NOTICE: Please use the new custom_queries option "
                "rather than the now deprecated custom_metrics")
        host = self.instance.get('host', '')
        port = self.instance.get('port', '')
        if port != '':
            port = int(port)
        dbname = self.instance.get('dbname', 'postgres')
        self.relations = self.instance.get('relations', [])
        if self.relations and not dbname:
            raise ConfigurationError(
                '"dbname" parameter must be set when using the "relations" parameter.'
            )

        self.key = (host, port, dbname)
        self.tags = self._build_tags(self.instance.get('tags', []), host, port,
                                     dbname)
Exemple #2
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        if instances is not None and len(instances) > 1:
            raise ConfigurationError('Disk check only supports one configured instance.')
        AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances)

        instance = instances[0]
        self._all_partitions = is_affirmative(instance.get('all_partitions', False))
        self._file_system_whitelist = instance.get('file_system_whitelist', [])
        self._file_system_blacklist = instance.get('file_system_blacklist', [])
        self._device_whitelist = instance.get('device_whitelist', [])
        self._device_blacklist = instance.get('device_blacklist', [])
        self._mount_point_whitelist = instance.get('mount_point_whitelist', [])
        self._mount_point_blacklist = instance.get('mount_point_blacklist', [])
        self._tag_by_filesystem = is_affirmative(instance.get('tag_by_filesystem', False))
        self._tag_by_label = is_affirmative(instance.get('tag_by_label', True))
        self._device_tag_re = instance.get('device_tag_re', {})
        self._custom_tags = instance.get('tags', [])
        self._service_check_rw = is_affirmative(instance.get('service_check_rw', False))

        # TODO Remove this v5/v6 fork when agent 5 will be fully deprecated
        if is_agent_6:
            self._use_mount = is_affirmative(instance.get('use_mount', False))
        else:
            # FIXME: 6.x, drop use_mount option in datadog.conf
            self._load_legacy_option(instance, 'use_mount', False, operation=is_affirmative)

            # FIXME: 6.x, drop device_blacklist_re option in datadog.conf
            self._load_legacy_option(
                instance, 'excluded_disk_re', '^$', legacy_name='device_blacklist_re', operation=re.compile
            )
        self._compile_pattern_filters(instance)
        self._compile_tag_re()
        self._blkid_label_re = re.compile('LABEL=\"(.*?)\"', re.I)

        self.devices_label = {}
Exemple #3
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        if instances is not None and len(instances) > 1:
            raise ConfigurationError(
                'Disk check only supports one configured instance.')
        AgentCheck.__init__(self,
                            name,
                            init_config,
                            agentConfig,
                            instances=instances)

        instance = instances[0]
        self._use_mount = is_affirmative(instance.get('use_mount', False))
        self._all_partitions = is_affirmative(
            instance.get('all_partitions', False))
        self._file_system_whitelist = instance.get('file_system_whitelist', [])
        self._file_system_blacklist = instance.get('file_system_blacklist', [])
        self._device_whitelist = instance.get('device_whitelist', [])
        self._device_blacklist = instance.get('device_blacklist', [])
        self._mount_point_whitelist = instance.get('mount_point_whitelist', [])
        self._mount_point_blacklist = instance.get('mount_point_blacklist', [])
        self._tag_by_filesystem = is_affirmative(
            instance.get('tag_by_filesystem', False))
        self._tag_by_label = is_affirmative(instance.get('tag_by_label', True))
        self._device_tag_re = instance.get('device_tag_re', {})
        self._custom_tags = instance.get('tags', [])
        self._service_check_rw = is_affirmative(
            instance.get('service_check_rw', False))
        self._min_disk_size = instance.get('min_disk_size', 0) * 1024 * 1024

        self._compile_pattern_filters(instance)
        self._compile_tag_re()
        self._blkid_label_re = re.compile('LABEL=\"(.*?)\"', re.I)

        self.devices_label = {}
    def __init__(self, name, init_config, instances):
        AgentCheck.__init__(self, name, init_config, instances)
        self.time_started = time.time()

        self.batch_morlist_size = max(
            init_config.get("batch_morlist_size", BATCH_MORLIST_SIZE), 0)
        self.batch_collector_size = max(
            init_config.get("batch_property_collector_size",
                            BATCH_COLLECTOR_SIZE), 0)

        self.refresh_morlist_interval = init_config.get(
            'refresh_morlist_interval', REFRESH_MORLIST_INTERVAL)
        self.clean_morlist_interval = max(
            init_config.get('clean_morlist_interval',
                            2 * self.refresh_morlist_interval),
            self.refresh_morlist_interval)
        self.refresh_metrics_metadata_interval = init_config.get(
            'refresh_metrics_metadata_interval',
            REFRESH_METRICS_METADATA_INTERVAL)

        # Connections open to vCenter instances
        self.server_instances = {}
        self.server_instances_lock = threading.RLock()

        # Event configuration
        self.event_config = {}

        # Host tags exclusion
        self.excluded_host_tags = instances[0].get(
            "excluded_host_tags", init_config.get("excluded_host_tags", []))

        # Caching configuration
        self.cache_config = CacheConfig()

        # build up configurations
        for instance in instances:
            i_key = self._instance_key(instance)
            # caches
            self.cache_config.set_interval(CacheConfig.Morlist, i_key,
                                           self.refresh_morlist_interval)
            self.cache_config.set_interval(
                CacheConfig.Metadata, i_key,
                self.refresh_metrics_metadata_interval)
            # events
            self.event_config[i_key] = instance.get('event_config')

        # Queue of raw Mor objects to process
        self.mor_objects_queue = ObjectsQueue()

        # Cache of processed Mor objects
        self.mor_cache = MorCache(self.log)

        # managed entity raw view
        self.registry = {}

        # Metrics metadata, for each instance keeps the mapping: perfCounterKey -> {name, group, description}
        self.metadata_cache = MetadataCache()
        self.latest_event_query = {}
        self.exception_printed = 0
Exemple #5
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        self.last_run = datetime.now()

        self.config = None
        if instances:
            self.config = Config(instances[0])
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Host status needs to persist across all checks.
        # We'll create keys when they are referenced. See:
        # https://en.wikipedia.org/wiki/Autovivification
        # https://gist.github.com/hrldcpr/2012250
        self.host_status = defaultdict(lambda: defaultdict(lambda: None))
Exemple #7
0
    def __init__(self, *args, **kwargs):
        AgentCheck.__init__(self, *args, **kwargs)
        self.instance_cache = {}

        # preserve backwards compatible default timeouts
        if self.instance and self.instance.get('timeout') is None:
            if self.init_config.get('timeout') is None:
                self.instance['timeout'] = 2
Exemple #8
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances)
        self._zk_timeout = int(init_config.get('zk_timeout', DEFAULT_ZK_TIMEOUT))
        self._kafka_timeout = int(init_config.get('kafka_timeout', DEFAULT_KAFKA_TIMEOUT))
        self.context_limit = int(init_config.get('max_partition_contexts', CONTEXT_UPPER_BOUND))
        self._broker_retries = int(init_config.get('kafka_retries', DEFAULT_KAFKA_RETRIES))
        self._zk_last_ts = {}

        self.kafka_clients = {}
Exemple #9
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Cache connections
        self.connections = {}
        self.failed_connections = {}
        self.instances_metrics = {}
        self.instances_per_type_metrics = defaultdict(dict)
        self.existing_databases = None
        self.do_check = {}
        self.proc_type_mapping = {'gauge': self.gauge, 'rate': self.rate, 'histogram': self.histogram}
        self.adoprovider = self.default_adoprovider

        self.connector = init_config.get('connector', 'adodbapi')
        if self.connector.lower() not in self.valid_connectors:
            self.log.error("Invalid database connector %s, defaulting to adodbapi", self.connector)
            self.connector = 'adodbapi'

        self.adoprovider = init_config.get('adoprovider', self.default_adoprovider)
        if self.adoprovider.upper() not in self.valid_adoproviders:
            self.log.error(
                "Invalid ADODB provider string %s, defaulting to %s", self.adoprovider, self.default_adoprovider
            )
            self.adoprovider = self.default_adoprovider

        # Pre-process the list of metrics to collect
        self.custom_metrics = init_config.get('custom_metrics', [])
        for instance in instances:
            try:
                instance_key = self._conn_key(instance, self.DEFAULT_DB_KEY)
                self.do_check[instance_key] = True

                # check to see if the database exists before we try any connections to it
                with self.open_managed_db_connections(instance, None, db_name=self.DEFAULT_DATABASE):
                    db_exists, context = self._check_db_exists(instance)

                if db_exists:
                    if instance.get('stored_procedure') is None:
                        with self.open_managed_db_connections(instance, self.DEFAULT_DB_KEY):
                            self._make_metric_list_to_collect(instance, self.custom_metrics)
                else:
                    # How much do we care that the DB doesn't exist?
                    ignore = is_affirmative(instance.get("ignore_missing_database", False))
                    if ignore is not None and ignore:
                        # not much : we expect it. leave checks disabled
                        self.do_check[instance_key] = False
                        self.log.warning("Database %s does not exist. Disabling checks for this instance.", context)
                    else:
                        # yes we do. Keep trying
                        self.log.error("Database %s does not exist. Fix issue and restart agent", context)

            except SQLConnectionError:
                self.log.exception("Skipping SQL Server instance")
                continue
            except Exception as e:
                self.log.exception("Initialization exception %s", e)
                continue
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.metric_type_mapping = {
         'AverageStatistic': self.gauge,
         'BoundedRangeStatistic': self.gauge,
         'CountStatistic': self.monotonic_count,
         'DoubleStatistic': self.rate,
         'RangeStatistic': self.gauge,
         'TimeStatistic': self.gauge,
     }
Exemple #11
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Members' last replica set states
        self._last_state_by_server = {}

        # List of metrics to collect per instance
        self.metrics_to_collect_by_instance = {}

        self.collection_metrics_names = []
        for key in self.COLLECTION_METRICS:
            self.collection_metrics_names.append(key.split('.')[1])
Exemple #12
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        if instances is not None and len(instances) > 1:
            raise Exception(
                'Disk check only supports one configured instance.')
        AgentCheck.__init__(self,
                            name,
                            init_config,
                            agentConfig,
                            instances=instances)

        # Get the configuration once for all
        self._load_conf(instances[0])
        self._compile_tag_re()
Exemple #13
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        for k in ["mean", "median", "95", "99", "100"]:
            for m in self.stat_keys:
                self.keys.append(m + "_" + k)

        for k in ["min", "max", "mean", "median", "95", "99", "999"]:
            for m in self.search_latency_keys:
                self.keys.append(m + "_" + k)

        for k in ["min", "max", "mean", "median", "total"]:
            for m in self.vnodeq_keys:
                self.keys.append(m + "_" + k)

        self.prev_coord_redirs_total = -1
Exemple #14
0
 def __init__(self, name, init_config, agentConfig, instances):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     # if they set the path, use that
     if init_config.get('nfsiostat_path'):
         self.nfs_cmd = init_config['nfsiostat_path'].split() + ['1', '2']
     else:
         # if not, check if it's installed in the opt dir, if so use that
         if os.path.exists('/opt/datadog-agent/embedded/sbin/nfsiostat'):
             self.nfs_cmd = ['/opt/datadog-agent/embedded/sbin/nfsiostat', '1', '2']
         # if not, then check if it is in the default place
         elif os.path.exists('/usr/local/sbin/nfsiostat'):
             self.nfs_cmd = ['/usr/local/sbin/nfsiostat', '1', '2']
         else:
             raise Exception(
                 'nfsstat check requires nfsiostat be installed, please install it '
                 '(through nfs-utils) or set the path to the installed version'
             )
Exemple #15
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        self.dbs = {}
        self.versions = {}
        self.instance_metrics = {}
        self.bgw_metrics = {}
        self.archiver_metrics = {}
        self.db_bgw_metrics = []
        self.db_archiver_metrics = []
        self.replication_metrics = {}
        self.activity_metrics = {}
        self.custom_metrics = {}

        # Deprecate custom_metrics in favor of custom_queries
        if instances is not None and any('custom_metrics' in instance for instance in instances):
            self.warning(
                "DEPRECATION NOTICE: Please use the new custom_queries option "
                "rather than the now deprecated custom_metrics"
            )
Exemple #16
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # ad stands for access denied
        # We cache the PIDs getting this error and don't iterate on them more often than `access_denied_cache_duration``
        # This cache is for all PIDs so it's global, but it should be refreshed by instance
        self.last_ad_cache_ts = {}
        self.ad_cache = set()
        self.access_denied_cache_duration = int(
            init_config.get('access_denied_cache_duration',
                            DEFAULT_AD_CACHE_DURATION))

        # By default cache the PID list for a while
        # Sometimes it's not wanted b/c it can mess with no-data monitoring
        # This cache is indexed per instance
        self.last_pid_cache_ts = {}
        self.pid_cache = {}
        self.pid_cache_duration = int(
            init_config.get('pid_cache_duration', DEFAULT_PID_CACHE_DURATION))

        self._conflicting_procfs = False
        self._deprecated_init_procfs = False
        if Platform.is_linux():
            procfs_path = init_config.get('procfs_path')
            if procfs_path:
                if 'procfs_path' in agentConfig and procfs_path != agentConfig.get(
                        'procfs_path').rstrip('/'):
                    self._conflicting_procfs = True
                else:
                    self._deprecated_init_procfs = True
                    psutil.PROCFS_PATH = procfs_path

        # Process cache, indexed by instance
        self.process_cache = defaultdict(dict)

        self.process_list_cache.cache_duration = int(
            init_config.get('shared_process_list_cache_duration',
                            DEFAULT_SHARED_PROCESS_LIST_CACHE_DURATION))
Exemple #17
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.counts = {}
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self._masters = defaultdict(lambda: "")
Exemple #19
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.connections = {}
     self.last_timestamp_seen = defaultdict(int)
Exemple #20
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     # Host status needs to persist across all checks
     self.cluster_status = {}
Exemple #21
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.already_alerted = []
     self.cached_vhosts = {}  # this is used to send CRITICAL rabbitmq.aliveness check if the server goes down
Exemple #22
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        self.nagios_tails = {}
        check_freq = init_config.get("check_freq", 15)

        if instances is not None:
            for instance in instances:
                tailers = []
                nagios_conf = {}
                instance_key = None
                custom_tag = instance.get('tags', [])

                if 'nagios_conf' in instance:  # conf.d check
                    conf_path = instance['nagios_conf']
                    nagios_conf = self.parse_nagios_config(conf_path)
                    instance_key = conf_path
                # Retrocompatibility Code
                elif 'nagios_perf_cfg' in instance:
                    conf_path = instance['nagios_perf_cfg']
                    nagios_conf = self.parse_nagios_config(conf_path)
                    instance["collect_host_performance_data"] = True
                    instance["collect_service_performance_data"] = True
                    instance_key = conf_path
                if 'nagios_log' in instance:
                    nagios_conf["log_file"] = instance['nagios_log']
                    if instance_key is None:
                        instance_key = instance['nagios_log']
                # End of retrocompatibility code
                if not nagios_conf:
                    self.log.warning("Missing path to nagios_conf")
                    continue

                if 'log_file' in nagios_conf and instance.get(
                        'collect_events', True):
                    self.log.debug("Starting to tail the event log")
                    tailers.append(
                        NagiosEventLogTailer(
                            log_path=nagios_conf['log_file'],
                            file_template=None,
                            logger=self.log,
                            hostname=self.hostname,
                            tags=custom_tag,
                            event_func=self.event,
                            gauge_func=self.gauge,
                            freq=check_freq,
                            passive_checks=instance.get(
                                'passive_checks_events', False),
                        ))
                if ('host_perfdata_file' in nagios_conf
                        and 'host_perfdata_file_template' in nagios_conf and
                        instance.get('collect_host_performance_data', False)):
                    self.log.debug("Starting to tail the host_perfdata file")
                    tailers.append(
                        NagiosHostPerfDataTailer(
                            log_path=nagios_conf['host_perfdata_file'],
                            file_template=nagios_conf[
                                'host_perfdata_file_template'],
                            logger=self.log,
                            hostname=self.hostname,
                            event_func=self.event,
                            gauge_func=self.gauge,
                            freq=check_freq,
                            tags=custom_tag,
                        ))
                if ('service_perfdata_file' in nagios_conf
                        and 'service_perfdata_file_template' in nagios_conf
                        and instance.get('collect_service_performance_data',
                                         False)):
                    self.log.debug(
                        "Starting to tail the service_perfdata file")
                    tailers.append(
                        NagiosServicePerfDataTailer(
                            log_path=nagios_conf['service_perfdata_file'],
                            file_template=nagios_conf[
                                'service_perfdata_file_template'],
                            logger=self.log,
                            hostname=self.hostname,
                            event_func=self.event,
                            gauge_func=self.gauge,
                            freq=check_freq,
                            tags=custom_tag,
                        ))

                self.nagios_tails[instance_key] = tailers
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.nodetool_cmd = init_config.get("nodetool", "/usr/bin/nodetool")
Exemple #24
0
    def __init__(self, name, init_config, instances):
        AgentCheck.__init__(self, name, init_config, instances)
        self.nagios_tails = {}

        instance = self.instances[0]
        tailers = []
        nagios_conf = {}
        instance_key = None
        custom_tag = instance.get('tags', [])

        if 'nagios_conf' in instance:  # conf.d check
            conf_path = instance['nagios_conf']
            nagios_conf = self.parse_nagios_config(conf_path)
            instance_key = conf_path
        # Retrocompatibility Code
        elif 'nagios_perf_cfg' in instance:
            conf_path = instance['nagios_perf_cfg']
            nagios_conf = self.parse_nagios_config(conf_path)
            instance["collect_host_performance_data"] = True
            instance["collect_service_performance_data"] = True
            instance_key = conf_path
        if 'nagios_log' in instance:
            nagios_conf["log_file"] = instance['nagios_log']
            if instance_key is None:
                instance_key = instance['nagios_log']
        # End of retrocompatibility code
        if not nagios_conf:
            self.log.warning("Missing path to nagios_conf")
            return

        if 'log_file' in nagios_conf and instance.get('collect_events', True):
            self.log.debug("Starting to tail the event log")
            tailers.append(
                NagiosEventLogTailer(
                    log_path=nagios_conf['log_file'],
                    logger=self.log,
                    hostname=self.hostname,
                    event_func=self.event,
                    tags=custom_tag,
                    passive_checks=instance.get('passive_checks_events', False),
                )
            )
        if (
            'host_perfdata_file' in nagios_conf
            and 'host_perfdata_file_template' in nagios_conf
            and instance.get('collect_host_performance_data', False)
        ):
            self.log.debug("Starting to tail the host_perfdata file")
            tailers.append(
                NagiosPerfDataTailer(
                    log_path=nagios_conf['host_perfdata_file'],
                    file_template=nagios_conf['host_perfdata_file_template'],
                    logger=self.log,
                    hostname=self.hostname,
                    gauge_func=self.gauge,
                    tags=custom_tag,
                    perfdata_field='HOSTPERFDATA',
                    metric_prefix=_get_host_metric_prefix,
                )
            )
        if (
            'service_perfdata_file' in nagios_conf
            and 'service_perfdata_file_template' in nagios_conf
            and instance.get('collect_service_performance_data', False)
        ):
            self.log.debug("Starting to tail the service_perfdata file")
            tailers.append(
                NagiosPerfDataTailer(
                    log_path=nagios_conf['service_perfdata_file'],
                    file_template=nagios_conf['service_perfdata_file_template'],
                    logger=self.log,
                    hostname=self.hostname,
                    gauge_func=self.gauge,
                    tags=custom_tag,
                    perfdata_field='SERVICEPERFDATA',
                    metric_prefix=_get_service_metric_prefix,
                )
            )

        self.nagios_tails[instance_key] = tailers
Exemple #25
0
    def __init__(self, name, init_config, instances):
        AgentCheck.__init__(self, name, init_config, instances)

        self.gunicorn_cmd = self.instance.get(
            'gunicorn', init_config.get('gunicorn', 'gunicorn'))
 def __init__(self, *args, **kwargs):
     AgentCheck.__init__(self, *args, **kwargs)
     self.instance_cache = {}
Exemple #27
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        self._instance_states = defaultdict(lambda: ConsulCheckInstanceState())
    def __init__(self, *args, **kwargs):
        AgentCheck.__init__(self, *args, **kwargs)
	self._previous_offset = {}
        self.instance_cache = {}
Exemple #29
0
 def __init__(self, name, init_config, instances=None):
     AgentCheck.__init__(self, name, init_config, instances)
     self._last_state_by_server = {}
     self.idx_rates = {}
Exemple #30
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.assumed_url = {}