def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # ad stands for access denied
        # We cache the PIDs getting this error and don't iterate on them more often than `access_denied_cache_duration``
        # This cache is for all PIDs so it's global, but it should be refreshed by instance
        self.last_ad_cache_ts = {}
        self.ad_cache = set()
        self.access_denied_cache_duration = int(
            init_config.get('access_denied_cache_duration',
                            DEFAULT_AD_CACHE_DURATION))

        # By default cache the PID list for a while
        # Sometimes it's not wanted b/c it can mess with no-data monitoring
        # This cache is indexed per instance
        self.last_pid_cache_ts = {}
        self.pid_cache = {}
        self.pid_cache_duration = int(
            init_config.get('pid_cache_duration', DEFAULT_PID_CACHE_DURATION))

        self._conflicting_procfs = False
        self._deprecated_init_procfs = False
        if Platform.is_linux():
            procfs_path = init_config.get('procfs_path')
            if procfs_path:
                if 'procfs_path' in agentConfig and procfs_path != agentConfig.get(
                        'procfs_path').rstrip('/'):
                    self._conflicting_procfs = True
                else:
                    self._deprecated_init_procfs = True
                    psutil.PROCFS_PATH = procfs_path

        # Process cache, indexed by instance
        self.process_cache = defaultdict(dict)
Exemple #2
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        self._countersettypes = {}
        self._counters = {}
        self._metrics = {}
        self._tags = {}

        try:
            for instance in instances:
                key = hash_mutable(instance)
                counterset = instance.get('countersetname')

                cfg_tags = instance.get('tags')
                if cfg_tags is not None:
                    tags = cfg_tags.join(",")
                    self._tags[key] = list(tags) if tags else []

                metrics = instance.get('metrics')
                # list of the metrics.  Each entry is itself an entry,
                # which is the pdh name, datadog metric name, type, and the
                # pdh counter object
                self._metrics[key] = []
                for inst_name, dd_name, mtype in metrics:
                    m = getattr(self, mtype.lower())
                    obj = WinPDHCounter(counterset, inst_name, self.log)
                    if not obj:
                        continue
                    entry = [inst_name, dd_name, m, obj]
                    self.log.debug("entry: %s" % str(entry))
                    self._metrics[key].append(entry)

        except Exception as e:
            self.log.debug("Exception in PDH init: %s", str(e))
            raise
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        self._ssl_verify = is_affirmative(init_config.get("ssl_verify", True))
        self.keystone_server_url = init_config.get("keystone_server_url")
        self.hypervisor_name_cache = {}

        self.paginated_server_limit = init_config.get(
            'paginated_server_limit') or DEFAULT_PAGINATED_SERVER_LIMIT

        self.request_timeout = init_config.get(
            'request_timeout') or DEFAULT_API_REQUEST_TIMEOUT

        if not self.keystone_server_url:
            raise IncompleteConfig()

        # Current authentication scopes
        self._parent_scope = None
        self._current_scope = None

        # Cache some things between runs for values that change rarely
        self._aggregate_list = None

        # Mapping of check instances to associated OpenStack project scopes
        self.instance_map = {}

        # Mapping of Nova-managed servers to tags
        self.external_host_tags = {}

        self.exclude_network_id_rules = set([
            re.compile(ex) for ex in init_config.get('exclude_network_ids', [])
        ])
        self.exclude_server_id_rules = set([
            re.compile(ex) for ex in init_config.get('exclude_server_ids', [])
        ])
        self.include_project_name_rules = set([
            re.compile(ex)
            for ex in init_config.get('whitelist_project_names', [])
        ])
        self.exclude_project_name_rules = set([
            re.compile(ex)
            for ex in init_config.get('blacklist_project_names', [])
        ])

        skip_proxy = not is_affirmative(
            init_config.get('use_agent_proxy', True))
        self.proxy_config = None if skip_proxy else self.proxies

        self.backoff = BackOffRetry(self)

        # ISO8601 date time: used to filter the call to get the list of nova servers
        self.changes_since_time = {}

        # Ex: server_details_by_id = {
        #   UUID: {UUID: <value>, etc}
        #   1: {id: 1, name: hostA},
        #   2: {id: 2, name: hostB}
        # }
        self.server_details_by_id = {}
Exemple #4
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Parse job specific counters
        self.general_counters = self._parse_general_counters(init_config)

        # Parse job specific counters
        self.job_specific_counters = self._parse_job_specific_counters(init_config)
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Host status needs to persist across all checks.
        # We'll create keys when they are referenced. See:
        # https://en.wikipedia.org/wiki/Autovivification
        # https://gist.github.com/hrldcpr/2012250
        self.host_status = defaultdict(lambda: defaultdict(lambda: None))
Exemple #6
0
    def __init__(self, name, init_config, instances=None):
        AgentCheck.__init__(self, name, init_config, instances)
        self.assumed_url = {}

        if instances is not None and 'auth_type' in instances[0]:
            if instances[0]['auth_type'] == 'digest':
                auth = self.http.options['auth']
                self.http.options['auth'] = requests.auth.HTTPDigestAuth(auth[0], auth[1])
Exemple #7
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     if instances is not None and len(instances) > 1:
         raise Exception("Disk check only supports one configured instance.")
     AgentCheck.__init__(self, name, init_config,
                         agentConfig, instances=instances)
     # Get the configuration once for all
     self._load_conf(instances[0])
     self._compile_tag_re()
Exemple #8
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.tenant_metrics = aci_metrics.make_tenant_metrics()
     self.last_events_ts = {}
     self.external_host_tags = {}
     self._api_cache = {}
     self.check_tags = ['cisco']
     self.tagger = CiscoTags(log=self.log)
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self,
                         name,
                         init_config,
                         agentConfig,
                         instances=instances)
     if instances is not None and len(instances) > 1:
         raise Exception(
             "BTRFS check only supports one configured instance.")
Exemple #10
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Cache connections
        self.connections = {}
        self.failed_connections = {}
        self.instances_metrics = {}
        self.instances_per_type_metrics = defaultdict(dict)
        self.existing_databases = None
        self.do_check = {}
        self.proc_type_mapping = {'gauge': self.gauge, 'rate': self.rate, 'histogram': self.histogram}
        self.adoprovider = self.default_adoprovider

        self.connector = init_config.get('connector', 'adodbapi')
        if self.connector.lower() not in self.valid_connectors:
            self.log.error("Invalid database connector %s, defaulting to adodbapi", self.connector)
            self.connector = 'adodbapi'

        self.adoprovider = init_config.get('adoprovider', self.default_adoprovider)
        if self.adoprovider.upper() not in self.valid_adoproviders:
            self.log.error(
                "Invalid ADODB provider string %s, defaulting to %s", self.adoprovider, self.default_adoprovider
            )
            self.adoprovider = self.default_adoprovider

        # Pre-process the list of metrics to collect
        self.custom_metrics = init_config.get('custom_metrics', [])
        for instance in instances:
            try:
                instance_key = self._conn_key(instance, self.DEFAULT_DB_KEY)
                self.do_check[instance_key] = True

                # check to see if the database exists before we try any connections to it
                with self.open_managed_db_connections(instance, None, db_name=self.DEFAULT_DATABASE):
                    db_exists, context = self._check_db_exists(instance)

                if db_exists:
                    if instance.get('stored_procedure') is None:
                        with self.open_managed_db_connections(instance, self.DEFAULT_DB_KEY):
                            self._make_metric_list_to_collect(instance, self.custom_metrics)
                else:
                    # How much do we care that the DB doesn't exist?
                    ignore = is_affirmative(instance.get("ignore_missing_database", False))
                    if ignore is not None and ignore:
                        # not much : we expect it. leave checks disabled
                        self.do_check[instance_key] = False
                        self.log.warning("Database %s does not exist. Disabling checks for this instance.", context)
                    else:
                        # yes we do. Keep trying
                        self.log.error("Database %s does not exist. Fix issue and restart agent", context)

            except SQLConnectionError:
                self.log.exception("Skipping SQL Server instance")
                continue
            except Exception as e:
                self.log.exception("INitialization exception %s", e)
                continue
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.cluster_name = None
     for instance in instances or []:
         url = instance.get('url', '')
         parsed_url = urlparse(url)
         ssl_verify = not _is_affirmative(instance.get('disable_ssl_validation', False))
         if not ssl_verify and parsed_url.scheme == 'https':
             self.log.warning('Skipping SSL cert validation for %s based on configuration.' % url)
Exemple #12
0
    def __init__(self, name, init_config, agentConfig, instances):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        self.time_started = time.time()
        self.pool_started = False
        self.exceptionq = Queue()

        self.batch_morlist_size = max(
            init_config.get("batch_morlist_size", BATCH_MORLIST_SIZE), 0)
        self.batch_collector_size = max(
            init_config.get("batch_property_collector_size",
                            BATCH_COLLECTOR_SIZE), 0)

        self.refresh_morlist_interval = init_config.get(
            'refresh_morlist_interval', REFRESH_MORLIST_INTERVAL)
        self.clean_morlist_interval = max(
            init_config.get('clean_morlist_interval',
                            2 * self.refresh_morlist_interval),
            self.refresh_morlist_interval)
        self.refresh_metrics_metadata_interval = init_config.get(
            'refresh_metrics_metadata_interval',
            REFRESH_METRICS_METADATA_INTERVAL)

        # Connections open to vCenter instances
        self.server_instances = {}
        self.server_instances_lock = threading.RLock()

        # Event configuration
        self.event_config = {}

        # Caching configuration
        self.cache_config = CacheConfig()

        # build up configurations
        for instance in instances:
            i_key = self._instance_key(instance)
            # caches
            self.cache_config.set_interval(CacheConfig.Morlist, i_key,
                                           self.refresh_morlist_interval)
            self.cache_config.set_interval(
                CacheConfig.Metadata, i_key,
                self.refresh_metrics_metadata_interval)
            # events
            self.event_config[i_key] = instance.get('event_config')

        # Queue of raw Mor objects to process
        self.mor_objects_queue = ObjectsQueue()

        # Cache of processed Mor objects
        self.mor_cache = MorCache()

        # managed entity raw view
        self.registry = {}

        # Metrics metadata, for each instance keeps the mapping: perfCounterKey -> {name, group, description}
        self.metadata_cache = MetadataCache()
        self.latest_event_query = {}
Exemple #13
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.dbs = {}
     self.versions = {}
     self.instance_metrics = {}
     self.bgw_metrics = {}
     self.archiver_metrics = {}
     self.db_bgw_metrics = []
     self.db_archiver_metrics = []
     self.replication_metrics = {}
     self.custom_metrics = {}
Exemple #14
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Members' last replica set states
        self._last_state_by_server = {}

        # List of metrics to collect per instance
        self.metrics_to_collect_by_instance = {}

        self.collection_metrics_names = []
        for (key, value) in self.COLLECTION_METRICS.iteritems():
            self.collection_metrics_names.append(key.split('.')[1])
Exemple #15
0
    def __init__(self, name, init_config, agentConfig, instances):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        self.time_started = time.time()
        self.pool_started = False
        self.exceptionq = Queue()

        self.batch_morlist_size = max(
            init_config.get("batch_morlist_size", BATCH_MORLIST_SIZE), 0)
        self.batch_collector_size = max(
            init_config.get("batch_property_collector_size",
                            BATCH_COLLECTOR_SIZE), 0)

        self.refresh_morlist_interval = init_config.get(
            'refresh_morlist_interval', REFRESH_MORLIST_INTERVAL)
        self.clean_morlist_interval = max(
            init_config.get('clean_morlist_interval',
                            2 * self.refresh_morlist_interval),
            self.refresh_morlist_interval)
        self.refresh_metrics_metadata_interval = init_config.get(
            'refresh_metrics_metadata_interval',
            REFRESH_METRICS_METADATA_INTERVAL)

        # Connections open to vCenter instances
        self.server_instances = {}

        # Event configuration
        self.event_config = {}

        # Caching configuration
        self.cache_config = CacheConfig()

        # build up configurations
        for instance in instances:
            i_key = self._instance_key(instance)
            # caches
            self.cache_config.set_interval(CacheConfig.Morlist, i_key,
                                           self.refresh_morlist_interval)
            self.cache_config.set_interval(
                CacheConfig.Metadata, i_key,
                self.refresh_metrics_metadata_interval)
            # events
            self.event_config[i_key] = instance.get('event_config')

        # managed entity raw view
        self.registry = {}
        # First layer of cache (get entities from the tree)
        self.morlist_raw = {}
        # Second layer, processed from the first one
        self.morlist = {}
        # Metrics metadata, basically perfCounterId -> {name, group, description}
        self.metrics_metadata = {}
        self.latest_event_query = {}
Exemple #16
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.dbs = {}
     self.versions = {}
     self.instance_metrics = {}
     self.bgw_metrics = {}
     self.archiver_metrics = {}
     self.db_bgw_metrics = []
     self.db_archiver_metrics = []
     self.replication_metrics = {}
     self.custom_metrics = {}
     # keep track of host/port present in any configured instance
     self._known_servers = set()
Exemple #17
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        for k in ["mean", "median", "95", "99", "100"]:
            for m in self.stat_keys:
                self.keys.append(m + "_" + k)

        for k in ["min", "max", "mean", "median", "95", "99", "999"]:
            for m in self.search_latency_keys:
                self.keys.append(m + "_" + k)

        for k in ["min", "max", "mean", "median", "total"]:
            for m in self.vnodeq_keys:
                self.keys.append(m + "_" + k)

        self.prev_coord_redirs_total = -1
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self,
                            name,
                            init_config,
                            agentConfig,
                            instances=instances)
        self._zk_timeout = int(
            init_config.get('zk_timeout', DEFAULT_ZK_TIMEOUT))
        self._kafka_timeout = int(
            init_config.get('kafka_timeout', DEFAULT_KAFKA_TIMEOUT))
        self.context_limit = int(
            init_config.get('max_partition_contexts', CONTEXT_UPPER_BOUND))
        self._broker_retries = int(
            init_config.get('kafka_retries', DEFAULT_KAFKA_RETRIES))
        self._zk_last_ts = {}

        self.kafka_clients = {}
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        self.dbs = {}
        self.versions = {}
        self.instance_metrics = {}
        self.bgw_metrics = {}
        self.archiver_metrics = {}
        self.db_bgw_metrics = []
        self.db_archiver_metrics = []
        self.replication_metrics = {}
        self.activity_metrics = {}
        self.custom_metrics = {}

        # Deprecate custom_metrics in favor of custom_queries
        if instances is not None and any(['custom_metrics' in instance for instance in instances]):
            self.warning("DEPRECATION NOTICE: Please use the new custom_queries option "
                         "rather than the now deprecated custom_metrics")
Exemple #20
0
 def __init__(self, name, init_config, agentConfig, instances):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     # if they set the path, use that
     if init_config.get('nfsiostat_path'):
         self.nfs_cmd = [init_config.get('nfsiostat_path'), '1', '2']
     else:
         # if not, check if it's installed in the opt dir, if so use that
         if os.path.exists('/opt/datadog-agent/embedded/sbin/nfsiostat'):
             self.nfs_cmd = ['/opt/datadog-agent/embedded/sbin/nfsiostat', '1', '2']
         # if not, then check if it is in the default place
         elif os.path.exists('/usr/local/sbin/nfsiostat'):
             self.nfs_cmd = ['/usr/local/sbin/nfsiostat', '1', '2']
         else:
             raise Exception(
                 'nfsstat check requires nfsiostat be installed, please install it '
                 '(through nfs-utils) or set the path to the installed version'
             )
    def __init__(self, name, init_config, agentConfig, instances):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        self.time_started = time.time()
        self.pool_started = False
        self.jobs_status = {}
        self.exceptionq = Queue()

        # Connections open to vCenter instances
        self.server_instances = {}

        # Event configuration
        self.event_config = {}
        # Caching resources, timeouts
        self.cache_times = {}
        for instance in self.instances:
            i_key = self._instance_key(instance)
            self.cache_times[i_key] = {
                MORLIST: {
                    LAST:
                    0,
                    INTERVAL:
                    init_config.get('refresh_morlist_interval',
                                    REFRESH_MORLIST_INTERVAL)
                },
                METRICS_METADATA: {
                    LAST:
                    0,
                    INTERVAL:
                    init_config.get('refresh_metrics_metadata_interval',
                                    REFRESH_METRICS_METADATA_INTERVAL)
                }
            }

            self.event_config[i_key] = instance.get('event_config')

        # managed entity raw view
        self.registry = {}
        # First layer of cache (get entities from the tree)
        self.morlist_raw = {}
        # Second layer, processed from the first one
        self.morlist = {}
        # Metrics metadata, basically perfCounterId -> {name, group, description}
        self.metrics_metadata = {}
        self.latest_event_query = {}
Exemple #22
0
 def __init__(self, *args, **kwargs):
     AgentCheck.__init__(self, *args, **kwargs)
     self.instance_cache = {}
Exemple #23
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self._last_state_by_server = {}
     self.idx_rates = {}
Exemple #24
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Keep track of last build IDs per instance
        self.last_build_ids = {}
Exemple #25
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.default_timeout = init_config.get('default_timeout',
                                            self.DEFAULT_TIMEOUT)
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.nodetool_cmd = init_config.get("nodetool", "/usr/bin/nodetool")
Exemple #27
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Keep track of all instances
        self._instance_states = defaultdict(lambda: self.CouchbaseInstanceState())
Exemple #28
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Host status needs to persist across all checks
        self.cluster_status = {}
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.dbs = {}
Exemple #30
0
 def __init__(self, name, init_config, agentConfig, instances):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.wmi_samplers = {}
     self.wmi_props = {}