class TestKubeutil(unittest.TestCase): def setUp(self): self.kubeutil = KubeUtil() @mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list', side_effect=['foo']) @mock.patch('utils.kubeutil.KubeUtil.extract_kube_labels') def test_get_kube_labels(self, extract_kube_labels, retrieve_pods_list): self.kubeutil.get_kube_labels(excluded_keys='bar') retrieve_pods_list.assert_called_once() extract_kube_labels.assert_called_once_with('foo', excluded_keys='bar') def test_extract_kube_labels(self): """ Test with both 1.1 and 1.2 version payloads """ res = self.kubeutil.extract_kube_labels({}, ['foo']) self.assertEqual(len(res), 0) pods = json.loads( Fixtures.read_file("pods_list_1.1.json", string_escape=False)) res = self.kubeutil.extract_kube_labels(pods, ['foo']) labels = set(inn for out in res.values() for inn in out) self.assertEqual(len(labels), 8) res = self.kubeutil.extract_kube_labels(pods, ['k8s-app']) labels = set(inn for out in res.values() for inn in out) self.assertEqual(len(labels), 6) pods = json.loads( Fixtures.read_file("pods_list_1.2.json", string_escape=False)) res = self.kubeutil.extract_kube_labels(pods, ['foo']) labels = set(inn for out in res.values() for inn in out) self.assertEqual(len(labels), 3) res = self.kubeutil.extract_kube_labels(pods, ['k8s-app']) labels = set(inn for out in res.values() for inn in out) self.assertEqual(len(labels), 3) def test_extract_meta(self): """ Test with both 1.1 and 1.2 version payloads """ res = self.kubeutil.extract_meta({}, 'foo') self.assertEqual(len(res), 0) pods = json.loads( Fixtures.read_file("pods_list_1.1.json", string_escape=False)) res = self.kubeutil.extract_meta(pods, 'foo') self.assertEqual(len(res), 0) res = self.kubeutil.extract_meta(pods, 'uid') self.assertEqual(len(res), 6) pods = json.loads( Fixtures.read_file("pods_list_1.2.json", string_escape=False)) res = self.kubeutil.extract_meta(pods, 'foo') self.assertEqual(len(res), 0) res = self.kubeutil.extract_meta(pods, 'uid') self.assertEqual(len(res), 4) @mock.patch('utils.kubeutil.retrieve_json') def test_retrieve_pods_list(self, retrieve_json): self.kubeutil.retrieve_pods_list() retrieve_json.assert_called_once_with(self.kubeutil.pods_list_url) @mock.patch('utils.kubeutil.retrieve_json') def test_retrieve_metrics(self, retrieve_json): self.kubeutil.retrieve_metrics() retrieve_json.assert_called_once_with(self.kubeutil.metrics_url) def test_filter_pods_list(self): """ Test with both 1.1 and 1.2 version payloads """ res = self.kubeutil.filter_pods_list({}, 'foo') self.assertEqual(len(res.get('items')), 0) pods = json.loads( Fixtures.read_file("pods_list_1.1.json", string_escape=False)) res = self.kubeutil.filter_pods_list(pods, '10.240.0.9') self.assertEqual(len(res.get('items')), 5) pods = json.loads( Fixtures.read_file("pods_list_1.1.json", string_escape=False)) res = self.kubeutil.filter_pods_list(pods, 'foo') self.assertEqual(len(res.get('items')), 0) pods = json.loads( Fixtures.read_file("pods_list_1.2.json", string_escape=False)) res = self.kubeutil.filter_pods_list(pods, '10.240.0.5') self.assertEqual(len(res.get('items')), 1) pods = json.loads( Fixtures.read_file("pods_list_1.2.json", string_escape=False)) res = self.kubeutil.filter_pods_list(pods, 'foo') self.assertEqual(len(res.get('items')), 0) @mock.patch('utils.kubeutil.requests') def test_retrieve_json_auth(self, r): self.kubeutil.retrieve_json_auth('url', 'foo_tok') r.get.assert_called_once_with( 'url', verify=False, timeout=10, headers={'Authorization': 'Bearer foo_tok'}) self.kubeutil.CA_CRT_PATH = __file__ self.kubeutil.retrieve_json_auth('url', 'foo_tok') r.get.assert_called_with('url', verify=__file__, timeout=10, headers={'Authorization': 'Bearer foo_tok'}) def test_get_node_info(self): with mock.patch('utils.kubeutil.KubeUtil._fetch_host_data') as f: self.kubeutil.get_node_info() f.assert_called_once() f.reset_mock() self.kubeutil._node_ip = 'foo' self.kubeutil._node_name = 'bar' ip, name = self.kubeutil.get_node_info() self.assertEqual(ip, 'foo') self.assertEqual(name, 'bar') f.assert_not_called() def test__fetch_host_data(self): """ Test with both 1.1 and 1.2 version payloads """ with mock.patch( 'utils.kubeutil.KubeUtil.retrieve_pods_list') as mock_pods: self.kubeutil.host_name = 'dd-agent-1rxlh' mock_pods.return_value = json.loads( Fixtures.read_file("pods_list_1.2.json", string_escape=False)) self.kubeutil._fetch_host_data() self.assertEqual(self.kubeutil._node_ip, '10.240.0.9') self.assertEqual(self.kubeutil._node_name, 'kubernetes-massi-minion-k23m') self.kubeutil.host_name = 'heapster-v11-l8sh1' mock_pods.return_value = json.loads( Fixtures.read_file("pods_list_1.1.json", string_escape=False)) self.kubeutil._fetch_host_data() self.assertEqual(self.kubeutil._node_ip, '10.240.0.9') self.assertEqual(self.kubeutil._node_name, 'gke-cluster-1-8046fdfa-node-ld35') def test_get_auth_token(self): KubeUtil.AUTH_TOKEN_PATH = '/foo/bar' self.assertIsNone(KubeUtil.get_auth_token()) KubeUtil.AUTH_TOKEN_PATH = Fixtures.file( 'events.json') # any file could do the trick self.assertIsNotNone(KubeUtil.get_auth_token()) def test_is_k8s(self): os.unsetenv('KUBERNETES_PORT') self.assertFalse(Platform.is_k8s()) os.environ['KUBERNETES_PORT'] = '999' self.assertTrue(Platform.is_k8s()) def test_extract_event_tags(self): events = json.loads( Fixtures.read_file("events.json", string_escape=False))['items'] for ev in events: tags = KubeUtil().extract_event_tags(ev) # there should be 4 tags except for some events where source.host is missing self.assertTrue(len(tags) >= 3) tag_names = [tag.split(':')[0] for tag in tags] self.assertIn('reason', tag_names) self.assertIn('namespace', tag_names) self.assertIn('object_type', tag_names) if len(tags) == 4: self.assertIn('node_name', tag_names)
class Kubernetes(AgentCheck): """ Collect metrics and events from kubelet """ pod_names_by_container = {} def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception('Kubernetes check only supports one configured instance.') AgentCheck.__init__(self, name, init_config, agentConfig, instances) inst = instances[0] if instances is not None else None self.kubeutil = KubeUtil(instance=inst) if not self.kubeutil.host: raise Exception('Unable to retrieve Docker hostname and host parameter is not set') def _perform_kubelet_checks(self, url): service_check_base = NAMESPACE + '.kubelet.check' is_ok = True try: r = requests.get(url) for line in r.iter_lines(): # avoid noise; this check is expected to fail since we override the container hostname if line.find('hostname') != -1: continue matches = re.match('\[(.)\]([^\s]+) (.*)?', line) if not matches or len(matches.groups()) < 2: continue service_check_name = service_check_base + '.' + matches.group(2) status = matches.group(1) if status == '+': self.service_check(service_check_name, AgentCheck.OK) else: self.service_check(service_check_name, AgentCheck.CRITICAL) is_ok = False except Exception as e: self.log.warning('kubelet check %s failed: %s' % (url, str(e))) self.service_check(service_check_base, AgentCheck.CRITICAL, message='Kubelet check %s failed: %s' % (url, str(e))) else: if is_ok: self.service_check(service_check_base, AgentCheck.OK) else: self.service_check(service_check_base, AgentCheck.CRITICAL) def check(self, instance): self.max_depth = instance.get('max_depth', DEFAULT_MAX_DEPTH) enabled_gauges = instance.get('enabled_gauges', DEFAULT_ENABLED_GAUGES) self.enabled_gauges = ["{0}.{1}".format(NAMESPACE, x) for x in enabled_gauges] enabled_rates = instance.get('enabled_rates', DEFAULT_ENABLED_RATES) self.enabled_rates = ["{0}.{1}".format(NAMESPACE, x) for x in enabled_rates] self.publish_aliases = _is_affirmative(instance.get('publish_aliases', DEFAULT_PUBLISH_ALIASES)) self.use_histogram = _is_affirmative(instance.get('use_histogram', DEFAULT_USE_HISTOGRAM)) self.publish_rate = FUNC_MAP[RATE][self.use_histogram] self.publish_gauge = FUNC_MAP[GAUGE][self.use_histogram] pods_list = self.kubeutil.retrieve_pods_list() # kubelet health checks self._perform_kubelet_checks(self.kubeutil.kube_health_url) # kubelet metrics self._update_metrics(instance, pods_list) # kubelet events if _is_affirmative(instance.get('collect_events', DEFAULT_COLLECT_EVENTS)): self._process_events(instance, pods_list) def _publish_raw_metrics(self, metric, dat, tags, depth=0): if depth >= self.max_depth: self.log.warning('Reached max depth on metric=%s' % metric) return if isinstance(dat, numbers.Number): if self.enabled_rates and any([fnmatch(metric, pat) for pat in self.enabled_rates]): self.publish_rate(self, metric, float(dat), tags) elif self.enabled_gauges and any([fnmatch(metric, pat) for pat in self.enabled_gauges]): self.publish_gauge(self, metric, float(dat), tags) elif isinstance(dat, dict): for k, v in dat.iteritems(): self._publish_raw_metrics(metric + '.%s' % k.lower(), v, tags, depth + 1) elif isinstance(dat, list): self._publish_raw_metrics(metric, dat[-1], tags, depth + 1) @staticmethod def _shorten_name(name): # shorten docker image id return re.sub('([0-9a-fA-F]{64,})', lambda x: x.group(1)[0:12], name) def _get_post_1_2_tags(self, cont_labels, subcontainer, kube_labels): tags = [] pod_name = cont_labels[KubeUtil.POD_NAME_LABEL] pod_namespace = cont_labels[KubeUtil.NAMESPACE_LABEL] tags.append(u"pod_name:{0}/{1}".format(pod_namespace, pod_name)) tags.append(u"kube_namespace:{0}".format(pod_namespace)) kube_labels_key = "{0}/{1}".format(pod_namespace, pod_name) pod_labels = kube_labels.get(kube_labels_key) if pod_labels: tags += list(pod_labels) if "-" in pod_name: replication_controller = "-".join(pod_name.split("-")[:-1]) tags.append("kube_replication_controller:%s" % replication_controller) if self.publish_aliases and subcontainer.get("aliases"): for alias in subcontainer['aliases'][1:]: # we don't add the first alias as it will be the container_name tags.append('container_alias:%s' % (self._shorten_name(alias))) return tags def _get_pre_1_2_tags(self, cont_labels, subcontainer, kube_labels): tags = [] pod_name = cont_labels[KubeUtil.POD_NAME_LABEL] tags.append(u"pod_name:{0}".format(pod_name)) pod_labels = kube_labels.get(pod_name) if pod_labels: tags.extend(list(pod_labels)) if "-" in pod_name: replication_controller = "-".join(pod_name.split("-")[:-1]) if "/" in replication_controller: namespace, replication_controller = replication_controller.split("/", 1) tags.append(u"kube_namespace:%s" % namespace) tags.append(u"kube_replication_controller:%s" % replication_controller) if self.publish_aliases and subcontainer.get("aliases"): for alias in subcontainer['aliases'][1:]: # we don't add the first alias as it will be the container_name tags.append(u"container_alias:%s" % (self._shorten_name(alias))) return tags def _update_container_metrics(self, instance, subcontainer, kube_labels): tags = list(instance.get('tags', [])) # add support for custom tags if len(subcontainer.get('aliases', [])) >= 1: # The first alias seems to always match the docker container name container_name = subcontainer['aliases'][0] else: # We default to the container id container_name = subcontainer['name'] tags.append('container_name:%s' % container_name) try: cont_labels = subcontainer['spec']['labels'] except KeyError: self.log.debug("Subcontainer, doesn't have any labels") cont_labels = {} # Collect pod names, namespaces, rc... if KubeUtil.NAMESPACE_LABEL in cont_labels and KubeUtil.POD_NAME_LABEL in cont_labels: # Kubernetes >= 1.2 tags += self._get_post_1_2_tags(cont_labels, subcontainer, kube_labels) elif KubeUtil.POD_NAME_LABEL in cont_labels: # Kubernetes <= 1.1 tags += self._get_pre_1_2_tags(cont_labels, subcontainer, kube_labels) else: # Those are containers that are not part of a pod. # They are top aggregate views and don't have the previous metadata. tags.append("pod_name:no_pod") stats = subcontainer['stats'][-1] # take the latest self._publish_raw_metrics(NAMESPACE, stats, tags) if subcontainer.get("spec", {}).get("has_filesystem"): fs = stats['filesystem'][-1] fs_utilization = float(fs['usage'])/float(fs['capacity']) self.publish_gauge(self, NAMESPACE + '.filesystem.usage_pct', fs_utilization, tags) if subcontainer.get("spec", {}).get("has_network"): net = stats['network'] self.publish_rate(self, NAMESPACE + '.network_errors', sum(float(net[x]) for x in NET_ERRORS), tags) return tags def _update_metrics(self, instance, pods_list): metrics = self.kubeutil.retrieve_metrics() excluded_labels = instance.get('excluded_labels') kube_labels = self.kubeutil.extract_kube_labels(pods_list, excluded_keys=excluded_labels) if not metrics: raise Exception('No metrics retrieved cmd=%s' % self.metrics_cmd) # container metrics from Cadvisor container_tags = {} for subcontainer in metrics: c_id = subcontainer.get('id') try: tags = self._update_container_metrics(instance, subcontainer, kube_labels) if c_id: container_tags[c_id] = tags # also store tags for aliases for alias in subcontainer.get('aliases', []): container_tags[alias] = tags except Exception, e: self.log.error("Unable to collect metrics for container: {0} ({1}".format(c_id, e)) # container metrics from kubernetes API: limits and requests for pod in pods_list['items']: try: containers = pod['spec']['containers'] name2id = {} for cs in pod['status'].get('containerStatuses', []): c_id = cs.get('containerID', '').split('//')[-1] name = cs.get('name') if name: name2id[name] = c_id except KeyError: self.log.debug("Pod %s does not have containers specs, skipping...", pod['metadata'].get('name')) continue for container in containers: c_name = container.get('name') _tags = container_tags.get(name2id.get(c_name), []) prog = re.compile(r'[-+]?\d+[\.]?\d*') # limits try: for limit, value_str in container['resources']['limits'].iteritems(): values = [float(s) for s in prog.findall(value_str)] if len(values) != 1: self.log.warning("Error parsing limits value string: %s", value_str) continue self.publish_gauge(self, '{}.{}.limits'.format(NAMESPACE, limit), values[0], _tags) except (KeyError, AttributeError) as e: self.log.debug("Unable to retrieve container limits for %s: %s", c_name, e) self.log.debug("Container object for {}: {}".format(c_name, container)) # requests try: for request, value_str in container['resources']['requests'].iteritems(): values = [float(s) for s in prog.findall(value_str)] if len(values) != 1: self.log.warning("Error parsing requests value string: %s", value_str) continue self.publish_gauge(self, '{}.{}.requests'.format(NAMESPACE, request), values[0], _tags) except (KeyError, AttributeError) as e: self.log.error("Unable to retrieve container requests for %s: %s", c_name, e) self.log.debug("Container object for {}: {}".format(c_name, container)) self._update_pods_metrics(instance, pods_list)
class SDDockerBackend(AbstractSDBackend): """Docker-based service discovery""" def __init__(self, agentConfig): self.docker_client = DockerUtil().client if is_k8s(): self.kubeutil = KubeUtil() try: self.config_store = get_config_store(agentConfig=agentConfig) except Exception as e: log.error('Failed to instantiate the config store client. ' 'Auto-config only will be used. %s' % str(e)) agentConfig['sd_config_backend'] = None self.config_store = get_config_store(agentConfig=agentConfig) self.VAR_MAPPING = { 'host': self._get_host, 'port': self._get_ports, 'tags': self._get_additional_tags, } AbstractSDBackend.__init__(self, agentConfig) def _get_host(self, container_inspect): """Extract the host IP from a docker inspect object, or the kubelet API.""" ip_addr = container_inspect.get('NetworkSettings', {}).get('IPAddress') if not ip_addr: if not is_k8s(): return # kubernetes case log.debug("Didn't find the IP address for container %s (%s), using the kubernetes way." % (container_inspect.get('Id', '')[:12], container_inspect.get('Config', {}).get('Image', ''))) pod_list = self.kubeutil.retrieve_pods_list().get('items', []) c_id = container_inspect.get('Id') for pod in pod_list: pod_ip = pod.get('status', {}).get('podIP') if pod_ip is None: continue else: c_statuses = pod.get('status', {}).get('containerStatuses', []) for status in c_statuses: # compare the container id with those of containers in the current pod if c_id == status.get('containerID', '').split('//')[-1]: ip_addr = pod_ip return ip_addr def _get_ports(self, container_inspect): """Extract a list of available ports from a docker inspect object. Sort them numerically.""" c_id = container_inspect.get('Id', '') try: ports = map(lambda x: x.split('/')[0], container_inspect['NetworkSettings']['Ports'].keys()) except (IndexError, KeyError, AttributeError): log.debug("Didn't find the port for container %s (%s), trying the kubernetes way." % (c_id[:12], container_inspect.get('Config', {}).get('Image', ''))) # first we try to get it from the docker API # it works if the image has an EXPOSE instruction ports = map(lambda x: x.split('/')[0], container_inspect['Config'].get('ExposedPorts', {}).keys()) # if it failed, try with the kubernetes API if not ports and is_k8s(): co_statuses = self._get_kube_config(c_id, 'status').get('containerStatuses', []) c_name = None for co in co_statuses: if co.get('containerID', '').split('//')[-1] == c_id: c_name = co.get('name') break containers = self._get_kube_config(c_id, 'spec').get('containers', []) for co in containers: if co.get('name') == c_name: ports = map(lambda x: str(x.get('containerPort')), co.get('ports', [])) ports = sorted(ports, key=lambda x: int(x)) return ports def get_tags(self, c_inspect): """Extract useful tags from docker or platform APIs. These are collected by default.""" tags = [] if is_k8s(): pod_metadata = self._get_kube_config(c_inspect.get('Id'), 'metadata') if pod_metadata is None: log.warning("Failed to fetch pod metadata for container %s." " Kubernetes tags may be missing." % c_inspect.get('Id', '')[:12]) return [] # get labels kube_labels = pod_metadata.get('labels', {}) for label, value in kube_labels.iteritems(): tags.append('%s:%s' % (label, value)) # get replication controller created_by = json.loads(pod_metadata.get('annotations', {}).get('kubernetes.io/created-by', '{}')) if created_by.get('reference', {}).get('kind') == 'ReplicationController': tags.append('kube_replication_controller:%s' % created_by.get('reference', {}).get('name')) # get kubernetes namespace tags.append('kube_namespace:%s' % pod_metadata.get('namespace')) return tags def _get_additional_tags(self, container_inspect): tags = [] if is_k8s(): pod_metadata = self._get_kube_config(container_inspect.get('Id'), 'metadata') pod_spec = self._get_kube_config(container_inspect.get('Id'), 'spec') tags.append('node_name:%s' % pod_spec.get('nodeName')) tags.append('pod_name:%s' % pod_metadata.get('name')) return tags def _get_kube_config(self, c_id, key): """Get a part of a pod config from the kubernetes API""" pods = self.kubeutil.retrieve_pods_list().get('items', []) for pod in pods: c_statuses = pod.get('status', {}).get('containerStatuses', []) for status in c_statuses: if c_id == status.get('containerID', '').split('//')[-1]: return pod.get(key, {}) def get_configs(self): """Get the config for all docker containers running on the host.""" configs = {} containers = [( container.get('Image').split(':')[0].split('/')[-1], container.get('Id'), container.get('Labels') ) for container in self.docker_client.containers()] # used by the configcheck agent command to trace where check configs come from trace_config = self.agentConfig.get(TRACE_CONFIG, False) for image, cid, labels in containers: try: check_configs = self._get_check_configs(cid, image, trace_config=trace_config) or [] for conf in check_configs: if trace_config and conf is not None: source, conf = conf check_name, init_config, instance = conf # build instances list if needed if configs.get(check_name) is None: if trace_config: configs[check_name] = (source, (init_config, [instance])) else: configs[check_name] = (init_config, [instance]) else: conflict_init_msg = 'Different versions of `init_config` found for check {0}. ' \ 'Keeping the first one found.' if trace_config: if configs[check_name][1][0] != init_config: log.warning(conflict_init_msg.format(check_name)) configs[check_name][1][1].append(instance) else: if configs[check_name][0] != init_config: log.warning(conflict_init_msg.format(check_name)) configs[check_name][1].append(instance) except Exception: log.exception('Building config for container %s based on image %s using service' ' discovery failed, leaving it alone.' % (cid[:12], image)) return configs def _get_check_configs(self, c_id, image, trace_config=False): """Retrieve configuration templates and fill them with data pulled from docker and tags.""" inspect = self.docker_client.inspect_container(c_id) config_templates = self._get_config_templates(image, trace_config=trace_config) if not config_templates: log.debug('No config template for container %s with image %s. ' 'It will be left unconfigured.' % (c_id[:12], image)) return None check_configs = [] tags = self.get_tags(inspect) for config_tpl in config_templates: if trace_config: source, config_tpl = config_tpl check_name, init_config_tpl, instance_tpl, variables = config_tpl # insert tags in instance_tpl and process values for template variables instance_tpl, var_values = self._fill_tpl(inspect, instance_tpl, variables, tags) tpl = self._render_template(init_config_tpl or {}, instance_tpl or {}, var_values) if tpl and len(tpl) == 2: if trace_config and len(tpl[1]) == 2: source, (init_config, instance) = tpl check_configs.append((source, (check_name, init_config, instance))) elif not trace_config: init_config, instance = tpl check_configs.append((check_name, init_config, instance)) return check_configs def _get_config_templates(self, image_name, trace_config=False): """Extract config templates for an image from a K/V store and returns it as a dict object.""" config_backend = self.agentConfig.get('sd_config_backend') templates = [] if config_backend is None: auto_conf = True log.warning('No supported configuration backend was provided, using auto-config only.') else: auto_conf = False # format: [('image', {init_tpl}, {instance_tpl})] without trace_config # or [(source, ('image', {init_tpl}, {instance_tpl}))] with trace_config raw_tpls = self.config_store.get_check_tpls(image_name, auto_conf=auto_conf, trace_config=trace_config) for tpl in raw_tpls: if trace_config and tpl is not None: # each template can come from either auto configuration or user-supplied templates source, tpl = tpl if tpl is not None and len(tpl) == 3: check_name, init_config_tpl, instance_tpl = tpl else: log.debug('No template was found for image %s, leaving it alone.' % image_name) return None try: # build a list of all variables to replace in the template variables = self.PLACEHOLDER_REGEX.findall(str(init_config_tpl)) + \ self.PLACEHOLDER_REGEX.findall(str(instance_tpl)) variables = map(lambda x: x.strip('%'), variables) if not isinstance(init_config_tpl, dict): init_config_tpl = json.loads(init_config_tpl or '{}') if not isinstance(instance_tpl, dict): instance_tpl = json.loads(instance_tpl or '{}') except json.JSONDecodeError: log.exception('Failed to decode the JSON template fetched for check {0}. Its configuration' ' by service discovery failed for {1}.'.format(check_name, image_name)) return None if trace_config: templates.append((source, (check_name, init_config_tpl, instance_tpl, variables))) else: templates.append((check_name, init_config_tpl, instance_tpl, variables)) return templates def _fill_tpl(self, inspect, instance_tpl, variables, tags=None): """Add container tags to instance templates and build a """ """dict from template variable names and their values.""" var_values = {} # add default tags to the instance if tags: tags += instance_tpl.get('tags', []) instance_tpl['tags'] = list(set(tags)) for v in variables: # variables can be suffixed with an index in case a list is found var_parts = v.split('_') if var_parts[0] in self.VAR_MAPPING: try: res = self.VAR_MAPPING[var_parts[0]](inspect) if not res: raise ValueError("Invalid value for variable %s." % var_parts[0]) # if an index is found in the variable, use it to select a value if len(var_parts) > 1 and isinstance(res, list) and int(var_parts[-1]) < len(res): var_values[v] = res[int(var_parts[-1])] # if no valid index was found but we have a list, return the last element elif isinstance(res, list): var_values[v] = res[-1] else: var_values[v] = res except Exception as ex: log.error("Could not find a value for the template variable %s: %s" % (v, str(ex))) else: log.error("No method was found to interpolate template variable %s." % v) return instance_tpl, var_values
class SDDockerBackend(AbstractSDBackend): """Docker-based service discovery""" def __init__(self, agentConfig): self.docker_client = DockerUtil().client if is_k8s(): self.kubeutil = KubeUtil() try: self.config_store = get_config_store(agentConfig=agentConfig) except Exception as e: log.error('Failed to instantiate the config store client. ' 'Auto-config only will be used. %s' % str(e)) agentConfig['sd_config_backend'] = None self.config_store = get_config_store(agentConfig=agentConfig) self.VAR_MAPPING = { 'host': self._get_host_address, 'port': self._get_port, 'tags': self._get_additional_tags, } AbstractSDBackend.__init__(self, agentConfig) def _get_host_address(self, c_inspect, tpl_var): """Extract the container IP from a docker inspect object, or the kubelet API.""" c_id, c_img = c_inspect.get('Id', ''), c_inspect.get('Config', {}).get('Image', '') tpl_parts = tpl_var.split('_') # a specifier was given if len(tpl_parts) > 1: networks = c_inspect.get('NetworkSettings', {}).get('Networks') or {} ip_dict = {} for net_name, net_desc in networks.iteritems(): ip = net_desc.get('IPAddress') if ip: ip_dict[net_name] = ip ip_addr = self._extract_ip_from_networks(ip_dict, tpl_var) if ip_addr: return ip_addr # try to get the bridge IP address log.debug("No network found for container %s (%s), trying with IPAddress field" % (c_id[:12], c_img)) ip_addr = c_inspect.get('NetworkSettings', {}).get('IPAddress') if ip_addr: return ip_addr if is_k8s(): # kubernetes case log.debug("Couldn't find the IP address for container %s (%s), " "using the kubernetes way." % (c_id[:12], c_img)) pod_list = self.kubeutil.retrieve_pods_list().get('items', []) for pod in pod_list: pod_ip = pod.get('status', {}).get('podIP') if pod_ip is None: continue else: c_statuses = pod.get('status', {}).get('containerStatuses', []) for status in c_statuses: # compare the container id with those of containers in the current pod if c_id == status.get('containerID', '').split('//')[-1]: return pod_ip log.error("No IP address was found for container %s (%s)" % (c_id[:12], c_img)) return None def _extract_ip_from_networks(self, ip_dict, tpl_var): """Extract a single IP from a dictionary made of network names and IPs.""" if not ip_dict: return None tpl_parts = tpl_var.split('_') # no specifier if len(tpl_parts) < 2: log.warning("No key was passed for template variable %s." % tpl_var) return self._get_fallback_ip(ip_dict) else: res = ip_dict.get(tpl_parts[-1]) if res is None: log.warning("The key passed for template variable %s was not found." % tpl_var) return self._get_fallback_ip(ip_dict) else: return res def _get_fallback_ip(self, ip_dict): """try to pick the bridge key, falls back to the value of the last key""" if 'bridge' in ip_dict: log.warning("Using the bridge network.") return ip_dict['bridge'] else: last_key = sorted(ip_dict.iterkeys())[-1] log.warning("Trying with the last key: '%s'." % last_key) return ip_dict[last_key] def _get_port(self, container_inspect, tpl_var): """Extract a port from a container_inspect or the k8s API given a template variable.""" c_id = container_inspect.get('Id', '') try: ports = map(lambda x: x.split('/')[0], container_inspect['NetworkSettings']['Ports'].keys()) except (IndexError, KeyError, AttributeError): # try to get ports from the docker API. Works if the image has an EXPOSE instruction ports = map(lambda x: x.split('/')[0], container_inspect['Config'].get('ExposedPorts', {}).keys()) # if it failed, try with the kubernetes API if not ports and is_k8s(): log.debug("Didn't find the port for container %s (%s), trying the kubernetes way." % (c_id[:12], container_inspect.get('Config', {}).get('Image', ''))) co_statuses = self._get_kube_config(c_id, 'status').get('containerStatuses', []) c_name = None for co in co_statuses: if co.get('containerID', '').split('//')[-1] == c_id: c_name = co.get('name') break containers = self._get_kube_config(c_id, 'spec').get('containers', []) for co in containers: if co.get('name') == c_name: ports = map(lambda x: str(x.get('containerPort')), co.get('ports', [])) ports = sorted(ports, key=lambda x: int(x)) return self._extract_port_from_list(ports, tpl_var) def _extract_port_from_list(self, ports, tpl_var): if not ports: return None tpl_parts = tpl_var.split('_') if len(tpl_parts) == 1: log.debug("No index was passed for template variable %s. " "Trying with the last element." % tpl_var) return ports[-1] try: idx = tpl_parts[-1] return ports[int(idx)] except ValueError: log.error("Port index is not an integer. Using the last element instead.") except IndexError: log.error("Port index is out of range. Using the last element instead.") return ports[-1] def get_tags(self, c_inspect): """Extract useful tags from docker or platform APIs. These are collected by default.""" tags = [] if is_k8s(): pod_metadata = self._get_kube_config(c_inspect.get('Id'), 'metadata') if pod_metadata is None: log.warning("Failed to fetch pod metadata for container %s." " Kubernetes tags may be missing." % c_inspect.get('Id', '')[:12]) return [] # get labels kube_labels = pod_metadata.get('labels', {}) for label, value in kube_labels.iteritems(): tags.append('%s:%s' % (label, value)) # get replication controller created_by = json.loads(pod_metadata.get('annotations', {}).get('kubernetes.io/created-by', '{}')) if created_by.get('reference', {}).get('kind') == 'ReplicationController': tags.append('kube_replication_controller:%s' % created_by.get('reference', {}).get('name')) # get kubernetes namespace tags.append('kube_namespace:%s' % pod_metadata.get('namespace')) return tags def _get_additional_tags(self, container_inspect, *args): tags = [] if is_k8s(): pod_metadata = self._get_kube_config(container_inspect.get('Id'), 'metadata') pod_spec = self._get_kube_config(container_inspect.get('Id'), 'spec') if pod_metadata is None or pod_spec is None: log.warning("Failed to fetch pod metadata or pod spec for container %s." " Additional Kubernetes tags may be missing." % container_inspect.get('Id', '')[:12]) return [] tags.append('node_name:%s' % pod_spec.get('nodeName')) tags.append('pod_name:%s' % pod_metadata.get('name')) return tags def _get_kube_config(self, c_id, key): """Get a part of a pod config from the kubernetes API""" pods = self.kubeutil.retrieve_pods_list().get('items', []) for pod in pods: c_statuses = pod.get('status', {}).get('containerStatuses', []) for status in c_statuses: if c_id == status.get('containerID', '').split('//')[-1]: return pod.get(key, {}) def get_configs(self): """Get the config for all docker containers running on the host.""" configs = {} containers = [( container.get('Image'), container.get('Id'), container.get('Labels') ) for container in self.docker_client.containers()] # used by the configcheck agent command to trace where check configs come from trace_config = self.agentConfig.get(TRACE_CONFIG, False) for image, cid, labels in containers: try: # value of the DATADOG_ID tag or the image name if the label is missing identifier = self.get_config_id(image, labels) check_configs = self._get_check_configs(cid, identifier, trace_config=trace_config) or [] for conf in check_configs: if trace_config and conf is not None: source, conf = conf check_name, init_config, instance = conf # build instances list if needed if configs.get(check_name) is None: if trace_config: configs[check_name] = (source, (init_config, [instance])) else: configs[check_name] = (init_config, [instance]) else: conflict_init_msg = 'Different versions of `init_config` found for check {0}. ' \ 'Keeping the first one found.' if trace_config: if configs[check_name][1][0] != init_config: log.warning(conflict_init_msg.format(check_name)) configs[check_name][1][1].append(instance) else: if configs[check_name][0] != init_config: log.warning(conflict_init_msg.format(check_name)) configs[check_name][1].append(instance) except Exception: log.exception('Building config for container %s based on image %s using service ' 'discovery failed, leaving it alone.' % (cid[:12], image)) return configs def get_config_id(self, image, labels): """Look for a DATADOG_ID label, return its value or the image name if missing""" return labels.get(DATADOG_ID) or image def _get_check_configs(self, c_id, identifier, trace_config=False): """Retrieve configuration templates and fill them with data pulled from docker and tags.""" inspect = self.docker_client.inspect_container(c_id) config_templates = self._get_config_templates(identifier, trace_config=trace_config) if not config_templates: log.debug('No config template for container %s with identifier %s. ' 'It will be left unconfigured.' % (c_id[:12], identifier)) return None check_configs = [] tags = self.get_tags(inspect) for config_tpl in config_templates: if trace_config: source, config_tpl = config_tpl check_name, init_config_tpl, instance_tpl, variables = config_tpl # insert tags in instance_tpl and process values for template variables instance_tpl, var_values = self._fill_tpl(inspect, instance_tpl, variables, tags) tpl = self._render_template(init_config_tpl or {}, instance_tpl or {}, var_values) if tpl and len(tpl) == 2: init_config, instance = tpl if trace_config: check_configs.append((source, (check_name, init_config, instance))) else: check_configs.append((check_name, init_config, instance)) return check_configs def _get_config_templates(self, identifier, trace_config=False): """Extract config templates for an identifier from a K/V store and returns it as a dict object.""" config_backend = self.agentConfig.get('sd_config_backend') templates = [] if config_backend is None: auto_conf = True log.warning('No supported configuration backend was provided, using auto-config only.') else: auto_conf = False # format: [('ident', {init_tpl}, {instance_tpl})] without trace_config # or [(source, ('ident', {init_tpl}, {instance_tpl}))] with trace_config raw_tpls = self.config_store.get_check_tpls( identifier, auto_conf=auto_conf, trace_config=trace_config) for tpl in raw_tpls: if trace_config and tpl is not None: # each template can come from either auto configuration or user-supplied templates source, tpl = tpl if tpl is not None and len(tpl) == 3: check_name, init_config_tpl, instance_tpl = tpl else: log.debug('No template was found for identifier %s, leaving it alone.' % identifier) return None try: # build a list of all variables to replace in the template variables = self.PLACEHOLDER_REGEX.findall(str(init_config_tpl)) + \ self.PLACEHOLDER_REGEX.findall(str(instance_tpl)) variables = map(lambda x: x.strip('%'), variables) if not isinstance(init_config_tpl, dict): init_config_tpl = json.loads(init_config_tpl or '{}') if not isinstance(instance_tpl, dict): instance_tpl = json.loads(instance_tpl or '{}') except json.JSONDecodeError: log.exception('Failed to decode the JSON template fetched for check {0}. Its configuration' ' by service discovery failed for ident {1}.'.format(check_name, identifier)) return None if trace_config: templates.append((source, (check_name, init_config_tpl, instance_tpl, variables))) else: templates.append((check_name, init_config_tpl, instance_tpl, variables)) return templates def _fill_tpl(self, inspect, instance_tpl, variables, tags=None): """Add container tags to instance templates and build a dict from template variable names and their values.""" var_values = {} c_id, c_image = inspect.get('Id', ''), inspect.get('Config', {}).get('Image', '') # add default tags to the instance if tags: tpl_tags = instance_tpl.get('tags', []) tags += tpl_tags if isinstance(tpl_tags, list) else [tpl_tags] instance_tpl['tags'] = list(set(tags)) for var in variables: # variables can be suffixed with an index in case several values are found if var.split('_')[0] in self.VAR_MAPPING: try: res = self.VAR_MAPPING[var.split('_')[0]](inspect, var) if res is None: raise ValueError("Invalid value for variable %s." % var) var_values[var] = res except Exception as ex: log.error("Could not find a value for the template variable %s for container %s " "(%s): %s" % (var, c_id[:12], c_image, str(ex))) else: log.error("No method was found to interpolate template variable %s for container %s " "(%s)." % (var, c_id[:12], c_image)) return instance_tpl, var_values
class Kubernetes(AgentCheck): """ Collect metrics and events from kubelet """ pod_names_by_container = {} def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception( 'Kubernetes check only supports one configured instance.') AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.kubeutil = KubeUtil() if not self.kubeutil.host: raise Exception( 'Unable to get default router and host parameter is not set') def _perform_kubelet_checks(self, url): service_check_base = NAMESPACE + '.kubelet.check' is_ok = True try: r = requests.get(url) for line in r.iter_lines(): # avoid noise; this check is expected to fail since we override the container hostname if line.find('hostname') != -1: continue matches = re.match('\[(.)\]([^\s]+) (.*)?', line) if not matches or len(matches.groups()) < 2: continue service_check_name = service_check_base + '.' + matches.group( 2) status = matches.group(1) if status == '+': self.service_check(service_check_name, AgentCheck.OK) else: self.service_check(service_check_name, AgentCheck.CRITICAL) is_ok = False except Exception as e: self.log.warning('kubelet check %s failed: %s' % (url, str(e))) self.service_check(service_check_base, AgentCheck.CRITICAL, message='Kubelet check %s failed: %s' % (url, str(e))) else: if is_ok: self.service_check(service_check_base, AgentCheck.OK) else: self.service_check(service_check_base, AgentCheck.CRITICAL) def check(self, instance): self.max_depth = instance.get('max_depth', DEFAULT_MAX_DEPTH) enabled_gauges = instance.get('enabled_gauges', DEFAULT_ENABLED_GAUGES) self.enabled_gauges = [ "{0}.{1}".format(NAMESPACE, x) for x in enabled_gauges ] enabled_rates = instance.get('enabled_rates', DEFAULT_ENABLED_RATES) self.enabled_rates = [ "{0}.{1}".format(NAMESPACE, x) for x in enabled_rates ] self.publish_aliases = _is_affirmative( instance.get('publish_aliases', DEFAULT_PUBLISH_ALIASES)) self.use_histogram = _is_affirmative( instance.get('use_histogram', DEFAULT_USE_HISTOGRAM)) self.publish_rate = FUNC_MAP[RATE][self.use_histogram] self.publish_gauge = FUNC_MAP[GAUGE][self.use_histogram] # kubelet health checks self._perform_kubelet_checks(self.kubeutil.kube_health_url) # kubelet metrics self._update_metrics(instance) def _publish_raw_metrics(self, metric, dat, tags, depth=0): if depth >= self.max_depth: self.log.warning('Reached max depth on metric=%s' % metric) return if isinstance(dat, numbers.Number): if self.enabled_rates and any( [fnmatch(metric, pat) for pat in self.enabled_rates]): self.publish_rate(self, metric, float(dat), tags) elif self.enabled_gauges and any( [fnmatch(metric, pat) for pat in self.enabled_gauges]): self.publish_gauge(self, metric, float(dat), tags) elif isinstance(dat, dict): for k, v in dat.iteritems(): self._publish_raw_metrics(metric + '.%s' % k.lower(), v, tags, depth + 1) elif isinstance(dat, list): self._publish_raw_metrics(metric, dat[-1], tags, depth + 1) @staticmethod def _shorten_name(name): # shorten docker image id return re.sub('([0-9a-fA-F]{64,})', lambda x: x.group(1)[0:12], name) def _get_post_1_2_tags(self, cont_labels, subcontainer, kube_labels): tags = [] pod_name = cont_labels[KubeUtil.POD_NAME_LABEL] pod_namespace = cont_labels[KubeUtil.NAMESPACE_LABEL] tags.append(u"pod_name:{0}/{1}".format(pod_namespace, pod_name)) tags.append(u"kube_namespace:{0}".format(pod_namespace)) kube_labels_key = "{0}/{1}".format(pod_namespace, pod_name) pod_labels = kube_labels.get(kube_labels_key) if pod_labels: tags += list(pod_labels) if "-" in pod_name: replication_controller = "-".join(pod_name.split("-")[:-1]) tags.append("kube_replication_controller:%s" % replication_controller) if self.publish_aliases and subcontainer.get("aliases"): for alias in subcontainer['aliases'][1:]: # we don't add the first alias as it will be the container_name tags.append('container_alias:%s' % (self._shorten_name(alias))) return tags def _get_pre_1_2_tags(self, cont_labels, subcontainer, kube_labels): tags = [] pod_name = cont_labels[KubeUtil.POD_NAME_LABEL] tags.append(u"pod_name:{0}".format(pod_name)) pod_labels = kube_labels.get(pod_name) if pod_labels: tags.extend(list(pod_labels)) if "-" in pod_name: replication_controller = "-".join(pod_name.split("-")[:-1]) if "/" in replication_controller: namespace, replication_controller = replication_controller.split( "/", 1) tags.append(u"kube_namespace:%s" % namespace) tags.append(u"kube_replication_controller:%s" % replication_controller) if self.publish_aliases and subcontainer.get("aliases"): for alias in subcontainer['aliases'][1:]: # we don't add the first alias as it will be the container_name tags.append(u"container_alias:%s" % (self._shorten_name(alias))) return tags def _update_container_metrics(self, instance, subcontainer, kube_labels): tags = list(instance.get('tags', [])) # add support for custom tags if len(subcontainer.get('aliases', [])) >= 1: # The first alias seems to always match the docker container name container_name = subcontainer['aliases'][0] else: # We default to the container id container_name = subcontainer['name'] tags.append('container_name:%s' % container_name) try: cont_labels = subcontainer['spec']['labels'] except KeyError: self.log.debug("Subcontainer, doesn't have any labels") cont_labels = {} # Collect pod names, namespaces, rc... if KubeUtil.NAMESPACE_LABEL in cont_labels and KubeUtil.POD_NAME_LABEL in cont_labels: # Kubernetes >= 1.2 tags += self._get_post_1_2_tags(cont_labels, subcontainer, kube_labels) elif KubeUtil.POD_NAME_LABEL in cont_labels: # Kubernetes <= 1.1 tags += self._get_pre_1_2_tags(cont_labels, subcontainer, kube_labels) else: # Those are containers that are not part of a pod. # They are top aggregate views and don't have the previous metadata. tags.append("pod_name:no_pod") stats = subcontainer['stats'][-1] # take the latest self._publish_raw_metrics(NAMESPACE, stats, tags) if subcontainer.get("spec", {}).get("has_filesystem"): fs = stats['filesystem'][-1] fs_utilization = float(fs['usage']) / float(fs['capacity']) self.publish_gauge(self, NAMESPACE + '.filesystem.usage_pct', fs_utilization, tags) if subcontainer.get("spec", {}).get("has_network"): net = stats['network'] self.publish_rate(self, NAMESPACE + '.network_errors', sum(float(net[x]) for x in NET_ERRORS), tags) def _retrieve_metrics(self, url): return retrieve_json(url) def _update_metrics(self, instance): pods_list = self.kubeutil.retrieve_pods_list() metrics = self._retrieve_metrics(self.kubeutil.metrics_url) excluded_labels = instance.get('excluded_labels') kube_labels = self.kubeutil.extract_kube_labels( pods_list, excluded_keys=excluded_labels) if not metrics: raise Exception('No metrics retrieved cmd=%s' % self.metrics_cmd) for subcontainer in metrics: try: self._update_container_metrics(instance, subcontainer, kube_labels) except Exception as e: self.log.error( "Unable to collect metrics for container: {0} ({1}".format( subcontainer.get('name'), e)) self._update_pods_metrics(instance, pods_list) def _update_pods_metrics(self, instance, pods): supported_kinds = [ "DaemonSet", "Deployment", "Job", "ReplicationController", "ReplicaSet", ] controllers_map = defaultdict(int) for pod in pods['items']: try: created_by = json.loads( pod['metadata']['annotations']['kubernetes.io/created-by']) kind = created_by['reference']['kind'] if kind in supported_kinds: controllers_map[created_by['reference']['name']] += 1 except KeyError: continue tags = instance.get('tags', []) for ctrl, pod_count in controllers_map.iteritems(): _tags = tags[:] # copy base tags _tags.append('kube_replication_controller:{0}'.format(ctrl)) self.publish_gauge(self, NAMESPACE + '.pods.running', pod_count, _tags)
class TestKubeutil(unittest.TestCase): def setUp(self): self.kubeutil = KubeUtil() @mock.patch("utils.kubeutil.KubeUtil.retrieve_pods_list", side_effect=["foo"]) @mock.patch("utils.kubeutil.KubeUtil.extract_kube_labels") def test_get_kube_labels(self, extract_kube_labels, retrieve_pods_list): self.kubeutil.get_kube_labels(excluded_keys="bar") retrieve_pods_list.assert_called_once() extract_kube_labels.assert_called_once_with("foo", excluded_keys="bar") def test_extract_kube_labels(self): """ Test with both 1.1 and 1.2 version payloads """ res = self.kubeutil.extract_kube_labels({}, ["foo"]) self.assertEqual(len(res), 0) pods = json.loads(Fixtures.read_file("pods_list_1.1.json", string_escape=False)) res = self.kubeutil.extract_kube_labels(pods, ["foo"]) labels = set(inn for out in res.values() for inn in out) self.assertEqual(len(labels), 8) res = self.kubeutil.extract_kube_labels(pods, ["k8s-app"]) labels = set(inn for out in res.values() for inn in out) self.assertEqual(len(labels), 6) pods = json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False)) res = self.kubeutil.extract_kube_labels(pods, ["foo"]) labels = set(inn for out in res.values() for inn in out) self.assertEqual(len(labels), 3) res = self.kubeutil.extract_kube_labels(pods, ["k8s-app"]) labels = set(inn for out in res.values() for inn in out) self.assertEqual(len(labels), 3) def test_extract_meta(self): """ Test with both 1.1 and 1.2 version payloads """ res = self.kubeutil.extract_meta({}, "foo") self.assertEqual(len(res), 0) pods = json.loads(Fixtures.read_file("pods_list_1.1.json", string_escape=False)) res = self.kubeutil.extract_meta(pods, "foo") self.assertEqual(len(res), 0) res = self.kubeutil.extract_meta(pods, "uid") self.assertEqual(len(res), 6) pods = json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False)) res = self.kubeutil.extract_meta(pods, "foo") self.assertEqual(len(res), 0) res = self.kubeutil.extract_meta(pods, "uid") self.assertEqual(len(res), 4) @mock.patch("utils.kubeutil.retrieve_json") def test_retrieve_pods_list(self, retrieve_json): self.kubeutil.retrieve_pods_list() retrieve_json.assert_called_once_with(self.kubeutil.pods_list_url) @mock.patch("utils.kubeutil.retrieve_json") def test_retrieve_metrics(self, retrieve_json): self.kubeutil.retrieve_metrics() retrieve_json.assert_called_once_with(self.kubeutil.metrics_url) def test_filter_pods_list(self): """ Test with both 1.1 and 1.2 version payloads """ res = self.kubeutil.filter_pods_list({}, "foo") self.assertEqual(len(res.get("items")), 0) pods = json.loads(Fixtures.read_file("pods_list_1.1.json", string_escape=False)) res = self.kubeutil.filter_pods_list(pods, "10.240.0.9") self.assertEqual(len(res.get("items")), 5) pods = json.loads(Fixtures.read_file("pods_list_1.1.json", string_escape=False)) res = self.kubeutil.filter_pods_list(pods, "foo") self.assertEqual(len(res.get("items")), 0) pods = json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False)) res = self.kubeutil.filter_pods_list(pods, "10.240.0.5") self.assertEqual(len(res.get("items")), 1) pods = json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False)) res = self.kubeutil.filter_pods_list(pods, "foo") self.assertEqual(len(res.get("items")), 0) @mock.patch("utils.kubeutil.requests") def test_retrieve_json_auth(self, r): self.kubeutil.retrieve_json_auth("url", "foo_tok") r.get.assert_called_once_with("url", verify=False, timeout=10, headers={"Authorization": "Bearer foo_tok"}) self.kubeutil.CA_CRT_PATH = __file__ self.kubeutil.retrieve_json_auth("url", "foo_tok") r.get.assert_called_with("url", verify=__file__, timeout=10, headers={"Authorization": "Bearer foo_tok"}) def test_get_node_info(self): with mock.patch("utils.kubeutil.KubeUtil._fetch_host_data") as f: self.kubeutil.get_node_info() f.assert_called_once() f.reset_mock() self.kubeutil._node_ip = "foo" self.kubeutil._node_name = "bar" ip, name = self.kubeutil.get_node_info() self.assertEqual(ip, "foo") self.assertEqual(name, "bar") f.assert_not_called() def test__fetch_host_data(self): """ Test with both 1.1 and 1.2 version payloads """ with mock.patch("utils.kubeutil.KubeUtil.retrieve_pods_list") as mock_pods: self.kubeutil.host_name = "dd-agent-1rxlh" mock_pods.return_value = json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False)) self.kubeutil._fetch_host_data() self.assertEqual(self.kubeutil._node_ip, "10.240.0.9") self.assertEqual(self.kubeutil._node_name, "kubernetes-massi-minion-k23m") self.kubeutil.host_name = "heapster-v11-l8sh1" mock_pods.return_value = json.loads(Fixtures.read_file("pods_list_1.1.json", string_escape=False)) self.kubeutil._fetch_host_data() self.assertEqual(self.kubeutil._node_ip, "10.240.0.9") self.assertEqual(self.kubeutil._node_name, "gke-cluster-1-8046fdfa-node-ld35") def test_get_auth_token(self): KubeUtil.AUTH_TOKEN_PATH = "/foo/bar" self.assertIsNone(KubeUtil.get_auth_token()) KubeUtil.AUTH_TOKEN_PATH = Fixtures.file("events.json") # any file could do the trick self.assertIsNotNone(KubeUtil.get_auth_token()) def test_is_k8s(self): os.unsetenv("KUBERNETES_PORT") self.assertFalse(is_k8s()) os.environ["KUBERNETES_PORT"] = "999" self.assertTrue(is_k8s())
class SDDockerBackend(AbstractSDBackend): """Docker-based service discovery""" def __init__(self, agentConfig): self.docker_client = DockerUtil().client if is_k8s(): self.kubeutil = KubeUtil() try: self.config_store = get_config_store(agentConfig=agentConfig) except Exception as e: log.error('Failed to instantiate the config store client. ' 'Auto-config only will be used. %s' % str(e)) agentConfig['sd_config_backend'] = None self.config_store = get_config_store(agentConfig=agentConfig) self.VAR_MAPPING = { 'host': self._get_host, 'port': self._get_ports, 'tags': self._get_additional_tags, } AbstractSDBackend.__init__(self, agentConfig) def _get_host(self, container_inspect): """Extract the host IP from a docker inspect object, or the kubelet API.""" ip_addr = container_inspect.get('NetworkSettings', {}).get('IPAddress') if not ip_addr: if not is_k8s(): return # kubernetes case log.debug( "Didn't find the IP address for container %s (%s), using the kubernetes way." % (container_inspect.get('Id', '')[:12], container_inspect.get('Config', {}).get('Image', ''))) pod_list = self.kubeutil.retrieve_pods_list().get('items', []) c_id = container_inspect.get('Id') for pod in pod_list: pod_ip = pod.get('status', {}).get('podIP') if pod_ip is None: continue else: c_statuses = pod.get('status', {}).get('containerStatuses', []) for status in c_statuses: # compare the container id with those of containers in the current pod if c_id == status.get('containerID', '').split('//')[-1]: ip_addr = pod_ip return ip_addr def _get_ports(self, container_inspect): """Extract a list of available ports from a docker inspect object. Sort them numerically.""" c_id = container_inspect.get('Id', '') try: ports = map(lambda x: x.split('/')[0], container_inspect['NetworkSettings']['Ports'].keys()) except (IndexError, KeyError, AttributeError): log.debug( "Didn't find the port for container %s (%s), trying the kubernetes way." % (c_id[:12], container_inspect.get('Config', {}).get( 'Image', ''))) # first we try to get it from the docker API # it works if the image has an EXPOSE instruction ports = map( lambda x: x.split('/')[0], container_inspect['Config'].get('ExposedPorts', {}).keys()) # if it failed, try with the kubernetes API if not ports and is_k8s(): co_statuses = self._get_kube_config(c_id, 'status').get( 'containerStatuses', []) c_name = None for co in co_statuses: if co.get('containerID', '').split('//')[-1] == c_id: c_name = co.get('name') break containers = self._get_kube_config(c_id, 'spec').get( 'containers', []) for co in containers: if co.get('name') == c_name: ports = map(lambda x: str(x.get('containerPort')), co.get('ports', [])) ports = sorted(ports, key=lambda x: int(x)) return ports def get_tags(self, c_inspect): """Extract useful tags from docker or platform APIs. These are collected by default.""" tags = [] if is_k8s(): pod_metadata = self._get_kube_config(c_inspect.get('Id'), 'metadata') if pod_metadata is None: log.warning("Failed to fetch pod metadata for container %s." " Kubernetes tags may be missing." % c_inspect.get('Id', '')[:12]) return [] # get labels kube_labels = pod_metadata.get('labels', {}) for label, value in kube_labels.iteritems(): tags.append('%s:%s' % (label, value)) # get replication controller created_by = json.loads( pod_metadata.get('annotations', {}).get('kubernetes.io/created-by', '{}')) if created_by.get('reference', {}).get('kind') == 'ReplicationController': tags.append('kube_replication_controller:%s' % created_by.get('reference', {}).get('name')) # get kubernetes namespace tags.append('kube_namespace:%s' % pod_metadata.get('namespace')) return tags def _get_additional_tags(self, container_inspect): tags = [] if is_k8s(): pod_metadata = self._get_kube_config(container_inspect.get('Id'), 'metadata') pod_spec = self._get_kube_config(container_inspect.get('Id'), 'spec') tags.append('node_name:%s' % pod_spec.get('nodeName')) tags.append('pod_name:%s' % pod_metadata.get('name')) return tags def _get_kube_config(self, c_id, key): """Get a part of a pod config from the kubernetes API""" pods = self.kubeutil.retrieve_pods_list().get('items', []) for pod in pods: c_statuses = pod.get('status', {}).get('containerStatuses', []) for status in c_statuses: if c_id == status.get('containerID', '').split('//')[-1]: return pod.get(key, {}) def get_configs(self): """Get the config for all docker containers running on the host.""" configs = {} containers = [(container.get('Image').split(':')[0].split('/')[-1], container.get('Id'), container.get('Labels')) for container in self.docker_client.containers()] # used by the configcheck agent command to trace where check configs come from trace_config = self.agentConfig.get(TRACE_CONFIG, False) for image, cid, labels in containers: try: check_configs = self._get_check_configs( cid, image, trace_config=trace_config) or [] for conf in check_configs: if trace_config and conf is not None: source, conf = conf check_name, init_config, instance = conf # build instances list if needed if configs.get(check_name) is None: if trace_config: configs[check_name] = (source, (init_config, [instance])) else: configs[check_name] = (init_config, [instance]) else: conflict_init_msg = 'Different versions of `init_config` found for check {0}. ' \ 'Keeping the first one found.' if trace_config: if configs[check_name][1][0] != init_config: log.warning( conflict_init_msg.format(check_name)) configs[check_name][1][1].append(instance) else: if configs[check_name][0] != init_config: log.warning( conflict_init_msg.format(check_name)) configs[check_name][1].append(instance) except Exception: log.exception( 'Building config for container %s based on image %s using service' ' discovery failed, leaving it alone.' % (cid[:12], image)) return configs def _get_check_configs(self, c_id, image, trace_config=False): """Retrieve configuration templates and fill them with data pulled from docker and tags.""" inspect = self.docker_client.inspect_container(c_id) config_templates = self._get_config_templates( image, trace_config=trace_config) if not config_templates: log.debug('No config template for container %s with image %s. ' 'It will be left unconfigured.' % (c_id[:12], image)) return None check_configs = [] tags = self.get_tags(inspect) for config_tpl in config_templates: if trace_config: source, config_tpl = config_tpl check_name, init_config_tpl, instance_tpl, variables = config_tpl # insert tags in instance_tpl and process values for template variables instance_tpl, var_values = self._fill_tpl(inspect, instance_tpl, variables, tags) tpl = self._render_template(init_config_tpl or {}, instance_tpl or {}, var_values) if tpl and len(tpl) == 2: if trace_config and len(tpl[1]) == 2: source, (init_config, instance) = tpl check_configs.append( (source, (check_name, init_config, instance))) elif not trace_config: init_config, instance = tpl check_configs.append((check_name, init_config, instance)) return check_configs def _get_config_templates(self, image_name, trace_config=False): """Extract config templates for an image from a K/V store and returns it as a dict object.""" config_backend = self.agentConfig.get('sd_config_backend') templates = [] if config_backend is None: auto_conf = True log.warning( 'No supported configuration backend was provided, using auto-config only.' ) else: auto_conf = False # format: [('image', {init_tpl}, {instance_tpl})] without trace_config # or [(source, ('image', {init_tpl}, {instance_tpl}))] with trace_config raw_tpls = self.config_store.get_check_tpls(image_name, auto_conf=auto_conf, trace_config=trace_config) for tpl in raw_tpls: if trace_config and tpl is not None: # each template can come from either auto configuration or user-supplied templates source, tpl = tpl if tpl is not None and len(tpl) == 3: check_name, init_config_tpl, instance_tpl = tpl else: log.debug( 'No template was found for image %s, leaving it alone.' % image_name) return None try: # build a list of all variables to replace in the template variables = self.PLACEHOLDER_REGEX.findall(str(init_config_tpl)) + \ self.PLACEHOLDER_REGEX.findall(str(instance_tpl)) variables = map(lambda x: x.strip('%'), variables) if not isinstance(init_config_tpl, dict): init_config_tpl = json.loads(init_config_tpl or '{}') if not isinstance(instance_tpl, dict): instance_tpl = json.loads(instance_tpl or '{}') except json.JSONDecodeError: log.exception( 'Failed to decode the JSON template fetched for check {0}. Its configuration' ' by service discovery failed for {1}.'.format( check_name, image_name)) return None if trace_config: templates.append((source, (check_name, init_config_tpl, instance_tpl, variables))) else: templates.append( (check_name, init_config_tpl, instance_tpl, variables)) return templates def _fill_tpl(self, inspect, instance_tpl, variables, tags=None): """Add container tags to instance templates and build a """ """dict from template variable names and their values.""" var_values = {} # add default tags to the instance if tags: tags += instance_tpl.get('tags', []) instance_tpl['tags'] = list(set(tags)) for v in variables: # variables can be suffixed with an index in case a list is found var_parts = v.split('_') if var_parts[0] in self.VAR_MAPPING: try: res = self.VAR_MAPPING[var_parts[0]](inspect) if not res: raise ValueError("Invalid value for variable %s." % var_parts[0]) # if an index is found in the variable, use it to select a value if len(var_parts) > 1 and isinstance( res, list) and int(var_parts[-1]) < len(res): var_values[v] = res[int(var_parts[-1])] # if no valid index was found but we have a list, return the last element elif isinstance(res, list): var_values[v] = res[-1] else: var_values[v] = res except Exception as ex: log.error( "Could not find a value for the template variable %s: %s" % (v, str(ex))) else: log.error( "No method was found to interpolate template variable %s." % v) return instance_tpl, var_values
class TestKubeutil(unittest.TestCase): def setUp(self): self.kubeutil = KubeUtil() @mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list', side_effect=['foo']) @mock.patch('utils.kubeutil.KubeUtil.extract_kube_labels') def test_get_kube_labels(self, extract_kube_labels, retrieve_pods_list): self.kubeutil.get_kube_labels(excluded_keys='bar') retrieve_pods_list.assert_called_once() extract_kube_labels.assert_called_once_with('foo', excluded_keys='bar') def test_extract_kube_labels(self): """ Test with both 1.1 and 1.2 version payloads """ res = self.kubeutil.extract_kube_labels({}, ['foo']) self.assertEqual(len(res), 0) pods = json.loads(Fixtures.read_file("pods_list_1.1.json", string_escape=False)) res = self.kubeutil.extract_kube_labels(pods, ['foo']) labels = set(inn for out in res.values() for inn in out) self.assertEqual(len(labels), 8) res = self.kubeutil.extract_kube_labels(pods, ['k8s-app']) labels = set(inn for out in res.values() for inn in out) self.assertEqual(len(labels), 6) pods = json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False)) res = self.kubeutil.extract_kube_labels(pods, ['foo']) labels = set(inn for out in res.values() for inn in out) self.assertEqual(len(labels), 3) res = self.kubeutil.extract_kube_labels(pods, ['k8s-app']) labels = set(inn for out in res.values() for inn in out) self.assertEqual(len(labels), 3) def test_extract_meta(self): """ Test with both 1.1 and 1.2 version payloads """ res = self.kubeutil.extract_meta({}, 'foo') self.assertEqual(len(res), 0) pods = json.loads(Fixtures.read_file("pods_list_1.1.json", string_escape=False)) res = self.kubeutil.extract_meta(pods, 'foo') self.assertEqual(len(res), 0) res = self.kubeutil.extract_meta(pods, 'uid') self.assertEqual(len(res), 6) pods = json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False)) res = self.kubeutil.extract_meta(pods, 'foo') self.assertEqual(len(res), 0) res = self.kubeutil.extract_meta(pods, 'uid') self.assertEqual(len(res), 4) @mock.patch('utils.kubeutil.retrieve_json') def test_retrieve_pods_list(self, retrieve_json): self.kubeutil.retrieve_pods_list() retrieve_json.assert_called_once_with(self.kubeutil.pods_list_url) @mock.patch('utils.kubeutil.retrieve_json') def test_retrieve_metrics(self, retrieve_json): self.kubeutil.retrieve_metrics() retrieve_json.assert_called_once_with(self.kubeutil.metrics_url) def test_filter_pods_list(self): """ Test with both 1.1 and 1.2 version payloads """ res = self.kubeutil.filter_pods_list({}, 'foo') self.assertEqual(len(res.get('items')), 0) pods = json.loads(Fixtures.read_file("pods_list_1.1.json", string_escape=False)) res = self.kubeutil.filter_pods_list(pods, '10.240.0.9') self.assertEqual(len(res.get('items')), 5) pods = json.loads(Fixtures.read_file("pods_list_1.1.json", string_escape=False)) res = self.kubeutil.filter_pods_list(pods, 'foo') self.assertEqual(len(res.get('items')), 0) pods = json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False)) res = self.kubeutil.filter_pods_list(pods, '10.240.0.5') self.assertEqual(len(res.get('items')), 1) pods = json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False)) res = self.kubeutil.filter_pods_list(pods, 'foo') self.assertEqual(len(res.get('items')), 0) @mock.patch('utils.kubeutil.requests') def test_retrieve_json_auth(self, r): self.kubeutil.retrieve_json_auth('url', 'foo_tok') r.get.assert_called_once_with('url', verify=False, timeout=10, headers={'Authorization': 'Bearer foo_tok'}) self.kubeutil.CA_CRT_PATH = __file__ self.kubeutil.retrieve_json_auth('url', 'foo_tok') r.get.assert_called_with('url', verify=__file__, timeout=10, headers={'Authorization': 'Bearer foo_tok'}) def test_get_node_info(self): with mock.patch('utils.kubeutil.KubeUtil._fetch_host_data') as f: self.kubeutil.get_node_info() f.assert_called_once() f.reset_mock() self.kubeutil._node_ip = 'foo' self.kubeutil._node_name = 'bar' ip, name = self.kubeutil.get_node_info() self.assertEqual(ip, 'foo') self.assertEqual(name, 'bar') f.assert_not_called() def test__fetch_host_data(self): """ Test with both 1.1 and 1.2 version payloads """ with mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list') as mock_pods: self.kubeutil.host_name = 'dd-agent-1rxlh' mock_pods.return_value = json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False)) self.kubeutil._fetch_host_data() self.assertEqual(self.kubeutil._node_ip, '10.240.0.9') self.assertEqual(self.kubeutil._node_name, 'kubernetes-massi-minion-k23m') self.kubeutil.host_name = 'heapster-v11-l8sh1' mock_pods.return_value = json.loads(Fixtures.read_file("pods_list_1.1.json", string_escape=False)) self.kubeutil._fetch_host_data() self.assertEqual(self.kubeutil._node_ip, '10.240.0.9') self.assertEqual(self.kubeutil._node_name, 'gke-cluster-1-8046fdfa-node-ld35') def test_get_auth_token(self): KubeUtil.AUTH_TOKEN_PATH = '/foo/bar' self.assertIsNone(KubeUtil.get_auth_token()) KubeUtil.AUTH_TOKEN_PATH = Fixtures.file('events.json') # any file could do the trick self.assertIsNotNone(KubeUtil.get_auth_token()) def test_is_k8s(self): os.unsetenv('KUBERNETES_PORT') self.assertFalse(Platform.is_k8s()) os.environ['KUBERNETES_PORT'] = '999' self.assertTrue(Platform.is_k8s())
class Kubernetes(AgentCheck): """ Collect metrics and events from kubelet """ pod_names_by_container = {} def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception('Kubernetes check only supports one configured instance.') AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.kubeutil = KubeUtil() if not self.kubeutil.host: raise Exception('Unable to get default router and host parameter is not set') def _perform_kubelet_checks(self, url): service_check_base = NAMESPACE + '.kubelet.check' is_ok = True try: r = requests.get(url) for line in r.iter_lines(): # avoid noise; this check is expected to fail since we override the container hostname if line.find('hostname') != -1: continue matches = re.match('\[(.)\]([^\s]+) (.*)?', line) if not matches or len(matches.groups()) < 2: continue service_check_name = service_check_base + '.' + matches.group(2) status = matches.group(1) if status == '+': self.service_check(service_check_name, AgentCheck.OK) else: self.service_check(service_check_name, AgentCheck.CRITICAL) is_ok = False except Exception as e: self.log.warning('kubelet check failed: %s' % str(e)) self.service_check(service_check_base, AgentCheck.CRITICAL, message='Kubelet check failed: %s' % str(e)) else: if is_ok: self.service_check(service_check_base, AgentCheck.OK) else: self.service_check(service_check_base, AgentCheck.CRITICAL) def check(self, instance): self.max_depth = instance.get('max_depth', DEFAULT_MAX_DEPTH) enabled_gauges = instance.get('enabled_gauges', DEFAULT_ENABLED_GAUGES) self.enabled_gauges = ["{0}.{1}".format(NAMESPACE, x) for x in enabled_gauges] enabled_rates = instance.get('enabled_rates', DEFAULT_ENABLED_RATES) self.enabled_rates = ["{0}.{1}".format(NAMESPACE, x) for x in enabled_rates] self.publish_aliases = _is_affirmative(instance.get('publish_aliases', DEFAULT_PUBLISH_ALIASES)) self.use_histogram = _is_affirmative(instance.get('use_histogram', DEFAULT_USE_HISTOGRAM)) self.publish_rate = FUNC_MAP[RATE][self.use_histogram] self.publish_gauge = FUNC_MAP[GAUGE][self.use_histogram] # kubelet health checks self._perform_kubelet_checks(self.kubeutil.kube_health_url) # kubelet metrics self._update_metrics(instance) def _publish_raw_metrics(self, metric, dat, tags, depth=0): if depth >= self.max_depth: self.log.warning('Reached max depth on metric=%s' % metric) return if isinstance(dat, numbers.Number): if self.enabled_rates and any([fnmatch(metric, pat) for pat in self.enabled_rates]): self.publish_rate(self, metric, float(dat), tags) elif self.enabled_gauges and any([fnmatch(metric, pat) for pat in self.enabled_gauges]): self.publish_gauge(self, metric, float(dat), tags) elif isinstance(dat, dict): for k, v in dat.iteritems(): self._publish_raw_metrics(metric + '.%s' % k.lower(), v, tags, depth + 1) elif isinstance(dat, list): self._publish_raw_metrics(metric, dat[-1], tags, depth + 1) @staticmethod def _shorten_name(name): # shorten docker image id return re.sub('([0-9a-fA-F]{64,})', lambda x: x.group(1)[0:12], name) def _get_post_1_2_tags(self, cont_labels, subcontainer, kube_labels): tags = [] pod_name = cont_labels[KubeUtil.POD_NAME_LABEL] pod_namespace = cont_labels[KubeUtil.NAMESPACE_LABEL] tags.append(u"pod_name:{0}/{1}".format(pod_namespace, pod_name)) tags.append(u"kube_namespace:{0}".format(pod_namespace)) kube_labels_key = "{0}/{1}".format(pod_namespace, pod_name) pod_labels = kube_labels.get(kube_labels_key) if pod_labels: tags += list(pod_labels) if "-" in pod_name: replication_controller = "-".join(pod_name.split("-")[:-1]) tags.append("kube_replication_controller:%s" % replication_controller) if self.publish_aliases and subcontainer.get("aliases"): for alias in subcontainer['aliases'][1:]: # we don't add the first alias as it will be the container_name tags.append('container_alias:%s' % (self._shorten_name(alias))) return tags def _get_pre_1_2_tags(self, cont_labels, subcontainer, kube_labels): tags = [] pod_name = cont_labels[KubeUtil.POD_NAME_LABEL] tags.append(u"pod_name:{0}".format(pod_name)) pod_labels = kube_labels.get(pod_name) if pod_labels: tags.extend(list(pod_labels)) if "-" in pod_name: replication_controller = "-".join(pod_name.split("-")[:-1]) if "/" in replication_controller: namespace, replication_controller = replication_controller.split("/", 1) tags.append(u"kube_namespace:%s" % namespace) tags.append(u"kube_replication_controller:%s" % replication_controller) if self.publish_aliases and subcontainer.get("aliases"): for alias in subcontainer['aliases'][1:]: # we don't add the first alias as it will be the container_name tags.append(u"container_alias:%s" % (self._shorten_name(alias))) return tags def _update_container_metrics(self, instance, subcontainer, kube_labels): tags = list(instance.get('tags', [])) # add support for custom tags if len(subcontainer.get('aliases', [])) >= 1: # The first alias seems to always match the docker container name container_name = subcontainer['aliases'][0] else: # We default to the container id container_name = subcontainer['name'] tags.append('container_name:%s' % container_name) try: cont_labels = subcontainer['spec']['labels'] except KeyError: self.log.debug("Subcontainer, doesn't have any labels") cont_labels = {} # Collect pod names, namespaces, rc... if KubeUtil.NAMESPACE_LABEL in cont_labels and KubeUtil.POD_NAME_LABEL in cont_labels: # Kubernetes >= 1.2 tags += self._get_post_1_2_tags(cont_labels, subcontainer, kube_labels) elif KubeUtil.POD_NAME_LABEL in cont_labels: # Kubernetes <= 1.1 tags += self._get_pre_1_2_tags(cont_labels, subcontainer, kube_labels) else: # Those are containers that are not part of a pod. # They are top aggregate views and don't have the previous metadata. tags.append("pod_name:no_pod") stats = subcontainer['stats'][-1] # take the latest self._publish_raw_metrics(NAMESPACE, stats, tags) if subcontainer.get("spec", {}).get("has_filesystem"): fs = stats['filesystem'][-1] fs_utilization = float(fs['usage'])/float(fs['capacity']) self.publish_gauge(self, NAMESPACE + '.filesystem.usage_pct', fs_utilization, tags) if subcontainer.get("spec", {}).get("has_network"): net = stats['network'] self.publish_rate(self, NAMESPACE + '.network_errors', sum(float(net[x]) for x in NET_ERRORS), tags) def _retrieve_metrics(self, url): return retrieve_json(url) def _update_metrics(self, instance): pods_list = self.kubeutil.retrieve_pods_list() metrics = self._retrieve_metrics(self.kubeutil.metrics_url) excluded_labels = instance.get('excluded_labels') kube_labels = self.kubeutil.extract_kube_labels(pods_list, excluded_keys=excluded_labels) if not metrics: raise Exception('No metrics retrieved cmd=%s' % self.metrics_cmd) for subcontainer in metrics: try: self._update_container_metrics(instance, subcontainer, kube_labels) except Exception as e: self.log.error("Unable to collect metrics for container: {0} ({1}".format( subcontainer.get('name'), e)) self._update_pods_metrics(instance, pods_list) def _update_pods_metrics(self, instance, pods): supported_kinds = [ "DaemonSet", "Deployment", "Job", "ReplicationController", "ReplicaSet", ] controllers_map = defaultdict(int) for pod in pods['items']: try: created_by = json.loads(pod['metadata']['annotations']['kubernetes.io/created-by']) kind = created_by['reference']['kind'] if kind in supported_kinds: controllers_map[created_by['reference']['name']] += 1 except KeyError: continue tags = instance.get('tags', []) for ctrl, pod_count in controllers_map.iteritems(): _tags = tags[:] # copy base tags _tags.append('kube_replication_controller:{0}'.format(ctrl)) self.publish_gauge(self, NAMESPACE + '.pods.running', pod_count, _tags)