コード例 #1
0
    def _process_container_metric(self, type, metric_name, metric, scraper_config):
        """
        Takes a simple metric about a container, reports it as a rate or gauge.
        If several series are found for a given container, values are summed before submission.
        """
        if metric.type not in METRIC_TYPES:
            self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name))
            return

        samples = self._sum_values_by_context(metric, self._get_entity_id_if_container_metric)
        for c_id, sample in iteritems(samples):
            pod_uid = self._get_pod_uid(sample[self.SAMPLE_LABELS])
            if self.pod_list_utils.is_excluded(c_id, pod_uid):
                continue

            tags = tagger.tag(c_id, tagger.HIGH)
            tags += scraper_config['custom_tags']

            # FIXME we are forced to do that because the Kubelet PodList isn't updated
            # for static pods, see https://github.com/kubernetes/kubernetes/pull/59948
            pod = self._get_pod_by_metric_label(sample[self.SAMPLE_LABELS])
            if pod is not None and is_static_pending_pod(pod):
                tags += tagger.tag('kubernetes_pod://%s' % pod["metadata"]["uid"], tagger.HIGH)
                tags += self._get_kube_container_name(sample[self.SAMPLE_LABELS])
                tags = list(set(tags))

            val = sample[self.SAMPLE_VALUE]

            if "rate" == type:
                self.rate(metric_name, val, tags)
            elif "gauge" == type:
                self.gauge(metric_name, val, tags)
コード例 #2
0
    def _process_usage_metric(self,
                              m_name,
                              metric,
                              cache,
                              scraper_config,
                              labels=None):
        """
        Takes a metric object, a metric name, and a cache dict where it will store
        container_name --> (value, tags) so that _process_limit_metric can compute usage_pct
        it also submit said value and tags as a gauge.
        """
        if labels is None:
            labels = []

        # track containers that still exist in the cache
        seen_keys = {k: False for k in cache}

        samples = self._sum_values_by_context(
            metric, self._get_entity_id_if_container_metric)
        for c_id, sample in iteritems(samples):
            c_name = self._get_container_label(sample[self.SAMPLE_LABELS],
                                               'name')
            if not c_name:
                continue
            pod_uid = self._get_pod_uid(sample[self.SAMPLE_LABELS])
            if self.pod_list_utils.is_excluded(c_id, pod_uid):
                continue

            tags = scraper_config['custom_tags'][:]
            tags += tagger.tag(replace_container_rt_prefix(c_id),
                               tagger.HIGH) or []

            # FIXME we are forced to do that because the Kubelet PodList isn't updated
            # for static pods, see https://github.com/kubernetes/kubernetes/pull/59948
            pod = self._get_pod_by_metric_label(sample[self.SAMPLE_LABELS])
            if pod is not None and is_static_pending_pod(pod):
                tags += tagger.tag(
                    'kubernetes_pod_uid://%s' % pod["metadata"]["uid"],
                    tagger.HIGH) or []
                tags += self._get_kube_container_name(
                    sample[self.SAMPLE_LABELS])
                tags = list(set(tags))

            for label in labels:
                value = sample[self.SAMPLE_LABELS].get(label)
                if value:
                    tags.append('%s:%s' % (label, value))

            val = sample[self.SAMPLE_VALUE]
            cache[c_name] = (val, tags)
            seen_keys[c_name] = True
            self.gauge(m_name, val, tags)

        # purge the cache
        for k, seen in iteritems(seen_keys):
            if not seen:
                del cache[k]
コード例 #3
0
    def _report_pods_running(self, pods, instance_tags):
        """
        Reports the number of running pods on this node and the running
        containers in pods, tagged by service and creator.

        :param pods: pod list object
        :param instance_tags: list of tags
        """
        pods_tag_counter = defaultdict(int)
        containers_tag_counter = defaultdict(int)
        for pod in pods.get('items', []):
            # Containers reporting
            containers = pod.get('status', {}).get('containerStatuses', [])
            has_container_running = False
            for container in containers:
                container_id = container.get('containerID')
                if not container_id:
                    self.log.debug('skipping container with no id')
                    continue
                if "running" not in container.get('state', {}):
                    continue
                has_container_running = True
                tags = tagger.tag(replace_container_rt_prefix(container_id),
                                  tagger.LOW) or None
                if not tags:
                    continue
                tags += instance_tags
                hash_tags = tuple(sorted(tags))
                containers_tag_counter[hash_tags] += 1
            # Pod reporting
            if not has_container_running:
                continue
            pod_id = pod.get('metadata', {}).get('uid')
            if not pod_id:
                self.log.debug('skipping pod with no uid')
                continue
            tags = tagger.tag('kubernetes_pod_uid://%s' % pod_id,
                              tagger.LOW) or None
            if not tags:
                continue
            tags += instance_tags
            hash_tags = tuple(sorted(tags))
            pods_tag_counter[hash_tags] += 1
        for tags, count in iteritems(pods_tag_counter):
            self.gauge(self.NAMESPACE + '.pods.running', count, list(tags))
        for tags, count in iteritems(containers_tag_counter):
            self.gauge(self.NAMESPACE + '.containers.running', count,
                       list(tags))
コード例 #4
0
    def _process_limit_metric(self, m_name, metric, cache, scraper_config, pct_m_name=None):
        """
        Reports limit metrics if m_name is not an empty string,
        and optionally checks in the given cache if there's a usage
        for each sample in the metric and reports the usage_pct
        """
        samples = self._sum_values_by_context(metric, self._get_entity_id_if_container_metric)
        for c_id, sample in iteritems(samples):
            limit = sample[self.SAMPLE_VALUE]
            pod_uid = self._get_pod_uid(sample[self.SAMPLE_LABELS])
            if self.pod_list_utils.is_excluded(c_id, pod_uid):
                continue

            tags = tagger.tag(c_id, tagger.HIGH)
            tags += scraper_config['custom_tags']

            if m_name:
                self.gauge(m_name, limit, tags)

            if pct_m_name and limit > 0:
                c_name = self._get_container_label(sample[self.SAMPLE_LABELS], 'name')
                if not c_name:
                    continue
                usage, tags = cache.get(c_name, (None, None))
                if usage:
                    self.gauge(pct_m_name, float(usage / float(limit)), tags)
                else:
                    self.log.debug(
                        "No corresponding usage found for metric %s and "
                        "container %s, skipping usage_pct for now." % (pct_m_name, c_name)
                    )
コード例 #5
0
    def _process_pod_rate(self,
                          metric_name,
                          metric,
                          scraper_config,
                          labels=None):
        """
        Takes a simple metric about a pod, reports it as a rate.
        If several series are found for a given pod, values are summed before submission.
        """
        if labels is None:
            labels = []

        if metric.type not in METRIC_TYPES:
            self.log.error("Metric type %s unsupported for metric %s" %
                           (metric.type, metric.name))
            return

        samples = self._sum_values_by_context(metric,
                                              self._get_pod_uid_if_pod_metric)
        for pod_uid, sample in iteritems(samples):
            if '.network.' in metric_name and self._is_pod_host_networked(
                    pod_uid):
                continue
            tags = tagger.tag('kubernetes_pod://%s' % pod_uid, tagger.HIGH)
            tags += scraper_config['custom_tags']
            for label in labels:
                value = sample[self.SAMPLE_LABELS].get(label)
                if value:
                    tags.append('%s:%s' % (label, value))
            val = sample[self.SAMPLE_VALUE]
            self.rate(metric_name, val, tags)
コード例 #6
0
    def _report_ephemeral_storage_usage(self, pod_list, instance_tags):
        stats = self._retrieve_stats()

        ephemeral_storage_usage = {}
        for pod in stats.get('pods', []):
            pod_uid = pod.get('podRef', {}).get('uid')
            pod_ephemeral_usage = pod.get('ephemeral-storage', {}).get('usedBytes')
            if pod_uid and pod_ephemeral_usage:
                ephemeral_storage_usage[pod_uid] = pod_ephemeral_usage

        for pod in pod_list['items']:
            pod_uid = pod.get('metadata', {}).get('uid')
            if pod_uid is None:
                continue

            pod_usage = ephemeral_storage_usage.get(pod_uid)
            if pod_usage is None:
                continue

            tags = tagger.tag('kubernetes_pod_uid://{}'.format(pod_uid), tagger.ORCHESTRATOR)
            if not tags:
                continue
            tags += instance_tags

            self.gauge(self.NAMESPACE + '.ephemeral_storage.usage', pod_usage, tags)
コード例 #7
0
    def _update_container_metrics(self, instance, subcontainer, pod_list,
                                  pod_list_utils):
        is_pod = False
        in_static_pod = False
        subcontainer_id = subcontainer.get('id')
        pod_uid = subcontainer.get('labels', {}).get('io.kubernetes.pod.uid')
        k_container_name = subcontainer.get(
            'labels', {}).get('io.kubernetes.container.name')

        # We want to collect network metrics at the pod level
        if k_container_name == "POD" and pod_uid:
            is_pod = True

        # FIXME we are forced to do that because the Kubelet PodList isn't updated
        # for static pods, see https://github.com/kubernetes/kubernetes/pull/59948
        pod = get_pod_by_uid(pod_uid, pod_list)
        if pod is not None and is_static_pending_pod(pod):
            in_static_pod = True

        # Let's see who we have here
        if is_pod:
            tags = tags_for_pod(pod_uid, tagger.HIGH)
        elif in_static_pod and k_container_name:
            # FIXME static pods don't have container statuses so we can't
            # get the container id with the scheme, assuming docker here
            tags = tags_for_docker(subcontainer_id, tagger.HIGH)
            tags += tags_for_pod(pod_uid, tagger.HIGH)
            tags.append("kube_container_name:%s" % k_container_name)
        else:  # Standard container
            cid = pod_list_utils.get_cid_by_name_tuple(
                (pod.get('metadata', {}).get('namespace', ""),
                 pod.get('metadata', {}).get('name', ""), k_container_name))
            if pod_list_utils.is_excluded(cid):
                self.log.debug("Filtering out " + cid)
                return
            tags = tagger.tag(cid, tagger.HIGH)

        if not tags:
            self.log.debug(
                "Subcontainer {} doesn't have tags, skipping.".format(
                    subcontainer_id))
            return
        tags = list(set(tags + instance.get('tags', [])))

        stats = subcontainer['stats'][-1]  # take the latest
        self._publish_raw_metrics(NAMESPACE, stats, tags, is_pod)

        if is_pod is False and subcontainer.get(
                "spec", {}).get("has_filesystem") and stats.get('filesystem'):
            fs = stats['filesystem'][-1]
            fs_utilization = float(fs['usage']) / float(fs['capacity'])
            self.gauge(NAMESPACE + '.filesystem.usage_pct',
                       fs_utilization,
                       tags=tags)

        if is_pod and subcontainer.get("spec", {}).get("has_network"):
            net = stats['network']
            self.rate(NAMESPACE + '.network_errors',
                      sum(float(net[x]) for x in NET_ERRORS),
                      tags=tags)
コード例 #8
0
    def _report_container_state_metrics(self, pod_list, instance_tags):
        """Reports container state & reasons by looking at container statuses"""
        if pod_list.get('expired_count'):
            self.gauge(self.NAMESPACE + '.pods.expired', pod_list.get('expired_count'), tags=instance_tags)

        for pod in pod_list['items']:
            pod_name = pod.get('metadata', {}).get('name')
            pod_uid = pod.get('metadata', {}).get('uid')

            if not pod_name or not pod_uid:
                continue

            for ctr_status in pod['status'].get('containerStatuses', []):
                c_name = ctr_status.get('name')
                cid = ctr_status.get('containerID')
                if not c_name or not cid:
                    continue

                if self.pod_list_utils.is_excluded(cid, pod_uid):
                    continue

                tags = tagger.tag('%s' % cid, tagger.ORCHESTRATOR) + instance_tags

                restart_count = ctr_status.get('restartCount', 0)
                self.gauge(self.NAMESPACE + '.containers.restarts', restart_count, tags)

                for (metric_name, field_name) in [('state', 'state'), ('last_state', 'lastState')]:
                    c_state = ctr_status.get(field_name, {})

                    for state_name in ['terminated', 'waiting']:
                        state_reasons = WHITELISTED_CONTAINER_STATE_REASONS.get(state_name, [])
                        self._submit_container_state_metric(metric_name, state_name, c_state, state_reasons, tags)
コード例 #9
0
    def check(self, _):
        kubelet_conn_info = get_connection_info()
        endpoint = kubelet_conn_info.get('url')
        if endpoint is None:
            raise CheckException(
                "Unable to detect the kubelet URL automatically: " +
                kubelet_conn_info.get('err', ''))

        self.pod_list_url = endpoint.strip("/") + POD_LIST_PATH
        self.kubelet_credentials = KubeletCredentials(kubelet_conn_info)

        if self.fargate_mode:
            pod_list = self.retrieve_pod_list()
            for pod in pod_list.get('items', []):
                pod_id = pod.get('metadata', {}).get('uid')
                tagger_tags = tagger.tag('kubernetes_pod_uid://%s' % pod_id,
                                         tagger.ORCHESTRATOR) or []
                tagger_tags.extend(self.tags)
                tags = set(tagger_tags)
                # Submit the heartbeat metric for fargate virtual nodes.
                self.gauge(self.NAMESPACE + '.pods.running', 1, tags)
                pod_annotations = pod.get('metadata', {}).get('annotations')
                if CAPACITY_ANNOTATION_KEY not in pod_annotations:
                    continue
                cpu_val, mem_val = extract_resource_values(
                    pod_annotations.get(CAPACITY_ANNOTATION_KEY))
                if cpu_val == 0 or mem_val == 0:
                    continue
                self.gauge(self.NAMESPACE + '.cpu.capacity', cpu_val, tags)
                self.gauge(self.NAMESPACE + '.memory.capacity', mem_val, tags)
コード例 #10
0
ファイル: common.py プロジェクト: wardd3/integrations-core
def tags_for_docker(cid, cardinality, with_prefix=False):
    """
    Queries the tagger for a given container id.
    If with_prefix=true, method won't add `container_id://` to `cid`
    :return: string array, empty if container not found
    """
    if not with_prefix:
        cid = 'container_id://%s' % cid
    return tagger.tag(cid, cardinality) or []
コード例 #11
0
    def _report_container_spec_metrics(self, pod_list, instance_tags):
        """Reports pod requests & limits by looking at pod specs."""
        for pod in pod_list.get('items', []):
            pod_name = pod.get('metadata', {}).get('name')
            pod_phase = pod.get('status', {}).get('phase')
            if self._should_ignore_pod(pod_name, pod_phase):
                continue

            for ctr in pod['spec']['containers']:
                if not ctr.get('resources'):
                    continue

                c_name = ctr.get('name', '')
                cid = None
                for ctr_status in pod['status'].get('containerStatuses', []):
                    if ctr_status.get('name') == c_name:
                        # it is already prefixed with 'runtime://'
                        cid = ctr_status.get('containerID')
                        break
                if not cid:
                    continue

                pod_uid = pod.get('metadata', {}).get('uid')
                if self.pod_list_utils.is_excluded(cid, pod_uid):
                    continue

                tags = tagger.tag(replace_container_rt_prefix(cid),
                                  tagger.HIGH)
                if not tags:
                    continue
                tags += instance_tags

                try:
                    for resource, value_str in iteritems(
                            ctr.get('resources', {}).get('requests', {})):
                        value = self.parse_quantity(value_str)
                        self.gauge(
                            '{}.{}.requests'.format(self.NAMESPACE, resource),
                            value, tags)
                except (KeyError, AttributeError) as e:
                    self.log.debug(
                        "Unable to retrieve container requests for %s: %s",
                        c_name, e)

                try:
                    for resource, value_str in iteritems(
                            ctr.get('resources', {}).get('limits', {})):
                        value = self.parse_quantity(value_str)
                        self.gauge(
                            '{}.{}.limits'.format(self.NAMESPACE, resource),
                            value, tags)
                except (KeyError, AttributeError) as e:
                    self.log.debug(
                        "Unable to retrieve container limits for %s: %s",
                        c_name, e)
コード例 #12
0
    def _create_pod_tags_by_pvc(self, pod_list):
        """
        Return a map, e.g.
            {
                "<kube_namespace>/<persistentvolumeclaim>": [<list_of_pod_tags>],
                "<kube_namespace1>/<persistentvolumeclaim1>": [<list_of_pod_tags1>],
            }
        that can be used to add pod tags to associated volume metrics
        """
        pod_tags_by_pvc = defaultdict(set)
        if pod_list is None:
            return pod_tags_by_pvc
        pods = pod_list.get('items', [])
        for pod in pods:
            # get kubernetes namespace of PVC
            kube_ns = pod.get('metadata', {}).get('namespace')
            if not kube_ns:
                continue

            # get volumes
            volumes = pod.get('spec', {}).get('volumes')
            if not volumes:
                continue

            # get pod id
            pod_id = pod.get('metadata', {}).get('uid')
            if not pod_id:
                self.log.debug('skipping pod with no uid')
                continue

            # get tags from tagger
            tags = tagger.tag('kubernetes_pod_uid://%s' % pod_id,
                              tagger.ORCHESTRATOR) or None
            if not tags:
                continue

            # remove tags that don't apply to PVCs
            for excluded_tag in self.VOLUME_TAG_KEYS_TO_EXCLUDE:
                tags = [
                    t for t in tags if not t.startswith(excluded_tag + ':')
                ]

            # get PVC
            for v in volumes:
                pvc_name = v.get('persistentVolumeClaim', {}).get('claimName')
                if pvc_name:
                    pod_tags_by_pvc['{}/{}'.format(kube_ns,
                                                   pvc_name)].update(tags)

        return pod_tags_by_pvc
コード例 #13
0
 def check(self, instance):
     if self.fargate_mode:
         pod_list = self.get_pod_list()
         for pod in pod_list.get('items', []):
             pod_id = pod.get('metadata', {}).get('uid')
             tagger_tags = tagger.tag('kubernetes_pod_uid://%s' % pod_id,
                                      tagger.ORCHESTRATOR) or []
             tagger_tags.extend(self.tags)
             tags = set(tagger_tags)
             # Submit the heartbeat metric for fargate virtual nodes.
             self.gauge(self.NAMESPACE + '.pods.running', 1, tags)
             pod_annotations = pod.get('metadata', {}).get('annotations')
             if CAPACITY_ANNOTATION_KEY not in pod_annotations:
                 continue
             cpu_val, mem_val = extract_resource_values(
                 pod_annotations.get(CAPACITY_ANNOTATION_KEY))
             if cpu_val == 0 or mem_val == 0:
                 continue
             self.gauge(self.NAMESPACE + '.cpu.capacity', cpu_val, tags)
             self.gauge(self.NAMESPACE + '.memory.capacity', mem_val, tags)
コード例 #14
0
ファイル: common.py プロジェクト: wardd3/integrations-core
def tags_for_pod(pod_id, cardinality):
    """
    Queries the tagger for a given pod uid
    :return: string array, empty if pod not found
    """
    return tagger.tag('kubernetes_pod_uid://%s' % pod_id, cardinality) or []
コード例 #15
0
ファイル: common.py プロジェクト: n2taylor/integrations-core
def tags_for_docker(cid, cardinality):
    """
    Queries the tagger for a given container id
    :return: string array, empty if container not found
    """
    return tagger.tag('docker://%s' % cid, cardinality)