コード例 #1
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        super(KubeletCheck, self).__init__(name, init_config, agentConfig,
                                           instances)

        self.NAMESPACE = 'kubernetes'

        if instances is not None and len(instances) > 1:
            raise Exception(
                'Kubelet check only supports one configured instance.')
        inst = instances[0] if instances else None

        self.cadvisor_legacy_port = inst.get('cadvisor_port',
                                             CADVISOR_DEFAULT_PORT)
        self.cadvisor_legacy_url = None

        self.cadvisor_scraper = CadvisorPrometheusScraper(self)

        self.kubelet_scraper = PrometheusScraper(self)
        self.kubelet_scraper.NAMESPACE = 'kubernetes'
        self.kubelet_scraper.metrics_mapper = {
            'apiserver_client_certificate_expiration_seconds':
            'apiserver.certificate.expiration',
            'rest_client_requests_total': 'rest.client.requests',
            'kubelet_runtime_operations': 'kubelet.runtime.operations',
            'kubelet_runtime_operations_errors': 'kubelet.runtime.errors',
        }
コード例 #2
0
    def _get_istio_mesh_scraper(self, instance):
        """
        Grab the istio mesh scraper from the dict and return it if it exists,
        otherwise create the scraper and add it to the dict
        """
        endpoint = instance.get('istio_mesh_endpoint')

        if self._scrapers.get(endpoint, None):
            return self._scrapers.get(endpoint)

        scraper = PrometheusScraper(self)
        self._scrapers[endpoint] = scraper
        scraper.NAMESPACE = self.MESH_NAMESPACE
        scraper.metrics_mapper = {
            # These metrics support Istio 1.0
            'istio_requests_total': 'request.count',
            'istio_request_duration_seconds': 'request.duration',
            'istio_request_bytes': 'request.size',
            'istio_response_bytes': 'response.size',

            # These metrics support Istio 0.8
            'istio_request_count': 'request.count',
            'istio_request_duration': 'request.duration',
            'istio_request_size': 'request.size',
            'istio_response_size': 'response.size',
        }
        scraper.label_to_hostname = endpoint
        scraper = self._shared_scraper_config(scraper, instance)

        return scraper
コード例 #3
0
def test_credentials_token_noverify():
    expected_headers = {'Authorization': 'Bearer mytoken'}
    creds = KubeletCredentials({
        "verify_tls": "false",
        "ca_cert": "ca_cert",
        "client_crt": "ignore_me",
        "token": "mytoken"
    })
    assert creds.verify() is False
    assert creds.cert_pair() is None
    assert creds.headers("https://dummy") == expected_headers
    # Make sure we don't leak the token over http
    assert creds.headers("http://dummy") is None

    scraper = PrometheusScraper(None)
    creds.configure_scraper(scraper, "https://dummy")
    assert scraper.ssl_ca_cert is False
    assert scraper.ssl_cert is None
    assert scraper.ssl_private_key is None
    assert scraper.extra_headers == expected_headers

    # Make sure we don't leak the token over http
    creds.configure_scraper(scraper, "http://dummy")
    assert scraper.ssl_ca_cert is False
    assert scraper.ssl_cert is None
    assert scraper.ssl_private_key is None
    assert scraper.extra_headers == {}
コード例 #4
0
def test_credentials_empty():
    creds = KubeletCredentials({})
    assert creds.verify() is None
    assert creds.cert_pair() is None
    assert creds.headers("https://dummy") is None

    scraper = PrometheusScraper(None)
    creds.configure_scraper(scraper, "https://dummy")
    assert scraper.ssl_ca_cert is None
    assert scraper.ssl_cert is None
    assert scraper.ssl_private_key is None
    assert scraper.extra_headers == {}
コード例 #5
0
def test_credentials_certificates():
    creds = KubeletCredentials({
        "verify_tls": "true",
        "ca_cert": "ca_cert",
        "client_crt": "crt",
        "client_key": "key",
        "token": "ignore_me"
    })
    assert creds.verify() == "ca_cert"
    assert creds.cert_pair() == ("crt", "key")
    assert creds.headers("https://dummy") is None

    scraper = PrometheusScraper(None)
    creds.configure_scraper(scraper, "https://dummy")
    assert scraper.ssl_ca_cert == "ca_cert"
    assert scraper.ssl_cert == "crt"
    assert scraper.ssl_private_key == "key"
    assert scraper.extra_headers == {}
コード例 #6
0
class KubeletCheck(AgentCheck, CadvisorScraper):
    """
    Collect metrics from Kubelet.
    """
    def __init__(self, name, init_config, agentConfig, instances=None):
        super(KubeletCheck, self).__init__(name, init_config, agentConfig,
                                           instances)

        self.NAMESPACE = 'kubernetes'

        if instances is not None and len(instances) > 1:
            raise Exception(
                'Kubelet check only supports one configured instance.')
        inst = instances[0] if instances else None

        self.cadvisor_legacy_port = inst.get('cadvisor_port',
                                             CADVISOR_DEFAULT_PORT)
        self.cadvisor_legacy_url = None

        self.cadvisor_scraper = CadvisorPrometheusScraper(self)

        self.kubelet_scraper = PrometheusScraper(self)
        self.kubelet_scraper.NAMESPACE = 'kubernetes'
        self.kubelet_scraper.metrics_mapper = {
            'apiserver_client_certificate_expiration_seconds':
            'apiserver.certificate.expiration',
            'rest_client_requests_total': 'rest.client.requests',
            'kubelet_runtime_operations': 'kubelet.runtime.operations',
            'kubelet_runtime_operations_errors': 'kubelet.runtime.errors',
        }

    def check(self, instance):
        kubelet_conn_info = get_connection_info()
        endpoint = kubelet_conn_info.get('url')
        if endpoint is None:
            raise CheckException(
                "Unable to detect the kubelet URL automatically.")

        if 'cadvisor_metrics_endpoint' in instance:
            self.cadvisor_metrics_url = \
                instance.get('cadvisor_metrics_endpoint', urljoin(endpoint, CADVISOR_METRICS_PATH))
        else:
            self.cadvisor_metrics_url = instance.get(
                'metrics_endpoint', urljoin(endpoint, CADVISOR_METRICS_PATH))

        if 'metrics_endpoint' in instance:
            self.log.warning(
                'metrics_endpoint is deprecated, please specify cadvisor_metrics_endpoint instead.'
            )

        self.kubelet_metrics_url = instance.get(
            'kubelet_metrics_endpoint', urljoin(endpoint,
                                                KUBELET_METRICS_PATH))

        self.kube_health_url = urljoin(endpoint, KUBELET_HEALTH_PATH)
        self.node_spec_url = urljoin(endpoint, NODE_SPEC_PATH)
        self.pod_list_url = urljoin(endpoint, POD_LIST_PATH)

        # Kubelet credentials handling
        self.kubelet_credentials = KubeletCredentials(kubelet_conn_info)
        self.kubelet_credentials.configure_scraper(self.cadvisor_scraper,
                                                   self.cadvisor_metrics_url)
        self.kubelet_credentials.configure_scraper(self.kubelet_scraper,
                                                   self.kubelet_metrics_url)

        # Legacy cadvisor support
        try:
            self.cadvisor_legacy_url = self.detect_cadvisor(
                endpoint, self.cadvisor_legacy_port)
        except Exception as e:
            self.log.debug(
                'cAdvisor not found, running in prometheus mode: %s' % str(e))

        # By default we send the buckets.
        send_buckets = instance.get('send_histograms_buckets', True)
        if send_buckets is not None and str(send_buckets).lower() == 'false':
            send_buckets = False
        else:
            send_buckets = True

        self.pod_list = self.retrieve_pod_list()

        self.container_filter = ContainerFilter(self.pod_list)

        self.instance_tags = instance.get('tags', [])
        self._perform_kubelet_check(self.instance_tags)
        self._report_node_metrics(self.instance_tags)
        self._report_pods_running(self.pod_list, self.instance_tags)
        self._report_container_spec_metrics(self.pod_list, self.instance_tags)

        if self.cadvisor_legacy_url:  # Legacy cAdvisor
            self.log.debug('processing legacy cadvisor metrics')
            self.process_cadvisor(instance, self.cadvisor_legacy_url,
                                  self.pod_list, self.container_filter)
        elif self.cadvisor_metrics_url:  # Prometheus
            self.log.debug('processing cadvisor metrics')
            self.cadvisor_scraper.process(
                self.cadvisor_metrics_url,
                send_histograms_buckets=send_buckets,
                instance=instance,
                pod_list=self.pod_list,
                container_filter=self.container_filter)

        if self.kubelet_metrics_url:  # Prometheus
            self.log.debug('processing kubelet metrics')
            self.kubelet_scraper.process(self.kubelet_metrics_url,
                                         send_histograms_buckets=send_buckets,
                                         instance=instance,
                                         ignore_unmapped=True)

        # Free up memory
        self.pod_list = None
        self.container_filter = None

    def perform_kubelet_query(self, url, verbose=True, timeout=10):
        """
        Perform and return a GET request against kubelet. Support auth and TLS validation.
        """
        return requests.get(url,
                            timeout=timeout,
                            verify=self.kubelet_credentials.verify(),
                            cert=self.kubelet_credentials.cert_pair(),
                            headers=self.kubelet_credentials.headers(url),
                            params={'verbose': verbose})

    def retrieve_pod_list(self):
        try:
            pod_list = self.perform_kubelet_query(self.pod_list_url).json()
            if pod_list.get("items") is None:
                # Sanitize input: if no pod are running, 'items' is a NoneObject
                pod_list['items'] = []
            return pod_list
        except Exception as e:
            self.log.debug(
                'failed to retrieve pod list from the kubelet at %s : %s' %
                (self.pod_list_url, str(e)))
            return None

    def _retrieve_node_spec(self):
        """
        Retrieve node spec from kubelet.
        """
        node_spec = self.perform_kubelet_query(self.node_spec_url).json()
        # TODO: report allocatable for cpu, mem, and pod capacity
        # if we can get it locally or thru the DCA instead of the /nodes endpoint directly
        return node_spec

    def _report_node_metrics(self, instance_tags):
        node_spec = self._retrieve_node_spec()
        num_cores = node_spec.get('num_cores', 0)
        memory_capacity = node_spec.get('memory_capacity', 0)

        tags = instance_tags
        self.gauge(self.NAMESPACE + '.cpu.capacity', float(num_cores), tags)
        self.gauge(self.NAMESPACE + '.memory.capacity', float(memory_capacity),
                   tags)

    def _perform_kubelet_check(self, instance_tags):
        """Runs local service checks"""
        service_check_base = self.NAMESPACE + '.kubelet.check'
        is_ok = True
        url = self.kube_health_url

        try:
            req = self.perform_kubelet_query(url)
            for line in req.iter_lines():
                # avoid noise; this check is expected to fail since we override the container hostname
                if line.find('hostname') != -1:
                    continue

                matches = re.match(r'\[(.)\]([^\s]+) (.*)?', line)
                if not matches or len(matches.groups()) < 2:
                    continue

                service_check_name = service_check_base + '.' + matches.group(
                    2)
                status = matches.group(1)
                if status == '+':
                    self.service_check(service_check_name,
                                       AgentCheck.OK,
                                       tags=instance_tags)
                else:
                    self.service_check(service_check_name,
                                       AgentCheck.CRITICAL,
                                       tags=instance_tags)
                    is_ok = False

        except Exception as e:
            self.log.warning('kubelet check %s failed: %s' % (url, str(e)))
            self.service_check(service_check_base,
                               AgentCheck.CRITICAL,
                               message='Kubelet check %s failed: %s' %
                               (url, str(e)),
                               tags=instance_tags)
        else:
            if is_ok:
                self.service_check(service_check_base,
                                   AgentCheck.OK,
                                   tags=instance_tags)
            else:
                self.service_check(service_check_base,
                                   AgentCheck.CRITICAL,
                                   tags=instance_tags)

    def _report_pods_running(self, pods, instance_tags):
        """
        Reports the number of running pods on this node
        tagged by service and creator.

        :param pods: pod list object
        :param instance_tags: list of tags
        """
        tag_counter = {}
        for pod in pods['items']:
            pod_id = pod.get('metadata', {}).get('uid')
            tags = get_tags('kubernetes_pod://%s' % pod_id, False) or None
            if not tags:
                continue
            tags += instance_tags
            hash_tags = tuple(sorted(tags))
            if hash_tags in tag_counter.keys():
                tag_counter[hash_tags] += 1
            else:
                tag_counter[hash_tags] = 1
        for tags, count in tag_counter.iteritems():
            self.gauge(self.NAMESPACE + '.pods.running', count, list(tags))

    def _report_container_spec_metrics(self, pod_list, instance_tags):
        """Reports pod requests & limits by looking at pod specs."""
        for pod in pod_list['items']:
            pod_name = pod.get('metadata', {}).get('name')
            if not pod_name:
                continue

            for ctr in pod['spec']['containers']:
                if not ctr.get('resources'):
                    continue

                c_name = ctr.get('name', '')
                cid = None

                for ctr_status in pod['status'].get('containerStatuses', []):
                    if ctr_status.get('name') == c_name:
                        # it is already prefixed with 'docker://'
                        cid = ctr_status.get('containerID')
                        break
                if not cid:
                    continue

                pod_uid = pod.get('metadata', {}).get('uid')
                if self.container_filter.is_excluded(cid, pod_uid):
                    continue

                tags = get_tags('%s' % cid, True) + instance_tags

                try:
                    for resource, value_str in ctr.get('resources',
                                                       {}).get('requests',
                                                               {}).iteritems():
                        value = self.parse_quantity(value_str)
                        self.gauge(
                            '{}.{}.requests'.format(self.NAMESPACE, resource),
                            value, tags)
                except (KeyError, AttributeError) as e:
                    self.log.debug(
                        "Unable to retrieve container requests for %s: %s",
                        c_name, e)

                try:
                    for resource, value_str in ctr.get('resources',
                                                       {}).get('limits',
                                                               {}).iteritems():
                        value = self.parse_quantity(value_str)
                        self.gauge(
                            '{}.{}.limits'.format(self.NAMESPACE, resource),
                            value, tags)
                except (KeyError, AttributeError) as e:
                    self.log.debug(
                        "Unable to retrieve container limits for %s: %s",
                        c_name, e)

    @staticmethod
    def parse_quantity(string):
        """
        Parse quantity allows to convert the value in the resources spec like:
        resources:
          requests:
            cpu: "100m"
            memory": "200Mi"
          limits:
            memory: "300Mi"
        :param string: str
        :return: float
        """
        number, unit = '', ''
        for char in string:
            if char.isdigit() or char == '.':
                number += char
            else:
                unit += char
        return float(number) * FACTORS.get(unit, 1)
コード例 #7
0
    def _get_mixer_scraper(self, instance):
        """
        Grab the mixer scraper from the dict and return it if it exists,
        otherwise create the scraper and add it to the dict
        """
        endpoint = instance.get('mixer_endpoint')

        if self._scrapers.get(endpoint, None):
            return self._scrapers.get(endpoint)

        scraper = PrometheusScraper(self)
        self._scrapers[endpoint] = scraper
        scraper.NAMESPACE = self.MIXER_NAMESPACE
        scraper.metrics_mapper = {
            'go_gc_duration_seconds': 'go.gc_duration_seconds',
            'go_goroutines': 'go.goroutines',
            'go_info': 'go.info',
            'go_memstats_alloc_bytes': 'go.memstats.alloc_bytes',
            'go_memstats_alloc_bytes_total': 'go.memstats.alloc_bytes_total',
            'go_memstats_buck_hash_sys_bytes': 'go.memstats.buck_hash_sys_bytes',
            'go_memstats_frees_total': 'go.memstats.frees_total',
            'go_memstats_gc_cpu_fraction': 'go.memstats.gc_cpu_fraction',
            'go_memstats_gc_sys_bytes': 'go.memstats.gc_sys_bytes',
            'go_memstats_heap_alloc_bytes': 'go.memstats.heap_alloc_bytes',
            'go_memstats_heap_idle_bytes': 'go.memstats.heap_idle_bytes',
            'go_memstats_heap_inuse_bytes': 'go.memstats.heap_inuse_bytes',
            'go_memstats_heap_objects': 'go.memstats.heap_objects',
            'go_memstats_heap_released_bytes': 'go.memstats.heap_released_bytes',
            'go_memstats_heap_sys_bytes': 'go.memstats.heap_sys_bytes',
            'go_memstats_last_gc_time_seconds': 'go.memstats.last_gc_time_seconds',
            'go_memstats_lookups_total': 'go.memstats.lookups_total',
            'go_memstats_mallocs_total': 'go.memstats.mallocs_total',
            'go_memstats_mcache_inuse_bytes': 'go.memstats.mcache_inuse_bytes',
            'go_memstats_mcache_sys_bytes': 'go.memstats.mcache_sys_bytes',
            'go_memstats_mspan_inuse_bytes': 'go.memstats.mspan_inuse_bytes',
            'go_memstats_mspan_sys_bytes': 'go.memstats.mspan_sys_bytes',
            'go_memstats_next_gc_bytes': 'go.memstats.next_gc_bytes',
            'go_memstats_other_sys_bytes': 'go.memstats.other_sys_bytes',
            'go_memstats_stack_inuse_bytes': 'go.memstats.stack_inuse_bytes',
            'go_memstats_stack_sys_bytes': 'go.memstats.stack_sys_bytes',
            'go_memstats_sys_bytes': 'go.memstats.sys_bytes',
            'go_threads': 'go.threads',
            'grpc_server_handled_total': 'grpc.server.handled_total',
            'grpc_server_handling_seconds': 'grpc.server.handling_seconds',
            'grpc_server_msg_received_total': 'grpc.server.msg_received_total',
            'grpc_server_msg_sent_total': 'grpc.server.msg_sent_total',
            'grpc_server_started_total': 'grpc.server.started_total',
            'mixer_adapter_dispatch_count': 'adapter.dispatch_count',
            'mixer_adapter_dispatch_duration': 'adapter.dispatch_duration',
            'mixer_adapter_old_dispatch_count': 'adapter.old_dispatch_count',
            'mixer_adapter_old_dispatch_duration': 'adapter.old_dispatch_duration',
            'mixer_config_resolve_actions': 'config.resolve_actions',
            'mixer_config_resolve_count': 'config.resolve_count',
            'mixer_config_resolve_duration': 'config.resolve_duration',
            'mixer_config_resolve_rules': 'config.resolve_rules',
            'process_cpu_seconds_total': 'process.cpu_seconds_total',
            'process_max_fds': 'process.max_fds',
            'process_open_fds': 'process.open_fds',
            'process_resident_memory_bytes': 'process.resident_memory_bytes',
            'process_start_time_seconds': 'process.start_time_seconds',
            'process_virtual_memory_bytes': 'process.virtual_memory_bytes',
        }
        scraper = self._shared_scraper_config(scraper, instance)
        return scraper